diff options
Diffstat (limited to 'libgomp/target.c')
-rw-r--r-- | libgomp/target.c | 171 |
1 files changed, 114 insertions, 57 deletions
diff --git a/libgomp/target.c b/libgomp/target.c index 2cffa4986f9..58fa82ea965 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -187,18 +187,44 @@ gomp_device_copy (struct gomp_device_descr *devicep, } } -static void +static inline void +goacc_device_copy_async (struct gomp_device_descr *devicep, + bool (*copy_func) (int, void *, const void *, size_t, + struct goacc_asyncqueue *), + const char *dst, void *dstaddr, + const char *src, const void *srcaddr, + size_t size, struct goacc_asyncqueue *aq) +{ + if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq)) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed", + src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size); + } +} + +attribute_hidden void gomp_copy_host2dev (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, void *d, const void *h, size_t sz) { - gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz); + if (aq) + goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func, + "dev", d, "host", h, sz, aq); + else + gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz); } -static void +attribute_hidden void gomp_copy_dev2host (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, void *h, const void *d, size_t sz) { - gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); + if (aq) + goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func, + "host", h, "dev", d, sz, aq); + else + gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); } static void @@ -216,7 +242,8 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) Helper function of gomp_map_vars. */ static inline void -gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, +gomp_map_vars_existing (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, splay_tree_key oldn, splay_tree_key newn, struct target_var_desc *tgt_var, unsigned char kind) { @@ -238,7 +265,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, } if (GOMP_MAP_ALWAYS_TO_P (kind)) - gomp_copy_host2dev (devicep, + gomp_copy_host2dev (devicep, aq, (void *) (oldn->tgt->tgt_start + oldn->tgt_offset + newn->host_start - oldn->host_start), (void *) newn->host_start, @@ -256,8 +283,8 @@ get_kind (bool short_mapkind, void *kinds, int idx) } static void -gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr, - uintptr_t target_offset, uintptr_t bias) +gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, + uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias) { struct gomp_device_descr *devicep = tgt->device_descr; struct splay_tree_s *mem_map = &devicep->mem_map; @@ -268,7 +295,7 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr, { cur_node.tgt_offset = (uintptr_t) NULL; /* FIXME: see comment about coalescing host/dev transfers below. */ - gomp_copy_host2dev (devicep, + gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset), (void *) &cur_node.tgt_offset, sizeof (void *)); @@ -291,7 +318,7 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr, to initialize the pointer with. */ cur_node.tgt_offset -= bias; /* FIXME: see comment about coalescing host/dev transfers below. */ - gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset), + gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset), (void *) &cur_node.tgt_offset, sizeof (void *)); } @@ -329,9 +356,9 @@ gomp_map_pset (struct target_mem_desc *tgt, uintptr_t host_ptr, } static void -gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n, - size_t first, size_t i, void **hostaddrs, - size_t *sizes, void *kinds) +gomp_map_fields_existing (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, + splay_tree_key n, size_t first, size_t i, + void **hostaddrs, size_t *sizes, void *kinds) { struct gomp_device_descr *devicep = tgt->device_descr; struct splay_tree_s *mem_map = &devicep->mem_map; @@ -348,7 +375,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n, && n2->tgt == n->tgt && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) { - gomp_map_vars_existing (devicep, n2, &cur_node, + gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], kind & typemask); return; } @@ -364,7 +391,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n, && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) { - gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i], + gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], kind & typemask); return; } @@ -376,7 +403,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n, && n2->tgt == n->tgt && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) { - gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i], + gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], kind & typemask); return; } @@ -547,6 +574,18 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, bool short_mapkind, enum gomp_map_vars_kind pragma_kind) { + struct target_mem_desc *tgt; + tgt = gomp_map_vars_async (devicep, NULL, mapnum, hostaddrs, devaddrs, + sizes, kinds, short_mapkind, pragma_kind); + return tgt; +} + +attribute_hidden struct target_mem_desc * +gomp_map_vars_async (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, size_t mapnum, + void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, + bool short_mapkind, enum gomp_map_vars_kind pragma_kind) +{ size_t i, tgt_align, tgt_size, not_found_cnt = 0; bool has_firstprivate = false; const int rshift = short_mapkind ? 8 : 3; @@ -665,7 +704,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, continue; } for (i = first; i <= last; i++) - gomp_map_fields_existing (tgt, n, first, i, hostaddrs, + gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, sizes, kinds); i--; continue; @@ -722,7 +761,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, else n = splay_tree_lookup (mem_map, &cur_node); if (n && n->refcount != REFCOUNT_LINK) - gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i], + gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i], kind & typemask); else { @@ -790,7 +829,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, if (n) { assert (n->refcount != REFCOUNT_LINK); - gomp_map_vars_existing (devicep, n, &cur_node, row_desc, + gomp_map_vars_existing (devicep, aq, n, &cur_node, row_desc, kind & typemask); } else @@ -866,7 +905,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, tgt_size = (tgt_size + align - 1) & ~(align - 1); tgt->list[i].offset = tgt_size; len = sizes[i]; - gomp_copy_host2dev (devicep, + gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + tgt_size), (void *) hostaddrs[i], len); tgt_size += len; @@ -900,7 +939,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, continue; } for (i = first; i <= last; i++) - gomp_map_fields_existing (tgt, n, first, i, hostaddrs, + gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, sizes, kinds); i--; continue; @@ -920,7 +959,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1); if (cur_node.tgt_offset) cur_node.tgt_offset -= sizes[i]; - gomp_copy_host2dev (devicep, + gomp_copy_host2dev (devicep, aq, (void *) (n->tgt->tgt_start + n->tgt_offset + cur_node.host_start @@ -950,7 +989,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, k->host_end = k->host_start + sizeof (void *); splay_tree_key n = splay_tree_lookup (mem_map, k); if (n && n->refcount != REFCOUNT_LINK) - gomp_map_vars_existing (devicep, n, k, &tgt->list[i], + gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i], kind & typemask); else { @@ -1006,14 +1045,15 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, /* FIXME: Perhaps add some smarts, like if copying several adjacent fields from host to target, use some host buffer to avoid sending each var individually. */ - gomp_copy_host2dev (devicep, + gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + k->tgt_offset), (void *) k->host_start, k->host_end - k->host_start); break; case GOMP_MAP_POINTER: - gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start, + gomp_map_pointer (tgt, aq, + (uintptr_t) *(void **) k->host_start, k->tgt_offset, sizes[i]); break; case GOMP_MAP_TO_PSET: @@ -1042,7 +1082,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, sizes[j]); tptr = *(uintptr_t *) hostaddrs[i]; *(uintptr_t *) hostaddrs[i]= toffset; - gomp_copy_host2dev (devicep, + gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + k->tgt_offset), (void *) k->host_start, @@ -1052,7 +1092,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, found_pointer = true; } if (!found_pointer) - gomp_copy_host2dev (devicep, + gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + k->tgt_offset), (void *) k->host_start, @@ -1079,7 +1119,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, break; case GOMP_MAP_FORCE_DEVICEPTR: assert (k->host_end - k->host_start == sizeof (void *)); - gomp_copy_host2dev (devicep, + gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + k->tgt_offset), (void *) k->host_start, @@ -1096,9 +1136,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, /* Set link pointer on target to the device address of the mapped object. */ void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset); - devicep->host2dev_func (devicep->target_id, - (void *) n->tgt_offset, - &tgt_addr, sizeof (void *)); + gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset, + &tgt_addr, sizeof (void *)); } array++; } @@ -1142,7 +1181,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, if (n) { assert (n->refcount != REFCOUNT_LINK); - gomp_map_vars_existing (devicep, n, &cur_node, row_desc, + gomp_map_vars_existing (devicep, aq, n, &cur_node, row_desc, kind & typemask); target_row_addr = n->tgt->tgt_start + n->tgt_offset; } @@ -1166,7 +1205,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, row_desc->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); row_desc->always_copy_from - = GOMP_MAP_COPY_FROM_P (kind & typemask); + = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); row_desc->offset = 0; row_desc->length = da->data_row_size; @@ -1175,7 +1214,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, splay_tree_insert (mem_map, array); if (GOMP_MAP_COPY_TO_P (kind & typemask)) - gomp_copy_host2dev (devicep, + gomp_copy_host2dev (devicep, aq, (void *) tgt->tgt_start + k->tgt_offset, (void *) k->host_start, da->data_row_size); @@ -1191,9 +1230,11 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, { void *ptrblock = gomp_dynamic_array_create_ptrblock (da, target_ptrblock, target_data_rows + row_start); - gomp_copy_host2dev (devicep, target_ptrblock, ptrblock, + gomp_copy_host2dev (devicep, aq, target_ptrblock, ptrblock, da->ptrblock_size); - free (ptrblock); + /* Freeing of the ptrblock must be scheduled after the host2dev + copy completes. */ + goacc_async_free (devicep, aq, ptrblock); } row_start += da->data_row_num; @@ -1213,7 +1254,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, { cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i); /* FIXME: see above FIXME comment. */ - gomp_copy_host2dev (devicep, + gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + i * sizeof (void *)), (void *) &cur_node.tgt_offset, sizeof (void *)); } @@ -1232,7 +1273,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, return tgt; } -static void +attribute_hidden void gomp_unmap_tgt (struct target_mem_desc *tgt) { /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ @@ -1267,6 +1308,13 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) attribute_hidden void gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) { + gomp_unmap_vars_async (tgt, do_copyfrom, NULL); +} + +attribute_hidden void +gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom, + struct goacc_asyncqueue *aq) +{ struct gomp_device_descr *devicep = tgt->device_descr; if (tgt->list_count == 0) @@ -1302,7 +1350,7 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) if ((do_unmap && do_copyfrom && tgt->list[i].copy_from) || tgt->list[i].always_copy_from) - gomp_copy_dev2host (devicep, + gomp_copy_dev2host (devicep, aq, (void *) (k->host_start + tgt->list[i].offset), (void *) (k->tgt->tgt_start + k->tgt_offset + tgt->list[i].offset), @@ -1368,9 +1416,9 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, size_t size = cur_node.host_end - cur_node.host_start; if (GOMP_MAP_COPY_TO_P (kind & typemask)) - gomp_copy_host2dev (devicep, devaddr, hostaddr, size); + gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size); if (GOMP_MAP_COPY_FROM_P (kind & typemask)) - gomp_copy_dev2host (devicep, hostaddr, devaddr, size); + gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size); } } gomp_mutex_unlock (&devicep->lock); @@ -1691,9 +1739,21 @@ gomp_init_device (struct gomp_device_descr *devicep) false); } + /* Initialize OpenACC asynchronous queues. */ + goacc_init_asyncqueues (devicep); + devicep->state = GOMP_DEVICE_INITIALIZED; } +attribute_hidden bool +gomp_fini_device (struct gomp_device_descr *devicep) +{ + devicep->state = GOMP_DEVICE_FINALIZED; + bool ret = goacc_fini_asyncqueues (devicep); + ret &= devicep->fini_device_func (devicep->target_id); + return ret; +} + attribute_hidden void gomp_unload_device (struct gomp_device_descr *devicep) { @@ -2222,7 +2282,7 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, if ((kind == GOMP_MAP_FROM && k->refcount == 0) || kind == GOMP_MAP_ALWAYS_FROM) - gomp_copy_dev2host (devicep, (void *) cur_node.host_start, + gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start, (void *) (k->tgt->tgt_start + k->tgt_offset + cur_node.host_start - k->host_start), @@ -2848,20 +2908,20 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) { if (!DLSYM_OPT (openacc.exec, openacc_exec) - || !DLSYM_OPT (openacc.register_async_cleanup, - openacc_register_async_cleanup) - || !DLSYM_OPT (openacc.async_test, openacc_async_test) - || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all) - || !DLSYM_OPT (openacc.async_wait, openacc_async_wait) - || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async) - || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all) - || !DLSYM_OPT (openacc.async_wait_all_async, - openacc_async_wait_all_async) - || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async) || !DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data) || !DLSYM_OPT (openacc.destroy_thread_data, - openacc_destroy_thread_data)) + openacc_destroy_thread_data) + || !DLSYM_OPT (openacc.async.construct, openacc_async_construct) + || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct) + || !DLSYM_OPT (openacc.async.test, openacc_async_test) + || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize) + || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize) + || !DLSYM_OPT (openacc.async.queue_callback, + openacc_async_queue_callback) + || !DLSYM_OPT (openacc.async.exec, openacc_async_exec) + || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host) + || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)) { /* Require all the OpenACC handlers if we have GOMP_OFFLOAD_CAP_OPENACC_200. */ @@ -2912,10 +2972,7 @@ gomp_target_fini (void) struct gomp_device_descr *devicep = &devices[i]; gomp_mutex_lock (&devicep->lock); if (devicep->state == GOMP_DEVICE_INITIALIZED) - { - ret = devicep->fini_device_func (devicep->target_id); - devicep->state = GOMP_DEVICE_FINALIZED; - } + ret = gomp_fini_device (devicep); gomp_mutex_unlock (&devicep->lock); if (!ret) gomp_fatal ("device finalization failed"); |