aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/config/gcn/gcn.c28
-rw-r--r--gcc/config/gcn/mkoffload.c37
-rw-r--r--gcc/omp-low.c4
3 files changed, 67 insertions, 2 deletions
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 5317e18a78e..8193f5b1d30 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -68,6 +68,11 @@ static bool ext_gcn_constants_init = 0;
enum gcn_isa gcn_isa = ISA_GCN3; /* Default to GCN3. */
+/* Record whether the host compiler added "omp unifed memory" attributes to
+ any functions. We can then pass this on to mkoffload to ensure xnack is
+ compatible there too. */
+static bool unified_shared_memory_enabled = false;
+
/* Reserve this much space for LDS (for propagating variables from
worker-single mode to worker-partitioned mode), per workgroup. Global
analysis could calculate an exact bound, but we don't do that yet.
@@ -2541,6 +2546,25 @@ gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /* Argument info to init */ ,
if (!caller && cfun->machine->normal_function)
gcn_detect_incoming_pointer_arg (fndecl);
+ if (fndecl && lookup_attribute ("omp unified memory",
+ DECL_ATTRIBUTES (fndecl)))
+ {
+ unified_shared_memory_enabled = true;
+
+ switch (gcn_arch)
+ {
+ case PROCESSOR_FIJI:
+ case PROCESSOR_VEGA10:
+ case PROCESSOR_VEGA20:
+ error ("GPU architecture does not support Unified Shared Memory");
+ default:
+ ;
+ }
+
+ if (flag_xnack == HSACO_ATTR_OFF)
+ error ("Unified Shared Memory is enabled, but XNACK is disabled");
+ }
+
reinit_regs ();
}
@@ -5430,12 +5454,14 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
assemble_name (file, name);
fputs (":\n", file);
- /* This comment is read by mkoffload. */
+ /* These comments are read by mkoffload. */
if (flag_openacc)
fprintf (file, "\t;; OPENACC-DIMS: %d, %d, %d : %s\n",
oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_GANG),
oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_WORKER),
oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_VECTOR), name);
+ if (unified_shared_memory_enabled)
+ fprintf (asm_out_file, "\t;; MKOFFLOAD OPTIONS: USM+\n");
}
/* Implement TARGET_ASM_SELECT_SECTION.
diff --git a/gcc/config/gcn/mkoffload.c b/gcc/config/gcn/mkoffload.c
index febc8461197..7bd1d2f7692 100644
--- a/gcc/config/gcn/mkoffload.c
+++ b/gcc/config/gcn/mkoffload.c
@@ -80,6 +80,8 @@
== EF_AMDGPU_FEATURE_XNACK_ANY_V4)
#define TEST_XNACK_ON(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
== EF_AMDGPU_FEATURE_XNACK_ON_V4)
+#define TEST_XNACK_OFF(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
+ == EF_AMDGPU_FEATURE_XNACK_OFF_V4)
#define SET_SRAM_ECC_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
| EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
@@ -474,6 +476,7 @@ static void
process_asm (FILE *in, FILE *out, FILE *cfile)
{
int fn_count = 0, var_count = 0, dims_count = 0, regcount_count = 0;
+ bool unified_shared_memory_enabled = false;
struct obstack fns_os, dims_os, regcounts_os;
obstack_init (&fns_os);
obstack_init (&dims_os);
@@ -498,6 +501,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
fn_count += 2;
char buf[1000];
+ char dummy;
enum
{ IN_CODE,
IN_METADATA,
@@ -517,6 +521,9 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
dims_count++;
}
+ if (sscanf (buf, " ;; MKOFFLOAD OPTIONS: USM+%c", &dummy) > 0)
+ unified_shared_memory_enabled = true;
+
break;
}
case IN_METADATA:
@@ -565,7 +572,6 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
}
}
- char dummy;
if (sscanf (buf, " .section .gnu.offload_vars%c", &dummy) > 0)
{
state = IN_VARS;
@@ -616,6 +622,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
fprintf (cfile, "#include <stdlib.h>\n");
fprintf (cfile, "#include <stdbool.h>\n\n");
+ fprintf (cfile, "#include <stdio.h>\n\n");
fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count);
@@ -656,6 +663,34 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
}
fprintf (cfile, "\n};\n\n");
+ /* Emit a constructor function to set the HSA_XNACK environment variable.
+ This must be done before the ROCr runtime library is loaded.
+ We never override a user value (exit empty string), but we do emit a
+ useful diagnostic in the wrong mode (the ROCr message is not good. */
+ if (TEST_XNACK_OFF (elf_flags) && unified_shared_memory_enabled)
+ fatal_error (input_location,
+ "conflicting settings; XNACK is forced off but Unified "
+ "Shared Memory is on");
+ if (!TEST_XNACK_ANY (elf_flags) || unified_shared_memory_enabled)
+ fprintf (cfile,
+ "static __attribute__((constructor))\n"
+ "void configure_xnack (void)\n"
+ "{\n"
+ " const char *val = getenv (\"HSA_XNACK\");\n"
+ " if (!val || val[0] == '\\0')\n"
+ " setenv (\"HSA_XNACK\", \"%d\", true);\n"
+ " else if (%s)\n"
+ " {\n"
+ " fprintf (stderr, \"error: HSA_XNACK=%%s is incompatible; "
+ "please unset\\n\", val);\n"
+ " exit (1);\n"
+ " }\n"
+ "}\n\n",
+ unified_shared_memory_enabled || TEST_XNACK_ON (elf_flags),
+ (unified_shared_memory_enabled || TEST_XNACK_ON (elf_flags)
+ ? "val[0] != '1' || val[1] != '\\0'"
+ : "val[0] == '1' && val[1] == '\\0'"));
+
obstack_free (&fns_os, NULL);
for (i = 0; i < dims_count; i++)
free (dims[i].name);
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 77779f48cc0..30408655662 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -2362,6 +2362,10 @@ create_omp_child_function (omp_context *ctx, bool task_copy)
DECL_ATTRIBUTES (decl)
= tree_cons (get_identifier (target_attr),
NULL_TREE, DECL_ATTRIBUTES (decl));
+ if (flag_offload_memory == OFFLOAD_MEMORY_UNIFIED)
+ DECL_ATTRIBUTES (decl)
+ = tree_cons (get_identifier ("omp unified memory"),
+ NULL_TREE, DECL_ATTRIBUTES (decl));
}
t = build_decl (DECL_SOURCE_LOCATION (decl),