49 files changed, 13746 insertions, 0 deletions
diff --git a/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/drivers/gpu/arm/midgard/backend/gpu/Kbuild
new file mode 100644
index 000000000000..5f700e9b6b44
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -0,0 +1,60 @@
+#
+# (C) COPYRIGHT 2014,2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+BACKEND += \
+	backend/gpu/mali_kbase_cache_policy_backend.c \
+	backend/gpu/mali_kbase_device_hw.c \
+	backend/gpu/mali_kbase_gpu.c \
+	backend/gpu/mali_kbase_gpuprops_backend.c \
+	backend/gpu/mali_kbase_debug_job_fault_backend.c \
+	backend/gpu/mali_kbase_irq_linux.c \
+	backend/gpu/mali_kbase_instr_backend.c \
+	backend/gpu/mali_kbase_jm_as.c \
+	backend/gpu/mali_kbase_jm_hw.c \
+	backend/gpu/mali_kbase_jm_rb.c \
+	backend/gpu/mali_kbase_js_affinity.c \
+	backend/gpu/mali_kbase_js_backend.c \
+	backend/gpu/mali_kbase_mmu_hw_direct.c \
+	backend/gpu/mali_kbase_pm_backend.c \
+	backend/gpu/mali_kbase_pm_driver.c \
+	backend/gpu/mali_kbase_pm_metrics.c \
+	backend/gpu/mali_kbase_pm_ca.c \
+	backend/gpu/mali_kbase_pm_ca_fixed.c \
+	backend/gpu/mali_kbase_pm_always_on.c \
+	backend/gpu/mali_kbase_pm_coarse_demand.c \
+	backend/gpu/mali_kbase_pm_demand.c \
+	backend/gpu/mali_kbase_pm_policy.c \
+	backend/gpu/mali_kbase_time.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+BACKEND += \
+	backend/gpu/mali_kbase_pm_ca_random.c \
+	backend/gpu/mali_kbase_pm_demand_always_powered.c \
+	backend/gpu/mali_kbase_pm_fast_start.c
+endif
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+BACKEND += \
+	backend/gpu/mali_kbase_devfreq.c \
+	backend/gpu/mali_kbase_pm_ca_devfreq.c
+endif
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+	# Dummy model
+	BACKEND += backend/gpu/mali_kbase_model_dummy.c
+	BACKEND += backend/gpu/mali_kbase_model_linux.c
+	# HW error simulation
+	BACKEND += backend/gpu/mali_kbase_model_error_generator.c
+endif
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
new file mode 100644
index 000000000000..c8ae87eb84a2
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific configuration
+ */
+
+#ifndef _KBASE_BACKEND_CONFIG_H_
+#define _KBASE_BACKEND_CONFIG_H_
+
+/* Enable GPU reset API */
+#define KBASE_GPU_RESET_EN 1
+
+#endif /* _KBASE_BACKEND_CONFIG_H_ */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
new file mode 100644
index 000000000000..fef9a2cb743e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "backend/gpu/mali_kbase_cache_policy_backend.h"
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode)
+{
+	kbdev->current_gpu_coherency_mode = mode;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
+		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
new file mode 100644
index 000000000000..fe9869109a82
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -0,0 +1,34 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
+#define _KBASE_CACHE_POLICY_BACKEND_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+  * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+  *			in the GPU.
+  * @kbdev:	Device pointer
+  * @mode:	Coherency mode. COHERENCY_ACE/ACE_LITE
+  */
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
new file mode 100644
index 000000000000..7851ea6466c7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_debug_job_fault.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+/*GPU_CONTROL_REG(r)*/
+static int gpu_control_reg_snapshot[] = {
+	GPU_ID,
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI
+};
+
+/* JOB_CONTROL_REG(r) */
+static int job_control_reg_snapshot[] = {
+	JOB_IRQ_MASK,
+	JOB_IRQ_STATUS
+};
+
+/* JOB_SLOT_REG(n,r) */
+static int job_slot_reg_snapshot[] = {
+	JS_HEAD_LO,
+	JS_HEAD_HI,
+	JS_TAIL_LO,
+	JS_TAIL_HI,
+	JS_AFFINITY_LO,
+	JS_AFFINITY_HI,
+	JS_CONFIG,
+	JS_STATUS,
+	JS_HEAD_NEXT_LO,
+	JS_HEAD_NEXT_HI,
+	JS_AFFINITY_NEXT_LO,
+	JS_AFFINITY_NEXT_HI,
+	JS_CONFIG_NEXT
+};
+
+/*MMU_REG(r)*/
+static int mmu_reg_snapshot[] = {
+	MMU_IRQ_MASK,
+	MMU_IRQ_STATUS
+};
+
+/* MMU_AS_REG(n,r) */
+static int as_reg_snapshot[] = {
+	AS_TRANSTAB_LO,
+	AS_TRANSTAB_HI,
+	AS_MEMATTR_LO,
+	AS_MEMATTR_HI,
+	AS_FAULTSTATUS,
+	AS_FAULTADDRESS_LO,
+	AS_FAULTADDRESS_HI,
+	AS_STATUS
+};
+
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range)
+{
+	int i, j;
+	int offset = 0;
+	int slot_number;
+	int as_number;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	slot_number = kctx->kbdev->gpu_props.num_job_slots;
+	as_number = kctx->kbdev->gpu_props.num_address_spaces;
+
+	/* get the GPU control registers*/
+	for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job control registers*/
+	for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				JOB_CONTROL_REG(job_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job Slot registers*/
+	for (j = 0; j < slot_number; j++)	{
+		for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+			JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	/* get the MMU registers*/
+	for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Address space registers*/
+	for (j = 0; j < as_number; j++) {
+		for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+					MMU_AS_REG(j, as_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	WARN_ON(offset >= (reg_range*2/4));
+
+	/* set the termination flag*/
+	kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
+	kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
+			offset);
+
+	return true;
+}
+
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
+{
+	int offset = 0;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
+		kctx->reg_dump[offset+1] =
+				kbase_reg_read(kctx->kbdev,
+						kctx->reg_dump[offset], NULL);
+		offset += 2;
+	}
+	return true;
+}
+
+
+#endif
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
new file mode 100644
index 000000000000..308b971b6896
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -0,0 +1,451 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#define dev_pm_opp_find_freq_floor opp_find_freq_floor
+#endif /* Linux >= 3.13 */
+
+#ifdef CONFIG_ARM_SCMI_PROTOCOL
+#include <linux/scmi_protocol.h>
+extern int scmi_gpu_domain_id_get(void);
+#endif
+
+/**
+ * opp_translate - Translate nominal OPP frequency from devicetree into real
+ *                 frequency and core mask
+ * @kbdev:     Device pointer
+ * @freq:      Nominal frequency
+ * @core_mask: Pointer to u64 to store core mask to
+ *
+ * Return: Real target frequency
+ *
+ * This function will only perform translation if an operating-points-v2-mali
+ * table is present in devicetree. If one is not present then it will return an
+ * untranslated frequency and all cores enabled.
+ */
+static unsigned long opp_translate(struct kbase_device *kbdev,
+		unsigned long freq, u64 *core_mask)
+{
+	int i;
+
+	for (i = 0; i < kbdev->num_opps; i++) {
+		if (kbdev->opp_table[i].opp_freq == freq) {
+			*core_mask = kbdev->opp_table[i].core_mask;
+			return kbdev->opp_table[i].real_freq;
+		}
+	}
+
+	/* Failed to find OPP - return all cores enabled & nominal frequency */
+	*core_mask = kbdev->gpu_props.props.raw_props.shader_present;
+
+	return freq;
+}
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long nominal_freq;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+	u64 core_mask;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+	if (IS_ERR_OR_NULL(opp)) {
+		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+		return PTR_ERR(opp);
+	}
+
+	nominal_freq = freq;
+
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (kbdev->current_nominal_freq == nominal_freq) {
+		*target_freq = nominal_freq;
+		return 0;
+	}
+
+	freq = opp_translate(kbdev, nominal_freq, &core_mask);
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq < freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to increase voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+        if (kbdev->clock)
+                err = clk_set_rate(kbdev->clock, freq);
+#ifdef CONFIG_ARM_SCMI_PROTOCOL
+        else if(kbdev->scmi_handle)
+                err = kbdev->scmi_handle->perf_ops->freq_set(kbdev->scmi_handle,
+                                        scmi_gpu_domain_id_get(), freq);
+#endif
+
+	if (err) {
+		dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+				freq, *target_freq);
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq > freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to decrease voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	if (kbdev->pm.backend.ca_current_policy->id ==
+			KBASE_PM_CA_POLICY_ID_DEVFREQ)
+		kbase_devfreq_set_core_mask(kbdev, core_mask);
+
+	*target_freq = nominal_freq;
+	kbdev->current_voltage = voltage;
+	kbdev->current_nominal_freq = nominal_freq;
+	kbdev->current_freq = freq;
+	kbdev->current_core_mask = core_mask;
+
+	KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)nominal_freq);
+
+	kbase_pm_reset_dvfs_utilisation(kbdev);
+
+	return err;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	*freq = kbdev->current_nominal_freq;
+
+	return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = kbdev->current_nominal_freq;
+
+	kbase_pm_get_dvfs_utilisation(kbdev,
+			&stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+	return 0;
+}
+
+/* Weak definition to be overriden by platforms */
+int __weak setup_opps(void)
+{
+	return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+		struct devfreq_dev_profile *dp)
+{
+	int err, count;
+	int i = 0;
+	unsigned long freq = 0;
+	struct dev_pm_opp *opp;
+
+	err = setup_opps();
+	if (err)
+		return err;
+
+	rcu_read_lock();
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) {
+		opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+				count, i);
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+}
+
+static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
+{
+	struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node,
+			"operating-points-v2", 0);
+	struct device_node *node;
+	int i = 0;
+	int count;
+
+	if (!opp_node)
+		return 0;
+	if (!of_device_is_compatible(opp_node, "operating-points-v2-mali"))
+		return 0;
+
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	kbdev->opp_table = kmalloc_array(count,
+			sizeof(struct kbase_devfreq_opp), GFP_KERNEL);
+	if (!kbdev->opp_table)
+		return -ENOMEM;
+
+	for_each_available_child_of_node(opp_node, node) {
+		u64 core_mask;
+		u64 opp_freq, real_freq;
+		const void *core_count_p;
+
+		if (of_property_read_u64(node, "opp-hz", &opp_freq)) {
+			dev_warn(kbdev->dev, "OPP is missing required opp-hz property\n");
+			continue;
+		}
+		if (of_property_read_u64(node, "opp-hz-real", &real_freq))
+			real_freq = opp_freq;
+		if (of_property_read_u64(node, "opp-core-mask", &core_mask))
+			core_mask =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		core_count_p = of_get_property(node, "opp-core-count", NULL);
+		if (core_count_p) {
+			u64 remaining_core_mask =
+				kbdev->gpu_props.props.raw_props.shader_present;
+			int core_count = be32_to_cpup(core_count_p);
+
+			core_mask = 0;
+
+			for (; core_count > 0; core_count--) {
+				int core = ffs(remaining_core_mask);
+
+				if (!core) {
+					dev_err(kbdev->dev, "OPP has more cores than GPU\n");
+					return -ENODEV;
+				}
+
+				core_mask |= (1ull << (core-1));
+				remaining_core_mask &= ~(1ull << (core-1));
+			}
+		}
+
+		if (!core_mask) {
+			dev_err(kbdev->dev, "OPP has invalid core mask of 0\n");
+			return -ENODEV;
+		}
+
+		kbdev->opp_table[i].opp_freq = opp_freq;
+		kbdev->opp_table[i].real_freq = real_freq;
+		kbdev->opp_table[i].core_mask = core_mask;
+
+		dev_info(kbdev->dev, "OPP %d : opp_freq=%llu real_freq=%llu core_mask=%llx\n",
+				i, opp_freq, real_freq, core_mask);
+
+		i++;
+	}
+
+	kbdev->num_opps = i;
+
+	return 0;
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp;
+	int err;
+
+#ifdef CONFIG_ARM_SCMI_PROTOCOL
+        if (!kbdev->scmi_handle) {
+#else
+        if (!kbdev->clock) {
+#endif
+		dev_err(kbdev->dev, "Clock not available for devfreq\n");
+                return -ENODEV;
+        }
+
+#ifndef CONFIG_ARM_SCMI_PROTOCOL
+        if (kbdev->clock)
+                kbdev->current_freq = clk_get_rate(kbdev->clock);
+#else
+        if (kbdev->scmi_handle) {
+                struct scmi_perf_ops *perf_ops = kbdev->scmi_handle->perf_ops;
+
+                perf_ops->freq_get(kbdev->scmi_handle,
+                                   scmi_gpu_domain_id_get(),
+                                   &kbdev->current_freq);
+        }
+#endif
+
+	kbdev->current_nominal_freq = kbdev->current_freq;
+
+	dp = &kbdev->devfreq_profile;
+
+	dp->initial_freq = kbdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = kbase_devfreq_target;
+	dp->get_dev_status = kbase_devfreq_status;
+	dp->get_cur_freq = kbase_devfreq_cur_freq;
+	dp->exit = kbase_devfreq_exit;
+
+	if (kbase_devfreq_init_freq_table(kbdev, dp))
+		return -EFAULT;
+
+	err = kbase_devfreq_init_core_mask_table(kbdev);
+	if (err)
+		return err;
+
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+				"simple_ondemand", NULL);
+	if (IS_ERR(kbdev->devfreq)) {
+		kbase_devfreq_term_freq_table(kbdev);
+		return PTR_ERR(kbdev->devfreq);
+	}
+
+	/* devfreq_add_device only copies a few of kbdev->dev's fields, so
+	 * set drvdata explicitly so IPA models can access kbdev. */
+	dev_set_drvdata(&kbdev->devfreq->dev, kbdev);
+
+	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register OPP notifier (%d)\n", err);
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	err = kbase_ipa_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "IPA initialization failed\n");
+		goto cooling_failed;
+	}
+
+	kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+			kbdev->dev->of_node,
+			kbdev->devfreq,
+			&kbase_ipa_power_model_ops);
+	if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+		err = PTR_ERR(kbdev->devfreq_cooling);
+		dev_err(kbdev->dev,
+			"Failed to register cooling device (%d)\n",
+			err);
+		goto cooling_failed;
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	if (devfreq_remove_device(kbdev->devfreq))
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+	int err;
+
+	dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->devfreq_cooling)
+		devfreq_cooling_unregister(kbdev->devfreq_cooling);
+
+	kbase_ipa_term(kbdev);
+#endif
+
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	kfree(kbdev->opp_table);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
new file mode 100644
index 000000000000..c0bf8b15b3bc
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _BASE_DEVFREQ_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
new file mode 100644
index 000000000000..dcdf15cdc3e8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -0,0 +1,255 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ *
+ */
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+
+#ifdef CONFIG_DEBUG_FS
+
+
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
+{
+	struct kbase_io_access *old_buf;
+	struct kbase_io_access *new_buf;
+	unsigned long flags;
+
+	if (!new_size)
+		goto out_err; /* The new size must not be 0 */
+
+	new_buf = vmalloc(new_size * sizeof(*h->buf));
+	if (!new_buf)
+		goto out_err;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	old_buf = h->buf;
+
+	/* Note: we won't bother with copying the old data over. The dumping
+	 * logic wouldn't work properly as it relies on 'count' both as a
+	 * counter and as an index to the buffer which would have changed with
+	 * the new array. This is a corner case that we don't need to support.
+	 */
+	h->count = 0;
+	h->size = new_size;
+	h->buf = new_buf;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	vfree(old_buf);
+
+	return 0;
+
+out_err:
+	return -1;
+}
+
+
+int kbase_io_history_init(struct kbase_io_history *h, u16 n)
+{
+	h->enabled = false;
+	spin_lock_init(&h->lock);
+	h->count = 0;
+	h->size = 0;
+	h->buf = NULL;
+	if (kbase_io_history_resize(h, n))
+		return -1;
+
+	return 0;
+}
+
+
+void kbase_io_history_term(struct kbase_io_history *h)
+{
+	vfree(h->buf);
+	h->buf = NULL;
+}
+
+
+/* kbase_io_history_add - add new entry to the register access history
+ *
+ * @h: Pointer to the history data structure
+ * @addr: Register address
+ * @value: The value that is either read from or written to the register
+ * @write: 1 if it's a register write, 0 if it's a read
+ */
+static void kbase_io_history_add(struct kbase_io_history *h,
+		void __iomem const *addr, u32 value, u8 write)
+{
+	struct kbase_io_access *io;
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	io = &h->buf[h->count % h->size];
+	io->addr = (uintptr_t)addr | write;
+	io->value = value;
+	++h->count;
+	/* If count overflows, move the index by the buffer size so the entire
+	 * buffer will still be dumped later */
+	if (unlikely(!h->count))
+		h->count = h->size;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+void kbase_io_history_dump(struct kbase_device *kbdev)
+{
+	struct kbase_io_history *const h = &kbdev->io_history;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!unlikely(h->enabled))
+		return;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	dev_err(kbdev->dev, "Register IO History:");
+	iters = (h->size > h->count) ? h->count : h->size;
+	dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	writel(value, kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				value, 1);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_WRITE, offset,
+									value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx)
+{
+	u32 val;
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	val = readl(kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				val, 0);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
+
+/**
+ * kbase_report_gpu_fault - Report a GPU fault.
+ * @kbdev:    Kbase device pointer
+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
+ *            was also set
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using dev_warn().
+ */
+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+	u32 status;
+	u64 address;
+
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+	address = (u64) kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+	address |= kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+
+	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+			status & 0xFF,
+			kbase_exception_name(kbdev, status),
+			address);
+	if (multiple)
+		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+	if (val & GPU_FAULT)
+		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+	if (val & RESET_COMPLETED)
+		kbase_pm_reset_done(kbdev);
+
+	if (val & PRFCNT_SAMPLE_COMPLETED)
+		kbase_instr_hwcnt_sample_done(kbdev);
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+
+	/* kbase_pm_check_transitions must be called after the IRQ has been
+	 * cleared. This is because it might trigger further power transitions
+	 * and we don't want to miss the interrupt raised to notify us that
+	 * these further transitions have finished.
+	 */
+	if (val & POWER_CHANGED_ALL)
+		kbase_pm_power_changed(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
new file mode 100644
index 000000000000..5b20445932fb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access device APIs
+ */
+
+#ifndef _KBASE_DEVICE_INTERNAL_H_
+#define _KBASE_DEVICE_INTERNAL_H_
+
+/**
+ * kbase_reg_write - write to GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @value:  Value to write
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ */
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_reg_read - read from GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ *
+ * Return: Value in desired register
+ */
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+
+/**
+ * kbase_gpu_interrupt - GPU interrupt handler
+ * @kbdev: Kbase device pointer
+ * @val:   The value of the GPU IRQ status register which triggered the call
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be
+ * handled.
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+#endif /* _KBASE_DEVICE_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
new file mode 100644
index 000000000000..3b78100ec6df
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend APIs
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_backend.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_backend_early_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbasep_platform_device_init(kbdev);
+	if (err)
+		return err;
+
+	/* Ensure we can access the GPU registers */
+	kbase_pm_register_access_enable(kbdev);
+
+	/* Find out GPU properties based on the GPU feature registers */
+	kbase_gpuprops_set(kbdev);
+
+	/* We're done accessing the GPU registers for now. */
+	kbase_pm_register_access_disable(kbdev);
+
+	err = kbase_install_interrupts(kbdev);
+	if (err)
+		goto fail_interrupts;
+
+	err = kbase_hwaccess_pm_init(kbdev);
+	if (err)
+		goto fail_pm;
+
+	return 0;
+
+fail_pm:
+	kbase_release_interrupts(kbdev);
+fail_interrupts:
+	kbasep_platform_device_term(kbdev);
+
+	return err;
+}
+
+void kbase_backend_early_term(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_term(kbdev);
+	kbase_release_interrupts(kbdev);
+	kbasep_platform_device_term(kbdev);
+}
+
+int kbase_backend_late_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	if (err)
+		return err;
+
+	err = kbase_backend_timer_init(kbdev);
+	if (err)
+		goto fail_timer;
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
+		dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+		err = -EINVAL;
+		goto fail_interrupt_test;
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	err = kbase_job_slot_init(kbdev);
+	if (err)
+		goto fail_job_slot;
+
+	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
+
+	return 0;
+
+fail_job_slot:
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+fail_interrupt_test:
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	kbase_backend_timer_term(kbdev);
+fail_timer:
+	kbase_hwaccess_pm_halt(kbdev);
+
+	return err;
+}
+
+void kbase_backend_late_term(struct kbase_device *kbdev)
+{
+	kbase_job_slot_halt(kbdev);
+	kbase_job_slot_term(kbdev);
+	kbase_backend_timer_term(kbdev);
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
new file mode 100644
index 000000000000..b395325b556b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -0,0 +1,110 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query backend APIs
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	int i;
+
+	/* Fill regdump with the content of the relevant registers */
+	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
+
+	regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES), NULL);
+	regdump->suspend_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
+	regdump->tiler_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_FEATURES), NULL);
+	regdump->mem_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MEM_FEATURES), NULL);
+	regdump->mmu_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MMU_FEATURES), NULL);
+	regdump->as_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(AS_PRESENT), NULL);
+	regdump->js_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_PRESENT), NULL);
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		regdump->js_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		regdump->texture_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
+
+	regdump->thread_max_threads = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
+	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
+									NULL);
+	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
+	regdump->thread_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+
+	regdump->shader_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
+	regdump->shader_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
+
+	regdump->tiler_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
+	regdump->tiler_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
+
+	regdump->l2_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
+	regdump->l2_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
+
+	regdump->stack_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_LO), NULL);
+	regdump->stack_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_HI), NULL);
+}
+
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
+		/* Ensure we can access the GPU registers */
+		kbase_pm_register_access_enable(kbdev);
+
+		regdump->coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* We're done accessing the GPU registers for now. */
+		kbase_pm_register_access_disable(kbdev);
+	} else {
+		/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
+		regdump->coherency_features =
+				COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+	}
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
new file mode 100644
index 000000000000..7ad309e8d7f4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -0,0 +1,492 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * GPU backend instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+
+/**
+ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
+ * hardware
+ *
+ * @kbdev: Kbase device
+ */
+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long pm_flags;
+	u32 irq_mask;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	int ret;
+	u64 shader_cores_needed;
+	u32 prfcnt_config;
+
+	shader_cores_needed = kbase_pm_get_present_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* alignment failure */
+	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
+		goto out_err;
+
+	/* Override core availability policy to ensure all cores are available
+	 */
+	kbase_pm_ca_instr_enable(kbdev);
+
+	/* Request the cores early on synchronously - we'll release them on any
+	 * errors (e.g. instrumentation already active) */
+	kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		/* Instrumentation is already enabled */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		goto out_unrequest_cores;
+	}
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
+						PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* In use, this context is the owner */
+	kbdev->hwcnt.kctx = kctx;
+	/* Remember the dump address so we can reprogram it later */
+	kbdev->hwcnt.addr = setup->dump_buffer;
+
+	/* Request the clean */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+	kbdev->hwcnt.backend.triggered = 0;
+	/* Clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+	KBASE_DEBUG_ASSERT(ret);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Wait for cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	/* Configure */
+	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	{
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+	}
+#endif
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					setup->dump_buffer & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					setup->dump_buffer >> 32,        kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+					setup->jm_bm,                    kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+					setup->shader_bm,                kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+					setup->mmu_l2_bm,                kctx);
+	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
+	 * HW counter dump. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
+									kctx);
+	else
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	err = 0;
+
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	return err;
+ out_unrequest_cores:
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ out_err:
+	return err;
+}
+
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	while (1) {
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
+			/* Instrumentation is not enabled */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.kctx != kctx) {
+			/* Instrumentation has been setup for another context */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
+			break;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+		/* Ongoing dump/setup - wait for its completion */
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Disable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+
+	kbase_pm_ca_instr_disable(kbdev);
+
+	kbase_pm_unrequest_cores(kbdev, true,
+		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+
+	kbase_pm_release_l2_caches(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+
+	err = 0;
+
+ out:
+	return err;
+}
+
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.kctx != kctx) {
+		/* The instrumentation has been setup for another context */
+		goto unlock;
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
+		/* HW counters are disabled or another dump is ongoing, or we're
+		 * resetting */
+		goto unlock;
+	}
+
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Mark that we're dumping - the PF handler can signal that we faulted
+	 */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+
+	/* Reconfigure the dump address */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					kbdev->hwcnt.addr >> 32, NULL);
+
+	/* Start dumping */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
+					kbdev->hwcnt.addr, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+	err = 0;
+
+ unlock:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
+
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success)
+{
+	unsigned long flags;
+	bool complete = false;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
+		*success = true;
+		complete = true;
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		*success = false;
+		complete = true;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
+
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(data, struct kbase_device,
+						hwcnt.backend.cache_clean_work);
+
+	mutex_lock(&kbdev->cacheclean_lock);
+	kbasep_instr_hwcnt_cacheclean(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for our condition, and any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANED);
+
+	/* All finished and idle */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
+		int ret;
+		/* Always clean and invalidate the cache after a successful dump
+		 */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+		ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+		KBASE_DEBUG_ASSERT(ret);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		unsigned long flags;
+		unsigned long pm_flags;
+
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+		/* Disable interrupt */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+									NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+		/* Wakeup... */
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+			/* Only wake if we weren't resetting */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	}
+}
+
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long flags;
+	int err;
+
+	/* Wait for dump & cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		err = -EINVAL;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	} else {
+		/* Dump done */
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+		err = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return err;
+}
+
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	/* Check it's the context previously set up and we're not already
+	 * dumping */
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
+							KBASE_INSTR_STATE_IDLE)
+		goto out;
+
+	/* Clear the counters */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_PRFCNT_CLEAR, kctx);
+
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+
+int kbase_instr_backend_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
+	init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
+	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
+						kbasep_cache_clean_worker);
+	kbdev->hwcnt.backend.triggered = 0;
+
+	kbdev->hwcnt.backend.cache_clean_wq =
+			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
+	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+void kbase_instr_backend_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
new file mode 100644
index 000000000000..4794672da8f0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific instrumentation definitions
+ */
+
+#ifndef _KBASE_INSTR_DEFS_H_
+#define _KBASE_INSTR_DEFS_H_
+
+/*
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+	/* State where instrumentation is not active */
+	KBASE_INSTR_STATE_DISABLED = 0,
+	/* State machine is active and ready for a command. */
+	KBASE_INSTR_STATE_IDLE,
+	/* Hardware is currently dumping a frame. */
+	KBASE_INSTR_STATE_DUMPING,
+	/* We've requested a clean to occur on a workqueue */
+	KBASE_INSTR_STATE_REQUEST_CLEAN,
+	/* Hardware is currently cleaning and invalidating caches. */
+	KBASE_INSTR_STATE_CLEANING,
+	/* Cache clean completed, and either a) a dump is complete, or
+	 * b) instrumentation can now be setup. */
+	KBASE_INSTR_STATE_CLEANED,
+	/* An error has occured during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT
+};
+
+/* Structure used for instrumentation and HW counters dumping */
+struct kbase_instr_backend {
+	wait_queue_head_t wait;
+	int triggered;
+
+	enum kbase_instr_state state;
+	wait_queue_head_t cache_clean_wait;
+	struct workqueue_struct *cache_clean_wq;
+	struct work_struct  cache_clean_work;
+};
+
+#endif /* _KBASE_INSTR_DEFS_H_ */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
new file mode 100644
index 000000000000..e96aeae786e1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access instrumentation APIs
+ */
+
+#ifndef _KBASE_INSTR_INTERNAL_H_
+#define _KBASE_INSTR_INTERNAL_H_
+
+/**
+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
+ * @data: a &struct work_struct
+ */
+void kbasep_cache_clean_worker(struct work_struct *data);
+
+/**
+ * kbase_clean_caches_done() - Cache clean interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_clean_caches_done(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
new file mode 100644
index 000000000000..8781561e73d0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific IRQ APIs
+ */
+
+#ifndef _KBASE_IRQ_INTERNAL_H_
+#define _KBASE_IRQ_INTERNAL_H_
+
+int kbase_install_interrupts(struct kbase_device *kbdev);
+
+void kbase_release_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
+ *                          execution
+ * @kbdev: The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev);
+
+#endif /* _KBASE_IRQ_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
new file mode 100644
index 000000000000..8416b80e8b77
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -0,0 +1,469 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+
+#include <linux/interrupt.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+	return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_job_done(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	atomic_inc(&kbdev->faults_pending);
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val) {
+		atomic_dec(&kbdev->faults_pending);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_mmu_interrupt(kbdev, val);
+
+	atomic_dec(&kbdev->faults_pending);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_gpu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler);
+
+static irq_handler_t kbase_handler_table[] = {
+	[JOB_IRQ_TAG] = kbase_job_irq_handler,
+	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+#ifdef CONFIG_MALI_DEBUG
+#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define  MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * kbase_set_custom_irq_handler - Set a custom IRQ handler
+ * @kbdev: Device for which the handler is to be registered
+ * @custom_handler: Handler to be registered
+ * @irq_type: Interrupt type
+ *
+ * Registers given interrupt handler for requested interrupt type
+ * In the case where irq handler is not specified, the default handler shall be
+ * registered
+ *
+ * Return: 0 case success, error code otherwise
+ */
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+					irq_handler_t custom_handler,
+					int irq_type)
+{
+	int result = 0;
+	irq_handler_t requested_irq_handler = NULL;
+
+	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
+						(GPU_IRQ_HANDLER >= irq_type));
+
+	/* Release previous handler */
+	if (kbdev->irqs[irq_type].irq)
+		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+	requested_irq_handler = (NULL != custom_handler) ? custom_handler :
+						kbase_handler_table[irq_type];
+
+	if (0 != request_irq(kbdev->irqs[irq_type].irq,
+			requested_irq_handler,
+			kbdev->irqs[irq_type].flags | IRQF_SHARED,
+			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+		result = -EINVAL;
+		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+					kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+	struct hrtimer timer;
+	wait_queue_head_t wait;
+	int triggered;
+	u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT    500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+	struct kbasep_irq_test *test_data = container_of(timer,
+						struct kbasep_irq_test, timer);
+
+	test_data->timeout = 1;
+	test_data->triggered = 1;
+	wake_up(&test_data->wait);
+	return HRTIMER_NORESTART;
+}
+
+static int kbasep_common_test_interrupt(
+				struct kbase_device * const kbdev, u32 tag)
+{
+	int err = 0;
+	irq_handler_t test_handler;
+
+	u32 old_mask_val;
+	u16 mask_offset;
+	u16 rawstat_offset;
+
+	switch (tag) {
+	case JOB_IRQ_TAG:
+		test_handler = kbase_job_irq_test_handler;
+		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		break;
+	case MMU_IRQ_TAG:
+		test_handler = kbase_mmu_irq_test_handler;
+		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		break;
+	case GPU_IRQ_TAG:
+		/* already tested by pm_driver - bail out */
+	default:
+		return 0;
+	}
+
+	/* store old mask */
+	old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+	/* mask interrupts */
+	kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+	if (kbdev->irqs[tag].irq) {
+		/* release original handler and install test handler */
+		if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
+			err = -EINVAL;
+		} else {
+			kbasep_irq_test_data.timeout = 0;
+			hrtimer_init(&kbasep_irq_test_data.timer,
+					CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function =
+						kbasep_test_interrupt_timeout;
+
+			/* trigger interrupt */
+			kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+
+			hrtimer_start(&kbasep_irq_test_data.timer,
+					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
+					HRTIMER_MODE_REL);
+
+			wait_event(kbasep_irq_test_data.wait,
+					kbasep_irq_test_data.triggered != 0);
+
+			if (kbasep_irq_test_data.timeout != 0) {
+				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+				err = -EINVAL;
+			} else {
+				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+			}
+
+			hrtimer_cancel(&kbasep_irq_test_data.timer);
+			kbasep_irq_test_data.triggered = 0;
+
+			/* mask interrupts */
+			kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+			/* release test handler */
+			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+		}
+
+		/* restore original interrupt */
+		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
+				kbdev->irqs[tag].flags | IRQF_SHARED,
+				dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
+						kbdev->irqs[tag].irq, tag);
+			err = -EINVAL;
+		}
+	}
+	/* restore old mask */
+	kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+
+	return err;
+}
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev)
+{
+	int err;
+
+	init_waitqueue_head(&kbasep_irq_test_data.wait);
+	kbasep_irq_test_data.triggered = 0;
+
+	/* A suspend won't happen during startup/insmod */
+	kbase_pm_context_active(kbdev);
+
+	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+	kbase_pm_context_idle(kbdev);
+
+	return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	int err;
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
+				kbdev->irqs[i].flags | IRQF_SHARED,
+				dev_name(kbdev->dev),
+				kbase_tag(kbdev, i));
+		if (err) {
+			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+							kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+			goto release;
+		}
+	}
+
+	return 0;
+
+ release:
+	while (i-- > 0)
+		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+	return err;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+	}
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			synchronize_irq(kbdev->irqs[i].irq);
+	}
+}
+
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
new file mode 100644
index 000000000000..c660c80341f4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -0,0 +1,235 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register backend context / address space management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+
+/**
+ * assign_and_activate_kctx_addr_space - Assign an AS to a context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @current_as: Address Space to assign
+ *
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes
+ *   setting up the global runpool_irq structure and the context on the AS,
+ *   Activating the MMU on the AS,
+ *   Allowing jobs to be submitted on the AS.
+ *
+ * Context:
+ *   kbasep_js_kctx_info.jsctx_mutex held,
+ *   kbasep_js_device_data.runpool_mutex held,
+ *   AS transaction mutex held,
+ *   Runpool IRQ lock held
+ */
+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_as *current_as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Attribute handling */
+	kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+	/* Allow it to run jobs */
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+	kbase_js_runpool_inc_context_count(kbdev, kctx);
+}
+
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int i;
+
+	if (kbdev->hwaccess.active_kctx == kctx) {
+		/* Context is already active */
+		return true;
+	}
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		if (kbdev->as_to_kctx[i] == kctx) {
+			/* Context already has ASID - mark as active */
+			return true;
+		}
+	}
+
+	/* Context does not have address space assigned */
+	return false;
+}
+
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int as_nr = kctx->as_nr;
+
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Attempting to release context without ASID\n");
+		return;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (atomic_read(&kctx->refcount) != 1) {
+		WARN(1, "Attempting to release active ASID\n");
+		return;
+	}
+
+	kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
+
+	kbase_ctx_sched_release_ctx(kctx);
+	kbase_js_runpool_dec_context_count(kbdev, kctx);
+}
+
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+}
+
+int kbase_backend_find_and_release_free_address_space(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	int i;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_kctx_info *as_js_kctx_info;
+		struct kbase_context *as_kctx;
+
+		as_kctx = kbdev->as_to_kctx[i];
+		as_js_kctx_info = &as_kctx->jctx.sched_info;
+
+		/* Don't release privileged or active contexts, or contexts with
+		 * jobs running.
+		 * Note that a context will have at least 1 reference (which
+		 * was previously taken by kbasep_js_schedule_ctx()) until
+		 * descheduled.
+		 */
+		if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) &&
+			atomic_read(&as_kctx->refcount) == 1) {
+			if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
+								as_kctx)) {
+				WARN(1, "Failed to retain active context\n");
+
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				return KBASEP_AS_NR_INVALID;
+			}
+
+			kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
+
+			/* Drop and retake locks to take the jsctx_mutex on the
+			 * context we're about to release without violating lock
+			 * ordering
+			 */
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+
+			/* Release context from address space */
+			mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+
+			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
+
+			if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) {
+				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
+								as_kctx,
+								true);
+
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+				return i;
+			}
+
+			/* Context was retained while locks were dropped,
+			 * continue looking for free AS */
+
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_as *new_address_space = NULL;
+
+	js_devdata = &kbdev->js_data;
+
+	if (kbdev->hwaccess.active_kctx == kctx) {
+		WARN(1, "Context is already scheduled in\n");
+		return false;
+	}
+
+	new_address_space = &kbdev->as[as_nr];
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
+
+	if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+		/* We need to retain it to keep the corresponding address space
+		 */
+		kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	}
+
+	return true;
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
new file mode 100644
index 000000000000..08a7400e66d5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific definitions
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
+#define _KBASE_HWACCESS_GPU_DEFS_H_
+
+/* SLOT_RB_SIZE must be < 256 */
+#define SLOT_RB_SIZE 2
+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
+
+/**
+ * struct rb_entry - Ringbuffer entry
+ * @katom:	Atom associated with this entry
+ */
+struct rb_entry {
+	struct kbase_jd_atom *katom;
+};
+
+/**
+ * struct slot_rb - Slot ringbuffer
+ * @entries:		Ringbuffer entries
+ * @last_context:	The last context to submit a job on this slot
+ * @read_idx:		Current read index of buffer
+ * @write_idx:		Current write index of buffer
+ * @job_chain_flag:	Flag used to implement jobchain disambiguation
+ */
+struct slot_rb {
+	struct rb_entry entries[SLOT_RB_SIZE];
+
+	struct kbase_context *last_context;
+
+	u8 read_idx;
+	u8 write_idx;
+
+	u8 job_chain_flag;
+};
+
+/**
+ * struct kbase_backend_data - GPU backend specific data for HW access layer
+ * @slot_rb:			Slot ringbuffers
+ * @rmu_workaround_flag:	When PRLAM-8987 is present, this flag determines
+ *				whether slots 0/1 or slot 2 are currently being
+ *				pulled from
+ * @scheduling_timer:		The timer tick used for rescheduling jobs
+ * @timer_running:		Is the timer running? The runpool_mutex must be
+ *				held whilst modifying this.
+ * @suspend_timer:              Is the timer suspended? Set when a suspend
+ *                              occurs and cleared on resume. The runpool_mutex
+ *                              must be held whilst modifying this.
+ * @reset_gpu:			Set to a KBASE_RESET_xxx value (see comments)
+ * @reset_workq:		Work queue for performing the reset
+ * @reset_work:			Work item for performing the reset
+ * @reset_wait:			Wait event signalled when the reset is complete
+ * @reset_timer:		Timeout for soft-stops before the reset
+ * @timeouts_updated:           Have timeout values just been updated?
+ *
+ * The hwaccess_lock (a spinlock) must be held when accessing this structure
+ */
+struct kbase_backend_data {
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
+
+	bool rmu_workaround_flag;
+
+	struct hrtimer scheduling_timer;
+
+	bool timer_running;
+	bool suspend_timer;
+
+	atomic_t reset_gpu;
+
+/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_NOT_PENDING     0
+/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_PREPARED        1
+/* kbase_reset_gpu has been called - the reset will now definitely happen
+ * within the timeout period */
+#define KBASE_RESET_GPU_COMMITTED       2
+/* The GPU reset process is currently occuring (timeout has expired or
+ * kbasep_try_reset_gpu_early was called) */
+#define KBASE_RESET_GPU_HAPPENING       3
+/* Reset the GPU silently, used when resetting the GPU as part of normal
+ * behavior (e.g. when exiting protected mode). */
+#define KBASE_RESET_GPU_SILENT          4
+	struct workqueue_struct *reset_workq;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	struct hrtimer reset_timer;
+
+	bool timeouts_updated;
+};
+
+/**
+ * struct kbase_jd_atom_backend - GPU backend specific katom data
+ */
+struct kbase_jd_atom_backend {
+};
+
+/**
+ * struct kbase_context_backend - GPU backend specific context data
+ */
+struct kbase_context_backend {
+};
+
+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
new file mode 100644
index 000000000000..be88ec8eb0d7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -0,0 +1,1512 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel job manager APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_vinstr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+#define beenthere(kctx, f, a...) \
+			dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if KBASE_GPU_RESET_EN
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
+static void kbasep_reset_timeout_worker(struct work_struct *data);
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
+#endif /* KBASE_GPU_RESET_EN */
+
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
+						struct kbase_context *kctx)
+{
+	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
+}
+
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js)
+{
+	struct kbase_context *kctx;
+	u32 cfg;
+	u64 jc_head = katom->jc;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	/* Command register must be available */
+	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+	/* Affinity is not violating */
+	kbase_js_debug_log_current_affinities(kbdev);
+	KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js,
+							katom->affinity));
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
+						jc_head & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
+						jc_head >> 32, kctx);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+					katom->affinity & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+					katom->affinity >> 32, kctx);
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
+	 * start */
+	cfg = kctx->as_nr;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) &&
+			!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
+		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START))
+		cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) &&
+			!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
+		cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649))
+		cfg |= JS_CONFIG_START_MMU;
+
+	cfg |= JS_CONFIG_THREAD_PRI(8);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) &&
+		(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED))
+		cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+			cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								true;
+		} else {
+			katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								false;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
+				katom->flush_id, kctx);
+
+	/* Write an approximate start timestamp.
+	 * It's approximate because there might be a job in the HEAD register.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* GO ! */
+	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx",
+				katom, kctx, js, jc_head, katom->affinity);
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
+							(u32) katom->affinity);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(
+				GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
+				kctx, kbase_jd_atom_id(kctx, katom));
+#endif
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head,
+			katom->affinity, cfg);
+	KBASE_TLSTREAM_TL_RET_CTX_LPU(
+		kctx,
+		&kbdev->gpu_props.props.raw_props.js_features[
+			katom->slot_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_LPU(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[js],
+			"ctx_nr,atom_nr");
+#ifdef CONFIG_GPU_TRACEPOINTS
+	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
+		/* If this is the only job on the slot, trace it as starting */
+		char js_string[16];
+
+		trace_gpu_sched_switch(
+				kbasep_make_job_slot_string(js, js_string,
+						sizeof(js_string)),
+				ktime_to_ns(katom->start_timestamp),
+				(u32)katom->kctx->id, 0, katom->work_id);
+		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
+	}
+#endif
+	kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+						JS_COMMAND_START, katom->kctx);
+}
+
+/**
+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp
+ * @kbdev: kbase device
+ * @js: job slot
+ * @end_timestamp: timestamp
+ *
+ * Update the start_timestamp of the job currently in the HEAD, based on the
+ * fact that we got an IRQ for the previous set of completed jobs.
+ *
+ * The estimate also takes into account the time the job was submitted, to
+ * work out the best estimate (which might still result in an over-estimate to
+ * the calculated time spent)
+ */
+static void kbasep_job_slot_update_head_start_timestamp(
+						struct kbase_device *kbdev,
+						int js,
+						ktime_t end_timestamp)
+{
+	if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) {
+		struct kbase_jd_atom *katom;
+		ktime_t timestamp_diff;
+		/* The atom in the HEAD */
+		katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		KBASE_DEBUG_ASSERT(katom != NULL);
+
+		timestamp_diff = ktime_sub(end_timestamp,
+				katom->start_timestamp);
+		if (ktime_to_ns(timestamp_diff) >= 0) {
+			/* Only update the timestamp if it's a better estimate
+			 * than what's currently stored. This is because our
+			 * estimate that accounts for the throttle time may be
+			 * too much of an overestimate */
+			katom->start_timestamp = end_timestamp;
+		}
+	}
+}
+
+/**
+ * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline
+ * tracepoint
+ * @kbdev: kbase device
+ * @js: job slot
+ *
+ * Make a tracepoint call to the instrumentation module informing that
+ * softstop happened on given lpu (job slot).
+ */
+static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
+					int js)
+{
+	KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(
+		&kbdev->gpu_props.props.raw_props.js_features[js]);
+}
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done)
+{
+	unsigned long flags;
+	int i;
+	u32 count = 0;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
+
+	memset(&kbdev->slot_submit_count_irq[0], 0,
+					sizeof(kbdev->slot_submit_count_irq));
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	while (done) {
+		u32 failed = done >> 16;
+
+		/* treat failed slots as finished slots */
+		u32 finished = (done & 0xFFFF) | failed;
+
+		/* Note: This is inherently unfair, as we always check
+		 * for lower numbered interrupts before the higher
+		 * numbered ones.*/
+		i = ffs(finished) - 1;
+		KBASE_DEBUG_ASSERT(i >= 0);
+
+		do {
+			int nr_done;
+			u32 active;
+			u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */
+			u64 job_tail = 0;
+
+			if (failed & (1u << i)) {
+				/* read out the job slot status code if the job
+				 * slot reported failure */
+				completion_code = kbase_reg_read(kbdev,
+					JOB_SLOT_REG(i, JS_STATUS), NULL);
+
+				switch (completion_code) {
+				case BASE_JD_EVENT_STOPPED:
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+					kbase_trace_mali_job_slots_event(
+						GATOR_MAKE_EVENT(
+						GATOR_JOB_SLOT_SOFT_STOPPED, i),
+								NULL, 0);
+#endif
+
+					kbasep_trace_tl_event_lpu_softstop(
+						kbdev, i);
+
+					/* Soft-stopped job - read the value of
+					 * JS<n>_TAIL so that the job chain can
+					 * be resumed */
+					job_tail = (u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_LO),
+									NULL) |
+						((u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_HI),
+								NULL) << 32);
+					break;
+				case BASE_JD_EVENT_NOT_STARTED:
+					/* PRLAM-10673 can cause a TERMINATED
+					 * job to come back as NOT_STARTED, but
+					 * the error interrupt helps us detect
+					 * it */
+					completion_code =
+						BASE_JD_EVENT_TERMINATED;
+					/* fall through */
+				default:
+					dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+							i, completion_code,
+							kbase_exception_name
+							(kbdev,
+							completion_code));
+				}
+
+				kbase_gpu_irq_evict(kbdev, i);
+			}
+
+			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+					done & ((1 << i) | (1 << (i + 16))),
+					NULL);
+			active = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_JS_STATE),
+					NULL);
+
+			if (((active >> i) & 1) == 0 &&
+					(((done >> (i + 16)) & 1) == 0)) {
+				/* There is a potential race we must work
+				 * around:
+				 *
+				 *  1. A job slot has a job in both current and
+				 *     next registers
+				 *  2. The job in current completes
+				 *     successfully, the IRQ handler reads
+				 *     RAWSTAT and calls this function with the
+				 *     relevant bit set in "done"
+				 *  3. The job in the next registers becomes the
+				 *     current job on the GPU
+				 *  4. Sometime before the JOB_IRQ_CLEAR line
+				 *     above the job on the GPU _fails_
+				 *  5. The IRQ_CLEAR clears the done bit but not
+				 *     the failed bit. This atomically sets
+				 *     JOB_IRQ_JS_STATE. However since both jobs
+				 *     have now completed the relevant bits for
+				 *     the slot are set to 0.
+				 *
+				 * If we now did nothing then we'd incorrectly
+				 * assume that _both_ jobs had completed
+				 * successfully (since we haven't yet observed
+				 * the fail bit being set in RAWSTAT).
+				 *
+				 * So at this point if there are no active jobs
+				 * left we check to see if RAWSTAT has a failure
+				 * bit set for the job slot. If it does we know
+				 * that there has been a new failure that we
+				 * didn't previously know about, so we make sure
+				 * that we record this in active (but we wait
+				 * for the next loop to deal with it).
+				 *
+				 * If we were handling a job failure (i.e. done
+				 * has the relevant high bit set) then we know
+				 * that the value read back from
+				 * JOB_IRQ_JS_STATE is the correct number of
+				 * remaining jobs because the failed job will
+				 * have prevented any futher jobs from starting
+				 * execution.
+				 */
+				u32 rawstat = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+				if ((rawstat >> (i + 16)) & 1) {
+					/* There is a failed job that we've
+					 * missed - add it back to active */
+					active |= (1u << i);
+				}
+			}
+
+			dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n",
+							completion_code);
+
+			nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
+			nr_done -= (active >> i) & 1;
+			nr_done -= (active >> (i + 16)) & 1;
+
+			if (nr_done <= 0) {
+				dev_warn(kbdev->dev, "Spurious interrupt on slot %d",
+									i);
+
+				goto spurious;
+			}
+
+			count += nr_done;
+
+			while (nr_done) {
+				if (nr_done == 1) {
+					kbase_gpu_complete_hw(kbdev, i,
+								completion_code,
+								job_tail,
+								&end_timestamp);
+					kbase_jm_try_kick_all(kbdev);
+				} else {
+					/* More than one job has completed.
+					 * Since this is not the last job being
+					 * reported this time it must have
+					 * passed. This is because the hardware
+					 * will not allow further jobs in a job
+					 * slot to complete until the failed job
+					 * is cleared from the IRQ status.
+					 */
+					kbase_gpu_complete_hw(kbdev, i,
+							BASE_JD_EVENT_DONE,
+							0,
+							&end_timestamp);
+				}
+				nr_done--;
+			}
+ spurious:
+			done = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
+				/* Workaround for missing interrupt caused by
+				 * PRLAM-10883 */
+				if (((active >> i) & 1) && (0 ==
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(i,
+							JS_STATUS), NULL))) {
+					/* Force job slot to be processed again
+					 */
+					done |= (1u << i);
+				}
+			}
+
+			failed = done >> 16;
+			finished = (done & 0xFFFF) | failed;
+			if (done)
+				end_timestamp = ktime_get();
+		} while (finished & (1 << i));
+
+		kbasep_job_slot_update_head_start_timestamp(kbdev, i,
+								end_timestamp);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#if KBASE_GPU_RESET_EN
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_COMMITTED) {
+		/* If we're trying to reset the GPU then we might be able to do
+		 * it early (without waiting for a timeout) because some jobs
+		 * have completed
+		 */
+		kbasep_try_reset_gpu_early(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
+}
+KBASE_EXPORT_TEST_API(kbase_job_done);
+
+static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	bool soft_stops_allowed = true;
+
+	if (kbase_jd_katom_is_protected(katom)) {
+		soft_stops_allowed = false;
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+		if ((katom->core_req & BASE_JD_REQ_T) != 0)
+			soft_stops_allowed = false;
+	}
+	return soft_stops_allowed;
+}
+
+static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
+						base_jd_core_req core_reqs)
+{
+	bool hard_stops_allowed = true;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			hard_stops_allowed = false;
+	}
+	return hard_stops_allowed;
+}
+
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom)
+{
+	struct kbase_context *kctx = target_katom->kctx;
+#if KBASE_TRACE_ENABLE
+	u32 status_reg_before;
+	u64 job_in_head_before;
+	u32 status_reg_after;
+
+	KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+
+	/* Check the head pointer */
+	job_in_head_before = ((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
+			| (((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_HI), NULL))
+									<< 32);
+	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+#endif
+
+	if (action == JS_COMMAND_SOFT_STOP) {
+		bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
+								target_katom);
+
+		if (!soft_stop_allowed) {
+#ifdef CONFIG_MALI_DEBUG
+			dev_dbg(kbdev->dev,
+					"Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+
+		/* We are about to issue a soft stop, so mark the atom as having
+		 * been soft stopped */
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+
+		/* Mark the point where we issue the soft-stop command */
+		KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(target_katom);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+			int i;
+
+			for (i = 0;
+			     i < kbase_backend_nr_atoms_submitted(kbdev, js);
+			     i++) {
+				struct kbase_jd_atom *katom;
+
+				katom = kbase_gpu_inspect(kbdev, js, i);
+
+				KBASE_DEBUG_ASSERT(katom);
+
+				/* For HW_ISSUE_8316, only 'bad' jobs attacking
+				 * the system can cause this issue: normally,
+				 * all memory should be allocated in multiples
+				 * of 4 pages, and growable memory should be
+				 * changed size in multiples of 4 pages.
+				 *
+				 * Whilst such 'bad' jobs can be cleared by a
+				 * GPU reset, the locking up of a uTLB entry
+				 * caused by the bad job could also stall other
+				 * ASs, meaning that other ASs' jobs don't
+				 * complete in the 'grace' period before the
+				 * reset. We don't want to lose other ASs' jobs
+				 * when they would normally complete fine, so we
+				 * must 'poke' the MMU regularly to help other
+				 * ASs complete */
+				kbase_as_poking_timer_retain_atom(
+						kbdev, katom->kctx, katom);
+			}
+		}
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_SOFT_STOP_1 :
+				JS_COMMAND_SOFT_STOP_0;
+		}
+	} else if (action == JS_COMMAND_HARD_STOP) {
+		bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
+								core_reqs);
+
+		if (!hard_stop_allowed) {
+			/* Jobs can be hard-stopped for the following reasons:
+			 *  * CFS decides the job has been running too long (and
+			 *    soft-stop has not occurred). In this case the GPU
+			 *    will be reset by CFS if the job remains on the
+			 *    GPU.
+			 *
+			 *  * The context is destroyed, kbase_jd_zap_context
+			 *    will attempt to hard-stop the job. However it also
+			 *    has a watchdog which will cause the GPU to be
+			 *    reset if the job remains on the GPU.
+			 *
+			 *  * An (unhandled) MMU fault occurred. As long as
+			 *    BASE_HW_ISSUE_8245 is defined then the GPU will be
+			 *    reset.
+			 *
+			 * All three cases result in the GPU being reset if the
+			 * hard-stop fails, so it is safe to just return and
+			 * ignore the hard-stop request.
+			 */
+			dev_warn(kbdev->dev,
+					"Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+			return;
+		}
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_HARD_STOP_1 :
+				JS_COMMAND_HARD_STOP_0;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
+
+#if KBASE_TRACE_ENABLE
+	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+									NULL);
+	if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
+		struct kbase_jd_atom *head;
+		struct kbase_context *head_kctx;
+
+		head = kbase_gpu_inspect(kbdev, js, 0);
+		head_kctx = head->kctx;
+
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx,
+						head, job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+						0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	} else {
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+#endif
+}
+
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Cancel any remaining running jobs for this kctx  */
+	mutex_lock(&kctx->jctx.lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Invalidate all jobs in context, to prevent re-submitting */
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		if (!work_pending(&kctx->jctx.atoms[i].work))
+			kctx->jctx.atoms[i].event_code =
+						BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_hardstop(kctx, i, NULL);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev;
+	int js = target_katom->slot_nr;
+	int priority = target_katom->sched_priority;
+	int i;
+	bool stop_sent = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		if (katom->kctx != kctx)
+			continue;
+
+		if (katom->sched_priority > priority) {
+			if (!stop_sent)
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE(
+						target_katom);
+
+			kbase_job_slot_softstop(kbdev, js, katom);
+			stop_sent = true;
+		}
+	}
+}
+
+struct zap_reset_data {
+	/* The stages are:
+	 * 1. The timer has never been called
+	 * 2. The zap has timed out, all slots are soft-stopped - the GPU reset
+	 *    will happen. The GPU has been reset when
+	 *    kbdev->hwaccess.backend.reset_waitq is signalled
+	 *
+	 * (-1 - The timer has been cancelled)
+	 */
+	int stage;
+	struct kbase_device *kbdev;
+	struct hrtimer timer;
+	spinlock_t lock; /* protects updates to stage member */
+};
+
+static enum hrtimer_restart zap_timeout_callback(struct hrtimer *timer)
+{
+	struct zap_reset_data *reset_data = container_of(timer,
+						struct zap_reset_data, timer);
+	struct kbase_device *kbdev = reset_data->kbdev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&reset_data->lock, flags);
+
+	if (reset_data->stage == -1)
+		goto out;
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_prepare_to_reset_gpu(kbdev)) {
+		dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+								ZAP_TIMEOUT);
+		kbase_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	reset_data->stage = 2;
+
+ out:
+	spin_unlock_irqrestore(&reset_data->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct zap_reset_data reset_data;
+	unsigned long flags;
+
+	hrtimer_init_on_stack(&reset_data.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	reset_data.timer.function = zap_timeout_callback;
+
+	spin_lock_init(&reset_data.lock);
+
+	reset_data.kbdev = kbdev;
+	reset_data.stage = 1;
+
+	hrtimer_start(&reset_data.timer, HR_TIMER_DELAY_MSEC(ZAP_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for all jobs to finish, and for the context to be not-scheduled
+	 * (due to kbase_job_zap_context(), we also guarentee it's not in the JS
+	 * policy queue either */
+	wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0);
+	wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+		   !kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	spin_lock_irqsave(&reset_data.lock, flags);
+	if (reset_data.stage == 1) {
+		/* The timer hasn't run yet - so cancel it */
+		reset_data.stage = -1;
+	}
+	spin_unlock_irqrestore(&reset_data.lock, flags);
+
+	hrtimer_cancel(&reset_data.timer);
+
+	if (reset_data.stage == 2) {
+		/* The reset has already started.
+		 * Wait for the reset to complete
+		 */
+		wait_event(kbdev->hwaccess.backend.reset_wait,
+				atomic_read(&kbdev->hwaccess.backend.reset_gpu)
+						== KBASE_RESET_GPU_NOT_PENDING);
+	}
+	destroy_hrtimer_on_stack(&reset_data.timer);
+
+	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+
+	/* Ensure that the signallers of the waitqs have finished */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
+{
+	u32 flush_id = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
+		mutex_lock(&kbdev->pm.lock);
+		if (kbdev->pm.backend.gpu_powered)
+			flush_id = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(LATEST_FLUSH), NULL);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return flush_id;
+}
+
+int kbase_job_slot_init(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
+						"Mali reset workqueue", 0, 1);
+	if (NULL == kbdev->hwaccess.backend.reset_workq)
+		return -EINVAL;
+
+	KBASE_DEBUG_ASSERT(0 ==
+		object_is_on_stack(&kbdev->hwaccess.backend.reset_work));
+	INIT_WORK(&kbdev->hwaccess.backend.reset_work,
+						kbasep_reset_timeout_worker);
+
+	hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->hwaccess.backend.reset_timer.function =
+						kbasep_reset_timer_callback;
+#endif
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_init);
+
+void kbase_job_slot_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_job_slot_term(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
+#endif
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_term);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot
+ * @kbdev: kbase device pointer
+ * @kctx:  context to check against
+ * @js:	   slot to check
+ * @target_katom: An atom to check, or NULL if all atoms from @kctx on
+ *                slot @js should be checked
+ *
+ * This checks are based upon parameters that would normally be passed to
+ * kbase_job_slot_hardstop().
+ *
+ * In the event of @target_katom being NULL, this will check the last jobs that
+ * are likely to be running on the slot to see if a) they belong to kctx, and
+ * so would be stopped, and b) whether they have AFBC
+ *
+ * In that case, It's guaranteed that a job currently executing on the HW with
+ * AFBC will be detected. However, this is a conservative check because it also
+ * detects jobs that have just completed too.
+ *
+ * Return: true when hard-stop _might_ stop an afbc atom, else false.
+ */
+static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
+		struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom)
+{
+	bool ret = false;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* When we have an atom the decision can be made straight away. */
+	if (target_katom)
+		return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC);
+
+	/* Otherwise, we must chweck the hardware to see if it has atoms from
+	 * this context with AFBC. */
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		/* Ignore atoms from other contexts, they won't be stopped when
+		 * we use this for checking if we should hard-stop them */
+		if (katom->kctx != kctx)
+			continue;
+
+		/* An atom on this slot and this context: check for AFBC */
+		if (katom->core_req & BASE_JD_REQ_FS_AFBC) {
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * @sw_flags:      Flags to pass in about the soft-stop
+ *
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Soft-stop the specified job slot, with extra information about the stop
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+			struct kbase_jd_atom *target_katom, u32 sw_flags)
+{
+	KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+	kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
+			JS_COMMAND_SOFT_STOP | sw_flags);
+}
+
+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
+}
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool stopped;
+#if KBASE_GPU_RESET_EN
+	/* We make the check for AFBC before evicting/stopping atoms.  Note
+	 * that no other thread can modify the slots whilst we have the
+	 * hwaccess_lock. */
+	int needs_workaround_for_afbc =
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
+			&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
+					 target_katom);
+#endif
+
+	stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
+							target_katom,
+							JS_COMMAND_HARD_STOP);
+#if KBASE_GPU_RESET_EN
+	if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
+			needs_workaround_for_afbc)) {
+		/* MIDBASE-2916 if a fragment job with AFBC encoding is
+		 * hardstopped, ensure to do a soft reset also in order to
+		 * clear the GPU status.
+		 * Workaround for HW issue 8401 has an issue,so after
+		 * hard-stopping just reset the GPU. This will ensure that the
+		 * jobs leave the GPU.*/
+		if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
+			dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue");
+			kbase_reset_gpu_locked(kbdev);
+		}
+	}
+#endif
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft/hard-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	/* For hard-stop, don't enter if hard-stop not allowed */
+	if (hw_action == JS_COMMAND_HARD_STOP &&
+			!kbasep_hard_stop_allowed(kbdev, core_reqs))
+		return;
+
+	/* For soft-stop, don't enter if soft-stop not allowed, or isn't
+	 * causing disjoint */
+	if (hw_action == JS_COMMAND_SOFT_STOP &&
+			!(kbasep_soft_stop_allowed(kbdev, target_katom) &&
+			  (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
+		return;
+
+	/* Nothing to do if already logged disjoint state on this atom */
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
+		return;
+
+	target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
+	kbase_disjoint_state_up(kbdev);
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom)
+{
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
+		target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
+		kbase_disjoint_state_down(kbdev);
+	}
+}
+
+
+#if KBASE_GPU_RESET_EN
+static void kbase_debug_dump_registers(struct kbase_device *kbdev)
+{
+	int i;
+
+	kbase_io_history_dump(kbdev);
+
+	dev_err(kbdev->dev, "Register state:");
+	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
+	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x",
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL));
+	for (i = 0; i < 3; i++) {
+		dev_err(kbdev->dev, "  JS%d_STATUS=0x%08x      JS%d_HEAD_LO=0x%08x",
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
+					NULL),
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
+					NULL));
+	}
+	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
+	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
+	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
+	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
+	dev_err(kbdev->dev, "  TILER_CONFIG=0x%08x    JM_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG), NULL));
+}
+
+static void kbasep_reset_timeout_worker(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+	bool try_schedule = false;
+	bool silent = false;
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	KBASE_DEBUG_ASSERT(data);
+
+	kbdev = container_of(data, struct kbase_device,
+						hwaccess.backend.reset_work);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_SILENT)
+		silent = true;
+
+	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	/* Make sure the timer has completed - this cannot be done from
+	 * interrupt context, so this cannot be done within
+	 * kbasep_try_reset_gpu_early. */
+	hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* This would re-activate the GPU. Since it's already idle,
+		 * there's no need to reset it */
+		atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+		kbase_disjoint_state_down(kbdev);
+		wake_up(&kbdev->hwaccess.backend.reset_wait);
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	spin_lock(&kbdev->hwaccess_lock);
+	spin_lock(&kbdev->mmu_mask_change);
+	/* We're about to flush out the IRQs and their bottom half's */
+	kbdev->irq_reset_flush = true;
+
+	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
+	 * spinlock; this also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts_nolock(kbdev);
+
+	spin_unlock(&kbdev->mmu_mask_change);
+	spin_unlock(&kbdev->hwaccess_lock);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Ensure that any IRQ handlers have finished
+	 * Must be done without any locks IRQ handlers will take */
+	kbase_synchronize_irqs(kbdev);
+
+	/* Flush out any in-flight work items */
+	kbase_flush_mmu_wqs(kbdev);
+
+	/* The flush has completed so reset the active indicator */
+	kbdev->irq_reset_flush = false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) {
+		/* Ensure that L2 is not transitioning when we send the reset
+		 * command */
+		while (--max_loops && kbase_pm_get_trans_cores(kbdev,
+				KBASE_PM_CORE_L2))
+			;
+
+		WARN(!max_loops, "L2 power transition timed out while trying to reset\n");
+	}
+
+	mutex_lock(&kbdev->pm.lock);
+	/* We hold the pm lock, so there ought to be a current policy */
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+	/* All slot have been soft-stopped and we've waited
+	 * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
+	 * assume that anything that is still left on the GPU is stuck there and
+	 * we'll kill it when we reset the GPU */
+
+	if (!silent)
+		dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
+								RESET_TIMEOUT);
+
+	/* Output the state of some interesting registers to help in the
+	 * debugging of GPU resets */
+	if (!silent)
+		kbase_debug_dump_registers(kbdev);
+
+	/* Complete any jobs that were still on the GPU */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->protected_mode = false;
+	kbase_backend_reset(kbdev, &end_timestamp);
+	kbase_pm_metrics_update(kbdev, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Reset the GPU */
+	kbase_pm_init_hw(kbdev, 0);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_restore_all_as(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_pm_enable_interrupts(kbdev);
+
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+
+	kbase_disjoint_state_down(kbdev);
+
+	wake_up(&kbdev->hwaccess.backend.reset_wait);
+	if (!silent)
+		dev_err(kbdev->dev, "Reset complete");
+
+	if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
+		try_schedule = true;
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Find out what cores are required now */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* Synchronously request and wait for those cores, because if
+	 * instrumentation is enabled it would need them immediately. */
+	kbase_pm_check_transitions_sync(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Try submitting some jobs to restart processing */
+	if (try_schedule) {
+		KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
+									0);
+		kbase_js_sched_all(kbdev);
+	}
+
+	/* Process any pending slot updates */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_slot_update(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_context_idle(kbdev);
+
+	/* Release vinstr */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
+}
+
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
+						hwaccess.backend.reset_timer);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Reset still pending? */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
+						KBASE_RESET_GPU_COMMITTED)
+		queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * If all jobs are evicted from the GPU then we can reset the GPU
+ * immediately instead of waiting for the timeout to elapse
+ */
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
+{
+	int i;
+	int pending_jobs = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Count the number of jobs */
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
+
+	if (pending_jobs > 0) {
+		/* There are still jobs on the GPU - wait */
+		return;
+	}
+
+	/* To prevent getting incorrect registers when dumping failed job,
+	 * skip early reset.
+	 */
+	if (kbdev->job_fault_debug != false)
+		return;
+
+	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
+	 * been called), and that no other thread beat this thread to starting
+	 * the reset */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
+						KBASE_RESET_GPU_COMMITTED) {
+		/* Reset has already occurred */
+		return;
+	}
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+}
+
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_try_reset_gpu_early_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
+ * @kbdev: kbase device
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return:
+ *   The function returns a boolean which should be interpreted as follows:
+ *   true - Prepared for reset, kbase_reset_gpu_locked should be called.
+ *   false - Another thread is performing a reset, kbase_reset_gpu should
+ *   not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_PREPARED) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return false;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_softstop(kbdev, i, NULL);
+
+	return true;
+}
+
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
+
+/*
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for
+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
+ * has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu);
+
+void kbase_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early_locked(kbdev);
+}
+
+void kbase_reset_gpu_silent(struct kbase_device *kbdev)
+{
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_SILENT) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+			&kbdev->hwaccess.backend.reset_work);
+}
+
+bool kbase_reset_gpu_active(struct kbase_device *kbdev)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_NOT_PENDING)
+		return false;
+
+	return true;
+}
+#endif /* KBASE_GPU_RESET_EN */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
new file mode 100644
index 000000000000..1f382b3c1af4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Manager backend-specific low-level APIs.
+ */
+
+#ifndef _KBASE_JM_HWACCESS_H_
+#define _KBASE_JM_HWACCESS_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/**
+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js);
+
+/**
+ * kbase_job_done_slot() - Complete the head job on a particular job-slot
+ * @kbdev:		Device pointer
+ * @s:			Job slot
+ * @completion_code:	Completion code of job reported by GPU
+ * @job_tail:		Job tail address reported by GPU
+ * @end_timestamp:	Timestamp of job completion
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
+					u64 job_tail, ktime_t *end_timestamp);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static inline char *kbasep_make_job_slot_string(int js, char *js_string,
+						size_t js_size)
+{
+	snprintf(js_string, js_size, "job_slot_%i", js);
+	return js_string;
+}
+#endif
+
+/**
+ * kbase_job_hw_submit() - Submit a job to the GPU
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js);
+
+/**
+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
+ *						   on the specified atom
+ * @kbdev:		Device pointer
+ * @js:			Job slot to stop on
+ * @action:		The action to perform, either JSn_COMMAND_HARD_STOP or
+ *			JSn_COMMAND_SOFT_STOP
+ * @core_reqs:		Core requirements of atom to stop
+ * @target_katom:	Atom to stop
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
+ *					 slot belonging to a given context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @katom:	Specific atom to stop. May be NULL
+ * @js:		Job slot to hard stop
+ * @action:	The action to perform, either JSn_COMMAND_HARD_STOP or
+ *		JSn_COMMAND_SOFT_STOP
+ *
+ * If no context is provided then all jobs on the slot will be soft or hard
+ * stopped.
+ *
+ * If a katom is provided then only that specific atom will be stopped. In this
+ * case the kctx parameter is ignored.
+ *
+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and
+ * returned to the job scheduler.
+ *
+ * Return: true if an atom was stopped, false otherwise
+ */
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action);
+
+/**
+ * kbase_job_slot_init - Initialise job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_job_slot_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_halt - Halt the job slot framework
+ * @kbdev: Device pointer
+ *
+ * Should prevent any further job slot processing
+ */
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_term - Terminate job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver termination
+ */
+void kbase_job_slot_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_cacheclean - Cause a GPU cache clean & flush
+ * @kbdev: Device pointer
+ *
+ * Caller must not be in IRQ context
+ */
+void kbase_gpu_cacheclean(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
new file mode 100644
index 000000000000..a41e7b5b7afb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -0,0 +1,1947 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_10969_workaround.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* Return whether the specified ringbuffer is empty. HW access lock must be
+ * held */
+#define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
+/* Return number of atoms currently in the specified ringbuffer. HW access lock
+ * must be held */
+#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer
+ * @kbdev: Device pointer
+ * @katom: Atom to enqueue
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr];
+
+	WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
+	rb->write_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+/**
+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once
+ * it has been completed
+ * @kbdev:         Device pointer
+ * @js:            Job slot to remove atom from
+ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in
+ *                 which case current time will be used.
+ *
+ * Context: Caller must hold the HW access lock
+ *
+ * Return: Atom removed from ringbuffer
+ */
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
+						int js,
+						ktime_t *end_timestamp)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+	struct kbase_jd_atom *katom;
+
+	if (SLOT_RB_EMPTY(rb)) {
+		WARN(1, "GPU ringbuffer unexpectedly empty\n");
+		return NULL;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
+
+	kbase_gpu_release_atom(kbdev, katom, end_timestamp);
+
+	rb->read_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
+
+	kbase_js_debug_log_current_affinities(kbdev);
+
+	return katom;
+}
+
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
+		return NULL; /* idx out of range */
+
+	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+					int js)
+{
+	return kbase_gpu_inspect(kbdev, js, 0);
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	if (SLOT_RB_EMPTY(rb))
+		return NULL;
+
+	return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom;
+}
+
+/**
+ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently
+ * on the GPU
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ *
+ * Return: true if there are atoms on the GPU for slot js,
+ *         false otherwise
+ */
+static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (!katom)
+			return false;
+		if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED ||
+				katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
+ * currently on the GPU
+ * @kbdev:  Device pointer
+ *
+ * Return: true if there are any atoms on the GPU, false otherwise
+ */
+static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
+{
+	int js;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+			if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+				return true;
+		}
+	}
+	return false;
+}
+
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		if (kbase_gpu_inspect(kbdev, js, i))
+			nr++;
+	}
+
+	return nr;
+}
+
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
+				enum kbase_atom_gpu_rb_state min_rb_state)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state >= min_rb_state))
+			nr++;
+	}
+
+	return nr;
+}
+
+/**
+ * check_secure_atom - Check if the given atom is in the given secure state and
+ *                     has a ringbuffer state of at least
+ *                     KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @katom:  Atom pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if atom is in the given state, false otherwise
+ */
+static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure)
+{
+	if (katom->gpu_rb_state >=
+			KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
+			((kbase_jd_katom_is_protected(katom) && secure) ||
+			(!kbase_jd_katom_is_protected(katom) && !secure)))
+		return true;
+
+	return false;
+}
+
+/**
+ * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given
+ *                                secure state in the ringbuffers of at least
+ *                                state
+ *                                KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE
+ * @kbdev:  Device pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if any atoms are in the given state, false otherwise
+ */
+static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
+		bool secure)
+{
+	int js, i;
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, i);
+
+			if (katom) {
+				if (check_secure_atom(katom, secure))
+					return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* The GPU is being reset - so prevent submission */
+		return 0;
+	}
+
+	return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
+}
+
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom);
+
+static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
+						int js,
+						struct kbase_jd_atom *katom)
+{
+	/* The most recently checked affinity. Having this at this scope allows
+	 * us to guarantee that we've checked the affinity in this function
+	 * call.
+	 */
+	u64 recently_chosen_affinity = 0;
+	bool chosen_affinity = false;
+	bool retry;
+
+	do {
+		retry = false;
+
+		/* NOTE: The following uses a number of FALLTHROUGHs to optimize
+		 * the calls to this function. Ending of the function is
+		 * indicated by BREAK OUT */
+		switch (katom->coreref_state) {
+			/* State when job is first attempted to be run */
+		case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+			KBASE_DEBUG_ASSERT(katom->affinity == 0);
+
+			/* Compute affinity */
+			if (false == kbase_js_choose_affinity(
+					&recently_chosen_affinity, kbdev, katom,
+									js)) {
+				/* No cores are currently available */
+				/* *** BREAK OUT: No state transition *** */
+				break;
+			}
+
+			chosen_affinity = true;
+
+			/* Request the cores */
+			kbase_pm_request_cores(kbdev,
+					katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+			katom->affinity = recently_chosen_affinity;
+
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+			{
+				enum kbase_pm_cores_ready cores_ready;
+
+				KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+				cores_ready = kbase_pm_register_inuse_cores(
+						kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						katom->affinity);
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Stay in this state and return, to
+					 * retry at this state later */
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: No state transition
+					 * *** */
+					break;
+				}
+				/* Proceed to next state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+			}
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+			/* Optimize out choosing the affinity twice in the same
+			 * function call */
+			if (chosen_affinity == false) {
+				/* See if the affinity changed since a previous
+				 * call. */
+				if (false == kbase_js_choose_affinity(
+						&recently_chosen_affinity,
+							kbdev, katom, js)) {
+					/* No cores are currently available */
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REQUEST_ON_RECHECK_FAILED,
+						katom->kctx, katom,
+						katom->jc, js,
+						(u32) recently_chosen_affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+				chosen_affinity = true;
+			}
+
+			/* Now see if this requires a different set of cores */
+			if (recently_chosen_affinity != katom->affinity) {
+				enum kbase_pm_cores_ready cores_ready;
+
+				kbase_pm_request_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+
+				/* Register new cores whilst we still hold the
+				 * old ones, to minimize power transitions */
+				cores_ready =
+					kbase_pm_register_inuse_cores(kbdev,
+						katom->core_req & BASE_JD_REQ_T,
+						recently_chosen_affinity);
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+
+				/* Fixup the state that was reduced by
+				 * deref_cores: */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				katom->affinity = recently_chosen_affinity;
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to
+					 * previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+					kbasep_js_job_check_deref_cores(kbdev,
+									katom);
+
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_INUSE_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous
+					 * state, retry *** */
+					retry = true;
+					break;
+				}
+				/* Now might be waiting for powerup again, with
+				 * a new affinity */
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Return to previous state */
+					katom->coreref_state =
+					KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_REGISTER_ON_RECHECK_FAILED,
+							katom->kctx, katom,
+							katom->jc, js,
+							(u32) katom->affinity);
+					/* *** BREAK OUT: Transition to lower
+					 * state *** */
+					break;
+				}
+			}
+			/* Proceed to next state */
+			katom->coreref_state =
+			KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+		case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+			KBASE_DEBUG_ASSERT(katom->affinity ==
+						recently_chosen_affinity);
+
+			/* Note: this is where the caller must've taken the
+			 * hwaccess_lock */
+
+			/* Check for affinity violations - if there are any,
+			 * then we just ask the caller to requeue and try again
+			 * later */
+			if (kbase_js_affinity_would_violate(kbdev, js,
+					katom->affinity) != false) {
+				/* Return to previous state */
+				katom->coreref_state =
+				KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				/* *** BREAK OUT: Transition to lower state ***
+				 */
+				KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+					JS_CORE_REF_AFFINITY_WOULD_VIOLATE,
+					katom->kctx, katom, katom->jc, js,
+					(u32) katom->affinity);
+				break;
+			}
+
+			/* No affinity violations would result, so the cores are
+			 * ready */
+			katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
+			/* *** BREAK OUT: Cores Ready *** */
+			break;
+
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false,
+					"Unhandled kbase_atom_coreref_state %d",
+							katom->coreref_state);
+			break;
+		}
+	} while (retry != false);
+
+	return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
+}
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	switch (katom->coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+					(katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+							katom->affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(katom->affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							katom->coreref_state);
+		break;
+	}
+
+	katom->affinity = 0;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+}
+
+static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(affinity != 0 ||
+					(core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T,
+							affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							coreref_state);
+		break;
+	}
+}
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	switch (katom->gpu_rb_state) {
+	case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+		/* Should be impossible */
+		WARN(1, "Attempting to release atom not in ringbuffer\n");
+		break;
+
+	case KBASE_ATOM_GPU_RB_SUBMITTED:
+		/* Inform power management at start/finish of atom so it can
+		 * update its GPU utilisation metrics. Mark atom as not
+		 * submitted beforehand. */
+		katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		kbase_pm_metrics_update(kbdev, end_timestamp);
+
+		if (katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+			&kbdev->gpu_props.props.raw_props.js_features
+				[katom->slot_nr]);
+		KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
+		KBASE_TLSTREAM_TL_NRET_CTX_LPU(kctx,
+			&kbdev->gpu_props.props.raw_props.js_features
+				[katom->slot_nr]);
+
+	case KBASE_ATOM_GPU_RB_READY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+		kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
+							katom->affinity);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+		break;
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+		if (katom->protected_state.enter !=
+				KBASE_ATOM_ENTER_PROTECTED_CHECK ||
+				katom->protected_state.exit !=
+				KBASE_ATOM_EXIT_PROTECTED_CHECK)
+			kbdev->protected_mode_transition = false;
+
+		if (kbase_jd_katom_is_protected(katom) &&
+				(katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) {
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+			/* Go back to configured model for IPA */
+			kbase_ipa_model_use_configured_locked(kbdev);
+		}
+
+
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+		break;
+	}
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+}
+
+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	kbase_gpu_release_atom(kbdev, katom, NULL);
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+}
+
+static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	bool slot_busy[3];
+
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+	slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+
+	if ((js == 2 && !(slot_busy[0] || slot_busy[1])) ||
+		(js != 2 && !slot_busy[2]))
+		return true;
+
+	/* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */
+	if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1) ||
+			backend->rmu_workaround_flag))
+		return false;
+
+	/* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */
+	if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) ||
+			!backend->rmu_workaround_flag))
+		return false;
+
+	backend->rmu_workaround_flag = !backend->rmu_workaround_flag;
+
+	return true;
+}
+
+/**
+ * other_slots_busy - Determine if any job slots other than @js are currently
+ *                    running atoms
+ * @kbdev: Device pointer
+ * @js:    Job slot
+ *
+ * Return: true if any slots other than @js are busy, false otherwise
+ */
+static inline bool other_slots_busy(struct kbase_device *kbdev, int js)
+{
+	int slot;
+
+	for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) {
+		if (slot == js)
+			continue;
+
+		if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot,
+				KBASE_ATOM_GPU_RB_SUBMITTED))
+			return true;
+	}
+
+	return false;
+}
+
+static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev)
+{
+	return kbdev->protected_mode;
+}
+
+static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot enter protected mode: protected callbacks not specified.\n");
+
+	/*
+	 * When entering into protected mode, we must ensure that the
+	 * GPU is not operating in coherent mode as well. This is to
+	 * ensure that no protected memory can be leaked.
+	 */
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE);
+
+	if (kbdev->protected_ops) {
+		/* Switch GPU to protected mode */
+		err = kbdev->protected_ops->protected_mode_enable(
+				kbdev->protected_dev);
+
+		if (err)
+			dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n",
+					err);
+		else
+			kbdev->protected_mode = true;
+	}
+
+	return err;
+}
+
+static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot exit protected mode: protected callbacks not specified.\n");
+
+	if (!kbdev->protected_ops)
+		return -EINVAL;
+
+	/* The protected mode disable callback will be called as part of reset
+	 */
+	kbase_reset_gpu_silent(kbdev);
+
+	return 0;
+}
+
+static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+	switch (katom[idx]->protected_state.enter) {
+	case KBASE_ATOM_ENTER_PROTECTED_CHECK:
+		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev);
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		kbdev->protected_mode_transition = true;
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_VINSTR;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_VINSTR:
+		if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
+			/*
+			 * We can't switch now because
+			 * the vinstr core state switch
+			 * is not done yet.
+			 */
+			return -EAGAIN;
+		}
+
+		/* Use generic model for IPA in protected mode */
+		kbase_ipa_model_use_fallback_locked(kbdev);
+
+		/* Once reaching this point GPU must be
+		 * switched to protected mode or vinstr
+		 * re-enabled. */
+
+		/*
+		 * Not in correct mode, begin protected mode switch.
+		 * Entering protected mode requires us to power down the L2,
+		 * and drop out of fully coherent mode.
+		 */
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_IDLE_L2;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
+		/* Avoid unnecessary waiting on non-ACE platforms. */
+		if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) {
+			if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+				/*
+				* The L2 is still powered, wait for all the users to
+				* finish with it before doing the actual reset.
+				*/
+				return -EAGAIN;
+			}
+		}
+
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_FINISHED;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
+
+		/* No jobs running, so we can switch GPU mode right now. */
+		err = kbase_gpu_protected_mode_enter(kbdev);
+
+		/*
+		 * Regardless of result, we are no longer transitioning
+		 * the GPU.
+		 */
+		kbdev->protected_mode_transition = false;
+		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev);
+		if (err) {
+			/*
+			 * Failed to switch into protected mode, resume
+			 * vinstr core and fail atom.
+			 */
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order. */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+
+			/* Go back to configured model for IPA */
+			kbase_ipa_model_use_configured_locked(kbdev);
+
+			return -EINVAL;
+		}
+
+		/* Protected mode sanity checks. */
+		KBASE_DEBUG_ASSERT_MSG(
+			kbase_jd_katom_is_protected(katom[idx]) ==
+			kbase_gpu_in_protected_mode(kbdev),
+			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+			kbase_jd_katom_is_protected(katom[idx]),
+			kbase_gpu_in_protected_mode(kbdev));
+		katom[idx]->gpu_rb_state =
+			KBASE_ATOM_GPU_RB_READY;
+	}
+
+	return 0;
+}
+
+static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+
+	switch (katom[idx]->protected_state.exit) {
+	case KBASE_ATOM_EXIT_PROTECTED_CHECK:
+		KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev);
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		/*
+		 * Exiting protected mode requires a reset, but first the L2
+		 * needs to be powered down to ensure it's not active when the
+		 * reset is issued.
+		 */
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_IDLE_L2;
+
+		kbdev->protected_mode_transition = true;
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+	case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
+		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/*
+			 * The L2 is still powered, wait for all the users to
+			 * finish with it before doing the actual reset.
+			 */
+			return -EAGAIN;
+		}
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET:
+		/* Issue the reset to the GPU */
+		err = kbase_gpu_protected_mode_reset(kbdev);
+
+		if (err) {
+			kbdev->protected_mode_transition = false;
+
+			/* Failed to exit protected mode, fail atom */
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+			/* Use generic model for IPA in protected mode */
+			kbase_ipa_model_use_fallback_locked(kbdev);
+
+			return -EINVAL;
+		}
+
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT:
+		/* A GPU reset is issued when exiting protected mode. Once the
+		 * reset is done all atoms' state will also be reset. For this
+		 * reason, if the atom is still in this state we can safely
+		 * say that the reset has not completed i.e., we have not
+		 * finished exiting protected mode yet.
+		 */
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+void kbase_backend_slot_update(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_jd_atom *katom[2];
+		int idx;
+
+		katom[0] = kbase_gpu_inspect(kbdev, js, 0);
+		katom[1] = kbase_gpu_inspect(kbdev, js, 1);
+		WARN_ON(katom[1] && !katom[0]);
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			bool cores_ready;
+			int ret;
+
+			if (!katom[idx])
+				continue;
+
+			switch (katom[idx]->gpu_rb_state) {
+			case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+				/* Should be impossible */
+				WARN(1, "Attempting to update atom not in ringbuffer\n");
+				break;
+
+			case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+				if (katom[idx]->atom_flags &
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED)
+					break;
+
+				katom[idx]->gpu_rb_state =
+				KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+				if (kbase_gpu_check_secure_atoms(kbdev,
+						!kbase_jd_katom_is_protected(
+						katom[idx])))
+					break;
+
+				if ((idx == 1) && (kbase_jd_katom_is_protected(
+								katom[0]) !=
+						kbase_jd_katom_is_protected(
+								katom[1])))
+					break;
+
+				if (kbdev->protected_mode_transition)
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+
+				/*
+				 * Exiting protected mode must be done before
+				 * the references on the cores are taken as
+				 * a power down the L2 is required which
+				 * can't happen after the references for this
+				 * atom are taken.
+				 */
+
+				if (!kbase_gpu_in_protected_mode(kbdev) &&
+					kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition into protected mode. */
+					ret = kbase_jm_enter_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				} else if (kbase_gpu_in_protected_mode(kbdev) &&
+					!kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition out of protected mode. */
+					ret = kbase_jm_exit_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				}
+				katom[idx]->protected_state.exit =
+						KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+				/* Atom needs no protected mode transition. */
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+				if (katom[idx]->will_fail_event_code) {
+					kbase_gpu_mark_atom_for_return(kbdev,
+							katom[idx]);
+					/* Set EVENT_DONE so this atom will be
+					   completed, not unpulled. */
+					katom[idx]->event_code =
+						BASE_JD_EVENT_DONE;
+					/* Only return if head atom or previous
+					 * atom already removed - as atoms must
+					 * be returned in order. */
+					if (idx == 0 ||	katom[0]->gpu_rb_state ==
+							KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+						kbase_gpu_dequeue_atom(kbdev, js, NULL);
+						kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+					}
+					break;
+				}
+
+				cores_ready =
+					kbasep_js_job_check_ref_cores(kbdev, js,
+								katom[idx]);
+
+				if (katom[idx]->event_code ==
+						BASE_JD_EVENT_PM_EVENT) {
+					katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+					break;
+				}
+
+				if (!cores_ready)
+					break;
+
+				kbase_js_affinity_retain_slot_cores(kbdev, js,
+							katom[idx]->affinity);
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+				if (!kbase_gpu_rmu_workaround(kbdev, js))
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_READY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_READY:
+
+				if (idx == 1) {
+					/* Only submit if head atom or previous
+					 * atom already submitted */
+					if ((katom[0]->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED &&
+						katom[0]->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+						break;
+
+					/* If intra-slot serialization in use
+					 * then don't submit atom to NEXT slot
+					 */
+					if (kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTRA_SLOT)
+						break;
+				}
+
+				/* If inter-slot serialization in use then don't
+				 * submit atom if any other slots are in use */
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTER_SLOT) &&
+						other_slots_busy(kbdev, js))
+					break;
+
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_RESET) &&
+						kbase_reset_gpu_active(kbdev))
+					break;
+
+				/* Check if this job needs the cycle counter
+				 * enabled before submission */
+				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(
+									kbdev);
+
+				kbase_job_hw_submit(kbdev, katom[idx], js);
+				katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_SUBMITTED;
+
+				/* Inform power management at start/finish of
+				 * atom so it can update its GPU utilisation
+				 * metrics. */
+				kbase_pm_metrics_update(kbdev,
+						&katom[idx]->start_timestamp);
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_SUBMITTED:
+				/* Atom submitted to HW, nothing else to do */
+				break;
+
+			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+				/* Only return if head atom or previous atom
+				 * already removed - as atoms must be returned
+				 * in order */
+				if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					kbase_gpu_dequeue_atom(kbdev, js, NULL);
+					kbase_jm_return_atom_to_js(kbdev,
+								katom[idx]);
+				}
+				break;
+			}
+		}
+	}
+
+	/* Warn if PRLAM-8987 affinity restrictions are violated */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1)) &&
+			kbase_gpu_atoms_submitted(kbdev, 2));
+}
+
+
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbase_gpu_enqueue_atom(kbdev, katom);
+	kbase_backend_slot_update(kbdev);
+}
+
+#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \
+	(KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER))
+
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_atom *next_katom;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = kbase_gpu_inspect(kbdev, js, 0);
+	next_katom = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (next_katom && katom->kctx == next_katom->kctx &&
+		next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+		HAS_DEP(next_katom) &&
+		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL)
+									!= 0)) {
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+				JS_COMMAND_NOP, NULL);
+		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+
+		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+				&kbdev->gpu_props.props.raw_props.js_features
+					[katom->slot_nr]);
+		KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as
+					[katom->kctx->as_nr]);
+		KBASE_TLSTREAM_TL_NRET_CTX_LPU(katom->kctx,
+				&kbdev->gpu_props.props.raw_props.js_features
+					[katom->slot_nr]);
+
+		return true;
+	}
+
+	return false;
+}
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/*
+	 * When a hard-stop is followed close after a soft-stop, the completion
+	 * code may be set to STOPPED, even though the job is terminated
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) {
+		if (completion_code == BASE_JD_EVENT_STOPPED &&
+				(katom->atom_flags &
+				KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) {
+			completion_code = BASE_JD_EVENT_TERMINATED;
+		}
+	}
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) || (katom->core_req &
+					BASE_JD_REQ_SKIP_CACHE_END)) &&
+			completion_code != BASE_JD_EVENT_DONE &&
+			!(completion_code & BASE_JD_SW_EVENT)) {
+		/* When a job chain fails, on a T60x or when
+		 * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not
+		 * flushed. To prevent future evictions causing possible memory
+		 * corruption we need to flush the cache manually before any
+		 * affected memory gets reused. */
+		katom->need_cache_flush_cores_retained = katom->affinity;
+		kbase_pm_request_cores(kbdev, false, katom->affinity);
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
+		if (kbdev->gpu_props.num_core_groups > 1 &&
+			!(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[0].core_mask
+									) &&
+			(katom->affinity &
+			kbdev->gpu_props.props.coherency_info.group[1].core_mask
+									)) {
+			dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
+			katom->need_cache_flush_cores_retained =
+								katom->affinity;
+			kbase_pm_request_cores(kbdev, false,
+							katom->affinity);
+		}
+	}
+
+	katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+	kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED) {
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		/*
+		 * Dequeue next atom from ringbuffers on same slot if required.
+		 * This atom will already have been removed from the NEXT
+		 * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
+		 * the atoms on this slot are returned in the correct order.
+		 */
+		if (next_katom && katom->kctx == next_katom->kctx &&
+				next_katom->sched_priority ==
+				katom->sched_priority) {
+			kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+			kbase_jm_return_atom_to_js(kbdev, next_katom);
+		}
+	} else if (completion_code != BASE_JD_EVENT_DONE) {
+		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+		int i;
+
+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
+		KBASE_TRACE_DUMP(kbdev);
+#endif
+		kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+		/*
+		 * Remove all atoms on the same context from ringbuffers. This
+		 * will not remove atoms that are already on the GPU, as these
+		 * are guaranteed not to have fail dependencies on the failed
+		 * atom.
+		 */
+		for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+			struct kbase_jd_atom *katom_idx0 =
+						kbase_gpu_inspect(kbdev, i, 0);
+			struct kbase_jd_atom *katom_idx1 =
+						kbase_gpu_inspect(kbdev, i, 1);
+
+			if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
+					HAS_DEP(katom_idx0) &&
+					katom_idx0->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Dequeue katom_idx0 from ringbuffer */
+				kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
+
+				if (katom_idx1 &&
+						katom_idx1->kctx == katom->kctx
+						&& HAS_DEP(katom_idx1) &&
+						katom_idx0->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+					/* Dequeue katom_idx1 from ringbuffer */
+					kbase_gpu_dequeue_atom(kbdev, i,
+							end_timestamp);
+
+					katom_idx1->event_code =
+							BASE_JD_EVENT_STOPPED;
+					kbase_jm_return_atom_to_js(kbdev,
+								katom_idx1);
+				}
+				katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+
+			} else if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					HAS_DEP(katom_idx1) &&
+					katom_idx1->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Can not dequeue this atom yet - will be
+				 * dequeued when atom at idx0 completes */
+				katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_gpu_mark_atom_for_return(kbdev,
+								katom_idx1);
+			}
+		}
+	}
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc,
+					js, completion_code);
+
+	if (job_tail != 0 && job_tail != katom->jc) {
+		bool was_updated = (job_tail != katom->jc);
+
+		/* Some of the job has been executed, so we update the job chain
+		 * address to where we should resume from */
+		katom->jc = job_tail;
+		if (was_updated)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
+						katom, job_tail, js);
+	}
+
+	/* Only update the event code for jobs that weren't cancelled */
+	if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+		katom->event_code = (base_jd_event_code)completion_code;
+
+	kbase_device_trace_register_access(kctx, REG_WRITE,
+						JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+						1 << js);
+
+	/* Complete the job, and start new ones
+	 *
+	 * Also defer remaining work onto the workqueue:
+	 * - Re-queue Soft-stopped jobs
+	 * - For any other jobs, queue the job back into the dependency system
+	 * - Schedule out the parent context if necessary, and schedule a new
+	 *   one in.
+	 */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	{
+		/* The atom in the HEAD */
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		if (next_katom && next_katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(*end_timestamp),
+						(u32)next_katom->kctx->id, 0,
+						next_katom->work_id);
+			kbdev->hwaccess.backend.slot_rb[js].last_context =
+							next_katom->kctx;
+		} else {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(ktime_get()), 0, 0,
+						0);
+			kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
+		}
+	}
+#endif
+
+	if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)
+		kbase_reset_gpu_silent(kbdev);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED)
+		katom = kbase_jm_return_atom_to_js(kbdev, katom);
+	else
+		katom = kbase_jm_complete(kbdev, katom, end_timestamp);
+
+	if (katom) {
+		/* Cross-slot dependency has now become runnable. Try to submit
+		 * it. */
+
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		kbase_jm_try_kick(kbdev, 1 << katom->slot_nr);
+	}
+
+	/* Job completion may have unblocked other atoms. Try to update all job
+	 * slots */
+	kbase_backend_slot_update(kbdev);
+}
+
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Reset should always take the GPU out of protected mode */
+	WARN_ON(kbase_gpu_in_protected_mode(kbdev));
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int atom_idx = 0;
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, atom_idx);
+			bool keep_in_jm_rb = false;
+
+			if (!katom)
+				break;
+			if (katom->protected_state.exit ==
+					KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)
+			{
+				KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev);
+
+				kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+				/* protected mode sanity checks */
+				KBASE_DEBUG_ASSERT_MSG(
+					kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev),
+					"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+					kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev));
+				KBASE_DEBUG_ASSERT_MSG(
+					(kbase_jd_katom_is_protected(katom) && js == 0) ||
+					!kbase_jd_katom_is_protected(katom),
+					"Protected atom on JS%d not supported", js);
+			}
+			if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED)
+				keep_in_jm_rb = true;
+
+			kbase_gpu_release_atom(kbdev, katom, NULL);
+
+			/*
+			 * If the atom wasn't on HW when the reset was issued
+			 * then leave it in the RB and next time we're kicked
+			 * it will be processed again from the starting state.
+			 */
+			if (keep_in_jm_rb) {
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+				katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+				katom->affinity = 0;
+				katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+				/* As the atom was not removed, increment the
+				 * index so that we read the correct atom in the
+				 * next iteration. */
+				atom_idx++;
+				continue;
+			}
+
+			/*
+			 * The atom was on the HW when the reset was issued
+			 * all we can do is fail the atom.
+			 */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			kbase_jm_complete(kbdev, katom, end_timestamp);
+		}
+	}
+
+	kbdev->protected_mode_transition = false;
+}
+
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
+	kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
+							katom->core_req, katom);
+	katom->kctx->blocked_js[js][katom->sched_priority] = true;
+}
+
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom,
+						u32 action,
+						bool disjoint)
+{
+	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+	kbase_gpu_mark_atom_for_return(kbdev, katom);
+	katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true;
+
+	if (disjoint)
+		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+									katom);
+}
+
+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
+{
+	if (katom->x_post_dep) {
+		struct kbase_jd_atom *dep_atom = katom->x_post_dep;
+
+		if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB &&
+			dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_RETURN_TO_JS)
+			return dep_atom->slot_nr;
+	}
+	return -1;
+}
+
+static void kbase_job_evicted(struct kbase_jd_atom *katom)
+{
+	kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom,
+			katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
+}
+
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbase_jd_atom *katom_idx0;
+	struct kbase_jd_atom *katom_idx1;
+
+	bool katom_idx0_valid, katom_idx1_valid;
+
+	bool ret = false;
+
+	int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
+	int prio_idx0 = 0, prio_idx1 = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
+	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (katom_idx0)
+		prio_idx0 = katom_idx0->sched_priority;
+	if (katom_idx1)
+		prio_idx1 = katom_idx1->sched_priority;
+
+	if (katom) {
+		katom_idx0_valid = (katom_idx0 == katom);
+		/* If idx0 is to be removed and idx1 is on the same context,
+		 * then idx1 must also be removed otherwise the atoms might be
+		 * returned out of order */
+		if (katom_idx1)
+			katom_idx1_valid = (katom_idx1 == katom) ||
+						(katom_idx0_valid &&
+							(katom_idx0->kctx ==
+							katom_idx1->kctx));
+		else
+			katom_idx1_valid = false;
+	} else {
+		katom_idx0_valid = (katom_idx0 &&
+				(!kctx || katom_idx0->kctx == kctx));
+		katom_idx1_valid = (katom_idx1 &&
+				(!kctx || katom_idx1->kctx == kctx) &&
+				prio_idx0 == prio_idx1);
+	}
+
+	if (katom_idx0_valid)
+		stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
+	if (katom_idx1_valid)
+		stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1);
+
+	if (katom_idx0_valid) {
+		if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Simple case - just dequeue and return */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			if (katom_idx1_valid) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				katom_idx1->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx1);
+				katom_idx1->kctx->blocked_js[js][prio_idx1] =
+						true;
+			}
+
+			katom_idx0->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+			kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+			katom_idx0->kctx->blocked_js[js][prio_idx0] = true;
+		} else {
+			/* katom_idx0 is on GPU */
+			if (katom_idx1 && katom_idx1->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* katom_idx0 and katom_idx1 are on GPU */
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+					/* idx0 has already completed - stop
+					 * idx1 if needed*/
+					if (katom_idx1_valid) {
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				} else {
+					/* idx1 is in NEXT registers - attempt
+					 * to remove */
+					kbase_reg_write(kbdev,
+							JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+					if (kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_LO), NULL)
+									!= 0 ||
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_HI), NULL)
+									!= 0) {
+						/* idx1 removed successfully,
+						 * will be handled in IRQ */
+						kbase_job_evicted(katom_idx1);
+						kbase_gpu_remove_atom(kbdev,
+								katom_idx1,
+								action, true);
+						stop_x_dep_idx1 =
+					should_stop_x_dep_slot(katom_idx1);
+
+						/* stop idx0 if still on GPU */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx0,
+								action);
+						ret = true;
+					} else if (katom_idx1_valid) {
+						/* idx0 has already completed,
+						 * stop idx1 if needed */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				}
+			} else if (katom_idx1_valid) {
+				/* idx1 not on GPU but must be dequeued*/
+
+				/* idx1 will be handled in IRQ */
+				kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+				/* stop idx0 */
+				/* This will be repeated for anything removed
+				 * from the next registers, since their normal
+				 * flow was also interrupted, and this function
+				 * might not enter disjoint state e.g. if we
+				 * don't actually do a hard stop on the head
+				 * atom */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			} else {
+				/* no atom in idx1 */
+				/* just stop idx0 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			}
+		}
+	} else if (katom_idx1_valid) {
+		if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Mark for return */
+			/* idx1 will be returned once idx0 completes */
+			kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+		} else {
+			/* idx1 is on GPU */
+			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT), NULL) == 0) {
+				/* idx0 has already completed - stop idx1 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx1,
+									action);
+				ret = true;
+			} else {
+				/* idx1 is in NEXT registers - attempt to
+				 * remove */
+				kbase_reg_write(kbdev, JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP, NULL);
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_LO), NULL) != 0 ||
+				    kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_HI), NULL) != 0) {
+					/* idx1 removed successfully, will be
+					 * handled in IRQ once idx0 completes */
+					kbase_job_evicted(katom_idx1);
+					kbase_gpu_remove_atom(kbdev, katom_idx1,
+									action,
+									false);
+				} else {
+					/* idx0 has already completed - stop
+					 * idx1 */
+					kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+					ret = true;
+				}
+			}
+		}
+	}
+
+
+	if (stop_x_dep_idx0 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0,
+								NULL, action);
+
+	if (stop_x_dep_idx1 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1,
+								NULL, action);
+
+	return ret;
+}
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev)
+{
+	/* Limit the number of loops to avoid a hang if the interrupt is missed
+	 */
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	mutex_lock(&kbdev->cacheclean_lock);
+
+	/* use GPU_COMMAND completion solution */
+	/* clean & invalidate the caches */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+
+	/* wait for cache flush to complete before continuing */
+	while (--max_loops &&
+		(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+						CLEAN_CACHES_COMPLETED) == 0)
+		;
+
+	/* clear the CLEAN_CACHES_COMPLETED irq */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
+							CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+						CLEAN_CACHES_COMPLETED, NULL);
+	KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING,
+	    "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
+
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_backend_cacheclean(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->need_cache_flush_cores_retained) {
+		unsigned long flags;
+
+		kbase_gpu_cacheclean(kbdev);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_pm_unrequest_cores(kbdev, false,
+					katom->need_cache_flush_cores_retained);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		katom->need_cache_flush_cores_retained = 0;
+	}
+}
+
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	/*
+	 * If cache flush required due to HW workaround then perform the flush
+	 * now
+	 */
+	kbase_backend_cacheclean(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969)            &&
+	    (katom->core_req & BASE_JD_REQ_FS)                        &&
+	    katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT       &&
+	    (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
+	    !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
+		dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
+		if (kbasep_10969_workaround_clamp_coordinates(katom)) {
+			/* The job had a TILE_RANGE_FAULT after was soft-stopped
+			 * Due to an HW issue we try to execute the job again.
+			 */
+			dev_dbg(kbdev->dev,
+				"Clamping has been executed, try to rerun the job\n"
+			);
+			katom->event_code = BASE_JD_EVENT_STOPPED;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
+		}
+	}
+
+	/* Clear the coreref_state now - while check_deref_cores() may not have
+	 * been called yet, the caller will have taken a copy of this field. If
+	 * this is not done, then if the atom is re-scheduled (following a soft
+	 * stop) then the core reference would not be retaken. */
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->affinity = 0;
+}
+
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req, u64 affinity,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity,
+			coreref_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!kbdev->pm.active_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		mutex_lock(&kbdev->pm.lock);
+		kbase_pm_update_active(kbdev);
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+void kbase_gpu_dump_slots(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int js;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js,
+									idx);
+
+			if (katom)
+				dev_info(kbdev->dev,
+				"  js%d idx%d : katom=%p gpu_rb_state=%d\n",
+				js, idx, katom, katom->gpu_rb_state);
+			else
+				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
+								js, idx);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
new file mode 100644
index 000000000000..1e0e05ad3ea4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_H_
+#define _KBASE_HWACCESS_GPU_H_
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/**
+ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot
+ *
+ * @kbdev:         Device pointer
+ * @js:            Job slot to evict from
+ *
+ * Evict the atom in the NEXT slot for the specified job slot. This function is
+ * called from the job complete IRQ handler when the previous job has failed.
+ *
+ * Return: true if job evicted from NEXT registers, false otherwise
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_gpu_complete_hw - Complete an atom on job slot js
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot that has completed
+ * @completion_code: Event code from job that has completed
+ * @job_tail:        The tail address from the hardware if the job has partially
+ *                   completed
+ * @end_timestamp:   Time of completion
+ */
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
+ *
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ * @idx:    Index into ringbuffer. 0 is the job currently running on
+ *          the slot, 1 is the job waiting, all other values are invalid.
+ * Return:  The atom at that position in the ringbuffer
+ *          or NULL if no atom present
+ */
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx);
+
+/**
+ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ */
+void kbase_gpu_dump_slots(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_GPU_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
new file mode 100644
index 000000000000..54d8ddd80097
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
@@ -0,0 +1,303 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel affinity manager APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_js_affinity.h"
+#include "mali_kbase_hw.h"
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js)
+{
+	/*
+	 * Here are the reasons for using job slot 2:
+	 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
+	 * - In absence of the above, then:
+	 *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
+	 *  - But, only when there aren't contexts with
+	 *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
+	 *  all cores on slot 1 could be blocked by those using a coherent group
+	 *  on slot 2
+	 *  - And, only when you actually have 2 or more coregroups - if you
+	 *  only have 1 coregroup, then having jobs for slot 2 implies they'd
+	 *  also be for slot 1, meaning you'll get interference from them. Jobs
+	 *  able to run on slot 2 could also block jobs that can only run on
+	 *  slot 1 (tiler jobs)
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+
+	if (js != 2)
+		return true;
+
+	/* Only deal with js==2 now: */
+	if (kbdev->gpu_props.num_core_groups > 1) {
+		/* Only use slot 2 in the 2+ coregroup case */
+		if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
+					KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
+								false) {
+			/* ...But only when we *don't* have atoms that run on
+			 * all cores */
+
+			/* No specific check for BASE_JD_REQ_COHERENT_GROUP
+			 * atoms - the policy will sort that out */
+			return true;
+		}
+	}
+
+	/* Above checks failed mean we shouldn't use slot 2 */
+	return false;
+}
+
+/*
+ * As long as it has been decided to have a deeper modification of
+ * what job scheduler, power manager and affinity manager will
+ * implement, this function is just an intermediate step that
+ * assumes:
+ * - all working cores will be powered on when this is called.
+ * - largest current configuration is 2 core groups.
+ * - It has been decided not to have hardcoded values so the low
+ *   and high cores in a core split will be evently distributed.
+ * - Odd combinations of core requirements have been filtered out
+ *   and do not get to this function (e.g. CS+T+NSS is not
+ *   supported here).
+ * - This function is frequently called and can be optimized,
+ *   (see notes in loops), but as the functionallity will likely
+ *   be modified, optimization has not been addressed.
+*/
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js)
+{
+	base_jd_core_req core_req = katom->core_req;
+	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+	u64 core_availability_mask;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
+
+	/*
+	 * If no cores are currently available (core availability policy is
+	 * transitioning) then fail.
+	 */
+	if (0 == core_availability_mask) {
+		*affinity = 0;
+		return false;
+	}
+
+	KBASE_DEBUG_ASSERT(js >= 0);
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+								BASE_JD_REQ_T) {
+		 /* If the hardware supports XAFFINITY then we'll only enable
+		  * the tiler (which is the default so this is a no-op),
+		  * otherwise enable shader core 0. */
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = 1;
+		else
+			*affinity = 0;
+
+		return true;
+	}
+
+	if (1 == kbdev->gpu_props.num_cores) {
+		/* trivial case only one core, nothing to do */
+		*affinity = core_availability_mask &
+				kbdev->pm.debug_core_mask[js];
+	} else {
+		if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+			if (js == 0 || num_core_groups == 1) {
+				/* js[0] and single-core-group systems just get
+				 * the first core group */
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[0].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+			} else {
+				/* js[1], js[2] use core groups 0, 1 for
+				 * dual-core-group systems */
+				u32 core_group_idx = ((u32) js) - 1;
+
+				KBASE_DEBUG_ASSERT(core_group_idx <
+							num_core_groups);
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+
+				/* If the job is specifically targeting core
+				 * group 1 and the core availability policy is
+				 * keeping that core group off, then fail */
+				if (*affinity == 0 && core_group_idx == 1 &&
+						kbdev->pm.backend.cg1_disabled
+								== true)
+					katom->event_code =
+							BASE_JD_EVENT_PM_EVENT;
+			}
+		} else {
+			/* All cores are available when no core split is
+			 * required */
+			*affinity = core_availability_mask &
+					kbdev->pm.debug_core_mask[js];
+		}
+	}
+
+	/*
+	 * If no cores are currently available in the desired core group(s)
+	 * (core availability policy is transitioning) then fail.
+	 */
+	if (*affinity == 0)
+		return false;
+
+	/* Enable core 0 if tiler required for hardware without XAFFINITY
+	 * support (notes above) */
+	if (core_req & BASE_JD_REQ_T) {
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = *affinity | 1;
+	}
+
+	return true;
+}
+
+static inline bool kbase_js_affinity_is_violating(
+						struct kbase_device *kbdev,
+								u64 *affinities)
+{
+	/* This implementation checks whether the two slots involved in Generic
+	 * thread creation have intersecting affinity. This is due to micro-
+	 * architectural issues where a job in slot A targetting cores used by
+	 * slot B could prevent the job in slot B from making progress until the
+	 * job in slot A has completed.
+	 */
+	u64 affinity_set_left;
+	u64 affinity_set_right;
+	u64 intersection;
+
+	KBASE_DEBUG_ASSERT(affinities != NULL);
+
+	affinity_set_left = affinities[1];
+
+	affinity_set_right = affinities[2];
+
+	/* A violation occurs when any bit in the left_set is also in the
+	 * right_set */
+	intersection = affinity_set_left & affinity_set_right;
+
+	return (bool) (intersection != (u64) 0u);
+}
+
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
+			sizeof(js_devdata->runpool_irq.slot_affinities));
+
+	new_affinities[js] |= affinity;
+
+	return kbase_js_affinity_is_violating(kbdev, new_affinities);
+}
+
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
+								== false);
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		cnt =
+		++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (cnt == 1)
+			js_devdata->runpool_irq.slot_affinities[js] |= bit;
+
+		cores &= ~bit;
+	}
+}
+
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		KBASE_DEBUG_ASSERT(
+		js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
+
+		cnt =
+		--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (0 == cnt)
+			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
+
+		cores &= ~bit;
+	}
+}
+
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int slot_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	for (slot_nr = 0; slot_nr < 3; ++slot_nr)
+		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
+							NULL, 0u, slot_nr,
+			(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
new file mode 100644
index 000000000000..35d9781ae092
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Affinity Manager internal APIs.
+ */
+
+#ifndef _KBASE_JS_AFFINITY_H_
+#define _KBASE_JS_AFFINITY_H_
+
+/**
+ * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
+ * submit a job to a particular job slot in the current status
+ *
+ * @kbdev: The kbase device structure of the device
+ * @js:    Job slot number to check for allowance
+ *
+ * Will check if submitting to the given job slot is allowed in the current
+ * status.  For example using job slot 2 while in soft-stoppable state and only
+ * having 1 coregroup is not allowed by the policy. This function should be
+ * called prior to submitting a job to a slot to make sure policy rules are not
+ * violated.
+ *
+ * The following locking conditions are made on the caller
+ * - it must hold hwaccess_lock
+ */
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_js_choose_affinity - Compute affinity for a given job.
+ *
+ * @affinity: Affinity bitmap computed
+ * @kbdev:    The kbase device structure of the device
+ * @katom:    Job chain of which affinity is going to be found
+ * @js:       Slot the job chain is being submitted
+ *
+ * Currently assumes an all-on/all-off power management policy.
+ * Also assumes there is at least one core with tiler available.
+ *
+ * Returns true if a valid affinity was chosen, false if
+ * no cores were available.
+ */
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					int js);
+
+/**
+ * kbase_js_affinity_would_violate - Determine whether a proposed affinity on
+ * job slot @js would cause a violation of affinity restrictions.
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot to test
+ * @affinity: The affinity mask to test
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ *
+ * Return: true if the affinity would violate the restrictions
+ */
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
+ *                                       a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot retaining the cores
+ * @affinity: The cores to retain
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
+ *                                        by a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       Job slot
+ * @affinity: Bit mask of core to be released
+ *
+ * Cores must be released as soon as a job is dequeued from a slot's 'submit
+ * slots', and before another job is submitted to those slots. Otherwise, the
+ * refcount could exceed the maximum number submittable to a slot,
+ * %BASE_JM_SUBMIT_SLOTS.
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_debug_log_current_affinities - log the current affinities
+ *
+ * @kbdev:  Kbase device structure
+ *
+ * Output to the Trace log the current tracked affinities on all slots
+ */
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
+#else				/*  KBASE_TRACE_ENABLE  */
+static inline void
+kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+}
+#endif				/*  KBASE_TRACE_ENABLE  */
+
+#endif				/* _KBASE_JS_AFFINITY_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
new file mode 100644
index 000000000000..a8c1af23a369
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -0,0 +1,356 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+
+/*
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is
+ * enabled for a particular level is down to the integrator. However this is
+ * being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/*
+ * Hold the runpool_mutex for this
+ */
+static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	s8 nr_running_ctxs;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+
+	/* Timer must stop if we are suspending */
+	if (backend->suspend_timer)
+		return false;
+
+	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
+	 * locking in the caller gives us a barrier that ensures
+	 * nr_contexts_pullable is up-to-date for reading */
+	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (kbdev->js_data.softstop_always) {
+		/* Debug support for allowing soft-stop on a single context */
+		return true;
+	}
+#endif				/* CONFIG_MALI_DEBUG */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+		/* Timeouts would have to be 4x longer (due to micro-
+		 * architectural design) to support OpenCL conformance tests, so
+		 * only run the timer when there's:
+		 * - 2 or more CL contexts
+		 * - 1 or more GLES contexts
+		 *
+		 * NOTE: We will treat a context that has both Compute and Non-
+		 * Compute jobs will be treated as an OpenCL context (hence, we
+		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+		 */
+		{
+			s8 nr_compute_ctxs =
+				kbasep_js_ctx_attr_count_on_runpool(kbdev,
+						KBASEP_JS_CTX_ATTR_COMPUTE);
+			s8 nr_noncompute_ctxs = nr_running_ctxs -
+							nr_compute_ctxs;
+
+			return (bool) (nr_compute_ctxs >= 2 ||
+							nr_noncompute_ctxs > 0);
+		}
+	} else {
+		/* Run the timer callback whenever you have at least 1 context
+		 */
+		return (bool) (nr_running_ctxs > 0);
+	}
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_backend_data *backend;
+	int s;
+	bool reset_needed = false;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	backend = container_of(timer, struct kbase_backend_data,
+							scheduling_timer);
+	kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
+	js_devdata = &kbdev->js_data;
+
+	/* Loop through the slots */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+		struct kbase_jd_atom *atom = NULL;
+
+		if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
+			atom = kbase_gpu_inspect(kbdev, s, 0);
+			KBASE_DEBUG_ASSERT(atom != NULL);
+		}
+
+		if (atom != NULL) {
+			/* The current version of the model doesn't support
+			 * Soft-Stop */
+			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+				u32 ticks = atom->ticks++;
+
+#if !CINSTR_DUMPING_ENABLED
+				u32 soft_stop_ticks, hard_stop_ticks,
+								gpu_reset_ticks;
+				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_cl;
+				} else {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_ss;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_ss;
+				}
+
+				/* If timeouts have been changed then ensure
+				 * that atom tick count is not greater than the
+				 * new soft_stop timeout. This ensures that
+				 * atoms do not miss any of the timeouts due to
+				 * races between this worker and the thread
+				 * changing the timeouts. */
+				if (backend->timeouts_updated &&
+						ticks > soft_stop_ticks)
+					ticks = atom->ticks = soft_stop_ticks;
+
+				/* Job is Soft-Stoppable */
+				if (ticks == soft_stop_ticks) {
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks ticks.
+					 * Soft stop the slot so we can run
+					 * other jobs.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					/* nr_user_contexts_running is updated
+					 * with the runpool_mutex, but we can't
+					 * take that here.
+					 *
+					 * However, if it's about to be
+					 * increased then the new context can't
+					 * run any jobs until they take the
+					 * hwaccess_lock, so it's OK to observe
+					 * the older value.
+					 *
+					 * Similarly, if it's about to be
+					 * decreased, the last job from another
+					 * context has already finished, so it's
+					 * not too bad that we observe the older
+					 * value and register a disjoint event
+					 * when we try soft-stopping */
+					if (js_devdata->nr_user_contexts_running
+							>= disjoint_threshold)
+						softstop_flags |=
+						JS_COMMAND_SW_CAUSES_DISJOINT;
+
+					kbase_job_slot_softstop_swflags(kbdev,
+						s, atom, softstop_flags);
+#endif
+				} else if (ticks == hard_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_ss ticks.
+					 * It should have been soft-stopped by
+					 * now. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks == gpu_reset_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_ss ticks.
+					 * It should have left the GPU by now.
+					 * Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#else				/* !CINSTR_DUMPING_ENABLED */
+				/* NOTE: During CINSTR_DUMPING_ENABLED, we use
+				 * the alternate timeouts, which makes the hard-
+				 * stop and GPU reset timeout much longer. We
+				 * also ensure that we don't soft-stop at all.
+				 */
+				if (ticks == js_devdata->soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks. We do
+					 * not soft-stop during
+					 * CINSTR_DUMPING_ENABLED, however.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+				} else if (ticks ==
+					js_devdata->hard_stop_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_dumping
+					 * ticks. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks ==
+					js_devdata->gpu_reset_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_dumping
+					 * ticks. It should have left the GPU by
+					 * now. Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#endif				/* !CINSTR_DUMPING_ENABLED */
+			}
+		}
+	}
+#if KBASE_GPU_RESET_EN
+	if (reset_needed) {
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* the timer is re-issued if there is contexts in the run-pool */
+
+	if (backend->timer_running)
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			HRTIMER_MODE_REL);
+
+	backend->timeouts_updated = false;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	unsigned long flags;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	if (!timer_callback_should_run(kbdev)) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = false;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/* From now on, return value of timer_callback_should_run() will
+		 * also cause the timer to not requeue itself. Its return value
+		 * cannot change, because it depends on variables updated with
+		 * the runpool_mutex held, which the caller of this must also
+		 * hold */
+		hrtimer_cancel(&backend->scheduling_timer);
+	}
+
+	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = true;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+							HRTIMER_MODE_REL);
+
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
+									0u);
+	}
+}
+
+int kbase_backend_timer_init(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	backend->scheduling_timer.function = timer_callback;
+
+	backend->timer_running = false;
+
+	return 0;
+}
+
+void kbase_backend_timer_term(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_cancel(&backend->scheduling_timer);
+}
+
+void kbase_backend_timer_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = true;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timer_resume(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = false;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->timeouts_updated = true;
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
new file mode 100644
index 000000000000..3f53779c6747
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#ifndef _KBASE_JS_BACKEND_H_
+#define _KBASE_JS_BACKEND_H_
+
+/**
+ * kbase_backend_timer_init() - Initialise the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_backend_timer_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_term() - Terminate the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver termination
+ */
+void kbase_backend_timer_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
+ *                               timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on suspend, after the active count has reached
+ * zero. This is required as the timer may have been started on job submission
+ * to the job scheduler, but before jobs are submitted to the GPU.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
+ *                              scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on resume. Note that is is not guaranteed to
+ * re-start the timer, only evalute whether it should be re-started.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JS_BACKEND_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
new file mode 100644
index 000000000000..aa1817c8bca9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -0,0 +1,401 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_tlstream.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+		u32 num_pages)
+{
+	u64 region;
+
+	/* can't lock a zero sized range */
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	region = pfn << PAGE_SHIFT;
+	/*
+	 * fls returns (given the ASSERT above):
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	/* gracefully handle num_pages being zero */
+	if (0 == num_pages) {
+		region |= 11;
+	} else {
+		u8 region_width;
+
+		region_width = 10 + fls(num_pages);
+		if (num_pages != (1ul << (region_width - 11))) {
+			/* not pow2, so must go up to the next pow2 */
+			region_width += 1;
+		}
+		KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+		KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+		region |= region_width;
+	}
+
+	return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+		unsigned int as_nr, struct kbase_context *kctx)
+{
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending. Do not log remaining register accesses. */
+	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
+		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
+
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		return -1;
+	}
+
+	/* If waiting in loop was performed, log last read value. */
+	if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
+		kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
+		struct kbase_context *kctx)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(kbdev, as_nr, kctx);
+	if (status == 0)
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
+									kctx);
+
+	return status;
+}
+
+static void validate_protected_page_fault(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	/* GPUs which support (native) protected mode shall not report page
+	 * fault addresses unless it has protected debug mode and protected
+	 * debug mode is turned on */
+	u32 protected_debug_mode = 0;
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+		return;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		protected_debug_mode = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS),
+				kctx) & GPU_DBGEN;
+	}
+
+	if (!protected_debug_mode) {
+		/* fault_addr should never be reported in protected mode.
+		 * However, we just continue by printing an error message */
+		dev_err(kbdev->dev, "Fault address reported in protected mode\n");
+	}
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+	const int num_as = 16;
+	const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+	const int pf_shift = 0;
+	const unsigned long as_bit_mask = (1UL << num_as) - 1;
+	unsigned long flags;
+	u32 new_mask;
+	u32 tmp;
+
+	/* bus faults */
+	u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+	/* page faults (note: Ignore ASes with both pf and bf) */
+	u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	/* remember current mask */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	/* mask interrupts for now */
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+	while (bf_bits | pf_bits) {
+		struct kbase_as *as;
+		int as_no;
+		struct kbase_context *kctx;
+
+		/*
+		 * the while logic ensures we have a bit set, no need to check
+		 * for not-found here
+		 */
+		as_no = ffs(bf_bits | pf_bits) - 1;
+		as = &kbdev->as[as_no];
+
+		/*
+		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+		 * Bus/Page faults _should_ only occur whilst jobs are running,
+		 * and a job causing the Bus/Page fault shouldn't complete until
+		 * the MMU is updated
+		 */
+		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+
+
+		/* find faulting address */
+		as->fault_addr = kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_HI),
+						kctx);
+		as->fault_addr <<= 32;
+		as->fault_addr |= kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_LO),
+						kctx);
+
+		/* Mark the fault protected or not */
+		as->protected_mode = kbdev->protected_mode;
+
+		if (kbdev->protected_mode && as->fault_addr)
+		{
+			/* check if address reporting is allowed */
+			validate_protected_page_fault(kbdev, kctx);
+		}
+
+		/* report the fault to debugfs */
+		kbase_as_fault_debugfs_new(kbdev, as_no);
+
+		/* record the fault status */
+		as->fault_status = kbase_reg_read(kbdev,
+						  MMU_AS_REG(as_no,
+							AS_FAULTSTATUS),
+						  kctx);
+
+		/* find the fault type */
+		as->fault_type = (bf_bits & (1 << as_no)) ?
+				KBASE_MMU_FAULT_TYPE_BUS :
+				KBASE_MMU_FAULT_TYPE_PAGE;
+
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
+			as->fault_extra_addr = kbase_reg_read(kbdev,
+					MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
+					kctx);
+			as->fault_extra_addr <<= 32;
+			as->fault_extra_addr |= kbase_reg_read(kbdev,
+					MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
+					kctx);
+		}
+
+		if (kbase_as_has_bus_fault(as)) {
+			/* Mark bus fault as handled.
+			 * Note that a bus fault is processed first in case
+			 * where both a bus fault and page fault occur.
+			 */
+			bf_bits &= ~(1UL << as_no);
+
+			/* remove the queued BF (and PF) from the mask */
+			new_mask &= ~(MMU_BUS_ERROR(as_no) |
+					MMU_PAGE_FAULT(as_no));
+		} else {
+			/* Mark page fault as handled */
+			pf_bits &= ~(1UL << as_no);
+
+			/* remove the queued PF from the mask */
+			new_mask &= ~MMU_PAGE_FAULT(as_no);
+		}
+
+		/* Process the interrupt for this address space */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_interrupt_process(kbdev, kctx, as);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+
+	/* reenable interrupts */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	new_mask |= tmp;
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx)
+{
+	struct kbase_mmu_setup *current_setup = &as->current_setup;
+	u32 transcfg = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
+		transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+
+		/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
+		/* Clear PTW_MEMATTR bits */
+		transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+		/* Enable correct PTW_MEMATTR bits */
+		transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+
+		if (kbdev->system_coherency == COHERENCY_ACE) {
+			/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
+			/* Clear PTW_SH bits */
+			transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+			/* Enable correct PTW_SH bits */
+			transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
+		}
+
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+				transcfg, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+				(current_setup->transcfg >> 32) & 0xFFFFFFFFUL,
+				kctx);
+	} else {
+		if (kbdev->system_coherency == COHERENCY_ACE)
+			current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+	}
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+			current_setup->transtab & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+			current_setup->memattr & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+
+	KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as,
+			current_setup->transtab,
+			current_setup->memattr,
+			transcfg);
+
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+		unsigned int handling_irq)
+{
+	int ret;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	if (op == AS_COMMAND_UNLOCK) {
+		/* Unlock doesn't require a lock first */
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+	} else {
+		u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+		/* Lock the region that needs to be updated */
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+				lock_addr & 0xFFFFFFFFUL, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+				(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+
+		/* Run the MMU operation */
+		write_cmd(kbdev, as->number, op, kctx);
+
+		/* Wait for the flush to complete */
+		ret = wait_ready(kbdev, as->number, kctx);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+			/* Issue an UNLOCK command to ensure that valid page
+			   tables are re-read by the GPU after an update.
+			   Note that, the FLUSH command should perform all the
+			   actions necessary, however the bus logs show that if
+			   multiple page faults occur within an 8 page region
+			   the MMU does not always re-read the updated page
+			   table entries for later faults or is only partially
+			   read, it subsequently raises the page fault IRQ for
+			   the same addresses, the unlock ensures that the MMU
+			   cache is flushed, so updates can be re-read.  As the
+			   region is now unlocked we need to issue 2 UNLOCK
+			   commands in order to flush the MMU/uTLB,
+			   see PRLAM-8812.
+			 */
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+		}
+	}
+
+	return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 pf_bf_mask;
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	/* Clear the page (and bus fault IRQ as well in case one occurred) */
+	pf_bf_mask = MMU_PAGE_FAULT(as->number);
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 irq_mask;
+
+	/* Enable the page fault IRQ (and bus fault IRQ as well in case one
+	 * occurred) */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+			MMU_PAGE_FAULT(as->number);
+
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		irq_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
new file mode 100644
index 000000000000..c02253c6acc3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Interface file for the direct implementation for MMU hardware access
+ *
+ * Direct MMU hardware interface
+ *
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
+#define _MALI_KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * kbase_mmu_interrupt - Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the &kbase_device.
+ *
+ * @kbdev:          kbase context to clear the fault from.
+ * @irq_stat:       Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+#endif	/* _MALI_KBASE_MMU_HW_DIRECT_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
new file mode 100644
index 000000000000..0614348e935a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+	"always_on",			/* name */
+	always_on_init,			/* init */
+	always_on_term,			/* term */
+	always_on_get_core_mask,	/* get_core_mask */
+	always_on_get_core_active,	/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
new file mode 100644
index 000000000000..f9d244b01bc2
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -0,0 +1,77 @@
+
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * DOC:
+ * The "Always on" power management policy has the following
+ * characteristics:
+ *
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *    All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *    All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *    The Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed. The GPU itself is also kept powered, even though it is not
+ *    needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * Note:
+ *
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data
+ * @dummy: unused dummy variable
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_always_on {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+
+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
new file mode 100644
index 000000000000..c88b80a325dd
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -0,0 +1,478 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * GPU backend implementation of base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_pm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_on_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = true;
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_off_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = false;
+}
+
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+	struct kbase_pm_callback_conf *callbacks;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_init(&kbdev->pm.lock);
+
+	kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!kbdev->pm.backend.gpu_poweroff_wait_wq)
+		return -ENOMEM;
+
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
+			kbase_pm_gpu_poweroff_wait_wq);
+
+	kbdev->pm.backend.gpu_powered = false;
+	kbdev->pm.suspending = false;
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.backend.driver_ready_for_irqs = false;
+#endif /* CONFIG_MALI_DEBUG */
+	kbdev->pm.backend.gpu_in_desired_state = true;
+	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+	if (callbacks) {
+		kbdev->pm.backend.callback_power_on =
+					callbacks->power_on_callback;
+		kbdev->pm.backend.callback_power_off =
+					callbacks->power_off_callback;
+		kbdev->pm.backend.callback_power_suspend =
+					callbacks->power_suspend_callback;
+		kbdev->pm.backend.callback_power_resume =
+					callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init =
+					callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term =
+					callbacks->power_runtime_term_callback;
+		kbdev->pm.backend.callback_power_runtime_on =
+					callbacks->power_runtime_on_callback;
+		kbdev->pm.backend.callback_power_runtime_off =
+					callbacks->power_runtime_off_callback;
+		kbdev->pm.backend.callback_power_runtime_idle =
+					callbacks->power_runtime_idle_callback;
+	} else {
+		kbdev->pm.backend.callback_power_on = NULL;
+		kbdev->pm.backend.callback_power_off = NULL;
+		kbdev->pm.backend.callback_power_suspend = NULL;
+		kbdev->pm.backend.callback_power_resume = NULL;
+		kbdev->pm.callback_power_runtime_init = NULL;
+		kbdev->pm.callback_power_runtime_term = NULL;
+		kbdev->pm.backend.callback_power_runtime_on = NULL;
+		kbdev->pm.backend.callback_power_runtime_off = NULL;
+		kbdev->pm.backend.callback_power_runtime_idle = NULL;
+	}
+
+	/* Initialise the metrics subsystem */
+	ret = kbasep_pm_metrics_init(kbdev);
+	if (ret)
+		return ret;
+
+	init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
+	kbdev->pm.backend.l2_powered = 0;
+
+	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
+	kbdev->pm.backend.reset_done = false;
+
+	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	kbdev->pm.active_count = 0;
+
+	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
+
+	init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
+
+	if (kbase_pm_ca_init(kbdev) != 0)
+		goto workq_fail;
+
+	if (kbase_pm_policy_init(kbdev) != 0)
+		goto pm_policy_fail;
+
+	return 0;
+
+pm_policy_fail:
+	kbase_pm_ca_term(kbdev);
+workq_fail:
+	kbasep_pm_metrics_term(kbdev);
+	return -EINVAL;
+}
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Turn clocks and interrupts on - no-op if we haven't done a previous
+	 * kbase_pm_clock_off() */
+	kbase_pm_clock_on(kbdev, is_resume);
+
+	/* Update core status as required by the policy */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
+	kbase_pm_update_cores_state(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
+
+	/* NOTE: We don't wait to reach the desired state, since running atoms
+	 * will wait for that state to be reached anyway */
+}
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device,
+			pm.backend.gpu_poweroff_wait_work);
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+#if !PLATFORM_POWER_DOWN_ONLY
+	/* Wait for power transitions to complete. We do this with no locks held
+	 * so that we don't deadlock with any pending workqueues */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
+	kbase_pm_check_transitions_sync(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+#if PLATFORM_POWER_DOWN_ONLY
+	if (kbdev->pm.backend.gpu_powered) {
+		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/* If L2 cache is powered then we must flush it before
+			 * we power off the GPU. Normally this would have been
+			 * handled when the L2 was powered off. */
+			kbase_gpu_cacheclean(kbdev);
+		}
+	}
+#endif /* PLATFORM_POWER_DOWN_ONLY */
+
+	if (!backend->poweron_required) {
+#if !PLATFORM_POWER_DOWN_ONLY
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		WARN_ON(kbdev->l2_available_bitmap ||
+				kbdev->shader_available_bitmap ||
+				kbdev->tiler_available_bitmap);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+
+		/* Consume any change-state events */
+		kbase_timeline_pm_check_handle_event(kbdev,
+					KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		/* Disable interrupts and turn the clock off */
+		if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
+			/*
+			 * Page/bus faults are pending, must drop locks to
+			 * process.  Interrupts are disabled so no more faults
+			 * should be generated at this point.
+			 */
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			kbase_flush_mmu_wqs(kbdev);
+			mutex_lock(&js_devdata->runpool_mutex);
+			mutex_lock(&kbdev->pm.lock);
+
+			/* Turn off clock now that fault have been handled. We
+			 * dropped locks so poweron_required may have changed -
+			 * power back on if this is the case.*/
+			if (backend->poweron_required)
+				kbase_pm_clock_on(kbdev, false);
+			else
+				WARN_ON(!kbase_pm_clock_off(kbdev,
+						backend->poweroff_is_suspend));
+		}
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	backend->poweroff_wait_in_progress = false;
+	if (backend->poweron_required) {
+		backend->poweron_required = false;
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_backend_slot_update(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	wake_up(&kbdev->pm.backend.poweroff_wait);
+}
+
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (!kbdev->pm.backend.poweroff_wait_in_progress) {
+		/* Force all cores off */
+		kbdev->pm.backend.desired_shader_state = 0;
+		kbdev->pm.backend.desired_tiler_state = 0;
+
+		/* Force all cores to be unavailable, in the situation where
+		 * transitions are in progress for some cores but not others,
+		 * and kbase_pm_check_transitions_nolock can not immediately
+		 * power off the cores */
+		kbdev->shader_available_bitmap = 0;
+		kbdev->tiler_available_bitmap = 0;
+		kbdev->l2_available_bitmap = 0;
+
+		kbdev->pm.backend.poweroff_wait_in_progress = true;
+		kbdev->pm.backend.poweroff_is_suspend = is_suspend;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/*Kick off wq here. Callers will have to wait*/
+		queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
+				&kbdev->pm.backend.gpu_poweroff_wait_work);
+	} else {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+}
+
+static bool is_poweroff_in_progress(struct kbase_device *kbdev)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
+{
+	wait_event_killable(kbdev->pm.backend.poweroff_wait,
+			is_poweroff_in_progress(kbdev));
+}
+
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long irq_flags;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* A suspend won't happen during startup/insmod */
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Power up the GPU, don't enable IRQs as we are not ready to receive
+	 * them. */
+	ret = kbase_pm_init_hw(kbdev, flags);
+	if (ret) {
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		return ret;
+	}
+
+	kbasep_pm_init_core_use_bitmaps(kbdev);
+
+	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
+			kbdev->pm.debug_core_mask[1] =
+			kbdev->pm.debug_core_mask[2] =
+			kbdev->gpu_props.props.raw_props.shader_present;
+
+	/* Pretend the GPU is active to prevent a power policy turning the GPU
+	 * cores off */
+	kbdev->pm.active_count = 1;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+	/* Ensure cycle counter is off */
+	kbdev->pm.backend.gpu_cycle_counter_requests = 0;
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+	/* We are ready to receive IRQ's now as power policy is set up, so
+	 * enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+	kbdev->pm.backend.driver_ready_for_irqs = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+#endif
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the GPU and any cores needed by the policy */
+	kbase_pm_do_poweron(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	return 0;
+}
+
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
+
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
+
+	/* Free any resources the policy allocated */
+	kbase_pm_policy_term(kbdev);
+	kbase_pm_ca_term(kbdev);
+
+	/* Shut down the metrics subsystem */
+	kbasep_pm_metrics_term(kbdev);
+
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
+}
+
+void kbase_pm_power_changed(struct kbase_device *kbdev)
+{
+	bool cores_are_available;
+	unsigned long flags;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
+
+	if (cores_are_available) {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		kbase_backend_slot_update(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2)
+{
+	kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
+	kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
+	kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
+	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
+			new_core_mask_js2;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	/* Force power off the GPU and all cores (regardless of policy), only
+	 * after the PM active count reaches zero (otherwise, we risk turning it
+	 * off prematurely) */
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, true);
+
+	kbase_backend_timer_suspend(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+}
+
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbdev->pm.suspending = false;
+	kbase_pm_do_poweron(kbdev, true);
+
+	kbase_backend_timer_resume(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
new file mode 100644
index 000000000000..85890f1e85f5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -0,0 +1,182 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_ca_policy *const policy_list[] = {
+	&kbase_pm_ca_fixed_policy_ops,
+#ifdef CONFIG_MALI_DEVFREQ
+	&kbase_pm_ca_devfreq_policy_ops,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_ca_random_policy_ops
+#endif
+};
+
+/**
+ * POLICY_COUNT - The number of policies available in the system.
+ *
+ * This is derived from the number of functions listed in policy_list.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+int kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.ca_current_policy = policy_list[0];
+
+	kbdev->pm.backend.ca_current_policy->init(kbdev);
+
+	return 0;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_current_policy->term(kbdev);
+}
+
+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
+
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.ca_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
+
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *new_policy)
+{
+	const struct kbase_pm_ca_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
+								new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.ca_current_policy;
+	kbdev->pm.backend.ca_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.ca_current_policy = new_policy;
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+					kbdev->shader_ready_bitmap,
+					kbdev->shader_transitioning_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* All cores must be enabled when instrumentation is in use */
+	if (kbdev->pm.backend.instr_enabled)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	if (kbdev->pm.backend.ca_current_policy == NULL)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
+						kbdev->pm.debug_core_mask_all;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
+
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+							u64 cores_transitioning)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.ca_current_policy != NULL)
+		kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+							cores_ready,
+							cores_transitioning);
+}
+
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.instr_enabled = true;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbdev->pm.backend.instr_enabled = false;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
new file mode 100644
index 000000000000..ee9e751f2d79
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+/**
+ * kbase_pm_ca_init - Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the core availability framework was successfully initialized,
+ *         -errno otherwise
+ */
+int kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_term - Terminate core availability framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the currently available shader cores.
+ * Calls into the core availability policy
+ *
+ * Return: The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_update_core_status - Update core status
+ *
+ * @kbdev:               The kbase device structure for the device (must be
+ *                       a valid pointer)
+ * @cores_ready:         The bit mask of cores ready for job submission
+ * @cores_transitioning: The bit mask of cores that are transitioning power
+ *                       state
+ *
+ * Update core availability policy with current core power status
+ *
+ * Calls into the core availability policy
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+/**
+ * kbase_pm_ca_instr_enable - Enable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This overrides the output of the core availability policy, ensuring that all
+ * cores are available
+ */
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_instr_disable - Disable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This disables any previously enabled override, and resumes normal policy
+ * functionality
+ */
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_CA_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c
new file mode 100644
index 000000000000..66bf660cffb6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A core availability policy implementing core mask selection from devfreq OPPs
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/version.h>
+
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
+{
+	struct kbasep_pm_ca_policy_devfreq *data =
+				&kbdev->pm.backend.ca_policy_data.devfreq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	data->cores_desired = core_mask;
+
+	/* Disable any cores that are now unwanted */
+	data->cores_enabled &= data->cores_desired;
+
+	kbdev->pm.backend.ca_in_transition = true;
+
+	/* If there are no cores to be powered off then power on desired cores
+	 */
+	if (!(data->cores_used & ~data->cores_desired)) {
+		data->cores_enabled = data->cores_desired;
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX %llX\n",
+				data->cores_desired, data->cores_enabled);
+}
+
+static void devfreq_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_ca_policy_devfreq *data =
+				&kbdev->pm.backend.ca_policy_data.devfreq;
+
+	if (kbdev->current_core_mask) {
+		data->cores_enabled = kbdev->current_core_mask;
+		data->cores_desired = kbdev->current_core_mask;
+	} else {
+		data->cores_enabled =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		data->cores_desired =
+				kbdev->gpu_props.props.raw_props.shader_present;
+	}
+	data->cores_used = 0;
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void devfreq_term(struct kbase_device *kbdev)
+{
+}
+
+static u64 devfreq_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->pm.backend.ca_policy_data.devfreq.cores_enabled;
+}
+
+static void devfreq_update_core_status(struct kbase_device *kbdev,
+							u64 cores_ready,
+							u64 cores_transitioning)
+{
+	struct kbasep_pm_ca_policy_devfreq *data =
+				&kbdev->pm.backend.ca_policy_data.devfreq;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	data->cores_used = cores_ready | cores_transitioning;
+
+	/* If in desired state then clear transition flag */
+	if (data->cores_enabled == data->cores_desired)
+		kbdev->pm.backend.ca_in_transition = false;
+
+	/* If all undesired cores are now off then power on desired cores.
+	 * The direct comparison against cores_enabled limits potential
+	 * recursion to one level */
+	if (!(data->cores_used & ~data->cores_desired) &&
+				data->cores_enabled != data->cores_desired) {
+		data->cores_enabled = data->cores_desired;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		kbdev->pm.backend.ca_in_transition = false;
+	}
+}
+
+/*
+ * The struct kbase_pm_ca_policy structure for the devfreq core availability
+ * policy.
+ *
+ * This is the static structure that defines the devfreq core availability power
+ * policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops = {
+	"devfreq",			/* name */
+	devfreq_init,			/* init */
+	devfreq_term,			/* term */
+	devfreq_get_core_mask,		/* get_core_mask */
+	devfreq_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_DEVFREQ,	/* id */
+};
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h
new file mode 100644
index 000000000000..7ab3cd4d8460
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h
@@ -0,0 +1,55 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A core availability policy for use with devfreq, where core masks are
+ * associated with OPPs.
+ */
+
+#ifndef MALI_KBASE_PM_CA_DEVFREQ_H
+#define MALI_KBASE_PM_CA_DEVFREQ_H
+
+/**
+ * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy
+ *
+ * This contains data that is private to the devfreq core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ */
+struct kbasep_pm_ca_policy_devfreq {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops;
+
+/**
+ * kbase_devfreq_set_core_mask - Set core mask for policy to use
+ * @kbdev: Device pointer
+ * @core_mask: New core mask
+ *
+ * The new core mask will have immediate effect if the GPU is powered, or will
+ * take effect when it is next powered on.
+ */
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask);
+
+#endif /* MALI_KBASE_PM_CA_DEVFREQ_H */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
new file mode 100644
index 000000000000..864612d31f9b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
@@ -0,0 +1,65 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static void fixed_init(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void fixed_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static u64 fixed_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static void fixed_update_core_status(struct kbase_device *kbdev,
+					u64 cores_ready,
+					u64 cores_transitioning)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(cores_ready);
+	CSTD_UNUSED(cores_transitioning);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the fixed power policy.
+ *
+ * This is the static structure that defines the fixed power policy's callback
+ * and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
+	"fixed",			/* name */
+	fixed_init,			/* init */
+	fixed_term,			/* term */
+	fixed_get_core_mask,		/* get_core_mask */
+	fixed_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_FIXED,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
new file mode 100644
index 000000000000..a763155cb703
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#ifndef MALI_KBASE_PM_CA_FIXED_H
+#define MALI_KBASE_PM_CA_FIXED_H
+
+/**
+ * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
+ *
+ * @dummy: Dummy member - no state is needed
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_ca_policy_fixed {
+	int dummy;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_FIXED_H */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
new file mode 100644
index 000000000000..f891fa225a89
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.active_count == 0)
+		return 0;
+
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/* The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+	"coarse_demand",			/* name */
+	coarse_demand_init,			/* init */
+	coarse_demand_term,			/* term */
+	coarse_demand_get_core_mask,		/* get_core_mask */
+	coarse_demand_get_core_active,		/* get_core_active */
+	0u,					/* flags */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
new file mode 100644
index 000000000000..749d305eee9a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * DOC:
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
+ *                                         policy
+ *
+ * This contains data that is private to the coarse demand power policy.
+ *
+ * @dummy: Dummy member - no state needed
+ */
+struct kbasep_pm_policy_coarse_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
new file mode 100644
index 000000000000..352744ee6d73
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -0,0 +1,519 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific Power Manager definitions
+ */
+
+#ifndef _KBASE_PM_HWACCESS_DEFS_H_
+#define _KBASE_PM_HWACCESS_DEFS_H_
+
+#include "mali_kbase_pm_ca_fixed.h"
+#include "mali_kbase_pm_ca_devfreq.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_ca_random.h"
+#endif
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#include "mali_kbase_pm_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_demand_always_powered.h"
+#include "mali_kbase_pm_fast_start.h"
+#endif
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+struct kbase_jd_atom;
+
+/**
+ * enum kbase_pm_core_type - The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to
+ * - kbase_pm_get_present_cores()
+ * - kbase_pm_get_active_cores()
+ * - kbase_pm_get_trans_cores()
+ * - kbase_pm_get_ready_cores().
+ *
+ * They specify which type of core should be acted on.  These values are set in
+ * a manner that allows core_type_to_reg() function to be simpler and more
+ * efficient.
+ *
+ * @KBASE_PM_CORE_L2: The L2 cache
+ * @KBASE_PM_CORE_SHADER: Shader cores
+ * @KBASE_PM_CORE_TILER: Tiler cores
+ * @KBASE_PM_CORE_STACK: Core stacks
+ */
+enum kbase_pm_core_type {
+	KBASE_PM_CORE_L2 = L2_PRESENT_LO,
+	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
+	KBASE_PM_CORE_TILER = TILER_PRESENT_LO,
+	KBASE_PM_CORE_STACK = STACK_PRESENT_LO
+};
+
+/**
+ * struct kbasep_pm_metrics_data - Metrics data collected for use by the power
+ *                                 management framework.
+ *
+ *  @time_period_start: time at which busy/idle measurements started
+ *  @time_busy: number of ns the GPU was busy executing jobs since the
+ *          @time_period_start timestamp.
+ *  @time_idle: number of ns since time_period_start the GPU was not executing
+ *          jobs since the @time_period_start timestamp.
+ *  @prev_busy: busy time in ns of previous time period.
+ *           Updated when metrics are reset.
+ *  @prev_idle: idle time in ns of previous time period
+ *           Updated when metrics are reset.
+ *  @gpu_active: true when the GPU is executing jobs. false when
+ *           not. Updated when the job scheduler informs us a job in submitted
+ *           or removed from a GPU slot.
+ *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
+ *           if two CL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
+ *           if two GL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
+ *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
+ *           GL jobs never run on slot 2 this slot is not recorded.
+ *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ *  @timer: timer to regularly make DVFS decisions based on the power
+ *           management metrics.
+ *  @timer_active: boolean indicating @timer is running
+ *  @platform_data: pointer to data controlled by platform specific code
+ *  @kbdev: pointer to kbase device for which metrics are collected
+ *
+ */
+struct kbasep_pm_metrics_data {
+	ktime_t time_period_start;
+	u32 time_busy;
+	u32 time_idle;
+	u32 prev_busy;
+	u32 prev_idle;
+	bool gpu_active;
+	u32 busy_cl[2];
+	u32 busy_gl;
+	u32 active_cl_ctx[2];
+	u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
+	spinlock_t lock;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	struct hrtimer timer;
+	bool timer_active;
+#endif
+
+	void *platform_data;
+	struct kbase_device *kbdev;
+};
+
+union kbase_pm_policy_data {
+	struct kbasep_pm_policy_always_on always_on;
+	struct kbasep_pm_policy_coarse_demand coarse_demand;
+	struct kbasep_pm_policy_demand demand;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_policy_demand_always_powered demand_always_powered;
+	struct kbasep_pm_policy_fast_start fast_start;
+#endif
+};
+
+union kbase_pm_ca_policy_data {
+	struct kbasep_pm_ca_policy_fixed fixed;
+	struct kbasep_pm_ca_policy_devfreq devfreq;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_ca_policy_random random;
+#endif
+};
+
+/**
+ * struct kbase_pm_backend_data - Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ *
+ * @ca_current_policy: The policy that is currently actively controlling core
+ *                     availability.
+ * @pm_current_policy: The policy that is currently actively controlling the
+ *                     power state.
+ * @ca_policy_data:    Private data for current CA policy
+ * @pm_policy_data:    Private data for current PM policy
+ * @ca_in_transition:  Flag indicating when core availability policy is
+ *                     transitioning cores. The core availability policy must
+ *                     set this when a change in core availability is occurring.
+ *                     power_change_lock must be held when accessing this.
+ * @reset_done:        Flag when a reset is complete
+ * @reset_done_wait:   Wait queue to wait for changes to @reset_done
+ * @l2_powered_wait:   Wait queue for whether the l2 cache has been powered as
+ *                     requested
+ * @l2_powered:        State indicating whether all the l2 caches are powered.
+ *                     Non-zero indicates they're *all* powered
+ *                     Zero indicates that some (or all) are not powered
+ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
+ *                              users
+ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
+ * @desired_shader_state: A bit mask identifying the shader cores that the
+ *                        power policy would like to be on. The current state
+ *                        of the cores may be different, but there should be
+ *                        transitions in progress that will eventually achieve
+ *                        this state (assuming that the policy doesn't change
+ *                        its mind in the mean time).
+ * @powering_on_shader_state: A bit mask indicating which shader cores are
+ *                            currently in a power-on transition
+ * @desired_tiler_state: A bit mask identifying the tiler cores that the power
+ *                       policy would like to be on. See @desired_shader_state
+ * @powering_on_tiler_state: A bit mask indicating which tiler core are
+ *                           currently in a power-on transition
+ * @powering_on_l2_state: A bit mask indicating which l2-caches are currently
+ *                        in a power-on transition
+ * @powering_on_stack_state: A bit mask indicating which core stacks are
+ *                           currently in a power-on transition
+ * @gpu_in_desired_state: This flag is set if the GPU is powered as requested
+ *                        by the desired_xxx_state variables
+ * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
+ * @gpu_powered:       Set to true when the GPU is powered and register
+ *                     accesses are possible, false otherwise
+ * @instr_enabled:     Set to true when instrumentation is enabled,
+ *                     false otherwise
+ * @cg1_disabled:      Set if the policy wants to keep the second core group
+ *                     powered off
+ * @driver_ready_for_irqs: Debug state indicating whether sufficient
+ *                         initialization of the driver has occurred to handle
+ *                         IRQs
+ * @gpu_powered_lock:  Spinlock that must be held when writing @gpu_powered or
+ *                     accessing @driver_ready_for_irqs
+ * @metrics:           Structure to hold metrics for the GPU
+ * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
+ *                        powered off
+ * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
+ *                        and/or timers are powered off
+ * @gpu_poweroff_timer: Timer for powering off GPU
+ * @gpu_poweroff_wq:   Workqueue to power off GPU on when timer fires
+ * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
+ * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
+ *                           timer callback
+ * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
+ *                          callback
+ * @poweroff_timer_needed: true if the poweroff timer is currently required,
+ *                         false otherwise
+ * @poweroff_timer_running: true if the poweroff timer is currently running,
+ *                          false otherwise
+ *                          power_change_lock should be held when accessing,
+ *                          unless there is no way the timer can be running (eg
+ *                          hrtimer_cancel() was called immediately before)
+ * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
+ *                             hwaccess_lock must be held when accessing
+ * @poweron_required: true if a GPU power on is required. Should only be set
+ *                    when poweroff_wait_in_progress is true, and therefore the
+ *                    GPU can not immediately be powered on. pm.lock must be
+ *                    held when accessing
+ * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend
+ *                       request. pm.lock must be held when accessing
+ * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off
+ * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq
+ * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete
+ * @callback_power_on: Callback when the GPU needs to be turned on. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_off: Callback when the GPU may be turned off. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to
+ *                          be turned off. See &struct kbase_pm_callback_conf
+ * @callback_power_resume: Callback when a resume occurs and the GPU needs to
+ *                          be turned on. See &struct kbase_pm_callback_conf
+ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See
+ *                             &struct kbase_pm_callback_conf
+ * @callback_power_runtime_off: Callback when the GPU may be turned off. See
+ *                              &struct kbase_pm_callback_conf
+ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
+ *                              &struct kbase_pm_callback_conf
+ *
+ * Note:
+ * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
+ * policy is being changed with kbase_pm_ca_set_policy() or
+ * kbase_pm_set_policy(). The change is protected under
+ * kbase_device.pm.power_change_lock. Direct access to this
+ * from IRQ context must therefore check for NULL. If NULL, then
+ * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
+ * functions that would have been done under IRQ.
+ */
+struct kbase_pm_backend_data {
+	const struct kbase_pm_ca_policy *ca_current_policy;
+	const struct kbase_pm_policy *pm_current_policy;
+	union kbase_pm_ca_policy_data ca_policy_data;
+	union kbase_pm_policy_data pm_policy_data;
+	bool ca_in_transition;
+	bool reset_done;
+	wait_queue_head_t reset_done_wait;
+	wait_queue_head_t l2_powered_wait;
+	int l2_powered;
+	int gpu_cycle_counter_requests;
+	spinlock_t gpu_cycle_counter_requests_lock;
+
+	u64 desired_shader_state;
+	u64 powering_on_shader_state;
+	u64 desired_tiler_state;
+	u64 powering_on_tiler_state;
+	u64 powering_on_l2_state;
+#ifdef CONFIG_MALI_CORESTACK
+	u64 powering_on_stack_state;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	bool gpu_in_desired_state;
+	wait_queue_head_t gpu_in_desired_state_wait;
+
+	bool gpu_powered;
+
+	bool instr_enabled;
+
+	bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+	bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+	spinlock_t gpu_powered_lock;
+
+
+	struct kbasep_pm_metrics_data metrics;
+
+	int gpu_poweroff_pending;
+	int shader_poweroff_pending_time;
+
+	struct hrtimer gpu_poweroff_timer;
+	struct workqueue_struct *gpu_poweroff_wq;
+	struct work_struct gpu_poweroff_work;
+
+	u64 shader_poweroff_pending;
+	u64 tiler_poweroff_pending;
+
+	bool poweroff_timer_needed;
+	bool poweroff_timer_running;
+
+	bool poweroff_wait_in_progress;
+	bool poweron_required;
+	bool poweroff_is_suspend;
+
+	struct workqueue_struct *gpu_poweroff_wait_wq;
+	struct work_struct gpu_poweroff_wait_work;
+
+	wait_queue_head_t poweroff_wait;
+
+	int (*callback_power_on)(struct kbase_device *kbdev);
+	void (*callback_power_off)(struct kbase_device *kbdev);
+	void (*callback_power_suspend)(struct kbase_device *kbdev);
+	void (*callback_power_resume)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
+};
+
+
+/* List of policy IDs */
+enum kbase_pm_policy_id {
+	KBASE_PM_POLICY_ID_DEMAND = 1,
+	KBASE_PM_POLICY_ID_ALWAYS_ON,
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
+	KBASE_PM_POLICY_ID_FAST_START
+#endif
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+/**
+ * struct kbase_pm_policy - Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core mask
+ * @get_core_active:    Function called to get the current overall GPU power
+ *                      state
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.pm_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core mask
+	 *
+	 * The returned mask should meet or exceed (kbdev->shader_needed_bitmap
+	 * | kbdev->shader_inuse_bitmap).
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: The mask of shader cores to be powered
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current overall GPU power state
+	 *
+	 * This function should consider the state of kbdev->pm.active_count. If
+	 * this count is greater than 0 then there is at least one active
+	 * context on the device and the GPU should be powered. If it is equal
+	 * to 0 then there are no active contexts and the GPU could be powered
+	 * off if desired.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: true if the GPU should be powered, false otherwise
+	 */
+	bool (*get_core_active)(struct kbase_device *kbdev);
+
+	kbase_pm_policy_flags flags;
+	enum kbase_pm_policy_id id;
+};
+
+
+enum kbase_pm_ca_policy_id {
+	KBASE_PM_CA_POLICY_ID_FIXED = 1,
+	KBASE_PM_CA_POLICY_ID_DEVFREQ,
+	KBASE_PM_CA_POLICY_ID_RANDOM
+};
+
+typedef u32 kbase_pm_ca_policy_flags;
+
+/**
+ * Maximum length of a CA policy names
+ */
+#define KBASE_PM_CA_MAX_POLICY_NAME_LEN 15
+
+/**
+ * struct kbase_pm_ca_policy - Core availability policy structure.
+ *
+ * Each core availability policy exposes a (static) instance of this structure
+ * which contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core
+ *                      availability mask
+ * @update_core_status: Function called to update the current core status
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_ca_policy {
+	char name[KBASE_PM_CA_MAX_POLICY_NAME_LEN + 1];
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.ca_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core availability mask
+	 *
+	 * When a change in core availability is occurring, the policy must set
+	 * kbdev->pm.ca_in_transition to true. This is to indicate that
+	 * reporting changes in power state cannot be optimized out, even if
+	 * kbdev->pm.desired_shader_state remains unchanged. This must be done
+	 * by any functions internal to the Core Availability Policy that change
+	 * the return value of kbase_pm_ca_policy::get_core_mask.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 *
+	 * Return: The current core availability mask
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to update the current core status
+	 *
+	 * If none of the cores in core group 0 are ready or transitioning, then
+	 * the policy must ensure that the next call to get_core_mask does not
+	 * return 0 for all cores in core group 0. It is an error to disable
+	 * core group 0 through the core availability policy.
+	 *
+	 * When a change in core availability has finished, the policy must set
+	 * kbdev->pm.ca_in_transition to false. This is to indicate that
+	 * changes in power state can once again be optimized out when
+	 * kbdev->pm.desired_shader_state is unchanged.
+	 *
+	 * @kbdev:               The kbase device structure for the device
+	 *                       (must be a valid pointer)
+	 * @cores_ready:         The mask of cores currently powered and
+	 *                       ready to run jobs
+	 * @cores_transitioning: The mask of cores currently transitioning
+	 *                       power state
+	 */
+	void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+	kbase_pm_ca_policy_flags flags;
+
+	/**
+	 * Field indicating an ID for this policy. This is not necessarily the
+	 * same as its index in the list returned by kbase_pm_list_policies().
+	 * It is used purely for debugging.
+	 */
+	enum kbase_pm_ca_policy_id id;
+};
+
+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
new file mode 100644
index 000000000000..81322fd0dd17
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
+
+	if (0 == kbdev->pm.active_count)
+		return 0;
+
+	return desired;
+}
+
+static bool demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_get_core_active,		/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
new file mode 100644
index 000000000000..c0c84b6e9189
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#ifndef MALI_KBASE_PM_DEMAND_H
+#define MALI_KBASE_PM_DEMAND_H
+
+/**
+ * DOC: Demand power management policy
+ *
+ * The demand power management policy has the following characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - The Shader Cores are not powered up
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - Only those Shader Cores are powered up
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * Note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_demand - Private structure for policy instance data
+ *
+ * @dummy: No state is needed, a dummy variable
+ *
+ * This contains data that is private to the demand power policy.
+ */
+struct kbasep_pm_policy_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_DEMAND_H */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
new file mode 100644
index 000000000000..cbc258cb361b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -0,0 +1,1672 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel Power Management hardware control
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_smc.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+
+#if MALI_MOCK_TEST
+#define MOCKABLE(function) function##_original
+#else
+#define MOCKABLE(function) function
+#endif				/* MALI_MOCK_TEST */
+
+/**
+ * enum kbasep_pm_action - Actions that can be performed on a core.
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * core_type_to_reg() function, which decodes this enumeration, to be simpler
+ * and more efficient.
+ *
+ * @ACTION_PRESENT: The cores that are present
+ * @ACTION_READY: The cores that are ready
+ * @ACTION_PWRON: Power on the cores specified
+ * @ACTION_PWROFF: Power off the cores specified
+ * @ACTION_PWRTRANS: The cores that are transitioning
+ * @ACTION_PWRACTIVE: The cores that are active
+ */
+enum kbasep_pm_action {
+	ACTION_PRESENT = 0,
+	ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO),
+	ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO),
+	ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO),
+	ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO),
+	ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO)
+};
+
+static u64 kbase_pm_get_state(
+		struct kbase_device *kbdev,
+		enum kbase_pm_core_type core_type,
+		enum kbasep_pm_action action);
+
+/**
+ * core_type_to_reg - Decode a core type and action to a register.
+ *
+ * Given a core type (defined by kbase_pm_core_type) and an action (defined
+ * by kbasep_pm_action) this function will return the register offset that
+ * will perform the action on the core type. The register returned is the _LO
+ * register and an offset must be applied to use the _HI register.
+ *
+ * @core_type: The type of core
+ * @action:    The type of action
+ *
+ * Return: The register offset of the _LO register that performs an action of
+ * type @action on a core of type @core_type.
+ */
+static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
+						enum kbasep_pm_action action)
+{
+#ifdef CONFIG_MALI_CORESTACK
+	if (core_type == KBASE_PM_CORE_STACK) {
+		switch (action) {
+		case ACTION_PRESENT:
+			return STACK_PRESENT_LO;
+		case ACTION_READY:
+			return STACK_READY_LO;
+		case ACTION_PWRON:
+			return STACK_PWRON_LO;
+		case ACTION_PWROFF:
+			return STACK_PWROFF_LO;
+		case ACTION_PWRTRANS:
+			return STACK_PWRTRANS_LO;
+		default:
+			BUG();
+		}
+	}
+#endif /* CONFIG_MALI_CORESTACK */
+
+	return (u32)core_type + (u32)action;
+}
+
+#ifdef CONFIG_ARM64
+static void mali_cci_flush_l2(struct kbase_device *kbdev)
+{
+	const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED;
+	u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+	u32 raw;
+
+	/*
+	 * Note that we don't take the cache flush mutex here since
+	 * we expect to be the last user of the L2, all other L2 users
+	 * would have dropped their references, to initiate L2 power
+	 * down, L2 power down being the only valid place for this
+	 * to be called from.
+	 */
+
+	kbase_reg_write(kbdev,
+			GPU_CONTROL_REG(GPU_COMMAND),
+			GPU_COMMAND_CLEAN_INV_CACHES,
+			NULL);
+
+	raw = kbase_reg_read(kbdev,
+		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+		NULL);
+
+	/* Wait for cache flush to complete before continuing, exit on
+	 * gpu resets or loop expiry. */
+	while (((raw & mask) == 0) && --loops) {
+		raw = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+					NULL);
+	}
+}
+#endif
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+static void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo = cores & 0xFFFFFFFF;
+	u32 hi = (cores >> 32) & 0xFFFFFFFF;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	if (cores) {
+		if (action == ACTION_PWRON)
+			kbase_trace_mali_pm_power_on(core_type, cores);
+		else if (action == ACTION_PWROFF)
+			kbase_trace_mali_pm_power_off(core_type, cores);
+	}
+#endif
+
+	if (cores) {
+		u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
+
+		if (action == ACTION_PWRON)
+			state |= cores;
+		else if (action == ACTION_PWROFF)
+			state &= ~cores;
+		KBASE_TLSTREAM_AUX_PM_STATE(core_type, state);
+	}
+
+	/* Tracing */
+	if (cores) {
+		if (action == ACTION_PWRON)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u,
+									lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL,
+									0u, lo);
+				break;
+			default:
+				break;
+			}
+		else if (action == ACTION_PWROFF)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL,
+									0u, lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
+									0u, lo);
+				/* disable snoops before L2 is turned off */
+				kbase_pm_cache_snoop_disable(kbdev);
+				break;
+			default:
+				break;
+			}
+	}
+
+	if (lo != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL);
+
+	if (hi != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL);
+}
+
+/**
+ * kbase_pm_get_state - Get information about a core set
+ *
+ * This function gets information (chosen by @action) about a set of cores of
+ * a type given by @core_type. It is a static function used by
+ * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and
+ * kbase_pm_get_ready_cores().
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the should be queried
+ * @action:    The property of the cores to query
+ *
+ * Return: A bit mask specifying the state of the cores
+ */
+static u64 kbase_pm_get_state(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo, hi;
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+
+	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL);
+	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL);
+
+	return (((u64) hi) << 32) | ((u64) lo);
+}
+
+void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev)
+{
+	kbdev->shader_inuse_bitmap = 0;
+	kbdev->shader_needed_bitmap = 0;
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_users_count = 0;
+	kbdev->l2_available_bitmap = 0;
+	kbdev->tiler_needed_cnt = 0;
+	kbdev->tiler_inuse_cnt = 0;
+
+	memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt));
+}
+
+/**
+ * kbase_pm_get_present_cores - Get the cores that are present
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of the cores that are present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (type) {
+	case KBASE_PM_CORE_L2:
+		return kbdev->gpu_props.props.raw_props.l2_present;
+	case KBASE_PM_CORE_SHADER:
+		return kbdev->gpu_props.props.raw_props.shader_present;
+	case KBASE_PM_CORE_TILER:
+		return kbdev->gpu_props.props.raw_props.tiler_present;
+#ifdef CONFIG_MALI_CORESTACK
+	case KBASE_PM_CORE_STACK:
+		return kbdev->gpu_props.props.raw_props.stack_present;
+#endif /* CONFIG_MALI_CORESTACK */
+	default:
+		break;
+	}
+	KBASE_DEBUG_ASSERT(0);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores);
+
+/**
+ * kbase_pm_get_active_cores - Get the cores that are "active"
+ *                             (busy processing work)
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are active
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores);
+
+/**
+ * kbase_pm_get_trans_cores - Get the cores that are transitioning between
+ *                            power states
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are transitioning
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores);
+
+/**
+ * kbase_pm_get_ready_cores - Get the cores that are powered on
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are ready (powered on)
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	u64 result;
+
+	result = kbase_pm_get_state(kbdev, type, ACTION_READY);
+
+	switch (type) {
+	case KBASE_PM_CORE_SHADER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_TILER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_L2:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	default:
+		break;
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
+
+/**
+ * kbase_pm_transition_core_type - Perform power transitions for a particular
+ *                                 core type.
+ *
+ * This function will perform any available power transitions to make the actual
+ * hardware state closer to the desired state. If a core is currently
+ * transitioning then changes to the power state of that call cannot be made
+ * until the transition has finished. Cores which are not present in the
+ * hardware are ignored if they are specified in the desired_state bitmask,
+ * however the return value will always be 0 in this case.
+ *
+ * @kbdev:             The kbase device
+ * @type:              The core type to perform transitions for
+ * @desired_state:     A bit mask of the desired state of the cores
+ * @in_use:            A bit mask of the cores that are currently running
+ *                     jobs. These cores have to be kept powered up because
+ *                     there are jobs running (or about to run) on them.
+ * @available:         Receives a bit mask of the cores that the job
+ *                     scheduler can use to submit jobs to. May be NULL if
+ *                     this is not needed.
+ * @powering_on:       Bit mask to update with cores that are
+ *                    transitioning to a power-on state.
+ *
+ * Return: true if the desired state has been reached, false otherwise
+ */
+static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type,
+						u64 desired_state,
+						u64 in_use,
+						u64 * const available,
+						u64 *powering_on)
+{
+	u64 present;
+	u64 ready;
+	u64 trans;
+	u64 powerup;
+	u64 powerdown;
+	u64 powering_on_trans;
+	u64 desired_state_in_use;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Get current state */
+	present = kbase_pm_get_present_cores(kbdev, type);
+	trans = kbase_pm_get_trans_cores(kbdev, type);
+	ready = kbase_pm_get_ready_cores(kbdev, type);
+	/* mask off ready from trans in case transitions finished between the
+	 * register reads */
+	trans &= ~ready;
+
+	if (trans) /* Do not progress if any cores are transitioning */
+		return false;
+
+	powering_on_trans = trans & *powering_on;
+	*powering_on = powering_on_trans;
+
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	/* Update desired state to include the in-use cores. These have to be
+	 * kept powered up because there are jobs running or about to run on
+	 * these cores
+	 */
+	desired_state_in_use = desired_state | in_use;
+
+	/* Update state of whether l2 caches are powered */
+	if (type == KBASE_PM_CORE_L2) {
+		if ((ready == present) && (desired_state_in_use == ready) &&
+								(trans == 0)) {
+			/* All are ready, none will be turned off, and none are
+			 * transitioning */
+			kbdev->pm.backend.l2_powered = 1;
+			/*
+			 * Ensure snoops are enabled after L2 is powered up,
+			 * note that kbase keeps track of the snoop state, so
+			 * safe to repeatedly call.
+			 */
+			kbase_pm_cache_snoop_enable(kbdev);
+			if (kbdev->l2_users_count > 0) {
+				/* Notify any registered l2 cache users
+				 * (optimized out when no users waiting) */
+				wake_up(&kbdev->pm.backend.l2_powered_wait);
+			}
+		} else
+			kbdev->pm.backend.l2_powered = 0;
+	}
+
+	if (desired_state == ready && (trans == 0))
+		return true;
+
+	/* Restrict the cores to those that are actually present */
+	powerup = desired_state_in_use & present;
+	powerdown = (~desired_state_in_use) & present;
+
+	/* Restrict to cores that are not already in the desired state */
+	powerup &= ~ready;
+	powerdown &= ready;
+
+	/* Don't transition any cores that are already transitioning, except for
+	 * Mali cores that support the following case:
+	 *
+	 * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
+	 * a core that is currently transitioning to power off, then this is
+	 * remembered and the shader core is automatically powered up again once
+	 * the original transition completes. Once the automatic power on is
+	 * complete any job scheduled on the shader core should start.
+	 */
+	powerdown &= ~trans;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
+		if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
+			trans = powering_on_trans; /* for exception cases, only
+						    * mask off cores in power on
+						    * transitions */
+
+	powerup &= ~trans;
+
+	/* Perform transitions if any */
+	kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
+#if !PLATFORM_POWER_DOWN_ONLY
+	kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
+#endif
+
+	/* Recalculate cores transitioning on, and re-evaluate our state */
+	powering_on_trans |= powerup;
+	*powering_on = powering_on_trans;
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	return false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
+
+/**
+ * get_desired_cache_status - Determine which caches should be on for a
+ *                            particular core state
+ *
+ * This function takes a bit mask of the present caches and the cores (or
+ * caches) that are attached to the caches that will be powered. It then
+ * computes which caches should be turned on to allow the cores requested to be
+ * powered up.
+ *
+ * @present:       The bit mask of present caches
+ * @cores_powered: A bit mask of cores (or L2 caches) that are desired to
+ *                 be powered
+ * @tilers_powered: The bit mask of tilers that are desired to be powered
+ *
+ * Return: A bit mask of the caches that should be turned on
+ */
+static u64 get_desired_cache_status(u64 present, u64 cores_powered,
+		u64 tilers_powered)
+{
+	u64 desired = 0;
+
+	while (present) {
+		/* Find out which is the highest set bit */
+		u64 bit = fls64(present) - 1;
+		u64 bit_mask = 1ull << bit;
+		/* Create a mask which has all bits from 'bit' upwards set */
+
+		u64 mask = ~(bit_mask - 1);
+
+		/* If there are any cores powered at this bit or above (that
+		 * haven't previously been processed) then we need this core on
+		 */
+		if (cores_powered & mask)
+			desired |= bit_mask;
+
+		/* Remove bits from cores_powered and present */
+		cores_powered &= ~mask;
+		present &= ~bit_mask;
+	}
+
+	/* Power up the required L2(s) for the tiler */
+	if (tilers_powered)
+		desired |= 1;
+
+	return desired;
+}
+
+KBASE_EXPORT_TEST_API(get_desired_cache_status);
+
+#ifdef CONFIG_MALI_CORESTACK
+u64 kbase_pm_core_stack_mask(u64 cores)
+{
+	u64 stack_mask = 0;
+	size_t const MAX_CORE_ID = 31;
+	size_t const NUM_CORES_PER_STACK = 4;
+	size_t i;
+
+	for (i = 0; i <= MAX_CORE_ID; ++i) {
+		if (test_bit(i, (unsigned long *)&cores)) {
+			/* Every core which ID >= 16 is filled to stacks 4-7
+			 * instead of 0-3 */
+			size_t const stack_num = (i > 16) ?
+				(i % NUM_CORES_PER_STACK) + 4 :
+				(i % NUM_CORES_PER_STACK);
+			set_bit(stack_num, (unsigned long *)&stack_mask);
+		}
+	}
+
+	return stack_mask;
+}
+#endif /* CONFIG_MALI_CORESTACK */
+
+bool
+MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
+{
+	bool cores_are_available = false;
+	bool in_desired_state = true;
+	u64 desired_l2_state;
+#ifdef CONFIG_MALI_CORESTACK
+	u64 desired_stack_state;
+	u64 stacks_powered;
+#endif /* CONFIG_MALI_CORESTACK */
+	u64 cores_powered;
+	u64 tilers_powered;
+	u64 tiler_available_bitmap;
+	u64 tiler_transitioning_bitmap;
+	u64 shader_available_bitmap;
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+	u64 l2_available_bitmap;
+	u64 prev_l2_available_bitmap;
+	u64 l2_inuse_bitmap;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock(&kbdev->pm.backend.gpu_powered_lock);
+	if (kbdev->pm.backend.gpu_powered == false) {
+		spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+		if (kbdev->pm.backend.desired_shader_state == 0 &&
+				kbdev->pm.backend.desired_tiler_state == 0)
+			return true;
+		return false;
+	}
+
+	/* Trace that a change-state is being requested, and that it took
+	 * (effectively) no time to start it. This is useful for counting how
+	 * many state changes occurred, in a way that's backwards-compatible
+	 * with processing the trace data */
+	kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+	kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+
+	/* If any cores are already powered then, we must keep the caches on */
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+	cores_powered |= kbdev->pm.backend.desired_shader_state;
+
+#ifdef CONFIG_MALI_CORESTACK
+	/* Work out which core stacks want to be powered */
+	desired_stack_state = kbase_pm_core_stack_mask(cores_powered);
+	stacks_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK) |
+		desired_stack_state;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	/* Work out which tilers want to be powered */
+	tiler_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_TILER);
+	tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+	tilers_powered |= kbdev->pm.backend.desired_tiler_state;
+
+	/* If there are l2 cache users registered, keep all l2s powered even if
+	 * all other cores are off. */
+	if (kbdev->l2_users_count > 0)
+		cores_powered |= kbdev->gpu_props.props.raw_props.l2_present;
+
+	desired_l2_state = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered, tilers_powered);
+
+	l2_inuse_bitmap = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered | shader_transitioning_bitmap,
+			tilers_powered | tiler_transitioning_bitmap);
+
+#ifdef CONFIG_MALI_CORESTACK
+	if (stacks_powered)
+		desired_l2_state |= 1;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	/* If any l2 cache is on, then enable l2 #0, for use by job manager */
+	if (0 != desired_l2_state)
+		desired_l2_state |= 1;
+
+	prev_l2_available_bitmap = kbdev->l2_available_bitmap;
+	in_desired_state &= kbase_pm_transition_core_type(kbdev,
+			KBASE_PM_CORE_L2, desired_l2_state, l2_inuse_bitmap,
+			&l2_available_bitmap,
+			&kbdev->pm.backend.powering_on_l2_state);
+
+	if (kbdev->l2_available_bitmap != l2_available_bitmap)
+		KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap);
+
+	kbdev->l2_available_bitmap = l2_available_bitmap;
+
+
+#ifdef CONFIG_MALI_CORESTACK
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_STACK, desired_stack_state, 0,
+				&kbdev->stack_available_bitmap,
+				&kbdev->pm.backend.powering_on_stack_state);
+	}
+#endif /* CONFIG_MALI_CORESTACK */
+
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_TILER,
+				kbdev->pm.backend.desired_tiler_state,
+				0, &tiler_available_bitmap,
+				&kbdev->pm.backend.powering_on_tiler_state);
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_SHADER,
+				kbdev->pm.backend.desired_shader_state,
+				kbdev->shader_inuse_bitmap,
+				&shader_available_bitmap,
+				&kbdev->pm.backend.powering_on_shader_state);
+
+		if (kbdev->shader_available_bitmap != shader_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u,
+						(u32) shader_available_bitmap);
+			KBASE_TIMELINE_POWER_SHADER(kbdev,
+						shader_available_bitmap);
+		}
+
+		kbdev->shader_available_bitmap = shader_available_bitmap;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u,
+						(u32) tiler_available_bitmap);
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+		}
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+
+	} else if ((l2_available_bitmap &
+			kbdev->gpu_props.props.raw_props.tiler_present) !=
+			kbdev->gpu_props.props.raw_props.tiler_present) {
+		tiler_available_bitmap = 0;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
+			KBASE_TIMELINE_POWER_TILER(kbdev,
+							tiler_available_bitmap);
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+	}
+
+	/* State updated for slow-path waiters */
+	kbdev->pm.backend.gpu_in_desired_state = in_desired_state;
+
+	shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* Determine whether the cores are now available (even if the set of
+	 * available cores is empty). Note that they can be available even if
+	 * we've not finished transitioning to the desired state */
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state) {
+		cores_are_available = true;
+
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u,
+				(u32)(kbdev->shader_available_bitmap &
+				kbdev->pm.backend.desired_shader_state));
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
+				(u32)(kbdev->tiler_available_bitmap &
+				kbdev->pm.backend.desired_tiler_state));
+
+		/* Log timelining information about handling events that power
+		 * up cores, to match up either with immediate submission either
+		 * because cores already available, or from PM IRQ */
+		if (!in_desired_state)
+			kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+
+	if (in_desired_state) {
+		KBASE_DEBUG_ASSERT(cores_are_available);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_L2));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_TILER));
+#ifdef CONFIG_MALI_CORESTACK
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_STACK));
+#endif /* CONFIG_MALI_CORESTACK */
+#endif
+
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_L2,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_L2));
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_SHADER,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_SHADER));
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_TILER,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_TILER));
+#ifdef CONFIG_MALI_CORESTACK
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_STACK,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_STACK));
+#endif /* CONFIG_MALI_CORESTACK */
+
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
+				kbdev->pm.backend.gpu_in_desired_state,
+				(u32)kbdev->pm.backend.desired_shader_state);
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
+				(u32)kbdev->pm.backend.desired_tiler_state);
+
+		/* Log timelining information for synchronous waiters */
+		kbase_timeline_pm_send_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+		/* Wake slow-path waiters. Job scheduler does not use this. */
+		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+
+		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
+	}
+
+	spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+
+	/* kbase_pm_ca_update_core_status can cause one-level recursion into
+	 * this function, so it must only be called once all changes to kbdev
+	 * have been committed, and after the gpu_powered_lock has been
+	 * dropped. */
+	if (kbdev->shader_ready_bitmap != shader_ready_bitmap ||
+	    kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) {
+		kbdev->shader_ready_bitmap = shader_ready_bitmap;
+		kbdev->shader_transitioning_bitmap =
+						shader_transitioning_bitmap;
+
+		kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap,
+						shader_transitioning_bitmap);
+	}
+
+	/* The core availability policy is not allowed to keep core group 0
+	 * turned off (unless it was changing the l2 power state) */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
+		(prev_l2_available_bitmap == desired_l2_state) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+		BUG();
+
+	/* The core availability policy is allowed to keep core group 1 off,
+	 * but all jobs specifically targeting CG1 must fail */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask))
+		kbdev->pm.backend.cg1_disabled = true;
+	else
+		kbdev->pm.backend.cg1_disabled = false;
+
+	return cores_are_available;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock);
+
+/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has
+ * aborted due to a fatal signal. If the time spent waiting has exceeded this
+ * threshold then there is most likely a hardware issue. */
+#define PM_TIMEOUT (5*HZ) /* 5s */
+
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long timeout;
+	bool cores_are_available;
+	int ret;
+
+	/* Force the transition to be checked and reported - the cores may be
+	 * 'available' (for job submission) but not fully powered up. */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	timeout = jiffies + PM_TIMEOUT;
+
+	/* Wait for cores */
+	ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+			kbdev->pm.backend.gpu_in_desired_state);
+
+	if (ret < 0 && time_after(jiffies, timeout)) {
+		dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+		dev_err(kbdev->dev, "Desired state :\n");
+		dev_err(kbdev->dev, "\tShader=%016llx\n",
+				kbdev->pm.backend.desired_shader_state);
+		dev_err(kbdev->dev, "\tTiler =%016llx\n",
+				kbdev->pm.backend.desired_tiler_state);
+		dev_err(kbdev->dev, "Current state :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_LO),
+					NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_HI), NULL),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_LO), NULL));
+		dev_err(kbdev->dev, "Cores transitioning :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_LO), NULL));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_HI), NULL),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_LO), NULL));
+#if KBASE_GPU_RESET_EN
+		dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+		if (kbase_prepare_to_reset_gpu(kbdev))
+			kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+	} else {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
+
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Clear all interrupts,
+	 * and unmask them all.
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
+									NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
+
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Mask all interrupts,
+	 * and clear them all.
+	 */
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+									NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+									NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+}
+
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_disable_interrupts_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
+
+
+/*
+ * pmu layout:
+ * 0x0000: PMU TAG (RO) (0xCAFECAFE)
+ * 0x0004: PMU VERSION ID (RO) (0x00000000)
+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE)
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
+{
+	bool reset_required = is_resume;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_powered) {
+		/* Already turned on */
+		if (kbdev->poweroff_pending)
+			kbase_pm_enable_interrupts(kbdev);
+		kbdev->poweroff_pending = false;
+		KBASE_DEBUG_ASSERT(!is_resume);
+		return;
+	}
+
+	kbdev->poweroff_pending = false;
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u);
+
+	if (is_resume && kbdev->pm.backend.callback_power_resume) {
+		kbdev->pm.backend.callback_power_resume(kbdev);
+		return;
+	} else if (kbdev->pm.backend.callback_power_on) {
+		kbdev->pm.backend.callback_power_on(kbdev);
+		/* If your platform properly keeps the GPU state you may use the
+		 * return value of the callback_power_on function to
+		 * conditionally reset the GPU on power up. Currently we are
+		 * conservative and always reset the GPU. */
+		reset_required = true;
+	}
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	kbdev->pm.backend.gpu_powered = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (reset_required) {
+		/* GPU state was lost, reset GPU to ensure it is in a
+		 * consistent state */
+		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
+	}
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_restore_all_as(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Lastly, enable the interrupts */
+	kbase_pm_enable_interrupts(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
+
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* ASSERT that the cores should now be unavailable. No lock needed. */
+	KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+
+	kbdev->poweroff_pending = true;
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* Already turned off */
+		if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+			kbdev->pm.backend.callback_power_suspend(kbdev);
+		return true;
+	}
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u);
+
+	/* Disable interrupts. This also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure that any IRQ handlers have finished */
+	kbase_synchronize_irqs(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (atomic_read(&kbdev->faults_pending)) {
+		/* Page/bus faults are still being processed. The GPU can not
+		 * be powered off until they have completed */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return false;
+	}
+
+	kbase_pm_cache_snoop_disable(kbdev);
+
+	/* The GPU power may be turned off from this point */
+	kbdev->pm.backend.gpu_powered = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+		kbdev->pm.backend.callback_power_suspend(kbdev);
+	else if (kbdev->pm.backend.callback_power_off)
+		kbdev->pm.backend.callback_power_off(kbdev);
+	return true;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_off);
+
+struct kbasep_reset_timeout_data {
+	struct hrtimer timer;
+	bool timed_out;
+	struct kbase_device *kbdev;
+};
+
+void kbase_pm_reset_done(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.reset_done = true;
+	wake_up(&kbdev->pm.backend.reset_done_wait);
+}
+
+/**
+ * kbase_pm_wait_for_reset - Wait for a reset to happen
+ *
+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
+ *
+ * @kbdev: Kbase device
+ */
+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	wait_event(kbdev->pm.backend.reset_done_wait,
+						(kbdev->pm.backend.reset_done));
+	kbdev->pm.backend.reset_done = false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_reset_done);
+
+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
+{
+	struct kbasep_reset_timeout_data *rtdata =
+		container_of(timer, struct kbasep_reset_timeout_data, timer);
+
+	rtdata->timed_out = 1;
+
+	/* Set the wait queue to wake up kbase_pm_init_hw even though the reset
+	 * hasn't completed */
+	kbase_pm_reset_done(rtdata->kbdev);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+	u32 jm_values[4];
+	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >>
+		GPU_ID_VERSION_MAJOR_SHIFT;
+
+	kbdev->hw_quirks_sc = 0;
+
+	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
+	 * and needed due to MIDGLES-3539. See PRLAM-11035 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
+		kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327.
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
+		kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	/* Enable alternative hardware counter selection if configured. */
+	if (!GPU_ID_IS_NEW_FORMAT(prod_id))
+		kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+#endif
+
+	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
+		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
+
+	if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
+		if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */
+			kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
+		else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */
+			kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
+	}
+
+	if (!kbdev->hw_quirks_sc)
+		kbdev->hw_quirks_sc = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_CONFIG), NULL);
+
+	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(TILER_CONFIG), NULL);
+
+	/* Set tiler clock gate override if required */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+		kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
+
+	/* Limit the GPU bus bandwidth if the platform needs this. */
+	kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(L2_MMU_CONFIG), NULL);
+
+	/* Limit read ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS);
+	kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT;
+
+	/* Limit write ID width for AXI */
+	kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+	kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Allow memory configuration disparity to be ignored, we
+		 * optimize the use of shared memory and thus we expect
+		 * some disparity in the memory configuration */
+		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
+	}
+
+	kbdev->hw_quirks_jm = 0;
+	/* Only for T86x/T88x-based products after r2p0 */
+	if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) {
+
+		if (of_property_read_u32_array(np,
+					"jm_config",
+					&jm_values[0],
+					ARRAY_SIZE(jm_values))) {
+			/* Entry not in device tree, use defaults  */
+			jm_values[0] = 0;
+			jm_values[1] = 0;
+			jm_values[2] = 0;
+			jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT;
+		}
+
+		/* Limit throttle limit to 6 bits*/
+		if (jm_values[3] > JM_MAX_JOB_THROTTLE_LIMIT) {
+			dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63).");
+			jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT;
+		}
+
+		/* Aggregate to one integer. */
+		kbdev->hw_quirks_jm |= (jm_values[0] ?
+				JM_TIMESTAMP_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[1] ?
+				JM_CLOCK_GATE_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[2] ?
+				JM_JOB_THROTTLE_ENABLE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[3] <<
+				JM_JOB_THROTTLE_LIMIT_SHIFT);
+
+	} else if (GPU_ID_IS_NEW_FORMAT(prod_id) &&
+			   (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
+					   GPU_ID2_PRODUCT_TMIX)) {
+		/* Only for tMIx */
+		u32 coherency_features;
+
+		coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
+		 * documented for tMIx so force correct value here.
+		 */
+		if (coherency_features ==
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE)) {
+			kbdev->hw_quirks_jm |=
+				(COHERENCY_ACE_LITE | COHERENCY_ACE) <<
+				JM_FORCE_COHERENCY_FEATURES_SHIFT;
+		}
+	}
+
+
+	if (!kbdev->hw_quirks_jm)
+		kbdev->hw_quirks_jm = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JM_CONFIG), NULL);
+
+#ifdef CONFIG_MALI_CORESTACK
+#define MANUAL_POWER_CONTROL ((u32)(1 << 8))
+	kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL;
+#endif /* CONFIG_MALI_CORESTACK */
+}
+
+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
+{
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
+			kbdev->hw_quirks_sc, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
+			kbdev->hw_quirks_tiler, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
+			kbdev->hw_quirks_mmu, NULL);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
+			kbdev->hw_quirks_jm, NULL);
+
+}
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
+{
+	if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) &&
+		!kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_enable_smc != 0)
+			kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0);
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n");
+		kbdev->cci_snoop_enabled = true;
+	}
+}
+
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
+{
+	if (kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_disable_smc != 0) {
+			mali_cci_flush_l2(kbdev);
+			kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0);
+		}
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n");
+		kbdev->cci_snoop_enabled = false;
+	}
+}
+
+static int kbase_pm_do_reset(struct kbase_device *kbdev)
+{
+	struct kbasep_reset_timeout_data rtdata;
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+
+	KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_SOFT_RESET, NULL);
+
+	/* Unmask the reset complete interrupt only */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED,
+									NULL);
+
+	/* Initialize a structure for tracking the status of the reset */
+	rtdata.kbdev = kbdev;
+	rtdata.timed_out = 0;
+
+	/* Create a timer to use as a timeout on the reset */
+	hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtdata.timer.function = kbasep_reset_timeout;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	/* No interrupt has been received - check if the RAWSTAT register says
+	 * the reset has completed */
+	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+							RESET_COMPLETED) {
+		/* The interrupt is set in the RAWSTAT; this suggests that the
+		 * interrupts are not getting to the CPU */
+		dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+		/* If interrupts aren't working we can't continue. */
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return -EINVAL;
+	}
+
+	/* The GPU doesn't seem to be responding to the reset so try a hard
+	 * reset */
+	dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
+								RESET_TIMEOUT);
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET, NULL);
+
+	/* Restart the timer to wait for the hard reset to complete */
+	rtdata.timed_out = 0;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	destroy_hrtimer_on_stack(&rtdata.timer);
+
+	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
+								RESET_TIMEOUT);
+
+	return -EINVAL;
+}
+
+static int kbasep_protected_mode_enable(struct protected_mode_device *pdev)
+{
+	struct kbase_device *kbdev = pdev->data;
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+		GPU_COMMAND_SET_PROTECTED_MODE, NULL);
+	return 0;
+}
+
+static int kbasep_protected_mode_disable(struct protected_mode_device *pdev)
+{
+	struct kbase_device *kbdev = pdev->data;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	return kbase_pm_do_reset(kbdev);
+}
+
+struct protected_mode_ops kbase_native_protected_ops = {
+	.protected_mode_enable = kbasep_protected_mode_enable,
+	.protected_mode_disable = kbasep_protected_mode_disable
+};
+
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+{
+	unsigned long irq_flags;
+	int err;
+	bool resume_vinstr = false;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Ensure the clock is on before attempting to access the hardware */
+	if (!kbdev->pm.backend.gpu_powered) {
+		if (kbdev->pm.backend.callback_power_on)
+			kbdev->pm.backend.callback_power_on(kbdev);
+
+		spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+		kbdev->pm.backend.gpu_powered = true;
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+	}
+
+	/* Ensure interrupts are off to begin with, this also clears any
+	 * outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure cache snoops are disabled before reset. */
+	kbase_pm_cache_snoop_disable(kbdev);
+	/* Prepare for the soft-reset */
+	kbdev->pm.backend.reset_done = false;
+
+	/* The cores should be made unavailable due to the reset */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->shader_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u, (u32)0u);
+	if (kbdev->tiler_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u, (u32)0u);
+	kbdev->shader_available_bitmap = 0u;
+	kbdev->tiler_available_bitmap = 0u;
+	kbdev->l2_available_bitmap = 0u;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	/* Soft reset the GPU */
+	if (kbdev->protected_mode_support)
+		err = kbdev->protected_ops->protected_mode_disable(
+				kbdev->protected_dev);
+	else
+		err = kbase_pm_do_reset(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->protected_mode)
+		resume_vinstr = true;
+	kbdev->protected_mode = false;
+	kbase_ipa_model_use_configured_locked(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	if (err)
+		goto exit;
+
+	if (flags & PM_HW_ISSUES_DETECT)
+		kbase_pm_hw_issues_detect(kbdev);
+
+	kbase_pm_hw_issues_apply(kbdev);
+	kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
+
+	/* Sanity check protected mode was left after reset */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		u32 gpu_status = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS), NULL);
+
+		WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE);
+	}
+
+	/* If cycle counter was in use re-enable it, enable_irqs will only be
+	 * false when called from kbase_pm_powerup */
+	if (kbdev->pm.backend.gpu_cycle_counter_requests &&
+						(flags & PM_ENABLE_IRQS)) {
+		/* enable interrupts as the L2 may have to be powered on */
+		kbase_pm_enable_interrupts(kbdev);
+		kbase_pm_request_l2_caches(kbdev);
+
+		/* Re-enable the counters if we need to */
+		spin_lock_irqsave(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+		if (kbdev->pm.backend.gpu_cycle_counter_requests)
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+		spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+		kbase_pm_release_l2_caches(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+		kbase_pm_disable_interrupts(kbdev);
+	}
+
+	if (flags & PM_ENABLE_IRQS)
+		kbase_pm_enable_interrupts(kbdev);
+
+exit:
+	/* If GPU is leaving protected mode resume vinstr operation. */
+	if (kbdev->vinstr_ctx && resume_vinstr)
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	return err;
+}
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters
+ *
+ * Increase the count of cycle counter users and turn the cycle counters on if
+ * they were previously off
+ *
+ * This function is designed to be called by
+ * kbase_pm_request_gpu_cycle_counter() or
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only
+ *
+ * When this function is called the l2 cache must be on and the l2 cache users
+ * count must have been incremented by a call to (
+ * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() )
+ *
+ * @kbdev:     The kbase device structure of the device
+ */
+static void
+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	++kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+}
+
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter);
+
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches_l2_is_on(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
+
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0);
+
+	--kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_STOP, NULL);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+}
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
new file mode 100644
index 000000000000..6804f45ac27b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -0,0 +1,548 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Power management API definitions used internally by GPU backend
+ */
+
+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
+#define _KBASE_BACKEND_PM_INTERNAL_H_
+
+#include <mali_kbase_hwaccess_pm.h>
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+
+/**
+ * kbase_pm_dev_idle - The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_dev_activate - The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_get_present_cores - Get details of the cores that are present in
+ *                              the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid
+ *         pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_active_cores - Get details of the cores that are currently
+ *                             active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are actively processing work (i.e.
+ * turned on *and* busy).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_trans_cores - Get details of the cores that are currently
+ *                            transitioning between power states.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are currently transitioning between
+ * power states.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_ready_cores - Get details of the cores that are currently
+ *                            powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are powered and ready for jobs (they may
+ * or may not be currently executing jobs).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_clock_on - Turn the clock for the device on, and enable device
+ *                     interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if clock on due to resume after suspend, false otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
+ *                      device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU
+ * off. It should be modified during integration to perform the necessary
+ * actions to turn the clock off (if this is possible in the integration).
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if clock off due to suspend, false otherwise
+ *
+ * Return: true  if clock was turned off, or
+ *         false if clock can not be turned off due to pending page/bus fault
+ *               workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
+
+/**
+ * kbase_pm_enable_interrupts - Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts - Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
+ *                                      that does not take the hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_init_hw - Initialize the hardware.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags specifying the type of PM init
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported
+ * by the driver and performs a reset on the device so that it is in a known
+ * state before the device is used.
+ *
+ * Return: 0 if the device is supported and successfully reset.
+ */
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * kbase_pm_reset_done - The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the
+ * RESET_COMPLETED bit is set. It signals to the power management initialization
+ * code that the GPU has been successfully reset.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_pm_check_transitions_nolock - Check if there are any power transitions
+ *                                     to make, and if so start them.
+ *
+ * This function will check the desired_xx_state members of
+ * struct kbase_pm_device_data and the actual status of the hardware to see if
+ * any power transitions can be made at this time to make the hardware state
+ * closer to the state desired by the power policy.
+ *
+ * The return value can be used to check whether all the desired cores are
+ * available, and so whether it's worth submitting a job (e.g. from a Power
+ * Management IRQ).
+ *
+ * Note that this still returns true when desired_xx_state has no
+ * cores. That is: of the no cores desired, none were *un*available. In
+ * this case, the caller may still need to try submitting jobs. This is because
+ * the Core Availability Policy might have taken us to an intermediate state
+ * where no cores are powered, before powering on more cores (e.g. for core
+ * rotation)
+ *
+ * The caller must hold kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return:      non-zero when all desired cores are available. That is,
+ *              it's worthwhile for the caller to submit a job.
+ *              false otherwise
+ */
+bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_check_transitions_sync - Synchronous and locking variant of
+ *                                   kbase_pm_check_transitions_nolock()
+ *
+ * On returning, the desired state at the time of the call will have been met.
+ *
+ * There is nothing to stop the core being switched off by calls to
+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
+ * caller must have already made a call to
+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
+ * kbase_device.pm.power_change_lock, because this function will take that
+ * lock itself.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
+ *                                      where the caller must hold
+ *                                      kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state - Update the desired state of shader cores from
+ *                               the Power Policy, and begin any power
+ *                               transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will
+ * then begin power transitions to make the hardware acheive the desired shader
+ * core state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
+ *                                     the GPU and/or shader cores.
+ *
+ * This should be called by any functions which directly power off the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_init_core_use_bitmaps - Initialise data tracking the required
+ *                                   and used cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 on success, error code on error
+ */
+int kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_term - Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an
+ * error to call any metrics gathering function (other than
+ * kbasep_pm_metrics_init()) after calling this function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to
+ *                         update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether
+ * the system is hitting the vsync target or not. buffer_updated should be true
+ * if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but
+ * only when the value of @buffer_updated differs from a previous call.
+ *
+ * @kbdev:          The kbase device structure for the device (must be a
+ *                  valid pointer)
+ * @buffer_updated: True if the buffer has been updated on this VSync,
+ *                  false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/**
+ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change
+ *                            the clock speed of the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function should be called regularly by the DVFS system to check whether
+ * the clock speed of the GPU needs updating.
+ */
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
+ *                                      needed
+ *
+ * If the caller is the first caller then the GPU cycle counters will be enabled
+ * along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
+ *                                               needed (l2 cache already on)
+ *
+ * This is a version of the above function
+ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
+ * l2 cache is known to be on and assured to be on until the subsequent call of
+ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does
+ * not sleep and can be called from atomic functions.
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called) and the l2 cache must be
+ * powered on.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
+ *                                      longer in use
+ *
+ * If the caller is the last caller then the GPU cycle counters will be
+ * disabled. A request must have been made before a call to this.
+ *
+ * Caller must not hold the hwaccess_lock, as it will be taken in this function.
+ * If the caller is already holding this lock then
+ * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
+ *                                             that does not take hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to
+ *                                       complete
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_enable - Enable access to GPU registers
+ *
+ * Enables access to the GPU registers before power management has powered up
+ * the GPU with kbase_pm_powerup().
+ *
+ * Access to registers should be done using kbase_os_reg_read()/write() at this
+ * stage, not kbase_reg_read()/write().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn on power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_disable - Disable early register access
+ *
+ * Disables access to the GPU registers enabled earlier by a call to
+ * kbase_pm_register_access_enable().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn off power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
+ * function */
+
+/**
+ * kbase_pm_metrics_is_active - Check if the power management metrics
+ *                              collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * Return: true if metrics collection was active else false.
+ */
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if power on due to resume after suspend,
+ *             false otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been
+ *                        requested.
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if power off due to suspend,
+ *              false otherwise
+ */
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+
+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total, unsigned long *busy);
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev);
+#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @kbdev:         The kbase device structure for the device (must be a
+ *                 valid pointer)
+ * @utilisation:   The current calculated utilisation by the metrics system.
+ * @util_gl_share: The current calculated gl share of utilisation.
+ * @util_cl_share: The current calculated cl share of utilisation per core
+ *                 group.
+ * Return:         Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+
+void kbase_pm_power_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_metrics_update - Inform the metrics system that an atom is either
+ *                           about to be run or has just completed.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @now:   Pointer to the timestamp of the change, or NULL to use current time
+ *
+ * Caller must hold hwaccess_lock
+ */
+void kbase_pm_metrics_update(struct kbase_device *kbdev,
+				ktime_t *now);
+
+/**
+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called after L2 power up.
+ */
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called before L2 power off.
+ */
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
new file mode 100644
index 000000000000..024248ca7123
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -0,0 +1,401 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION          70
+#define KBASE_PM_VSYNC_MAX_UTILISATION          90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
+ * under 11s. Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT			8
+
+/* Maximum time between sampling of utilization data, without resetting the
+ * counters. */
+#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbasep_pm_metrics_data *metrics;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	metrics = container_of(timer, struct kbasep_pm_metrics_data, timer);
+	kbase_pm_get_dvfs_action(metrics->kbdev);
+
+	spin_lock_irqsave(&metrics->lock, flags);
+
+	if (metrics->timer_active)
+		hrtimer_start(timer,
+			HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&metrics->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+int kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.metrics.kbdev = kbdev;
+
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.prev_busy = 0;
+	kbdev->pm.backend.metrics.prev_idle = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+
+	spin_lock_init(&kbdev->pm.backend.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbdev->pm.backend.metrics.timer_active = true;
+	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
+
+	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	ktime_t diff;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	if (ktime_to_ns(diff) < 0)
+		return;
+
+	if (kbdev->pm.backend.metrics.gpu_active) {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+		kbdev->pm.backend.metrics.time_busy += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[0])
+			kbdev->pm.backend.metrics.busy_cl[0] += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[1])
+			kbdev->pm.backend.metrics.busy_cl[1] += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[0])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[1])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+	} else {
+		kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff)
+							>> KBASE_PM_TIME_SHIFT);
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function.
+ */
+static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	/* Store previous value */
+	kbdev->pm.backend.metrics.prev_idle =
+					kbdev->pm.backend.metrics.time_idle;
+	kbdev->pm.backend.metrics.prev_busy =
+					kbdev->pm.backend.metrics.time_busy;
+
+	/* Reset current values */
+	kbdev->pm.backend.metrics.time_period_start = now;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+}
+
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get());
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	unsigned long flags, busy, total;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	busy = kbdev->pm.backend.metrics.time_busy;
+	total = busy + kbdev->pm.backend.metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += kbdev->pm.backend.metrics.prev_idle +
+				kbdev->pm.backend.metrics.prev_busy;
+		busy += kbdev->pm.backend.metrics.prev_busy;
+	}
+
+	*total_out = total;
+	*busy_out = busy;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev,
+					int *util_gl_share,
+					int util_cl_share[2],
+					ktime_t now)
+{
+	int utilisation;
+	int busy;
+
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	if (kbdev->pm.backend.metrics.time_idle +
+				kbdev->pm.backend.metrics.time_busy == 0) {
+		/* No data - so we return NOP */
+		utilisation = -1;
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+		goto out;
+	}
+
+	utilisation = (100 * kbdev->pm.backend.metrics.time_busy) /
+			(kbdev->pm.backend.metrics.time_idle +
+			 kbdev->pm.backend.metrics.time_busy);
+
+	busy = kbdev->pm.backend.metrics.busy_gl +
+		kbdev->pm.backend.metrics.busy_cl[0] +
+		kbdev->pm.backend.metrics.busy_cl[1];
+
+	if (busy != 0) {
+		if (util_gl_share)
+			*util_gl_share =
+				(100 * kbdev->pm.backend.metrics.busy_gl) /
+									busy;
+		if (util_cl_share) {
+			util_cl_share[0] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[0]) /
+									busy;
+			util_cl_share[1] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[1]) /
+									busy;
+		}
+	} else {
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+	}
+
+out:
+	return utilisation;
+}
+
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int utilisation, util_gl_share;
+	int util_cl_share[2];
+	ktime_t now;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	now = ktime_get();
+
+	utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share,
+			util_cl_share, now);
+
+	if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 ||
+							util_cl_share[1] < 0) {
+		utilisation = 0;
+		util_gl_share = 0;
+		util_cl_share[0] = 0;
+		util_cl_share[1] = 0;
+		goto out;
+	}
+
+out:
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
+								util_cl_share);
+#endif				/*CONFIG_MALI_MIDGARD_DVFS */
+
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+	bool isactive;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	isactive = kbdev->pm.backend.metrics.timer_active;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+/**
+ * kbase_pm_metrics_active_calc - Update PM active counts based on currently
+ *                                running atoms
+ * @kbdev: Device pointer
+ *
+ * The caller must hold kbdev->pm.backend.metrics.lock
+ */
+static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		/* Head atom may have just completed, so if it isn't running
+		 * then try the next atom */
+		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
+			katom = kbase_gpu_inspect(kbdev, js, 1);
+
+		if (katom && katom->gpu_rb_state ==
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+			if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+				int device_nr = (katom->core_req &
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+						? katom->device_nr : 0;
+				if (!WARN_ON(device_nr >= 2))
+					kbdev->pm.backend.metrics.
+						active_cl_ctx[device_nr] = 1;
+			} else {
+				/* Slot 2 should not be running non-compute
+				 * atoms */
+				if (!WARN_ON(js >= 2))
+					kbdev->pm.backend.metrics.
+						active_gl_ctx[js] = 1;
+			}
+			kbdev->pm.backend.metrics.gpu_active = true;
+		}
+	}
+}
+
+/* called when job is submitted to or removed from a GPU slot */
+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
+{
+	unsigned long flags;
+	ktime_t now;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	if (!timestamp) {
+		now = ktime_get();
+		timestamp = &now;
+	}
+
+	/* Track how long CL and/or GL jobs have been busy for */
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
+
+	kbase_pm_metrics_active_calc(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
new file mode 100644
index 000000000000..075f020c66e6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -0,0 +1,973 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_policy *const policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#else				/* CONFIG_MALI_NO_MALI */
+#if !PLATFORM_POWER_DOWN_ONLY
+	&kbase_pm_demand_policy_ops,
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+	&kbase_pm_coarse_demand_policy_ops,
+	&kbase_pm_always_on_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+#if !PLATFORM_POWER_DOWN_ONLY
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+#endif
+#endif /* CONFIG_MALI_NO_MALI */
+};
+
+/* The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+
+/* Function IDs for looking up Timeline Trace codes in
+ * kbase_pm_change_state_trace_code */
+enum kbase_pm_func_id {
+	KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+	KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+	/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
+	 * expect to hit it nor tend to hit it very much anyway. We can detect
+	 * whether we need more instrumentation by a difference between
+	 * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
+
+	/* Must be the last */
+	KBASE_PM_FUNC_ID_COUNT
+};
+
+
+/* State changes during request/unrequest/release-ing cores */
+enum {
+	KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
+	KBASE_PM_CHANGE_STATE_TILER  = (1u << 1),
+
+	/* These two must be last */
+	KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
+						KBASE_PM_CHANGE_STATE_SHADER),
+	KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
+};
+typedef u32 kbase_pm_change_state;
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+/* Timeline Trace code lookups for each function */
+static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
+					[KBASE_PM_CHANGE_STATE_COUNT] = {
+	/* kbase_pm_request_cores */
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
+
+	/* kbase_pm_release_cores */
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
+};
+
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id,
+		kbase_pm_change_state state)
+{
+	int trace_code;
+
+	KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
+	KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
+									state);
+
+	trace_code = kbase_pm_change_state_trace_code[func_id][state];
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
+}
+
+#else /* CONFIG_MALI_TRACE_TIMELINE */
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id, kbase_pm_change_state state)
+{
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+/**
+ * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
+ *                               requested shader cores
+ * @kbdev: Device pointer
+ */
+static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
+{
+	u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
+	u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->pm.backend.desired_shader_state &=
+			~kbdev->pm.backend.shader_poweroff_pending;
+	kbdev->pm.backend.desired_tiler_state &=
+			~kbdev->pm.backend.tiler_poweroff_pending;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+
+	if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
+			prev_tiler_state !=
+				kbdev->pm.backend.desired_tiler_state ||
+			kbdev->pm.backend.ca_in_transition) {
+		bool cores_are_available;
+
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
+
+		/* Don't need 'cores_are_available',
+		 * because we don't return anything */
+		CSTD_UNUSED(cores_are_available);
+	}
+}
+
+static enum hrtimer_restart
+kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(timer, struct kbase_device,
+						pm.backend.gpu_poweroff_timer);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* It is safe for this call to do nothing if the work item is already
+	 * queued. The worker function will read the must up-to-date state of
+	 * kbdev->pm.backend.gpu_poweroff_pending under lock.
+	 *
+	 * If a state change occurs while the worker function is processing,
+	 * this call will succeed as a work item can be requeued once it has
+	 * started processing.
+	 */
+	if (kbdev->pm.backend.gpu_poweroff_pending)
+		queue_work(kbdev->pm.backend.gpu_poweroff_wq,
+					&kbdev->pm.backend.gpu_poweroff_work);
+
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending_time--;
+
+		KBASE_DEBUG_ASSERT(
+				kbdev->pm.backend.shader_poweroff_pending_time
+									>= 0);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending_time)
+			kbasep_pm_do_poweroff_cores(kbdev);
+	}
+
+	if (kbdev->pm.backend.poweroff_timer_needed) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+
+		return HRTIMER_RESTART;
+	}
+
+	kbdev->pm.backend.poweroff_timer_running = false;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	bool do_poweroff = false;
+
+	kbdev = container_of(data, struct kbase_device,
+						pm.backend.gpu_poweroff_work);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	kbdev->pm.backend.gpu_poweroff_pending--;
+
+	if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Only power off the GPU if a request is still pending */
+	if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
+		do_poweroff = true;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (do_poweroff) {
+		kbdev->pm.backend.poweroff_timer_needed = false;
+		hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+		kbdev->pm.backend.poweroff_timer_running = false;
+
+		/* Power off the GPU */
+		kbase_pm_do_poweroff(kbdev, false);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+int kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	struct workqueue_struct *wq;
+
+	wq = alloc_workqueue("kbase_pm_do_poweroff",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!wq)
+		return -ENOMEM;
+
+	kbdev->pm.backend.gpu_poweroff_wq = wq;
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
+			kbasep_pm_do_gpu_poweroff_wq);
+	hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->pm.backend.gpu_poweroff_timer.function =
+			kbasep_pm_do_gpu_poweroff_callback;
+	kbdev->pm.backend.pm_current_policy = policy_list[0];
+	kbdev->pm.backend.pm_current_policy->init(kbdev);
+	kbdev->pm.gpu_poweroff_time =
+			HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+	kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+	kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
+
+	return 0;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.pm_current_policy->term(kbdev);
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
+}
+
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.backend.poweroff_timer_needed = false;
+	hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.poweroff_timer_running = false;
+
+	/* If wq is already running but is held off by pm.lock, make sure it has
+	 * no effect */
+	kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+	kbdev->pm.backend.shader_poweroff_pending_time = 0;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+	bool active;
+
+	lockdep_assert_held(&pm->lock);
+
+	/* pm_current_policy will never be NULL while pm.lock is held */
+	KBASE_DEBUG_ASSERT(backend->pm_current_policy);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	active = backend->pm_current_policy->get_core_active(kbdev);
+
+	if (active) {
+		if (backend->gpu_poweroff_pending) {
+			/* Cancel any pending power off request */
+			backend->gpu_poweroff_pending = 0;
+
+			/* If a request was pending then the GPU was still
+			 * powered, so no need to continue */
+			if (!kbdev->poweroff_pending) {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				return;
+			}
+		}
+
+		if (!backend->poweroff_timer_running && !backend->gpu_powered &&
+				(pm->poweroff_gpu_ticks ||
+				pm->poweroff_shader_ticks)) {
+			backend->poweroff_timer_needed = true;
+			backend->poweroff_timer_running = true;
+			hrtimer_start(&backend->gpu_poweroff_timer,
+					pm->gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+
+		/* Power on the GPU and any cores requested by the policy */
+		if (pm->backend.poweroff_wait_in_progress) {
+			pm->backend.poweron_required = true;
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			kbase_pm_do_poweron(kbdev, false);
+		}
+	} else {
+		/* It is an error for the power policy to power off the GPU
+		 * when there are contexts active */
+		KBASE_DEBUG_ASSERT(pm->active_count == 0);
+
+		if (backend->shader_poweroff_pending ||
+				backend->tiler_poweroff_pending) {
+			backend->shader_poweroff_pending = 0;
+			backend->tiler_poweroff_pending = 0;
+			backend->shader_poweroff_pending_time = 0;
+		}
+
+		/* Request power off */
+		if (pm->backend.gpu_powered) {
+			if (pm->poweroff_gpu_ticks) {
+				backend->gpu_poweroff_pending =
+						pm->poweroff_gpu_ticks;
+				backend->poweroff_timer_needed = true;
+				if (!backend->poweroff_timer_running) {
+					/* Start timer if not running (eg if
+					 * power policy has been changed from
+					 * always_on to something else). This
+					 * will ensure the GPU is actually
+					 * powered off */
+					backend->poweroff_timer_running
+							= true;
+					hrtimer_start(
+						&backend->gpu_poweroff_timer,
+						pm->gpu_poweroff_time,
+						HRTIMER_MODE_REL);
+				}
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+			} else {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+
+				/* Power off the GPU immediately */
+				kbase_pm_do_poweroff(kbdev, false);
+			}
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+	}
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap;
+	u64 desired_tiler_bitmap;
+	bool cores_are_available;
+	bool do_poweroff = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.pm_current_policy == NULL)
+		return;
+	if (kbdev->pm.backend.poweroff_wait_in_progress)
+		return;
+
+	if (kbdev->protected_mode_transition &&	!kbdev->shader_needed_bitmap &&
+			!kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt) {
+		/* We are trying to change in/out of protected mode - force all
+		 * cores off so that the L2 powers down */
+		desired_bitmap = 0;
+		desired_tiler_bitmap = 0;
+	} else {
+		desired_bitmap =
+		kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
+		desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+
+		if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+			desired_tiler_bitmap = 1;
+		else
+			desired_tiler_bitmap = 0;
+
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
+			/* Unless XAFFINITY is supported, enable core 0 if tiler
+			 * required, regardless of core availability */
+			if (kbdev->tiler_needed_cnt > 0 ||
+					kbdev->tiler_inuse_cnt > 0)
+				desired_bitmap |= 1;
+		}
+	}
+
+	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
+							(u32)desired_bitmap);
+	/* Are any cores being powered on? */
+	if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
+	    ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
+	    kbdev->pm.backend.ca_in_transition) {
+		/* Check if we are powering off any cores before updating shader
+		 * state */
+		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+			/* Start timer to power off cores */
+			kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+			kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+
+			if (kbdev->pm.poweroff_shader_ticks &&
+					!kbdev->protected_mode_transition)
+				kbdev->pm.backend.shader_poweroff_pending_time =
+						kbdev->pm.poweroff_shader_ticks;
+			else
+				do_poweroff = true;
+		}
+
+		kbdev->pm.backend.desired_shader_state = desired_bitmap;
+		kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
+
+		/* If any cores are being powered on, transition immediately */
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+		/* Start timer to power off cores */
+		kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+		if (kbdev->pm.poweroff_shader_ticks &&
+				!kbdev->protected_mode_transition)
+			kbdev->pm.backend.shader_poweroff_pending_time =
+					kbdev->pm.poweroff_shader_ticks;
+		else
+			kbasep_pm_do_poweroff_cores(kbdev);
+	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
+			desired_tiler_bitmap != 0 &&
+			kbdev->pm.backend.poweroff_timer_needed) {
+		/* If power policy is keeping cores on despite there being no
+		 * active contexts then disable poweroff timer as it isn't
+		 * required.
+		 * Only reset poweroff_timer_needed if we're not in the middle
+		 * of the power off callback */
+		kbdev->pm.backend.poweroff_timer_needed = false;
+	}
+
+	/* Ensure timer does not power off wanted cores and make sure to power
+	 * off unwanted cores */
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending &=
+				~(kbdev->pm.backend.desired_shader_state &
+								desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending &=
+				~(kbdev->pm.backend.desired_tiler_state &
+				desired_tiler_bitmap);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending &&
+				!kbdev->pm.backend.tiler_poweroff_pending)
+			kbdev->pm.backend.shader_poweroff_pending_time = 0;
+	}
+
+	/* Shader poweroff is deferred to the end of the function, to eliminate
+	 * issues caused by the core availability policy recursing into this
+	 * function */
+	if (do_poweroff)
+		kbasep_pm_do_poweroff_cores(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
+
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_policy *new_policy)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	const struct kbase_pm_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.pm_current_policy;
+	kbdev->pm.backend.pm_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
+								old_policy->id);
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
+								new_policy->id);
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.pm_current_policy = new_policy;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_active(kbdev);
+	kbase_pm_update_cores_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
+
+/* Check whether a state change has finished, and trace it as completed */
+static void
+kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
+{
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state)
+		kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 cores;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	cores = shader_cores;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+
+		/* It should be almost impossible for this to overflow. It would
+		 * require 2^32 atoms to request a particular core, which would
+		 * require 2^24 contexts to submit. This would require an amount
+		 * of memory that is impossible on a 32-bit system and extremely
+		 * unlikely on a 64-bit system. */
+		int cnt = ++kbdev->shader_needed_cnt[bitnum];
+
+		if (1 == cnt) {
+			kbdev->shader_needed_bitmap |= bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt = ++kbdev->tiler_needed_cnt;
+
+		if (1 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+							change_gpu_state);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
+
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_needed_bitmap &= ~bit;
+
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		cnt = --kbdev->tiler_needed_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* Trace that any state change effectively completes immediately
+		 * - no-one will wait on the state change */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
+
+enum kbase_pm_cores_ready
+kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 prev_shader_needed;	/* Just for tracing */
+	u64 prev_shader_inuse;	/* Just for tracing */
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	prev_shader_needed = kbdev->shader_needed_bitmap;
+	prev_shader_inuse = kbdev->shader_inuse_bitmap;
+
+	/* If desired_shader_state does not contain the requested cores, then
+	 * power management is not attempting to powering those cores (most
+	 * likely due to core availability policy) and a new job affinity must
+	 * be chosen */
+	if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
+							shader_cores) {
+		return (kbdev->pm.backend.poweroff_wait_in_progress ||
+				kbdev->pm.backend.pm_current_policy == NULL) ?
+				KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY;
+	}
+
+	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
+	    (tiler_required && !kbdev->tiler_available_bitmap)) {
+		/* Trace ongoing core transition */
+		kbase_timeline_pm_l2_transition_start(kbdev);
+		return KBASE_CORES_NOT_READY;
+	}
+
+	/* If we started to trace a state change, then trace it has being
+	 * finished by now, at the very latest */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+	/* Trace core transition done */
+	kbase_timeline_pm_l2_transition_done(kbdev);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt)
+			kbdev->shader_needed_bitmap &= ~bit;
+
+		/* shader_inuse_cnt should not overflow because there can only
+		 * be a very limited number of jobs on the h/w at one time */
+
+		kbdev->shader_inuse_cnt[bitnum]++;
+		kbdev->shader_inuse_bitmap |= bit;
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		--kbdev->tiler_needed_cnt;
+
+		kbdev->tiler_inuse_cnt++;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
+	}
+
+	if (prev_shader_needed != kbdev->shader_needed_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+	if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+	return KBASE_CORES_READY;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
+
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_inuse_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_inuse_bitmap &= ~bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
+
+		cnt = --kbdev->tiler_inuse_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+							change_gpu_state);
+
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+					bool tiler_required,
+					u64 shader_cores)
+{
+	unsigned long flags;
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_check_transitions_sync(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 prior_l2_users_count;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	prior_l2_users_count = kbdev->l2_users_count++;
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
+
+	/* if the GPU is reset while the l2 is on, l2 will be off but
+	 * prior_l2_users_count will be > 0. l2_available_bitmap will have been
+	 * set to 0 though by kbase_pm_init_hw */
+	if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
+		kbase_pm_check_transitions_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	wait_event(kbdev->pm.backend.l2_powered_wait,
+					kbdev->pm.backend.l2_powered == 1);
+
+	/* Trace that any state change completed immediately */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
+
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->l2_users_count++;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
+
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
+
+	--kbdev->l2_users_count;
+
+	if (!kbdev->l2_users_count) {
+		kbase_pm_check_transitions_nolock(kbdev);
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
new file mode 100644
index 000000000000..611a90e66e65
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -0,0 +1,227 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/**
+ * kbase_pm_policy_init - Initialize power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Must be called before calling any other policy function
+ *
+ * Return: 0 if the power policy framework was successfully
+ *         initialized, -errno otherwise.
+ */
+int kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_policy_term - Terminate power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_active - Update the active power state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores - Update the desired core state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+
+enum kbase_pm_cores_ready {
+	KBASE_CORES_NOT_READY = 0,
+	KBASE_NEW_AFFINITY = 1,
+	KBASE_CORES_READY = 2
+};
+
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * When this function returns, the @shader_cores will be in the READY state.
+ *
+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
+ * work of ensuring the requested cores will remain powered until a matching
+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
+ * is made.
+ */
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_cores - Mark one or more cores as being required
+ *                          for jobs to be submitted
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * This function is called by the job scheduler to mark one or more cores as
+ * being required to submit jobs that are ready to run.
+ *
+ * The cores requested are reference counted and a subsequent call to
+ * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
+ * made to dereference the cores as being 'needed'.
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete/the cores might not be available
+ * until a Power Management IRQ.
+ *
+ * Return: 0 if the cores were successfully requested, or -errno otherwise.
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_unrequest_cores - Unmark one or more cores as being required for
+ *                            jobs to be submitted.
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function undoes the effect of kbase_pm_request_cores(). It should be
+ * used when a job is not going to be submitted to the hardware (e.g. the job is
+ * cancelled before it is enqueued).
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete until a Power Management IRQ.
+ *
+ * The policy may use this as an indication that it can power down cores.
+ */
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function should be called after kbase_pm_request_cores() when the job
+ * is about to be submitted to the hardware. It will check that the necessary
+ * cores are available and if so update the 'needed' and 'inuse' bitmasks to
+ * reflect that the job is now committed to being run.
+ *
+ * If the necessary cores are not currently available then the function will
+ * return %KBASE_CORES_NOT_READY and have no effect.
+ *
+ * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
+ *
+ *         %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
+ *
+ *         %KBASE_CORES_READY if the cores requested are already available
+ */
+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
+						struct kbase_device *kbdev,
+						bool tiler_required,
+						u64 shader_cores);
+
+/**
+ * kbase_pm_release_cores - Release cores after a job has run
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_register_inuse_cores() )
+ *
+ * This function should be called when a job has finished running on the
+ * hardware. A call to kbase_pm_register_inuse_cores() must have previously
+ * occurred. The reference counts of the specified cores will be decremented
+ * which may cause the bitmask of 'inuse' cores to be reduced. The power policy
+ * may then turn off any cores which are no longer 'inuse'.
+ */
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_l2_caches - Request l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups, power up, wait and prevent
+ * the power manager from powering down the l2 caches.
+ *
+ * This tells the power management that the caches should be powered up, and
+ * they should remain powered, irrespective of the usage of shader cores. This
+ * does not return until the l2 caches are powered up.
+ *
+ * The caller must call kbase_pm_release_l2_caches() when they are finished
+ * to allow normal power management of the l2 caches to resume.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Increment the count of l2 users but do not attempt to power on the l2
+ *
+ * It is the callers responsibility to ensure that the l2 is already powered up
+ * and to eventually call kbase_pm_release_l2_caches()
+ */
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches - Release l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Release the use of l2 caches for all core groups and allow the power manager
+ * to power them down when necessary.
+ *
+ * This tells the power management that the caches can be powered down if
+ * necessary, with respect to the usage of shader cores.
+ *
+ * The caller must have called kbase_pm_request_l2_caches() prior to a call
+ * to this.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
new file mode 100644
index 000000000000..d992989123e8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -0,0 +1,103 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts)
+{
+	u32 hi1, hi2;
+
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Record the CPU's idea of current time */
+	getrawmonotonic(ts);
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * kbase_wait_write_flush -  Wait for GPU write flush
+ * @kctx: Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * Only in use for BASE_HW_ISSUE_6367
+ *
+ * Note : If GPU resets occur then the counters are reset to zero, the delay may
+ * not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+	u32 base_count = 0;
+
+	/*
+	 * The caller must be holding onto the kctx or the call is from
+	 * userspace.
+	 */
+	kbase_pm_context_active(kctx->kbdev);
+	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+
+	while (true) {
+		u32 new_count;
+
+		new_count = kbase_reg_read(kctx->kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		/* First time around, just store the count. */
+		if (base_count == 0) {
+			base_count = new_count;
+			continue;
+		}
+
+		/* No need to handle wrapping, unsigned maths works for this. */
+		if ((new_count - base_count) > 1000)
+			break;
+	}
+
+	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
+	kbase_pm_context_idle(kctx->kbdev);
+}
+#endif				/* CONFIG_MALI_NO_MALI */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
new file mode 100644
index 000000000000..35088abc8fe5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */