diff options
Diffstat (limited to 'drivers/gpu/arm/midgard/backend/gpu')
49 files changed, 13746 insertions, 0 deletions
diff --git a/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/drivers/gpu/arm/midgard/backend/gpu/Kbuild new file mode 100644 index 000000000000..5f700e9b6b44 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/Kbuild @@ -0,0 +1,60 @@ +# +# (C) COPYRIGHT 2014,2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + +BACKEND += \ + backend/gpu/mali_kbase_cache_policy_backend.c \ + backend/gpu/mali_kbase_device_hw.c \ + backend/gpu/mali_kbase_gpu.c \ + backend/gpu/mali_kbase_gpuprops_backend.c \ + backend/gpu/mali_kbase_debug_job_fault_backend.c \ + backend/gpu/mali_kbase_irq_linux.c \ + backend/gpu/mali_kbase_instr_backend.c \ + backend/gpu/mali_kbase_jm_as.c \ + backend/gpu/mali_kbase_jm_hw.c \ + backend/gpu/mali_kbase_jm_rb.c \ + backend/gpu/mali_kbase_js_affinity.c \ + backend/gpu/mali_kbase_js_backend.c \ + backend/gpu/mali_kbase_mmu_hw_direct.c \ + backend/gpu/mali_kbase_pm_backend.c \ + backend/gpu/mali_kbase_pm_driver.c \ + backend/gpu/mali_kbase_pm_metrics.c \ + backend/gpu/mali_kbase_pm_ca.c \ + backend/gpu/mali_kbase_pm_ca_fixed.c \ + backend/gpu/mali_kbase_pm_always_on.c \ + backend/gpu/mali_kbase_pm_coarse_demand.c \ + backend/gpu/mali_kbase_pm_demand.c \ + backend/gpu/mali_kbase_pm_policy.c \ + backend/gpu/mali_kbase_time.c + +ifeq ($(MALI_CUSTOMER_RELEASE),0) +BACKEND += \ + backend/gpu/mali_kbase_pm_ca_random.c \ + backend/gpu/mali_kbase_pm_demand_always_powered.c \ + backend/gpu/mali_kbase_pm_fast_start.c +endif + +ifeq ($(CONFIG_MALI_DEVFREQ),y) +BACKEND += \ + backend/gpu/mali_kbase_devfreq.c \ + backend/gpu/mali_kbase_pm_ca_devfreq.c +endif + +ifeq ($(CONFIG_MALI_NO_MALI),y) + # Dummy model + BACKEND += backend/gpu/mali_kbase_model_dummy.c + BACKEND += backend/gpu/mali_kbase_model_linux.c + # HW error simulation + BACKEND += backend/gpu/mali_kbase_model_error_generator.c +endif diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h new file mode 100644 index 000000000000..c8ae87eb84a2 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h @@ -0,0 +1,29 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Backend specific configuration + */ + +#ifndef _KBASE_BACKEND_CONFIG_H_ +#define _KBASE_BACKEND_CONFIG_H_ + +/* Enable GPU reset API */ +#define KBASE_GPU_RESET_EN 1 + +#endif /* _KBASE_BACKEND_CONFIG_H_ */ + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c new file mode 100644 index 000000000000..fef9a2cb743e --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c @@ -0,0 +1,29 @@ +/* + * + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include "backend/gpu/mali_kbase_cache_policy_backend.h" +#include <backend/gpu/mali_kbase_device_internal.h> + +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, + u32 mode) +{ + kbdev->current_gpu_coherency_mode = mode; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) + kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL); +} + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h new file mode 100644 index 000000000000..fe9869109a82 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h @@ -0,0 +1,34 @@ +/* + * + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +#ifndef _KBASE_CACHE_POLICY_BACKEND_H_ +#define _KBASE_CACHE_POLICY_BACKEND_H_ + +#include "mali_kbase.h" +#include "mali_base_kernel.h" + +/** + * kbase_cache_set_coherency_mode() - Sets the system coherency mode + * in the GPU. + * @kbdev: Device pointer + * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE + */ +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, + u32 mode); + +#endif /* _KBASE_CACHE_POLICY_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c new file mode 100644 index 000000000000..7851ea6466c7 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c @@ -0,0 +1,157 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <mali_kbase.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include "mali_kbase_debug_job_fault.h" + +#ifdef CONFIG_DEBUG_FS + +/*GPU_CONTROL_REG(r)*/ +static int gpu_control_reg_snapshot[] = { + GPU_ID, + SHADER_READY_LO, + SHADER_READY_HI, + TILER_READY_LO, + TILER_READY_HI, + L2_READY_LO, + L2_READY_HI +}; + +/* JOB_CONTROL_REG(r) */ +static int job_control_reg_snapshot[] = { + JOB_IRQ_MASK, + JOB_IRQ_STATUS +}; + +/* JOB_SLOT_REG(n,r) */ +static int job_slot_reg_snapshot[] = { + JS_HEAD_LO, + JS_HEAD_HI, + JS_TAIL_LO, + JS_TAIL_HI, + JS_AFFINITY_LO, + JS_AFFINITY_HI, + JS_CONFIG, + JS_STATUS, + JS_HEAD_NEXT_LO, + JS_HEAD_NEXT_HI, + JS_AFFINITY_NEXT_LO, + JS_AFFINITY_NEXT_HI, + JS_CONFIG_NEXT +}; + +/*MMU_REG(r)*/ +static int mmu_reg_snapshot[] = { + MMU_IRQ_MASK, + MMU_IRQ_STATUS +}; + +/* MMU_AS_REG(n,r) */ +static int as_reg_snapshot[] = { + AS_TRANSTAB_LO, + AS_TRANSTAB_HI, + AS_MEMATTR_LO, + AS_MEMATTR_HI, + AS_FAULTSTATUS, + AS_FAULTADDRESS_LO, + AS_FAULTADDRESS_HI, + AS_STATUS +}; + +bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, + int reg_range) +{ + int i, j; + int offset = 0; + int slot_number; + int as_number; + + if (kctx->reg_dump == NULL) + return false; + + slot_number = kctx->kbdev->gpu_props.num_job_slots; + as_number = kctx->kbdev->gpu_props.num_address_spaces; + + /* get the GPU control registers*/ + for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + GPU_CONTROL_REG(gpu_control_reg_snapshot[i]); + offset += 2; + } + + /* get the Job control registers*/ + for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + JOB_CONTROL_REG(job_control_reg_snapshot[i]); + offset += 2; + } + + /* get the Job Slot registers*/ + for (j = 0; j < slot_number; j++) { + for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + JOB_SLOT_REG(j, job_slot_reg_snapshot[i]); + offset += 2; + } + } + + /* get the MMU registers*/ + for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]); + offset += 2; + } + + /* get the Address space registers*/ + for (j = 0; j < as_number; j++) { + for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + MMU_AS_REG(j, as_reg_snapshot[i]); + offset += 2; + } + } + + WARN_ON(offset >= (reg_range*2/4)); + + /* set the termination flag*/ + kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG; + kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG; + + dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n", + offset); + + return true; +} + +bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx) +{ + int offset = 0; + + if (kctx->reg_dump == NULL) + return false; + + while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) { + kctx->reg_dump[offset+1] = + kbase_reg_read(kctx->kbdev, + kctx->reg_dump[offset], NULL); + offset += 2; + } + return true; +} + + +#endif diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c new file mode 100644 index 000000000000..308b971b6896 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c @@ -0,0 +1,451 @@ +/* + * + * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <mali_kbase.h> +#include <mali_kbase_tlstream.h> +#include <mali_kbase_config_defaults.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +#include <linux/of.h> +#include <linux/clk.h> +#include <linux/devfreq.h> +#ifdef CONFIG_DEVFREQ_THERMAL +#include <linux/devfreq_cooling.h> +#endif + +#include <linux/version.h> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) +#include <linux/pm_opp.h> +#else /* Linux >= 3.13 */ +/* In 3.13 the OPP include header file, types, and functions were all + * renamed. Use the old filename for the include, and define the new names to + * the old, when an old kernel is detected. + */ +#include <linux/opp.h> +#define dev_pm_opp opp +#define dev_pm_opp_get_voltage opp_get_voltage +#define dev_pm_opp_get_opp_count opp_get_opp_count +#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil +#define dev_pm_opp_find_freq_floor opp_find_freq_floor +#endif /* Linux >= 3.13 */ + +#ifdef CONFIG_ARM_SCMI_PROTOCOL +#include <linux/scmi_protocol.h> +extern int scmi_gpu_domain_id_get(void); +#endif + +/** + * opp_translate - Translate nominal OPP frequency from devicetree into real + * frequency and core mask + * @kbdev: Device pointer + * @freq: Nominal frequency + * @core_mask: Pointer to u64 to store core mask to + * + * Return: Real target frequency + * + * This function will only perform translation if an operating-points-v2-mali + * table is present in devicetree. If one is not present then it will return an + * untranslated frequency and all cores enabled. + */ +static unsigned long opp_translate(struct kbase_device *kbdev, + unsigned long freq, u64 *core_mask) +{ + int i; + + for (i = 0; i < kbdev->num_opps; i++) { + if (kbdev->opp_table[i].opp_freq == freq) { + *core_mask = kbdev->opp_table[i].core_mask; + return kbdev->opp_table[i].real_freq; + } + } + + /* Failed to find OPP - return all cores enabled & nominal frequency */ + *core_mask = kbdev->gpu_props.props.raw_props.shader_present; + + return freq; +} + +static int +kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + struct dev_pm_opp *opp; + unsigned long nominal_freq; + unsigned long freq = 0; + unsigned long voltage; + int err; + u64 core_mask; + + freq = *target_freq; + + rcu_read_lock(); + opp = devfreq_recommended_opp(dev, &freq, flags); + voltage = dev_pm_opp_get_voltage(opp); + rcu_read_unlock(); + if (IS_ERR_OR_NULL(opp)) { + dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); + return PTR_ERR(opp); + } + + nominal_freq = freq; + + /* + * Only update if there is a change of frequency + */ + if (kbdev->current_nominal_freq == nominal_freq) { + *target_freq = nominal_freq; + return 0; + } + + freq = opp_translate(kbdev, nominal_freq, &core_mask); +#ifdef CONFIG_REGULATOR + if (kbdev->regulator && kbdev->current_voltage != voltage + && kbdev->current_freq < freq) { + err = regulator_set_voltage(kbdev->regulator, voltage, voltage); + if (err) { + dev_err(dev, "Failed to increase voltage (%d)\n", err); + return err; + } + } +#endif + + if (kbdev->clock) + err = clk_set_rate(kbdev->clock, freq); +#ifdef CONFIG_ARM_SCMI_PROTOCOL + else if(kbdev->scmi_handle) + err = kbdev->scmi_handle->perf_ops->freq_set(kbdev->scmi_handle, + scmi_gpu_domain_id_get(), freq); +#endif + + if (err) { + dev_err(dev, "Failed to set clock %lu (target %lu)\n", + freq, *target_freq); + return err; + } + +#ifdef CONFIG_REGULATOR + if (kbdev->regulator && kbdev->current_voltage != voltage + && kbdev->current_freq > freq) { + err = regulator_set_voltage(kbdev->regulator, voltage, voltage); + if (err) { + dev_err(dev, "Failed to decrease voltage (%d)\n", err); + return err; + } + } +#endif + + if (kbdev->pm.backend.ca_current_policy->id == + KBASE_PM_CA_POLICY_ID_DEVFREQ) + kbase_devfreq_set_core_mask(kbdev, core_mask); + + *target_freq = nominal_freq; + kbdev->current_voltage = voltage; + kbdev->current_nominal_freq = nominal_freq; + kbdev->current_freq = freq; + kbdev->current_core_mask = core_mask; + + KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)nominal_freq); + + kbase_pm_reset_dvfs_utilisation(kbdev); + + return err; +} + +static int +kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + *freq = kbdev->current_nominal_freq; + + return 0; +} + +static int +kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + stat->current_frequency = kbdev->current_nominal_freq; + + kbase_pm_get_dvfs_utilisation(kbdev, + &stat->total_time, &stat->busy_time); + + stat->private_data = NULL; + + return 0; +} + +/* Weak definition to be overriden by platforms */ +int __weak setup_opps(void) +{ + return 0; +} + +static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, + struct devfreq_dev_profile *dp) +{ + int err, count; + int i = 0; + unsigned long freq = 0; + struct dev_pm_opp *opp; + + err = setup_opps(); + if (err) + return err; + + rcu_read_lock(); + count = dev_pm_opp_get_opp_count(kbdev->dev); + if (count < 0) { + rcu_read_unlock(); + return count; + } + rcu_read_unlock(); + + dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), + GFP_KERNEL); + if (!dp->freq_table) + return -ENOMEM; + + rcu_read_lock(); + for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) { + opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq); + if (IS_ERR(opp)) + break; + + dp->freq_table[i] = freq; + } + rcu_read_unlock(); + + if (count != i) + dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n", + count, i); + + dp->max_state = i; + + return 0; +} + +static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev) +{ + struct devfreq_dev_profile *dp = kbdev->devfreq->profile; + + kfree(dp->freq_table); +} + +static void kbase_devfreq_exit(struct device *dev) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + kbase_devfreq_term_freq_table(kbdev); +} + +static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) +{ + struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node, + "operating-points-v2", 0); + struct device_node *node; + int i = 0; + int count; + + if (!opp_node) + return 0; + if (!of_device_is_compatible(opp_node, "operating-points-v2-mali")) + return 0; + + count = dev_pm_opp_get_opp_count(kbdev->dev); + kbdev->opp_table = kmalloc_array(count, + sizeof(struct kbase_devfreq_opp), GFP_KERNEL); + if (!kbdev->opp_table) + return -ENOMEM; + + for_each_available_child_of_node(opp_node, node) { + u64 core_mask; + u64 opp_freq, real_freq; + const void *core_count_p; + + if (of_property_read_u64(node, "opp-hz", &opp_freq)) { + dev_warn(kbdev->dev, "OPP is missing required opp-hz property\n"); + continue; + } + if (of_property_read_u64(node, "opp-hz-real", &real_freq)) + real_freq = opp_freq; + if (of_property_read_u64(node, "opp-core-mask", &core_mask)) + core_mask = + kbdev->gpu_props.props.raw_props.shader_present; + core_count_p = of_get_property(node, "opp-core-count", NULL); + if (core_count_p) { + u64 remaining_core_mask = + kbdev->gpu_props.props.raw_props.shader_present; + int core_count = be32_to_cpup(core_count_p); + + core_mask = 0; + + for (; core_count > 0; core_count--) { + int core = ffs(remaining_core_mask); + + if (!core) { + dev_err(kbdev->dev, "OPP has more cores than GPU\n"); + return -ENODEV; + } + + core_mask |= (1ull << (core-1)); + remaining_core_mask &= ~(1ull << (core-1)); + } + } + + if (!core_mask) { + dev_err(kbdev->dev, "OPP has invalid core mask of 0\n"); + return -ENODEV; + } + + kbdev->opp_table[i].opp_freq = opp_freq; + kbdev->opp_table[i].real_freq = real_freq; + kbdev->opp_table[i].core_mask = core_mask; + + dev_info(kbdev->dev, "OPP %d : opp_freq=%llu real_freq=%llu core_mask=%llx\n", + i, opp_freq, real_freq, core_mask); + + i++; + } + + kbdev->num_opps = i; + + return 0; +} + +int kbase_devfreq_init(struct kbase_device *kbdev) +{ + struct devfreq_dev_profile *dp; + int err; + +#ifdef CONFIG_ARM_SCMI_PROTOCOL + if (!kbdev->scmi_handle) { +#else + if (!kbdev->clock) { +#endif + dev_err(kbdev->dev, "Clock not available for devfreq\n"); + return -ENODEV; + } + +#ifndef CONFIG_ARM_SCMI_PROTOCOL + if (kbdev->clock) + kbdev->current_freq = clk_get_rate(kbdev->clock); +#else + if (kbdev->scmi_handle) { + struct scmi_perf_ops *perf_ops = kbdev->scmi_handle->perf_ops; + + perf_ops->freq_get(kbdev->scmi_handle, + scmi_gpu_domain_id_get(), + &kbdev->current_freq); + } +#endif + + kbdev->current_nominal_freq = kbdev->current_freq; + + dp = &kbdev->devfreq_profile; + + dp->initial_freq = kbdev->current_freq; + dp->polling_ms = 100; + dp->target = kbase_devfreq_target; + dp->get_dev_status = kbase_devfreq_status; + dp->get_cur_freq = kbase_devfreq_cur_freq; + dp->exit = kbase_devfreq_exit; + + if (kbase_devfreq_init_freq_table(kbdev, dp)) + return -EFAULT; + + err = kbase_devfreq_init_core_mask_table(kbdev); + if (err) + return err; + + kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, + "simple_ondemand", NULL); + if (IS_ERR(kbdev->devfreq)) { + kbase_devfreq_term_freq_table(kbdev); + return PTR_ERR(kbdev->devfreq); + } + + /* devfreq_add_device only copies a few of kbdev->dev's fields, so + * set drvdata explicitly so IPA models can access kbdev. */ + dev_set_drvdata(&kbdev->devfreq->dev, kbdev); + + err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); + if (err) { + dev_err(kbdev->dev, + "Failed to register OPP notifier (%d)\n", err); + goto opp_notifier_failed; + } + +#ifdef CONFIG_DEVFREQ_THERMAL + err = kbase_ipa_init(kbdev); + if (err) { + dev_err(kbdev->dev, "IPA initialization failed\n"); + goto cooling_failed; + } + + kbdev->devfreq_cooling = of_devfreq_cooling_register_power( + kbdev->dev->of_node, + kbdev->devfreq, + &kbase_ipa_power_model_ops); + if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) { + err = PTR_ERR(kbdev->devfreq_cooling); + dev_err(kbdev->dev, + "Failed to register cooling device (%d)\n", + err); + goto cooling_failed; + } +#endif + + return 0; + +#ifdef CONFIG_DEVFREQ_THERMAL +cooling_failed: + devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); +#endif /* CONFIG_DEVFREQ_THERMAL */ +opp_notifier_failed: + if (devfreq_remove_device(kbdev->devfreq)) + dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); + else + kbdev->devfreq = NULL; + + return err; +} + +void kbase_devfreq_term(struct kbase_device *kbdev) +{ + int err; + + dev_dbg(kbdev->dev, "Term Mali devfreq\n"); + +#ifdef CONFIG_DEVFREQ_THERMAL + if (kbdev->devfreq_cooling) + devfreq_cooling_unregister(kbdev->devfreq_cooling); + + kbase_ipa_term(kbdev); +#endif + + devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); + + err = devfreq_remove_device(kbdev->devfreq); + if (err) + dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); + else + kbdev->devfreq = NULL; + + kfree(kbdev->opp_table); +} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h new file mode 100644 index 000000000000..c0bf8b15b3bc --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h @@ -0,0 +1,24 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _BASE_DEVFREQ_H_ +#define _BASE_DEVFREQ_H_ + +int kbase_devfreq_init(struct kbase_device *kbdev); +void kbase_devfreq_term(struct kbase_device *kbdev); + +#endif /* _BASE_DEVFREQ_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c new file mode 100644 index 000000000000..dcdf15cdc3e8 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c @@ -0,0 +1,255 @@ +/* + * + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * + */ +#include <mali_kbase.h> +#include <backend/gpu/mali_kbase_instr_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +#include <backend/gpu/mali_kbase_device_internal.h> + +#if !defined(CONFIG_MALI_NO_MALI) + + +#ifdef CONFIG_DEBUG_FS + + +int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size) +{ + struct kbase_io_access *old_buf; + struct kbase_io_access *new_buf; + unsigned long flags; + + if (!new_size) + goto out_err; /* The new size must not be 0 */ + + new_buf = vmalloc(new_size * sizeof(*h->buf)); + if (!new_buf) + goto out_err; + + spin_lock_irqsave(&h->lock, flags); + + old_buf = h->buf; + + /* Note: we won't bother with copying the old data over. The dumping + * logic wouldn't work properly as it relies on 'count' both as a + * counter and as an index to the buffer which would have changed with + * the new array. This is a corner case that we don't need to support. + */ + h->count = 0; + h->size = new_size; + h->buf = new_buf; + + spin_unlock_irqrestore(&h->lock, flags); + + vfree(old_buf); + + return 0; + +out_err: + return -1; +} + + +int kbase_io_history_init(struct kbase_io_history *h, u16 n) +{ + h->enabled = false; + spin_lock_init(&h->lock); + h->count = 0; + h->size = 0; + h->buf = NULL; + if (kbase_io_history_resize(h, n)) + return -1; + + return 0; +} + + +void kbase_io_history_term(struct kbase_io_history *h) +{ + vfree(h->buf); + h->buf = NULL; +} + + +/* kbase_io_history_add - add new entry to the register access history + * + * @h: Pointer to the history data structure + * @addr: Register address + * @value: The value that is either read from or written to the register + * @write: 1 if it's a register write, 0 if it's a read + */ +static void kbase_io_history_add(struct kbase_io_history *h, + void __iomem const *addr, u32 value, u8 write) +{ + struct kbase_io_access *io; + unsigned long flags; + + spin_lock_irqsave(&h->lock, flags); + + io = &h->buf[h->count % h->size]; + io->addr = (uintptr_t)addr | write; + io->value = value; + ++h->count; + /* If count overflows, move the index by the buffer size so the entire + * buffer will still be dumped later */ + if (unlikely(!h->count)) + h->count = h->size; + + spin_unlock_irqrestore(&h->lock, flags); +} + + +void kbase_io_history_dump(struct kbase_device *kbdev) +{ + struct kbase_io_history *const h = &kbdev->io_history; + u16 i; + size_t iters; + unsigned long flags; + + if (!unlikely(h->enabled)) + return; + + spin_lock_irqsave(&h->lock, flags); + + dev_err(kbdev->dev, "Register IO History:"); + iters = (h->size > h->count) ? h->count : h->size; + dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters, + h->count); + for (i = 0; i < iters; ++i) { + struct kbase_io_access *io = + &h->buf[(h->count - iters + i) % h->size]; + char const access = (io->addr & 1) ? 'w' : 'r'; + + dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access, + (void *)(io->addr & ~0x1), io->value); + } + + spin_unlock_irqrestore(&h->lock, flags); +} + + +#endif /* CONFIG_DEBUG_FS */ + + +void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, + struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + + writel(value, kbdev->reg + offset); + +#ifdef CONFIG_DEBUG_FS + if (unlikely(kbdev->io_history.enabled)) + kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, + value, 1); +#endif /* CONFIG_DEBUG_FS */ + dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value); + + if (kctx && kctx->jctx.tb) + kbase_device_trace_register_access(kctx, REG_WRITE, offset, + value); +} + +KBASE_EXPORT_TEST_API(kbase_reg_write); + +u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, + struct kbase_context *kctx) +{ + u32 val; + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + + val = readl(kbdev->reg + offset); + +#ifdef CONFIG_DEBUG_FS + if (unlikely(kbdev->io_history.enabled)) + kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, + val, 0); +#endif /* CONFIG_DEBUG_FS */ + dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val); + + if (kctx && kctx->jctx.tb) + kbase_device_trace_register_access(kctx, REG_READ, offset, val); + return val; +} + +KBASE_EXPORT_TEST_API(kbase_reg_read); +#endif /* !defined(CONFIG_MALI_NO_MALI) */ + +/** + * kbase_report_gpu_fault - Report a GPU fault. + * @kbdev: Kbase device pointer + * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS + * was also set + * + * This function is called from the interrupt handler when a GPU fault occurs. + * It reports the details of the fault using dev_warn(). + */ +static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) +{ + u32 status; + u64 address; + + status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL); + address = (u64) kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32; + address |= kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL); + + dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", + status & 0xFF, + kbase_exception_name(kbdev, status), + address); + if (multiple) + dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); +} + +void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) +{ + KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val); + if (val & GPU_FAULT) + kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS); + + if (val & RESET_COMPLETED) + kbase_pm_reset_done(kbdev); + + if (val & PRFCNT_SAMPLE_COMPLETED) + kbase_instr_hwcnt_sample_done(kbdev); + + if (val & CLEAN_CACHES_COMPLETED) + kbase_clean_caches_done(kbdev); + + KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL); + + /* kbase_pm_check_transitions must be called after the IRQ has been + * cleared. This is because it might trigger further power transitions + * and we don't want to miss the interrupt raised to notify us that + * these further transitions have finished. + */ + if (val & POWER_CHANGED_ALL) + kbase_pm_power_changed(kbdev); + + KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val); +} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h new file mode 100644 index 000000000000..5b20445932fb --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h @@ -0,0 +1,67 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Backend-specific HW access device APIs + */ + +#ifndef _KBASE_DEVICE_INTERNAL_H_ +#define _KBASE_DEVICE_INTERNAL_H_ + +/** + * kbase_reg_write - write to GPU register + * @kbdev: Kbase device pointer + * @offset: Offset of register + * @value: Value to write + * @kctx: Kbase context pointer. May be NULL + * + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If + * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr + * != KBASEP_AS_NR_INVALID). + */ +void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, + struct kbase_context *kctx); + +/** + * kbase_reg_read - read from GPU register + * @kbdev: Kbase device pointer + * @offset: Offset of register + * @kctx: Kbase context pointer. May be NULL + * + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If + * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr + * != KBASEP_AS_NR_INVALID). + * + * Return: Value in desired register + */ +u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, + struct kbase_context *kctx); + + +/** + * kbase_gpu_interrupt - GPU interrupt handler + * @kbdev: Kbase device pointer + * @val: The value of the GPU IRQ status register which triggered the call + * + * This function is called from the interrupt handler when a GPU irq is to be + * handled. + */ +void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val); + +#endif /* _KBASE_DEVICE_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c new file mode 100644 index 000000000000..3b78100ec6df --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c @@ -0,0 +1,123 @@ +/* + * + * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * Register-based HW access backend APIs + */ +#include <mali_kbase.h> +#include <mali_kbase_hwaccess_backend.h> +#include <backend/gpu/mali_kbase_irq_internal.h> +#include <backend/gpu/mali_kbase_jm_internal.h> +#include <backend/gpu/mali_kbase_js_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +int kbase_backend_early_init(struct kbase_device *kbdev) +{ + int err; + + err = kbasep_platform_device_init(kbdev); + if (err) + return err; + + /* Ensure we can access the GPU registers */ + kbase_pm_register_access_enable(kbdev); + + /* Find out GPU properties based on the GPU feature registers */ + kbase_gpuprops_set(kbdev); + + /* We're done accessing the GPU registers for now. */ + kbase_pm_register_access_disable(kbdev); + + err = kbase_install_interrupts(kbdev); + if (err) + goto fail_interrupts; + + err = kbase_hwaccess_pm_init(kbdev); + if (err) + goto fail_pm; + + return 0; + +fail_pm: + kbase_release_interrupts(kbdev); +fail_interrupts: + kbasep_platform_device_term(kbdev); + + return err; +} + +void kbase_backend_early_term(struct kbase_device *kbdev) +{ + kbase_hwaccess_pm_term(kbdev); + kbase_release_interrupts(kbdev); + kbasep_platform_device_term(kbdev); +} + +int kbase_backend_late_init(struct kbase_device *kbdev) +{ + int err; + + err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); + if (err) + return err; + + err = kbase_backend_timer_init(kbdev); + if (err) + goto fail_timer; + +#ifdef CONFIG_MALI_DEBUG +#ifndef CONFIG_MALI_NO_MALI + if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { + dev_err(kbdev->dev, "Interrupt assigment check failed.\n"); + err = -EINVAL; + goto fail_interrupt_test; + } +#endif /* !CONFIG_MALI_NO_MALI */ +#endif /* CONFIG_MALI_DEBUG */ + + err = kbase_job_slot_init(kbdev); + if (err) + goto fail_job_slot; + + init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); + + return 0; + +fail_job_slot: + +#ifdef CONFIG_MALI_DEBUG +#ifndef CONFIG_MALI_NO_MALI +fail_interrupt_test: +#endif /* !CONFIG_MALI_NO_MALI */ +#endif /* CONFIG_MALI_DEBUG */ + + kbase_backend_timer_term(kbdev); +fail_timer: + kbase_hwaccess_pm_halt(kbdev); + + return err; +} + +void kbase_backend_late_term(struct kbase_device *kbdev) +{ + kbase_job_slot_halt(kbdev); + kbase_job_slot_term(kbdev); + kbase_backend_timer_term(kbdev); + kbase_hwaccess_pm_halt(kbdev); +} + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c new file mode 100644 index 000000000000..b395325b556b --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c @@ -0,0 +1,110 @@ +/* + * + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Base kernel property query backend APIs + */ + +#include <mali_kbase.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> +#include <mali_kbase_hwaccess_gpuprops.h> + +void kbase_backend_gpuprops_get(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump) +{ + int i; + + /* Fill regdump with the content of the relevant registers */ + regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL); + + regdump->l2_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_FEATURES), NULL); + regdump->suspend_size = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SUSPEND_SIZE), NULL); + regdump->tiler_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_FEATURES), NULL); + regdump->mem_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(MEM_FEATURES), NULL); + regdump->mmu_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(MMU_FEATURES), NULL); + regdump->as_present = kbase_reg_read(kbdev, + GPU_CONTROL_REG(AS_PRESENT), NULL); + regdump->js_present = kbase_reg_read(kbdev, + GPU_CONTROL_REG(JS_PRESENT), NULL); + + for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) + regdump->js_features[i] = kbase_reg_read(kbdev, + GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL); + + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + regdump->texture_features[i] = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL); + + regdump->thread_max_threads = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL); + regdump->thread_max_workgroup_size = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE), + NULL); + regdump->thread_max_barrier_size = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL); + regdump->thread_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_FEATURES), NULL); + + regdump->shader_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL); + regdump->shader_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL); + + regdump->tiler_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_PRESENT_LO), NULL); + regdump->tiler_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_PRESENT_HI), NULL); + + regdump->l2_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_PRESENT_LO), NULL); + regdump->l2_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_PRESENT_HI), NULL); + + regdump->stack_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(STACK_PRESENT_LO), NULL); + regdump->stack_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(STACK_PRESENT_HI), NULL); +} + +void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump) +{ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) { + /* Ensure we can access the GPU registers */ + kbase_pm_register_access_enable(kbdev); + + regdump->coherency_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(COHERENCY_FEATURES), NULL); + + /* We're done accessing the GPU registers for now. */ + kbase_pm_register_access_disable(kbdev); + } else { + /* Pre COHERENCY_FEATURES we only supported ACE_LITE */ + regdump->coherency_features = + COHERENCY_FEATURE_BIT(COHERENCY_NONE) | + COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); + } +} + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c new file mode 100644 index 000000000000..7ad309e8d7f4 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c @@ -0,0 +1,492 @@ +/* + * + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * GPU backend instrumentation APIs. + */ + +#include <mali_kbase.h> +#include <mali_midg_regmap.h> +#include <mali_kbase_hwaccess_instr.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> +#include <backend/gpu/mali_kbase_instr_internal.h> + +/** + * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to + * hardware + * + * @kbdev: Kbase device + */ +static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev) +{ + unsigned long flags; + unsigned long pm_flags; + u32 irq_mask; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_REQUEST_CLEAN); + + /* Enable interrupt */ + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask | CLEAN_CACHES_COMPLETED, NULL); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + + /* clean&invalidate the caches so we're sure the mmu tables for the dump + * buffer is valid */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES, NULL); + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING; + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +} + +int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_uk_hwcnt_setup *setup) +{ + unsigned long flags, pm_flags; + int err = -EINVAL; + u32 irq_mask; + int ret; + u64 shader_cores_needed; + u32 prfcnt_config; + + shader_cores_needed = kbase_pm_get_present_cores(kbdev, + KBASE_PM_CORE_SHADER); + + /* alignment failure */ + if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1))) + goto out_err; + + /* Override core availability policy to ensure all cores are available + */ + kbase_pm_ca_instr_enable(kbdev); + + /* Request the cores early on synchronously - we'll release them on any + * errors (e.g. instrumentation already active) */ + kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { + /* Instrumentation is already enabled */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + goto out_unrequest_cores; + } + + /* Enable interrupt */ + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | + PRFCNT_SAMPLE_COMPLETED, NULL); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + + /* In use, this context is the owner */ + kbdev->hwcnt.kctx = kctx; + /* Remember the dump address so we can reprogram it later */ + kbdev->hwcnt.addr = setup->dump_buffer; + + /* Request the clean */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; + kbdev->hwcnt.backend.triggered = 0; + /* Clean&invalidate the caches so we're sure the mmu tables for the dump + * buffer is valid */ + ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, + &kbdev->hwcnt.backend.cache_clean_work); + KBASE_DEBUG_ASSERT(ret); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Wait for cacheclean to complete */ + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_IDLE); + + kbase_pm_request_l2_caches(kbdev); + + /* Configure */ + prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY + { + u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) + >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id); + + if (arch_v6) + prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; + } +#endif + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), + prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), + setup->dump_buffer & 0xFFFFFFFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), + setup->dump_buffer >> 32, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), + setup->jm_bm, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), + setup->shader_bm, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), + setup->mmu_l2_bm, kctx); + /* Due to PRLAM-8186 we need to disable the Tiler before we enable the + * HW counter dump. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0, + kctx); + else + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), + setup->tiler_bm, kctx); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), + prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx); + + /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump + */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), + setup->tiler_bm, kctx); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + err = 0; + + dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); + return err; + out_unrequest_cores: + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + out_err: + return err; +} + +int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) +{ + unsigned long flags, pm_flags; + int err = -EINVAL; + u32 irq_mask; + struct kbase_device *kbdev = kctx->kbdev; + + while (1) { + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) { + /* Instrumentation is not enabled */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + goto out; + } + + if (kbdev->hwcnt.kctx != kctx) { + /* Instrumentation has been setup for another context */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + goto out; + } + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) + break; + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Ongoing dump/setup - wait for its completion */ + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + } + + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; + kbdev->hwcnt.backend.triggered = 0; + + /* Disable interrupt */ + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL); + + /* Disable the counters */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx); + + kbdev->hwcnt.kctx = NULL; + kbdev->hwcnt.addr = 0ULL; + + kbase_pm_ca_instr_disable(kbdev); + + kbase_pm_unrequest_cores(kbdev, true, + kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER)); + + kbase_pm_release_l2_caches(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", + kctx); + + err = 0; + + out: + return err; +} + +int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) +{ + unsigned long flags; + int err = -EINVAL; + struct kbase_device *kbdev = kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.kctx != kctx) { + /* The instrumentation has been setup for another context */ + goto unlock; + } + + if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) { + /* HW counters are disabled or another dump is ongoing, or we're + * resetting */ + goto unlock; + } + + kbdev->hwcnt.backend.triggered = 0; + + /* Mark that we're dumping - the PF handler can signal that we faulted + */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; + + /* Reconfigure the dump address */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), + kbdev->hwcnt.addr & 0xFFFFFFFF, NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), + kbdev->hwcnt.addr >> 32, NULL); + + /* Start dumping */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL, + kbdev->hwcnt.addr, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_PRFCNT_SAMPLE, kctx); + + dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx); + + err = 0; + + unlock: + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); + +bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, + bool * const success) +{ + unsigned long flags; + bool complete = false; + struct kbase_device *kbdev = kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) { + *success = true; + complete = true; + } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { + *success = false; + complete = true; + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + } + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + return complete; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete); + +void kbasep_cache_clean_worker(struct work_struct *data) +{ + struct kbase_device *kbdev; + unsigned long flags; + + kbdev = container_of(data, struct kbase_device, + hwcnt.backend.cache_clean_work); + + mutex_lock(&kbdev->cacheclean_lock); + kbasep_instr_hwcnt_cacheclean(kbdev); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + /* Wait for our condition, and any reset to complete */ + while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.backend.cache_clean_wait, + kbdev->hwcnt.backend.state != + KBASE_INSTR_STATE_CLEANING); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_CLEANED); + + /* All finished and idle */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + mutex_unlock(&kbdev->cacheclean_lock); +} + +void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) { + int ret; + /* Always clean and invalidate the cache after a successful dump + */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; + ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, + &kbdev->hwcnt.backend.cache_clean_work); + KBASE_DEBUG_ASSERT(ret); + } + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +} + +void kbase_clean_caches_done(struct kbase_device *kbdev) +{ + u32 irq_mask; + + if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { + unsigned long flags; + unsigned long pm_flags; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + /* Disable interrupt */ + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask & ~CLEAN_CACHES_COMPLETED, NULL); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + + /* Wakeup... */ + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { + /* Only wake if we weren't resetting */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED; + wake_up(&kbdev->hwcnt.backend.cache_clean_wait); + } + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + } +} + +int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + unsigned long flags; + int err; + + /* Wait for dump & cacheclean to complete */ + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { + err = -EINVAL; + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + } else { + /* Dump done */ + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_IDLE); + err = 0; + } + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + return err; +} + +int kbase_instr_hwcnt_clear(struct kbase_context *kctx) +{ + unsigned long flags; + int err = -EINVAL; + struct kbase_device *kbdev = kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + /* Check it's the context previously set up and we're not already + * dumping */ + if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != + KBASE_INSTR_STATE_IDLE) + goto out; + + /* Clear the counters */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_PRFCNT_CLEAR, kctx); + + err = 0; + +out: + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear); + +int kbase_instr_backend_init(struct kbase_device *kbdev) +{ + int ret = 0; + + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; + + init_waitqueue_head(&kbdev->hwcnt.backend.wait); + init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait); + INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work, + kbasep_cache_clean_worker); + kbdev->hwcnt.backend.triggered = 0; + + kbdev->hwcnt.backend.cache_clean_wq = + alloc_workqueue("Mali cache cleaning workqueue", 0, 1); + if (NULL == kbdev->hwcnt.backend.cache_clean_wq) + ret = -EINVAL; + + return ret; +} + +void kbase_instr_backend_term(struct kbase_device *kbdev) +{ + destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq); +} + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h new file mode 100644 index 000000000000..4794672da8f0 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h @@ -0,0 +1,58 @@ +/* + * + * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Backend-specific instrumentation definitions + */ + +#ifndef _KBASE_INSTR_DEFS_H_ +#define _KBASE_INSTR_DEFS_H_ + +/* + * Instrumentation State Machine States + */ +enum kbase_instr_state { + /* State where instrumentation is not active */ + KBASE_INSTR_STATE_DISABLED = 0, + /* State machine is active and ready for a command. */ + KBASE_INSTR_STATE_IDLE, + /* Hardware is currently dumping a frame. */ + KBASE_INSTR_STATE_DUMPING, + /* We've requested a clean to occur on a workqueue */ + KBASE_INSTR_STATE_REQUEST_CLEAN, + /* Hardware is currently cleaning and invalidating caches. */ + KBASE_INSTR_STATE_CLEANING, + /* Cache clean completed, and either a) a dump is complete, or + * b) instrumentation can now be setup. */ + KBASE_INSTR_STATE_CLEANED, + /* An error has occured during DUMPING (page fault). */ + KBASE_INSTR_STATE_FAULT +}; + +/* Structure used for instrumentation and HW counters dumping */ +struct kbase_instr_backend { + wait_queue_head_t wait; + int triggered; + + enum kbase_instr_state state; + wait_queue_head_t cache_clean_wait; + struct workqueue_struct *cache_clean_wq; + struct work_struct cache_clean_work; +}; + +#endif /* _KBASE_INSTR_DEFS_H_ */ + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h new file mode 100644 index 000000000000..e96aeae786e1 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h @@ -0,0 +1,45 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Backend-specific HW access instrumentation APIs + */ + +#ifndef _KBASE_INSTR_INTERNAL_H_ +#define _KBASE_INSTR_INTERNAL_H_ + +/** + * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning + * @data: a &struct work_struct + */ +void kbasep_cache_clean_worker(struct work_struct *data); + +/** + * kbase_clean_caches_done() - Cache clean interrupt received + * @kbdev: Kbase device + */ +void kbase_clean_caches_done(struct kbase_device *kbdev); + +/** + * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received + * @kbdev: Kbase device + */ +void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev); + +#endif /* _KBASE_INSTR_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h new file mode 100644 index 000000000000..8781561e73d0 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Backend specific IRQ APIs + */ + +#ifndef _KBASE_IRQ_INTERNAL_H_ +#define _KBASE_IRQ_INTERNAL_H_ + +int kbase_install_interrupts(struct kbase_device *kbdev); + +void kbase_release_interrupts(struct kbase_device *kbdev); + +/** + * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed + * execution + * @kbdev: The kbase device + */ +void kbase_synchronize_irqs(struct kbase_device *kbdev); + +int kbasep_common_test_interrupt_handlers( + struct kbase_device * const kbdev); + +#endif /* _KBASE_IRQ_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c new file mode 100644 index 000000000000..8416b80e8b77 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c @@ -0,0 +1,469 @@ +/* + * + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <mali_kbase.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include <backend/gpu/mali_kbase_irq_internal.h> + +#include <linux/interrupt.h> + +#if !defined(CONFIG_MALI_NO_MALI) + +/* GPU IRQ Tags */ +#define JOB_IRQ_TAG 0 +#define MMU_IRQ_TAG 1 +#define GPU_IRQ_TAG 2 + +static void *kbase_tag(void *ptr, u32 tag) +{ + return (void *)(((uintptr_t) ptr) | tag); +} + +static void *kbase_untag(void *ptr) +{ + return (void *)(((uintptr_t) ptr) & ~3); +} + +static irqreturn_t kbase_job_irq_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL); + +#ifdef CONFIG_MALI_DEBUG + if (!kbdev->pm.backend.driver_ready_for_irqs) + dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", + __func__, irq, val); +#endif /* CONFIG_MALI_DEBUG */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_job_done(kbdev, val); + + return IRQ_HANDLED; +} + +KBASE_EXPORT_TEST_API(kbase_job_irq_handler); + +static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + flags); + return IRQ_NONE; + } + + atomic_inc(&kbdev->faults_pending); + + val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL); + +#ifdef CONFIG_MALI_DEBUG + if (!kbdev->pm.backend.driver_ready_for_irqs) + dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", + __func__, irq, val); +#endif /* CONFIG_MALI_DEBUG */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!val) { + atomic_dec(&kbdev->faults_pending); + return IRQ_NONE; + } + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_mmu_interrupt(kbdev, val); + + atomic_dec(&kbdev->faults_pending); + + return IRQ_HANDLED; +} + +static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL); + +#ifdef CONFIG_MALI_DEBUG + if (!kbdev->pm.backend.driver_ready_for_irqs) + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", + __func__, irq, val); +#endif /* CONFIG_MALI_DEBUG */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_gpu_interrupt(kbdev, val); + + return IRQ_HANDLED; +} + +KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler); + +static irq_handler_t kbase_handler_table[] = { + [JOB_IRQ_TAG] = kbase_job_irq_handler, + [MMU_IRQ_TAG] = kbase_mmu_irq_handler, + [GPU_IRQ_TAG] = kbase_gpu_irq_handler, +}; + +#ifdef CONFIG_MALI_DEBUG +#define JOB_IRQ_HANDLER JOB_IRQ_TAG +#define MMU_IRQ_HANDLER MMU_IRQ_TAG +#define GPU_IRQ_HANDLER GPU_IRQ_TAG + +/** + * kbase_set_custom_irq_handler - Set a custom IRQ handler + * @kbdev: Device for which the handler is to be registered + * @custom_handler: Handler to be registered + * @irq_type: Interrupt type + * + * Registers given interrupt handler for requested interrupt type + * In the case where irq handler is not specified, the default handler shall be + * registered + * + * Return: 0 case success, error code otherwise + */ +int kbase_set_custom_irq_handler(struct kbase_device *kbdev, + irq_handler_t custom_handler, + int irq_type) +{ + int result = 0; + irq_handler_t requested_irq_handler = NULL; + + KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) && + (GPU_IRQ_HANDLER >= irq_type)); + + /* Release previous handler */ + if (kbdev->irqs[irq_type].irq) + free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type)); + + requested_irq_handler = (NULL != custom_handler) ? custom_handler : + kbase_handler_table[irq_type]; + + if (0 != request_irq(kbdev->irqs[irq_type].irq, + requested_irq_handler, + kbdev->irqs[irq_type].flags | IRQF_SHARED, + dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) { + result = -EINVAL; + dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", + kbdev->irqs[irq_type].irq, irq_type); +#ifdef CONFIG_SPARSE_IRQ + dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); +#endif /* CONFIG_SPARSE_IRQ */ + } + + return result; +} + +KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler); + +/* test correct interrupt assigment and reception by cpu */ +struct kbasep_irq_test { + struct hrtimer timer; + wait_queue_head_t wait; + int triggered; + u32 timeout; +}; + +static struct kbasep_irq_test kbasep_irq_test_data; + +#define IRQ_TEST_TIMEOUT 500 + +static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL); + + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbasep_irq_test_data.triggered = 1; + wake_up(&kbasep_irq_test_data.wait); + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL); + + return IRQ_HANDLED; +} + +static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL); + + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbasep_irq_test_data.triggered = 1; + wake_up(&kbasep_irq_test_data.wait); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL); + + return IRQ_HANDLED; +} + +static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer) +{ + struct kbasep_irq_test *test_data = container_of(timer, + struct kbasep_irq_test, timer); + + test_data->timeout = 1; + test_data->triggered = 1; + wake_up(&test_data->wait); + return HRTIMER_NORESTART; +} + +static int kbasep_common_test_interrupt( + struct kbase_device * const kbdev, u32 tag) +{ + int err = 0; + irq_handler_t test_handler; + + u32 old_mask_val; + u16 mask_offset; + u16 rawstat_offset; + + switch (tag) { + case JOB_IRQ_TAG: + test_handler = kbase_job_irq_test_handler; + rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT); + mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK); + break; + case MMU_IRQ_TAG: + test_handler = kbase_mmu_irq_test_handler; + rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT); + mask_offset = MMU_REG(MMU_IRQ_MASK); + break; + case GPU_IRQ_TAG: + /* already tested by pm_driver - bail out */ + default: + return 0; + } + + /* store old mask */ + old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL); + /* mask interrupts */ + kbase_reg_write(kbdev, mask_offset, 0x0, NULL); + + if (kbdev->irqs[tag].irq) { + /* release original handler and install test handler */ + if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) { + err = -EINVAL; + } else { + kbasep_irq_test_data.timeout = 0; + hrtimer_init(&kbasep_irq_test_data.timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); + kbasep_irq_test_data.timer.function = + kbasep_test_interrupt_timeout; + + /* trigger interrupt */ + kbase_reg_write(kbdev, mask_offset, 0x1, NULL); + kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL); + + hrtimer_start(&kbasep_irq_test_data.timer, + HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), + HRTIMER_MODE_REL); + + wait_event(kbasep_irq_test_data.wait, + kbasep_irq_test_data.triggered != 0); + + if (kbasep_irq_test_data.timeout != 0) { + dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n", + kbdev->irqs[tag].irq, tag); + err = -EINVAL; + } else { + dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n", + kbdev->irqs[tag].irq, tag); + } + + hrtimer_cancel(&kbasep_irq_test_data.timer); + kbasep_irq_test_data.triggered = 0; + + /* mask interrupts */ + kbase_reg_write(kbdev, mask_offset, 0x0, NULL); + + /* release test handler */ + free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag)); + } + + /* restore original interrupt */ + if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag], + kbdev->irqs[tag].flags | IRQF_SHARED, + dev_name(kbdev->dev), kbase_tag(kbdev, tag))) { + dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n", + kbdev->irqs[tag].irq, tag); + err = -EINVAL; + } + } + /* restore old mask */ + kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL); + + return err; +} + +int kbasep_common_test_interrupt_handlers( + struct kbase_device * const kbdev) +{ + int err; + + init_waitqueue_head(&kbasep_irq_test_data.wait); + kbasep_irq_test_data.triggered = 0; + + /* A suspend won't happen during startup/insmod */ + kbase_pm_context_active(kbdev); + + err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG); + if (err) { + dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n"); + goto out; + } + + err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG); + if (err) { + dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n"); + goto out; + } + + dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n"); + + out: + kbase_pm_context_idle(kbdev); + + return err; +} +#endif /* CONFIG_MALI_DEBUG */ + +int kbase_install_interrupts(struct kbase_device *kbdev) +{ + u32 nr = ARRAY_SIZE(kbase_handler_table); + int err; + u32 i; + + for (i = 0; i < nr; i++) { + err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i], + kbdev->irqs[i].flags | IRQF_SHARED, + dev_name(kbdev->dev), + kbase_tag(kbdev, i)); + if (err) { + dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", + kbdev->irqs[i].irq, i); +#ifdef CONFIG_SPARSE_IRQ + dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); +#endif /* CONFIG_SPARSE_IRQ */ + goto release; + } + } + + return 0; + + release: + while (i-- > 0) + free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); + + return err; +} + +void kbase_release_interrupts(struct kbase_device *kbdev) +{ + u32 nr = ARRAY_SIZE(kbase_handler_table); + u32 i; + + for (i = 0; i < nr; i++) { + if (kbdev->irqs[i].irq) + free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); + } +} + +void kbase_synchronize_irqs(struct kbase_device *kbdev) +{ + u32 nr = ARRAY_SIZE(kbase_handler_table); + u32 i; + + for (i = 0; i < nr; i++) { + if (kbdev->irqs[i].irq) + synchronize_irq(kbdev->irqs[i].irq); + } +} + +#endif /* !defined(CONFIG_MALI_NO_MALI) */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c new file mode 100644 index 000000000000..c660c80341f4 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c @@ -0,0 +1,235 @@ +/* + * + * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * Register backend context / address space management + */ + +#include <mali_kbase.h> +#include <mali_kbase_hwaccess_jm.h> +#include <mali_kbase_ctx_sched.h> + +/** + * assign_and_activate_kctx_addr_space - Assign an AS to a context + * @kbdev: Kbase device + * @kctx: Kbase context + * @current_as: Address Space to assign + * + * Assign an Address Space (AS) to a context, and add the context to the Policy. + * + * This includes + * setting up the global runpool_irq structure and the context on the AS, + * Activating the MMU on the AS, + * Allowing jobs to be submitted on the AS. + * + * Context: + * kbasep_js_kctx_info.jsctx_mutex held, + * kbasep_js_device_data.runpool_mutex held, + * AS transaction mutex held, + * Runpool IRQ lock held + */ +static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_as *current_as) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Attribute handling */ + kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx); + + /* Allow it to run jobs */ + kbasep_js_set_submit_allowed(js_devdata, kctx); + + kbase_js_runpool_inc_context_count(kbdev, kctx); +} + +bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + int i; + + if (kbdev->hwaccess.active_kctx == kctx) { + /* Context is already active */ + return true; + } + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + if (kbdev->as_to_kctx[i] == kctx) { + /* Context already has ASID - mark as active */ + return true; + } + } + + /* Context does not have address space assigned */ + return false; +} + +void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + int as_nr = kctx->as_nr; + + if (as_nr == KBASEP_AS_NR_INVALID) { + WARN(1, "Attempting to release context without ASID\n"); + return; + } + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (atomic_read(&kctx->refcount) != 1) { + WARN(1, "Attempting to release active ASID\n"); + return; + } + + kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx); + + kbase_ctx_sched_release_ctx(kctx); + kbase_js_runpool_dec_context_count(kbdev, kctx); +} + +void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ +} + +int kbase_backend_find_and_release_free_address_space( + struct kbase_device *kbdev, struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + unsigned long flags; + int i; + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + struct kbasep_js_kctx_info *as_js_kctx_info; + struct kbase_context *as_kctx; + + as_kctx = kbdev->as_to_kctx[i]; + as_js_kctx_info = &as_kctx->jctx.sched_info; + + /* Don't release privileged or active contexts, or contexts with + * jobs running. + * Note that a context will have at least 1 reference (which + * was previously taken by kbasep_js_schedule_ctx()) until + * descheduled. + */ + if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) && + atomic_read(&as_kctx->refcount) == 1) { + if (!kbasep_js_runpool_retain_ctx_nolock(kbdev, + as_kctx)) { + WARN(1, "Failed to retain active context\n"); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return KBASEP_AS_NR_INVALID; + } + + kbasep_js_clear_submit_allowed(js_devdata, as_kctx); + + /* Drop and retake locks to take the jsctx_mutex on the + * context we're about to release without violating lock + * ordering + */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + + /* Release context from address space */ + mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx); + + if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) { + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, + as_kctx, + true); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); + + return i; + } + + /* Context was retained while locks were dropped, + * continue looking for free AS */ + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + } + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return KBASEP_AS_NR_INVALID; +} + +bool kbase_backend_use_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, + int as_nr) +{ + struct kbasep_js_device_data *js_devdata; + struct kbase_as *new_address_space = NULL; + + js_devdata = &kbdev->js_data; + + if (kbdev->hwaccess.active_kctx == kctx) { + WARN(1, "Context is already scheduled in\n"); + return false; + } + + new_address_space = &kbdev->as[as_nr]; + + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&kbdev->mmu_hw_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space); + + if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { + /* We need to retain it to keep the corresponding address space + */ + kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); + } + + return true; +} + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h new file mode 100644 index 000000000000..08a7400e66d5 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h @@ -0,0 +1,123 @@ +/* + * + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * Register-based HW access backend specific definitions + */ + +#ifndef _KBASE_HWACCESS_GPU_DEFS_H_ +#define _KBASE_HWACCESS_GPU_DEFS_H_ + +/* SLOT_RB_SIZE must be < 256 */ +#define SLOT_RB_SIZE 2 +#define SLOT_RB_MASK (SLOT_RB_SIZE - 1) + +/** + * struct rb_entry - Ringbuffer entry + * @katom: Atom associated with this entry + */ +struct rb_entry { + struct kbase_jd_atom *katom; +}; + +/** + * struct slot_rb - Slot ringbuffer + * @entries: Ringbuffer entries + * @last_context: The last context to submit a job on this slot + * @read_idx: Current read index of buffer + * @write_idx: Current write index of buffer + * @job_chain_flag: Flag used to implement jobchain disambiguation + */ +struct slot_rb { + struct rb_entry entries[SLOT_RB_SIZE]; + + struct kbase_context *last_context; + + u8 read_idx; + u8 write_idx; + + u8 job_chain_flag; +}; + +/** + * struct kbase_backend_data - GPU backend specific data for HW access layer + * @slot_rb: Slot ringbuffers + * @rmu_workaround_flag: When PRLAM-8987 is present, this flag determines + * whether slots 0/1 or slot 2 are currently being + * pulled from + * @scheduling_timer: The timer tick used for rescheduling jobs + * @timer_running: Is the timer running? The runpool_mutex must be + * held whilst modifying this. + * @suspend_timer: Is the timer suspended? Set when a suspend + * occurs and cleared on resume. The runpool_mutex + * must be held whilst modifying this. + * @reset_gpu: Set to a KBASE_RESET_xxx value (see comments) + * @reset_workq: Work queue for performing the reset + * @reset_work: Work item for performing the reset + * @reset_wait: Wait event signalled when the reset is complete + * @reset_timer: Timeout for soft-stops before the reset + * @timeouts_updated: Have timeout values just been updated? + * + * The hwaccess_lock (a spinlock) must be held when accessing this structure + */ +struct kbase_backend_data { + struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS]; + + bool rmu_workaround_flag; + + struct hrtimer scheduling_timer; + + bool timer_running; + bool suspend_timer; + + atomic_t reset_gpu; + +/* The GPU reset isn't pending */ +#define KBASE_RESET_GPU_NOT_PENDING 0 +/* kbase_prepare_to_reset_gpu has been called */ +#define KBASE_RESET_GPU_PREPARED 1 +/* kbase_reset_gpu has been called - the reset will now definitely happen + * within the timeout period */ +#define KBASE_RESET_GPU_COMMITTED 2 +/* The GPU reset process is currently occuring (timeout has expired or + * kbasep_try_reset_gpu_early was called) */ +#define KBASE_RESET_GPU_HAPPENING 3 +/* Reset the GPU silently, used when resetting the GPU as part of normal + * behavior (e.g. when exiting protected mode). */ +#define KBASE_RESET_GPU_SILENT 4 + struct workqueue_struct *reset_workq; + struct work_struct reset_work; + wait_queue_head_t reset_wait; + struct hrtimer reset_timer; + + bool timeouts_updated; +}; + +/** + * struct kbase_jd_atom_backend - GPU backend specific katom data + */ +struct kbase_jd_atom_backend { +}; + +/** + * struct kbase_context_backend - GPU backend specific context data + */ +struct kbase_context_backend { +}; + +#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c new file mode 100644 index 000000000000..be88ec8eb0d7 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c @@ -0,0 +1,1512 @@ +/* + * + * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Base kernel job manager APIs + */ + +#include <mali_kbase.h> +#include <mali_kbase_config.h> +#include <mali_midg_regmap.h> +#if defined(CONFIG_MALI_GATOR_SUPPORT) +#include <mali_kbase_gator.h> +#endif +#include <mali_kbase_tlstream.h> +#include <mali_kbase_vinstr.h> +#include <mali_kbase_hw.h> +#include <mali_kbase_hwaccess_jm.h> +#include <mali_kbase_ctx_sched.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include <backend/gpu/mali_kbase_irq_internal.h> +#include <backend/gpu/mali_kbase_js_affinity.h> +#include <backend/gpu/mali_kbase_jm_internal.h> + +#define beenthere(kctx, f, a...) \ + dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) + +#if KBASE_GPU_RESET_EN +static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev); +static void kbasep_reset_timeout_worker(struct work_struct *data); +static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer); +#endif /* KBASE_GPU_RESET_EN */ + +static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, + struct kbase_context *kctx) +{ + return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx); +} + +void kbase_job_hw_submit(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + int js) +{ + struct kbase_context *kctx; + u32 cfg; + u64 jc_head = katom->jc; + + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(katom); + + kctx = katom->kctx; + + /* Command register must be available */ + KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); + /* Affinity is not violating */ + kbase_js_debug_log_current_affinities(kbdev); + KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js, + katom->affinity)); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), + jc_head & 0xFFFFFFFF, kctx); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), + jc_head >> 32, kctx); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), + katom->affinity & 0xFFFFFFFF, kctx); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), + katom->affinity >> 32, kctx); + + /* start MMU, medium priority, cache clean/flush on end, clean/flush on + * start */ + cfg = kctx->as_nr; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) && + !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) + cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; + + if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) + cfg |= JS_CONFIG_START_FLUSH_NO_ACTION; + else + cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; + + if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && + !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) + cfg |= JS_CONFIG_END_FLUSH_NO_ACTION; + else + cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649)) + cfg |= JS_CONFIG_START_MMU; + + cfg |= JS_CONFIG_THREAD_PRI(8); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) && + (katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED)) + cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; + + if (kbase_hw_has_feature(kbdev, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) { + cfg |= JS_CONFIG_JOB_CHAIN_FLAG; + katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN; + kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = + true; + } else { + katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN; + kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = + false; + } + } + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), + katom->flush_id, kctx); + + /* Write an approximate start timestamp. + * It's approximate because there might be a job in the HEAD register. + */ + katom->start_timestamp = ktime_get(); + + /* GO ! */ + dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx", + katom, kctx, js, jc_head, katom->affinity); + + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, + (u32) katom->affinity); + +#if defined(CONFIG_MALI_GATOR_SUPPORT) + kbase_trace_mali_job_slots_event( + GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js), + kctx, kbase_jd_atom_id(kctx, katom)); +#endif + KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head, + katom->affinity, cfg); + KBASE_TLSTREAM_TL_RET_CTX_LPU( + kctx, + &kbdev->gpu_props.props.raw_props.js_features[ + katom->slot_nr]); + KBASE_TLSTREAM_TL_RET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]); + KBASE_TLSTREAM_TL_RET_ATOM_LPU( + katom, + &kbdev->gpu_props.props.raw_props.js_features[js], + "ctx_nr,atom_nr"); +#ifdef CONFIG_GPU_TRACEPOINTS + if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { + /* If this is the only job on the slot, trace it as starting */ + char js_string[16]; + + trace_gpu_sched_switch( + kbasep_make_job_slot_string(js, js_string, + sizeof(js_string)), + ktime_to_ns(katom->start_timestamp), + (u32)katom->kctx->id, 0, katom->work_id); + kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx; + } +#endif + kbase_timeline_job_slot_submit(kbdev, kctx, katom, js); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), + JS_COMMAND_START, katom->kctx); +} + +/** + * kbasep_job_slot_update_head_start_timestamp - Update timestamp + * @kbdev: kbase device + * @js: job slot + * @end_timestamp: timestamp + * + * Update the start_timestamp of the job currently in the HEAD, based on the + * fact that we got an IRQ for the previous set of completed jobs. + * + * The estimate also takes into account the time the job was submitted, to + * work out the best estimate (which might still result in an over-estimate to + * the calculated time spent) + */ +static void kbasep_job_slot_update_head_start_timestamp( + struct kbase_device *kbdev, + int js, + ktime_t end_timestamp) +{ + if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) { + struct kbase_jd_atom *katom; + ktime_t timestamp_diff; + /* The atom in the HEAD */ + katom = kbase_gpu_inspect(kbdev, js, 0); + + KBASE_DEBUG_ASSERT(katom != NULL); + + timestamp_diff = ktime_sub(end_timestamp, + katom->start_timestamp); + if (ktime_to_ns(timestamp_diff) >= 0) { + /* Only update the timestamp if it's a better estimate + * than what's currently stored. This is because our + * estimate that accounts for the throttle time may be + * too much of an overestimate */ + katom->start_timestamp = end_timestamp; + } + } +} + +/** + * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline + * tracepoint + * @kbdev: kbase device + * @js: job slot + * + * Make a tracepoint call to the instrumentation module informing that + * softstop happened on given lpu (job slot). + */ +static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, + int js) +{ + KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( + &kbdev->gpu_props.props.raw_props.js_features[js]); +} + +void kbase_job_done(struct kbase_device *kbdev, u32 done) +{ + unsigned long flags; + int i; + u32 count = 0; + ktime_t end_timestamp = ktime_get(); + struct kbasep_js_device_data *js_devdata; + + KBASE_DEBUG_ASSERT(kbdev); + js_devdata = &kbdev->js_data; + + KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done); + + memset(&kbdev->slot_submit_count_irq[0], 0, + sizeof(kbdev->slot_submit_count_irq)); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + while (done) { + u32 failed = done >> 16; + + /* treat failed slots as finished slots */ + u32 finished = (done & 0xFFFF) | failed; + + /* Note: This is inherently unfair, as we always check + * for lower numbered interrupts before the higher + * numbered ones.*/ + i = ffs(finished) - 1; + KBASE_DEBUG_ASSERT(i >= 0); + + do { + int nr_done; + u32 active; + u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */ + u64 job_tail = 0; + + if (failed & (1u << i)) { + /* read out the job slot status code if the job + * slot reported failure */ + completion_code = kbase_reg_read(kbdev, + JOB_SLOT_REG(i, JS_STATUS), NULL); + + switch (completion_code) { + case BASE_JD_EVENT_STOPPED: +#if defined(CONFIG_MALI_GATOR_SUPPORT) + kbase_trace_mali_job_slots_event( + GATOR_MAKE_EVENT( + GATOR_JOB_SLOT_SOFT_STOPPED, i), + NULL, 0); +#endif + + kbasep_trace_tl_event_lpu_softstop( + kbdev, i); + + /* Soft-stopped job - read the value of + * JS<n>_TAIL so that the job chain can + * be resumed */ + job_tail = (u64)kbase_reg_read(kbdev, + JOB_SLOT_REG(i, JS_TAIL_LO), + NULL) | + ((u64)kbase_reg_read(kbdev, + JOB_SLOT_REG(i, JS_TAIL_HI), + NULL) << 32); + break; + case BASE_JD_EVENT_NOT_STARTED: + /* PRLAM-10673 can cause a TERMINATED + * job to come back as NOT_STARTED, but + * the error interrupt helps us detect + * it */ + completion_code = + BASE_JD_EVENT_TERMINATED; + /* fall through */ + default: + dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", + i, completion_code, + kbase_exception_name + (kbdev, + completion_code)); + } + + kbase_gpu_irq_evict(kbdev, i); + } + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), + done & ((1 << i) | (1 << (i + 16))), + NULL); + active = kbase_reg_read(kbdev, + JOB_CONTROL_REG(JOB_IRQ_JS_STATE), + NULL); + + if (((active >> i) & 1) == 0 && + (((done >> (i + 16)) & 1) == 0)) { + /* There is a potential race we must work + * around: + * + * 1. A job slot has a job in both current and + * next registers + * 2. The job in current completes + * successfully, the IRQ handler reads + * RAWSTAT and calls this function with the + * relevant bit set in "done" + * 3. The job in the next registers becomes the + * current job on the GPU + * 4. Sometime before the JOB_IRQ_CLEAR line + * above the job on the GPU _fails_ + * 5. The IRQ_CLEAR clears the done bit but not + * the failed bit. This atomically sets + * JOB_IRQ_JS_STATE. However since both jobs + * have now completed the relevant bits for + * the slot are set to 0. + * + * If we now did nothing then we'd incorrectly + * assume that _both_ jobs had completed + * successfully (since we haven't yet observed + * the fail bit being set in RAWSTAT). + * + * So at this point if there are no active jobs + * left we check to see if RAWSTAT has a failure + * bit set for the job slot. If it does we know + * that there has been a new failure that we + * didn't previously know about, so we make sure + * that we record this in active (but we wait + * for the next loop to deal with it). + * + * If we were handling a job failure (i.e. done + * has the relevant high bit set) then we know + * that the value read back from + * JOB_IRQ_JS_STATE is the correct number of + * remaining jobs because the failed job will + * have prevented any futher jobs from starting + * execution. + */ + u32 rawstat = kbase_reg_read(kbdev, + JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL); + + if ((rawstat >> (i + 16)) & 1) { + /* There is a failed job that we've + * missed - add it back to active */ + active |= (1u << i); + } + } + + dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n", + completion_code); + + nr_done = kbase_backend_nr_atoms_submitted(kbdev, i); + nr_done -= (active >> i) & 1; + nr_done -= (active >> (i + 16)) & 1; + + if (nr_done <= 0) { + dev_warn(kbdev->dev, "Spurious interrupt on slot %d", + i); + + goto spurious; + } + + count += nr_done; + + while (nr_done) { + if (nr_done == 1) { + kbase_gpu_complete_hw(kbdev, i, + completion_code, + job_tail, + &end_timestamp); + kbase_jm_try_kick_all(kbdev); + } else { + /* More than one job has completed. + * Since this is not the last job being + * reported this time it must have + * passed. This is because the hardware + * will not allow further jobs in a job + * slot to complete until the failed job + * is cleared from the IRQ status. + */ + kbase_gpu_complete_hw(kbdev, i, + BASE_JD_EVENT_DONE, + 0, + &end_timestamp); + } + nr_done--; + } + spurious: + done = kbase_reg_read(kbdev, + JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) { + /* Workaround for missing interrupt caused by + * PRLAM-10883 */ + if (((active >> i) & 1) && (0 == + kbase_reg_read(kbdev, + JOB_SLOT_REG(i, + JS_STATUS), NULL))) { + /* Force job slot to be processed again + */ + done |= (1u << i); + } + } + + failed = done >> 16; + finished = (done & 0xFFFF) | failed; + if (done) + end_timestamp = ktime_get(); + } while (finished & (1 << i)); + + kbasep_job_slot_update_head_start_timestamp(kbdev, i, + end_timestamp); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#if KBASE_GPU_RESET_EN + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_COMMITTED) { + /* If we're trying to reset the GPU then we might be able to do + * it early (without waiting for a timeout) because some jobs + * have completed + */ + kbasep_try_reset_gpu_early(kbdev); + } +#endif /* KBASE_GPU_RESET_EN */ + KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count); +} +KBASE_EXPORT_TEST_API(kbase_job_done); + +static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + bool soft_stops_allowed = true; + + if (kbase_jd_katom_is_protected(katom)) { + soft_stops_allowed = false; + } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { + if ((katom->core_req & BASE_JD_REQ_T) != 0) + soft_stops_allowed = false; + } + return soft_stops_allowed; +} + +static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev, + base_jd_core_req core_reqs) +{ + bool hard_stops_allowed = true; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) { + if ((core_reqs & BASE_JD_REQ_T) != 0) + hard_stops_allowed = false; + } + return hard_stops_allowed; +} + +void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, + int js, + u32 action, + base_jd_core_req core_reqs, + struct kbase_jd_atom *target_katom) +{ + struct kbase_context *kctx = target_katom->kctx; +#if KBASE_TRACE_ENABLE + u32 status_reg_before; + u64 job_in_head_before; + u32 status_reg_after; + + KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK))); + + /* Check the head pointer */ + job_in_head_before = ((u64) kbase_reg_read(kbdev, + JOB_SLOT_REG(js, JS_HEAD_LO), NULL)) + | (((u64) kbase_reg_read(kbdev, + JOB_SLOT_REG(js, JS_HEAD_HI), NULL)) + << 32); + status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS), + NULL); +#endif + + if (action == JS_COMMAND_SOFT_STOP) { + bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev, + target_katom); + + if (!soft_stop_allowed) { +#ifdef CONFIG_MALI_DEBUG + dev_dbg(kbdev->dev, + "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", + (unsigned int)core_reqs); +#endif /* CONFIG_MALI_DEBUG */ + return; + } + + /* We are about to issue a soft stop, so mark the atom as having + * been soft stopped */ + target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; + + /* Mark the point where we issue the soft-stop command */ + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(target_katom); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { + int i; + + for (i = 0; + i < kbase_backend_nr_atoms_submitted(kbdev, js); + i++) { + struct kbase_jd_atom *katom; + + katom = kbase_gpu_inspect(kbdev, js, i); + + KBASE_DEBUG_ASSERT(katom); + + /* For HW_ISSUE_8316, only 'bad' jobs attacking + * the system can cause this issue: normally, + * all memory should be allocated in multiples + * of 4 pages, and growable memory should be + * changed size in multiples of 4 pages. + * + * Whilst such 'bad' jobs can be cleared by a + * GPU reset, the locking up of a uTLB entry + * caused by the bad job could also stall other + * ASs, meaning that other ASs' jobs don't + * complete in the 'grace' period before the + * reset. We don't want to lose other ASs' jobs + * when they would normally complete fine, so we + * must 'poke' the MMU regularly to help other + * ASs complete */ + kbase_as_poking_timer_retain_atom( + kbdev, katom->kctx, katom); + } + } + + if (kbase_hw_has_feature( + kbdev, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + action = (target_katom->atom_flags & + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_SOFT_STOP_1 : + JS_COMMAND_SOFT_STOP_0; + } + } else if (action == JS_COMMAND_HARD_STOP) { + bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev, + core_reqs); + + if (!hard_stop_allowed) { + /* Jobs can be hard-stopped for the following reasons: + * * CFS decides the job has been running too long (and + * soft-stop has not occurred). In this case the GPU + * will be reset by CFS if the job remains on the + * GPU. + * + * * The context is destroyed, kbase_jd_zap_context + * will attempt to hard-stop the job. However it also + * has a watchdog which will cause the GPU to be + * reset if the job remains on the GPU. + * + * * An (unhandled) MMU fault occurred. As long as + * BASE_HW_ISSUE_8245 is defined then the GPU will be + * reset. + * + * All three cases result in the GPU being reset if the + * hard-stop fails, so it is safe to just return and + * ignore the hard-stop request. + */ + dev_warn(kbdev->dev, + "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", + (unsigned int)core_reqs); + return; + } + target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED; + + if (kbase_hw_has_feature( + kbdev, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + action = (target_katom->atom_flags & + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_HARD_STOP_1 : + JS_COMMAND_HARD_STOP_0; + } + } + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx); + +#if KBASE_TRACE_ENABLE + status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS), + NULL); + if (status_reg_after == BASE_JD_EVENT_ACTIVE) { + struct kbase_jd_atom *head; + struct kbase_context *head_kctx; + + head = kbase_gpu_inspect(kbdev, js, 0); + head_kctx = head->kctx; + + if (status_reg_before == BASE_JD_EVENT_ACTIVE) + KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, + head, job_in_head_before, js); + else + KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, + 0, js); + + switch (action) { + case JS_COMMAND_SOFT_STOP: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx, + head, head->jc, js); + break; + case JS_COMMAND_SOFT_STOP_0: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, + head, head->jc, js); + break; + case JS_COMMAND_SOFT_STOP_1: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, + head, head->jc, js); + break; + case JS_COMMAND_HARD_STOP: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx, + head, head->jc, js); + break; + case JS_COMMAND_HARD_STOP_0: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, + head, head->jc, js); + break; + case JS_COMMAND_HARD_STOP_1: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, + head, head->jc, js); + break; + default: + BUG(); + break; + } + } else { + if (status_reg_before == BASE_JD_EVENT_ACTIVE) + KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, + job_in_head_before, js); + else + KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, + 0, js); + + switch (action) { + case JS_COMMAND_SOFT_STOP: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, + js); + break; + case JS_COMMAND_SOFT_STOP_0: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, + 0, js); + break; + case JS_COMMAND_SOFT_STOP_1: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, + 0, js); + break; + case JS_COMMAND_HARD_STOP: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, + js); + break; + case JS_COMMAND_HARD_STOP_0: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, + 0, js); + break; + case JS_COMMAND_HARD_STOP_1: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, + 0, js); + break; + default: + BUG(); + break; + } + } +#endif +} + +void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx) +{ + unsigned long flags; + struct kbase_device *kbdev; + int i; + + KBASE_DEBUG_ASSERT(kctx != NULL); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + /* Cancel any remaining running jobs for this kctx */ + mutex_lock(&kctx->jctx.lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* Invalidate all jobs in context, to prevent re-submitting */ + for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { + if (!work_pending(&kctx->jctx.atoms[i].work)) + kctx->jctx.atoms[i].event_code = + BASE_JD_EVENT_JOB_CANCELLED; + } + + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbase_job_slot_hardstop(kctx, i, NULL); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kctx->jctx.lock); +} + +void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, + struct kbase_jd_atom *target_katom) +{ + struct kbase_device *kbdev; + int js = target_katom->slot_nr; + int priority = target_katom->sched_priority; + int i; + bool stop_sent = false; + + KBASE_DEBUG_ASSERT(kctx != NULL); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) { + struct kbase_jd_atom *katom; + + katom = kbase_gpu_inspect(kbdev, js, i); + if (!katom) + continue; + + if (katom->kctx != kctx) + continue; + + if (katom->sched_priority > priority) { + if (!stop_sent) + KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY_CHANGE( + target_katom); + + kbase_job_slot_softstop(kbdev, js, katom); + stop_sent = true; + } + } +} + +struct zap_reset_data { + /* The stages are: + * 1. The timer has never been called + * 2. The zap has timed out, all slots are soft-stopped - the GPU reset + * will happen. The GPU has been reset when + * kbdev->hwaccess.backend.reset_waitq is signalled + * + * (-1 - The timer has been cancelled) + */ + int stage; + struct kbase_device *kbdev; + struct hrtimer timer; + spinlock_t lock; /* protects updates to stage member */ +}; + +static enum hrtimer_restart zap_timeout_callback(struct hrtimer *timer) +{ + struct zap_reset_data *reset_data = container_of(timer, + struct zap_reset_data, timer); + struct kbase_device *kbdev = reset_data->kbdev; + unsigned long flags; + + spin_lock_irqsave(&reset_data->lock, flags); + + if (reset_data->stage == -1) + goto out; + +#if KBASE_GPU_RESET_EN + if (kbase_prepare_to_reset_gpu(kbdev)) { + dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", + ZAP_TIMEOUT); + kbase_reset_gpu(kbdev); + } +#endif /* KBASE_GPU_RESET_EN */ + reset_data->stage = 2; + + out: + spin_unlock_irqrestore(&reset_data->lock, flags); + + return HRTIMER_NORESTART; +} + +void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + struct zap_reset_data reset_data; + unsigned long flags; + + hrtimer_init_on_stack(&reset_data.timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + reset_data.timer.function = zap_timeout_callback; + + spin_lock_init(&reset_data.lock); + + reset_data.kbdev = kbdev; + reset_data.stage = 1; + + hrtimer_start(&reset_data.timer, HR_TIMER_DELAY_MSEC(ZAP_TIMEOUT), + HRTIMER_MODE_REL); + + /* Wait for all jobs to finish, and for the context to be not-scheduled + * (due to kbase_job_zap_context(), we also guarentee it's not in the JS + * policy queue either */ + wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0); + wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, + !kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + spin_lock_irqsave(&reset_data.lock, flags); + if (reset_data.stage == 1) { + /* The timer hasn't run yet - so cancel it */ + reset_data.stage = -1; + } + spin_unlock_irqrestore(&reset_data.lock, flags); + + hrtimer_cancel(&reset_data.timer); + + if (reset_data.stage == 2) { + /* The reset has already started. + * Wait for the reset to complete + */ + wait_event(kbdev->hwaccess.backend.reset_wait, + atomic_read(&kbdev->hwaccess.backend.reset_gpu) + == KBASE_RESET_GPU_NOT_PENDING); + } + destroy_hrtimer_on_stack(&reset_data.timer); + + dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx); + + /* Ensure that the signallers of the waitqs have finished */ + mutex_lock(&kctx->jctx.lock); + mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&kctx->jctx.lock); +} + +u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) +{ + u32 flush_id = 0; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) { + mutex_lock(&kbdev->pm.lock); + if (kbdev->pm.backend.gpu_powered) + flush_id = kbase_reg_read(kbdev, + GPU_CONTROL_REG(LATEST_FLUSH), NULL); + mutex_unlock(&kbdev->pm.lock); + } + + return flush_id; +} + +int kbase_job_slot_init(struct kbase_device *kbdev) +{ +#if KBASE_GPU_RESET_EN + kbdev->hwaccess.backend.reset_workq = alloc_workqueue( + "Mali reset workqueue", 0, 1); + if (NULL == kbdev->hwaccess.backend.reset_workq) + return -EINVAL; + + KBASE_DEBUG_ASSERT(0 == + object_is_on_stack(&kbdev->hwaccess.backend.reset_work)); + INIT_WORK(&kbdev->hwaccess.backend.reset_work, + kbasep_reset_timeout_worker); + + hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + kbdev->hwaccess.backend.reset_timer.function = + kbasep_reset_timer_callback; +#endif + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_job_slot_init); + +void kbase_job_slot_halt(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +void kbase_job_slot_term(struct kbase_device *kbdev) +{ +#if KBASE_GPU_RESET_EN + destroy_workqueue(kbdev->hwaccess.backend.reset_workq); +#endif +} +KBASE_EXPORT_TEST_API(kbase_job_slot_term); + +#if KBASE_GPU_RESET_EN +/** + * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot + * @kbdev: kbase device pointer + * @kctx: context to check against + * @js: slot to check + * @target_katom: An atom to check, or NULL if all atoms from @kctx on + * slot @js should be checked + * + * This checks are based upon parameters that would normally be passed to + * kbase_job_slot_hardstop(). + * + * In the event of @target_katom being NULL, this will check the last jobs that + * are likely to be running on the slot to see if a) they belong to kctx, and + * so would be stopped, and b) whether they have AFBC + * + * In that case, It's guaranteed that a job currently executing on the HW with + * AFBC will be detected. However, this is a conservative check because it also + * detects jobs that have just completed too. + * + * Return: true when hard-stop _might_ stop an afbc atom, else false. + */ +static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev, + struct kbase_context *kctx, int js, + struct kbase_jd_atom *target_katom) +{ + bool ret = false; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* When we have an atom the decision can be made straight away. */ + if (target_katom) + return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC); + + /* Otherwise, we must chweck the hardware to see if it has atoms from + * this context with AFBC. */ + for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) { + struct kbase_jd_atom *katom; + + katom = kbase_gpu_inspect(kbdev, js, i); + if (!katom) + continue; + + /* Ignore atoms from other contexts, they won't be stopped when + * we use this for checking if we should hard-stop them */ + if (katom->kctx != kctx) + continue; + + /* An atom on this slot and this context: check for AFBC */ + if (katom->core_req & BASE_JD_REQ_FS_AFBC) { + ret = true; + break; + } + } + + return ret; +} +#endif /* KBASE_GPU_RESET_EN */ + +/** + * kbase_job_slot_softstop_swflags - Soft-stop a job with flags + * @kbdev: The kbase device + * @js: The job slot to soft-stop + * @target_katom: The job that should be soft-stopped (or NULL for any job) + * @sw_flags: Flags to pass in about the soft-stop + * + * Context: + * The job slot lock must be held when calling this function. + * The job slot must not already be in the process of being soft-stopped. + * + * Soft-stop the specified job slot, with extra information about the stop + * + * Where possible any job in the next register is evicted before the soft-stop. + */ +void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, + struct kbase_jd_atom *target_katom, u32 sw_flags) +{ + KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); + kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, + JS_COMMAND_SOFT_STOP | sw_flags); +} + +/** + * kbase_job_slot_softstop - Soft-stop the specified job slot + * @kbdev: The kbase device + * @js: The job slot to soft-stop + * @target_katom: The job that should be soft-stopped (or NULL for any job) + * Context: + * The job slot lock must be held when calling this function. + * The job slot must not already be in the process of being soft-stopped. + * + * Where possible any job in the next register is evicted before the soft-stop. + */ +void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, + struct kbase_jd_atom *target_katom) +{ + kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u); +} + +/** + * kbase_job_slot_hardstop - Hard-stop the specified job slot + * @kctx: The kbase context that contains the job(s) that should + * be hard-stopped + * @js: The job slot to hard-stop + * @target_katom: The job that should be hard-stopped (or NULL for all + * jobs from the context) + * Context: + * The job slot lock must be held when calling this function. + */ +void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, + struct kbase_jd_atom *target_katom) +{ + struct kbase_device *kbdev = kctx->kbdev; + bool stopped; +#if KBASE_GPU_RESET_EN + /* We make the check for AFBC before evicting/stopping atoms. Note + * that no other thread can modify the slots whilst we have the + * hwaccess_lock. */ + int needs_workaround_for_afbc = + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542) + && kbasep_check_for_afbc_on_slot(kbdev, kctx, js, + target_katom); +#endif + + stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, + target_katom, + JS_COMMAND_HARD_STOP); +#if KBASE_GPU_RESET_EN + if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) || + kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) || + needs_workaround_for_afbc)) { + /* MIDBASE-2916 if a fragment job with AFBC encoding is + * hardstopped, ensure to do a soft reset also in order to + * clear the GPU status. + * Workaround for HW issue 8401 has an issue,so after + * hard-stopping just reset the GPU. This will ensure that the + * jobs leave the GPU.*/ + if (kbase_prepare_to_reset_gpu_locked(kbdev)) { + dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue"); + kbase_reset_gpu_locked(kbdev); + } + } +#endif +} + +/** + * kbase_job_check_enter_disjoint - potentiall enter disjoint mode + * @kbdev: kbase device + * @action: the event which has occurred + * @core_reqs: core requirements of the atom + * @target_katom: the atom which is being affected + * + * For a certain soft/hard-stop action, work out whether to enter disjoint + * state. + * + * This does not register multiple disjoint events if the atom has already + * started a disjoint period + * + * @core_reqs can be supplied as 0 if the atom had not started on the hardware + * (and so a 'real' soft/hard-stop was not required, but it still interrupted + * flow, perhaps on another context) + * + * kbase_job_check_leave_disjoint() should be used to end the disjoint + * state when the soft/hard-stop action is complete + */ +void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) +{ + u32 hw_action = action & JS_COMMAND_MASK; + + /* For hard-stop, don't enter if hard-stop not allowed */ + if (hw_action == JS_COMMAND_HARD_STOP && + !kbasep_hard_stop_allowed(kbdev, core_reqs)) + return; + + /* For soft-stop, don't enter if soft-stop not allowed, or isn't + * causing disjoint */ + if (hw_action == JS_COMMAND_SOFT_STOP && + !(kbasep_soft_stop_allowed(kbdev, target_katom) && + (action & JS_COMMAND_SW_CAUSES_DISJOINT))) + return; + + /* Nothing to do if already logged disjoint state on this atom */ + if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) + return; + + target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT; + kbase_disjoint_state_up(kbdev); +} + +/** + * kbase_job_check_enter_disjoint - potentially leave disjoint state + * @kbdev: kbase device + * @target_katom: atom which is finishing + * + * Work out whether to leave disjoint state when finishing an atom that was + * originated by kbase_job_check_enter_disjoint(). + */ +void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, + struct kbase_jd_atom *target_katom) +{ + if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) { + target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT; + kbase_disjoint_state_down(kbdev); + } +} + + +#if KBASE_GPU_RESET_EN +static void kbase_debug_dump_registers(struct kbase_device *kbdev) +{ + int i; + + kbase_io_history_dump(kbdev); + + dev_err(kbdev->dev, "Register state:"); + dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL)); + dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x", + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL), + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL)); + for (i = 0; i < 3; i++) { + dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x", + i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS), + NULL), + i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO), + NULL)); + } + dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", + kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL)); + dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL), + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL), + kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL)); + dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL)); + dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL)); + dev_err(kbdev->dev, " TILER_CONFIG=0x%08x JM_CONFIG=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG), NULL)); +} + +static void kbasep_reset_timeout_worker(struct work_struct *data) +{ + unsigned long flags; + struct kbase_device *kbdev; + ktime_t end_timestamp = ktime_get(); + struct kbasep_js_device_data *js_devdata; + bool try_schedule = false; + bool silent = false; + u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + + KBASE_DEBUG_ASSERT(data); + + kbdev = container_of(data, struct kbase_device, + hwaccess.backend.reset_work); + + KBASE_DEBUG_ASSERT(kbdev); + js_devdata = &kbdev->js_data; + + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_SILENT) + silent = true; + + KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); + + /* Suspend vinstr. + * This call will block until vinstr is suspended. */ + kbase_vinstr_suspend(kbdev->vinstr_ctx); + + /* Make sure the timer has completed - this cannot be done from + * interrupt context, so this cannot be done within + * kbasep_try_reset_gpu_early. */ + hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); + + if (kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + /* This would re-activate the GPU. Since it's already idle, + * there's no need to reset it */ + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + kbase_disjoint_state_down(kbdev); + wake_up(&kbdev->hwaccess.backend.reset_wait); + kbase_vinstr_resume(kbdev->vinstr_ctx); + return; + } + + KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + spin_lock(&kbdev->hwaccess_lock); + spin_lock(&kbdev->mmu_mask_change); + /* We're about to flush out the IRQs and their bottom half's */ + kbdev->irq_reset_flush = true; + + /* Disable IRQ to avoid IRQ handlers to kick in after releasing the + * spinlock; this also clears any outstanding interrupts */ + kbase_pm_disable_interrupts_nolock(kbdev); + + spin_unlock(&kbdev->mmu_mask_change); + spin_unlock(&kbdev->hwaccess_lock); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Ensure that any IRQ handlers have finished + * Must be done without any locks IRQ handlers will take */ + kbase_synchronize_irqs(kbdev); + + /* Flush out any in-flight work items */ + kbase_flush_mmu_wqs(kbdev); + + /* The flush has completed so reset the active indicator */ + kbdev->irq_reset_flush = false; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) { + /* Ensure that L2 is not transitioning when we send the reset + * command */ + while (--max_loops && kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_L2)) + ; + + WARN(!max_loops, "L2 power transition timed out while trying to reset\n"); + } + + mutex_lock(&kbdev->pm.lock); + /* We hold the pm lock, so there ought to be a current policy */ + KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); + + /* All slot have been soft-stopped and we've waited + * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we + * assume that anything that is still left on the GPU is stuck there and + * we'll kill it when we reset the GPU */ + + if (!silent) + dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", + RESET_TIMEOUT); + + /* Output the state of some interesting registers to help in the + * debugging of GPU resets */ + if (!silent) + kbase_debug_dump_registers(kbdev); + + /* Complete any jobs that were still on the GPU */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->protected_mode = false; + kbase_backend_reset(kbdev, &end_timestamp); + kbase_pm_metrics_update(kbdev, NULL); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Reset the GPU */ + kbase_pm_init_hw(kbdev, 0); + + mutex_unlock(&kbdev->pm.lock); + + mutex_lock(&js_devdata->runpool_mutex); + + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_ctx_sched_restore_all_as(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + kbase_pm_enable_interrupts(kbdev); + + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + + kbase_disjoint_state_down(kbdev); + + wake_up(&kbdev->hwaccess.backend.reset_wait); + if (!silent) + dev_err(kbdev->dev, "Reset complete"); + + if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending) + try_schedule = true; + + mutex_unlock(&js_devdata->runpool_mutex); + + mutex_lock(&kbdev->pm.lock); + + /* Find out what cores are required now */ + kbase_pm_update_cores_state(kbdev); + + /* Synchronously request and wait for those cores, because if + * instrumentation is enabled it would need them immediately. */ + kbase_pm_check_transitions_sync(kbdev); + + mutex_unlock(&kbdev->pm.lock); + + /* Try submitting some jobs to restart processing */ + if (try_schedule) { + KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, + 0); + kbase_js_sched_all(kbdev); + } + + /* Process any pending slot updates */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_backend_slot_update(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_pm_context_idle(kbdev); + + /* Release vinstr */ + kbase_vinstr_resume(kbdev->vinstr_ctx); + + KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); +} + +static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) +{ + struct kbase_device *kbdev = container_of(timer, struct kbase_device, + hwaccess.backend.reset_timer); + + KBASE_DEBUG_ASSERT(kbdev); + + /* Reset still pending? */ + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == + KBASE_RESET_GPU_COMMITTED) + queue_work(kbdev->hwaccess.backend.reset_workq, + &kbdev->hwaccess.backend.reset_work); + + return HRTIMER_NORESTART; +} + +/* + * If all jobs are evicted from the GPU then we can reset the GPU + * immediately instead of waiting for the timeout to elapse + */ + +static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) +{ + int i; + int pending_jobs = 0; + + KBASE_DEBUG_ASSERT(kbdev); + + /* Count the number of jobs */ + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i); + + if (pending_jobs > 0) { + /* There are still jobs on the GPU - wait */ + return; + } + + /* To prevent getting incorrect registers when dumping failed job, + * skip early reset. + */ + if (kbdev->job_fault_debug != false) + return; + + /* Check that the reset has been committed to (i.e. kbase_reset_gpu has + * been called), and that no other thread beat this thread to starting + * the reset */ + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != + KBASE_RESET_GPU_COMMITTED) { + /* Reset has already occurred */ + return; + } + + queue_work(kbdev->hwaccess.backend.reset_workq, + &kbdev->hwaccess.backend.reset_work); +} + +static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbasep_try_reset_gpu_early_locked(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +/** + * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU + * @kbdev: kbase device + * + * This function just soft-stops all the slots to ensure that as many jobs as + * possible are saved. + * + * Return: + * The function returns a boolean which should be interpreted as follows: + * true - Prepared for reset, kbase_reset_gpu_locked should be called. + * false - Another thread is performing a reset, kbase_reset_gpu should + * not be called. + */ +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) +{ + int i; + + KBASE_DEBUG_ASSERT(kbdev); + + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING, + KBASE_RESET_GPU_PREPARED) != + KBASE_RESET_GPU_NOT_PENDING) { + /* Some other thread is already resetting the GPU */ + return false; + } + + kbase_disjoint_state_up(kbdev); + + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbase_job_slot_softstop(kbdev, i, NULL); + + return true; +} + +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) +{ + unsigned long flags; + bool ret; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ret = kbase_prepare_to_reset_gpu_locked(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} +KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); + +/* + * This function should be called after kbase_prepare_to_reset_gpu if it + * returns true. It should never be called without a corresponding call to + * kbase_prepare_to_reset_gpu. + * + * After this function is called (or not called if kbase_prepare_to_reset_gpu + * returned false), the caller should wait for + * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset + * has completed. + */ +void kbase_reset_gpu(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + + /* Note this is an assert/atomic_set because it is a software issue for + * a race to be occuring here */ + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_PREPARED); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED); + + dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + kbdev->reset_timeout_ms); + + hrtimer_start(&kbdev->hwaccess.backend.reset_timer, + HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), + HRTIMER_MODE_REL); + + /* Try resetting early */ + kbasep_try_reset_gpu_early(kbdev); +} +KBASE_EXPORT_TEST_API(kbase_reset_gpu); + +void kbase_reset_gpu_locked(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + + /* Note this is an assert/atomic_set because it is a software issue for + * a race to be occuring here */ + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_PREPARED); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED); + + dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + kbdev->reset_timeout_ms); + hrtimer_start(&kbdev->hwaccess.backend.reset_timer, + HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), + HRTIMER_MODE_REL); + + /* Try resetting early */ + kbasep_try_reset_gpu_early_locked(kbdev); +} + +void kbase_reset_gpu_silent(struct kbase_device *kbdev) +{ + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING, + KBASE_RESET_GPU_SILENT) != + KBASE_RESET_GPU_NOT_PENDING) { + /* Some other thread is already resetting the GPU */ + return; + } + + kbase_disjoint_state_up(kbdev); + + queue_work(kbdev->hwaccess.backend.reset_workq, + &kbdev->hwaccess.backend.reset_work); +} + +bool kbase_reset_gpu_active(struct kbase_device *kbdev) +{ + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_NOT_PENDING) + return false; + + return true; +} +#endif /* KBASE_GPU_RESET_EN */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h new file mode 100644 index 000000000000..1f382b3c1af4 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h @@ -0,0 +1,164 @@ +/* + * + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Job Manager backend-specific low-level APIs. + */ + +#ifndef _KBASE_JM_HWACCESS_H_ +#define _KBASE_JM_HWACCESS_H_ + +#include <mali_kbase_hw.h> +#include <mali_kbase_debug.h> +#include <linux/atomic.h> + +#include <backend/gpu/mali_kbase_jm_rb.h> + +/** + * kbase_job_submit_nolock() - Submit a job to a certain job-slot + * @kbdev: Device pointer + * @katom: Atom to submit + * @js: Job slot to submit on + * + * The caller must check kbasep_jm_is_submit_slots_free() != false before + * calling this. + * + * The following locking conditions are made on the caller: + * - it must hold the hwaccess_lock + */ +void kbase_job_submit_nolock(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, int js); + +/** + * kbase_job_done_slot() - Complete the head job on a particular job-slot + * @kbdev: Device pointer + * @s: Job slot + * @completion_code: Completion code of job reported by GPU + * @job_tail: Job tail address reported by GPU + * @end_timestamp: Timestamp of job completion + */ +void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, + u64 job_tail, ktime_t *end_timestamp); + +#ifdef CONFIG_GPU_TRACEPOINTS +static inline char *kbasep_make_job_slot_string(int js, char *js_string, + size_t js_size) +{ + snprintf(js_string, js_size, "job_slot_%i", js); + return js_string; +} +#endif + +/** + * kbase_job_hw_submit() - Submit a job to the GPU + * @kbdev: Device pointer + * @katom: Atom to submit + * @js: Job slot to submit on + * + * The caller must check kbasep_jm_is_submit_slots_free() != false before + * calling this. + * + * The following locking conditions are made on the caller: + * - it must hold the hwaccess_lock + */ +void kbase_job_hw_submit(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + int js); + +/** + * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop + * on the specified atom + * @kbdev: Device pointer + * @js: Job slot to stop on + * @action: The action to perform, either JSn_COMMAND_HARD_STOP or + * JSn_COMMAND_SOFT_STOP + * @core_reqs: Core requirements of atom to stop + * @target_katom: Atom to stop + * + * The following locking conditions are made on the caller: + * - it must hold the hwaccess_lock + */ +void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, + int js, + u32 action, + base_jd_core_req core_reqs, + struct kbase_jd_atom *target_katom); + +/** + * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job + * slot belonging to a given context. + * @kbdev: Device pointer + * @kctx: Context pointer. May be NULL + * @katom: Specific atom to stop. May be NULL + * @js: Job slot to hard stop + * @action: The action to perform, either JSn_COMMAND_HARD_STOP or + * JSn_COMMAND_SOFT_STOP + * + * If no context is provided then all jobs on the slot will be soft or hard + * stopped. + * + * If a katom is provided then only that specific atom will be stopped. In this + * case the kctx parameter is ignored. + * + * Jobs that are on the slot but are not yet on the GPU will be unpulled and + * returned to the job scheduler. + * + * Return: true if an atom was stopped, false otherwise + */ +bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js, + struct kbase_jd_atom *katom, + u32 action); + +/** + * kbase_job_slot_init - Initialise job slot framework + * @kbdev: Device pointer + * + * Called on driver initialisation + * + * Return: 0 on success + */ +int kbase_job_slot_init(struct kbase_device *kbdev); + +/** + * kbase_job_slot_halt - Halt the job slot framework + * @kbdev: Device pointer + * + * Should prevent any further job slot processing + */ +void kbase_job_slot_halt(struct kbase_device *kbdev); + +/** + * kbase_job_slot_term - Terminate job slot framework + * @kbdev: Device pointer + * + * Called on driver termination + */ +void kbase_job_slot_term(struct kbase_device *kbdev); + +/** + * kbase_gpu_cacheclean - Cause a GPU cache clean & flush + * @kbdev: Device pointer + * + * Caller must not be in IRQ context + */ +void kbase_gpu_cacheclean(struct kbase_device *kbdev); + +#endif /* _KBASE_JM_HWACCESS_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c new file mode 100644 index 000000000000..a41e7b5b7afb --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c @@ -0,0 +1,1947 @@ +/* + * + * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * Register-based HW access backend specific APIs + */ + +#include <mali_kbase.h> +#include <mali_kbase_hwaccess_jm.h> +#include <mali_kbase_jm.h> +#include <mali_kbase_js.h> +#include <mali_kbase_tlstream.h> +#include <mali_kbase_10969_workaround.h> +#include <backend/gpu/mali_kbase_cache_policy_backend.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include <backend/gpu/mali_kbase_jm_internal.h> +#include <backend/gpu/mali_kbase_js_affinity.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +/* Return whether the specified ringbuffer is empty. HW access lock must be + * held */ +#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) +/* Return number of atoms currently in the specified ringbuffer. HW access lock + * must be held */ +#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx) + +static void kbase_gpu_release_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + ktime_t *end_timestamp); + +/** + * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer + * @kbdev: Device pointer + * @katom: Atom to enqueue + * + * Context: Caller must hold the HW access lock + */ +static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr]; + + WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom; + rb->write_idx++; + + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; +} + +/** + * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once + * it has been completed + * @kbdev: Device pointer + * @js: Job slot to remove atom from + * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in + * which case current time will be used. + * + * Context: Caller must hold the HW access lock + * + * Return: Atom removed from ringbuffer + */ +static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, + int js, + ktime_t *end_timestamp) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + struct kbase_jd_atom *katom; + + if (SLOT_RB_EMPTY(rb)) { + WARN(1, "GPU ringbuffer unexpectedly empty\n"); + return NULL; + } + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom; + + kbase_gpu_release_atom(kbdev, katom, end_timestamp); + + rb->read_idx++; + + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB; + + kbase_js_debug_log_current_affinities(kbdev); + + return katom; +} + +struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, + int idx) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if ((SLOT_RB_ENTRIES(rb) - 1) < idx) + return NULL; /* idx out of range */ + + return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom; +} + +struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev, + int js) +{ + return kbase_gpu_inspect(kbdev, js, 0); +} + +struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, + int js) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + + if (SLOT_RB_EMPTY(rb)) + return NULL; + + return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom; +} + +/** + * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently + * on the GPU + * @kbdev: Device pointer + * @js: Job slot to inspect + * + * Return: true if there are atoms on the GPU for slot js, + * false otherwise + */ +static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js) +{ + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + if (!katom) + return false; + if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED || + katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY) + return true; + } + + return false; +} + +/** + * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms + * currently on the GPU + * @kbdev: Device pointer + * + * Return: true if there are any atoms on the GPU, false otherwise + */ +static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) +{ + int js; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) + return true; + } + } + return false; +} + +int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) +{ + int nr = 0; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + if (katom && (katom->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED)) + nr++; + } + + return nr; +} + +int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) +{ + int nr = 0; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < SLOT_RB_SIZE; i++) { + if (kbase_gpu_inspect(kbdev, js, i)) + nr++; + } + + return nr; +} + +static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, + enum kbase_atom_gpu_rb_state min_rb_state) +{ + int nr = 0; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + if (katom && (katom->gpu_rb_state >= min_rb_state)) + nr++; + } + + return nr; +} + +/** + * check_secure_atom - Check if the given atom is in the given secure state and + * has a ringbuffer state of at least + * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION + * @katom: Atom pointer + * @secure: Desired secure state + * + * Return: true if atom is in the given state, false otherwise + */ +static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) +{ + if (katom->gpu_rb_state >= + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && + ((kbase_jd_katom_is_protected(katom) && secure) || + (!kbase_jd_katom_is_protected(katom) && !secure))) + return true; + + return false; +} + +/** + * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given + * secure state in the ringbuffers of at least + * state + * KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE + * @kbdev: Device pointer + * @secure: Desired secure state + * + * Return: true if any atoms are in the given state, false otherwise + */ +static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, + bool secure) +{ + int js, i; + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, + js, i); + + if (katom) { + if (check_secure_atom(katom, secure)) + return true; + } + } + } + + return false; +} + +int kbase_backend_slot_free(struct kbase_device *kbdev, int js) +{ + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != + KBASE_RESET_GPU_NOT_PENDING) { + /* The GPU is being reset - so prevent submission */ + return 0; + } + + return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js); +} + + +static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev, + int js, + struct kbase_jd_atom *katom) +{ + /* The most recently checked affinity. Having this at this scope allows + * us to guarantee that we've checked the affinity in this function + * call. + */ + u64 recently_chosen_affinity = 0; + bool chosen_affinity = false; + bool retry; + + do { + retry = false; + + /* NOTE: The following uses a number of FALLTHROUGHs to optimize + * the calls to this function. Ending of the function is + * indicated by BREAK OUT */ + switch (katom->coreref_state) { + /* State when job is first attempted to be run */ + case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: + KBASE_DEBUG_ASSERT(katom->affinity == 0); + + /* Compute affinity */ + if (false == kbase_js_choose_affinity( + &recently_chosen_affinity, kbdev, katom, + js)) { + /* No cores are currently available */ + /* *** BREAK OUT: No state transition *** */ + break; + } + + chosen_affinity = true; + + /* Request the cores */ + kbase_pm_request_cores(kbdev, + katom->core_req & BASE_JD_REQ_T, + recently_chosen_affinity); + + katom->affinity = recently_chosen_affinity; + + /* Proceed to next state */ + katom->coreref_state = + KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: + { + enum kbase_pm_cores_ready cores_ready; + + KBASE_DEBUG_ASSERT(katom->affinity != 0 || + (katom->core_req & BASE_JD_REQ_T)); + + cores_ready = kbase_pm_register_inuse_cores( + kbdev, + katom->core_req & BASE_JD_REQ_T, + katom->affinity); + if (cores_ready == KBASE_NEW_AFFINITY) { + /* Affinity no longer valid - return to + * previous state */ + kbasep_js_job_check_deref_cores(kbdev, + katom); + KBASE_TRACE_ADD_SLOT_INFO(kbdev, + JS_CORE_REF_REGISTER_INUSE_FAILED, + katom->kctx, katom, + katom->jc, js, + (u32) katom->affinity); + /* *** BREAK OUT: Return to previous + * state, retry *** */ + retry = true; + break; + } + if (cores_ready == KBASE_CORES_NOT_READY) { + /* Stay in this state and return, to + * retry at this state later */ + KBASE_TRACE_ADD_SLOT_INFO(kbdev, + JS_CORE_REF_REGISTER_INUSE_FAILED, + katom->kctx, katom, + katom->jc, js, + (u32) katom->affinity); + /* *** BREAK OUT: No state transition + * *** */ + break; + } + /* Proceed to next state */ + katom->coreref_state = + KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; + } + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: + KBASE_DEBUG_ASSERT(katom->affinity != 0 || + (katom->core_req & BASE_JD_REQ_T)); + + /* Optimize out choosing the affinity twice in the same + * function call */ + if (chosen_affinity == false) { + /* See if the affinity changed since a previous + * call. */ + if (false == kbase_js_choose_affinity( + &recently_chosen_affinity, + kbdev, katom, js)) { + /* No cores are currently available */ + kbasep_js_job_check_deref_cores(kbdev, + katom); + KBASE_TRACE_ADD_SLOT_INFO(kbdev, + JS_CORE_REF_REQUEST_ON_RECHECK_FAILED, + katom->kctx, katom, + katom->jc, js, + (u32) recently_chosen_affinity); + /* *** BREAK OUT: Transition to lower + * state *** */ + break; + } + chosen_affinity = true; + } + + /* Now see if this requires a different set of cores */ + if (recently_chosen_affinity != katom->affinity) { + enum kbase_pm_cores_ready cores_ready; + + kbase_pm_request_cores(kbdev, + katom->core_req & BASE_JD_REQ_T, + recently_chosen_affinity); + + /* Register new cores whilst we still hold the + * old ones, to minimize power transitions */ + cores_ready = + kbase_pm_register_inuse_cores(kbdev, + katom->core_req & BASE_JD_REQ_T, + recently_chosen_affinity); + kbasep_js_job_check_deref_cores(kbdev, katom); + + /* Fixup the state that was reduced by + * deref_cores: */ + katom->coreref_state = + KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; + katom->affinity = recently_chosen_affinity; + if (cores_ready == KBASE_NEW_AFFINITY) { + /* Affinity no longer valid - return to + * previous state */ + katom->coreref_state = + KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; + + kbasep_js_job_check_deref_cores(kbdev, + katom); + + KBASE_TRACE_ADD_SLOT_INFO(kbdev, + JS_CORE_REF_REGISTER_INUSE_FAILED, + katom->kctx, katom, + katom->jc, js, + (u32) katom->affinity); + /* *** BREAK OUT: Return to previous + * state, retry *** */ + retry = true; + break; + } + /* Now might be waiting for powerup again, with + * a new affinity */ + if (cores_ready == KBASE_CORES_NOT_READY) { + /* Return to previous state */ + katom->coreref_state = + KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; + KBASE_TRACE_ADD_SLOT_INFO(kbdev, + JS_CORE_REF_REGISTER_ON_RECHECK_FAILED, + katom->kctx, katom, + katom->jc, js, + (u32) katom->affinity); + /* *** BREAK OUT: Transition to lower + * state *** */ + break; + } + } + /* Proceed to next state */ + katom->coreref_state = + KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS: + KBASE_DEBUG_ASSERT(katom->affinity != 0 || + (katom->core_req & BASE_JD_REQ_T)); + KBASE_DEBUG_ASSERT(katom->affinity == + recently_chosen_affinity); + + /* Note: this is where the caller must've taken the + * hwaccess_lock */ + + /* Check for affinity violations - if there are any, + * then we just ask the caller to requeue and try again + * later */ + if (kbase_js_affinity_would_violate(kbdev, js, + katom->affinity) != false) { + /* Return to previous state */ + katom->coreref_state = + KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; + /* *** BREAK OUT: Transition to lower state *** + */ + KBASE_TRACE_ADD_SLOT_INFO(kbdev, + JS_CORE_REF_AFFINITY_WOULD_VIOLATE, + katom->kctx, katom, katom->jc, js, + (u32) katom->affinity); + break; + } + + /* No affinity violations would result, so the cores are + * ready */ + katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY; + /* *** BREAK OUT: Cores Ready *** */ + break; + + default: + KBASE_DEBUG_ASSERT_MSG(false, + "Unhandled kbase_atom_coreref_state %d", + katom->coreref_state); + break; + } + } while (retry != false); + + return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY); +} + +static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + + switch (katom->coreref_state) { + case KBASE_ATOM_COREREF_STATE_READY: + /* State where atom was submitted to the HW - just proceed to + * power-down */ + KBASE_DEBUG_ASSERT(katom->affinity != 0 || + (katom->core_req & BASE_JD_REQ_T)); + + /* *** FALLTHROUGH *** */ + + case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: + /* State where cores were registered */ + KBASE_DEBUG_ASSERT(katom->affinity != 0 || + (katom->core_req & BASE_JD_REQ_T)); + kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T, + katom->affinity); + + break; + + case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: + /* State where cores were requested, but not registered */ + KBASE_DEBUG_ASSERT(katom->affinity != 0 || + (katom->core_req & BASE_JD_REQ_T)); + kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T, + katom->affinity); + break; + + case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: + /* Initial state - nothing required */ + KBASE_DEBUG_ASSERT(katom->affinity == 0); + break; + + default: + KBASE_DEBUG_ASSERT_MSG(false, + "Unhandled coreref_state: %d", + katom->coreref_state); + break; + } + + katom->affinity = 0; + katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; +} + +static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev, + base_jd_core_req core_req, u64 affinity, + enum kbase_atom_coreref_state coreref_state) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + switch (coreref_state) { + case KBASE_ATOM_COREREF_STATE_READY: + /* State where atom was submitted to the HW - just proceed to + * power-down */ + KBASE_DEBUG_ASSERT(affinity != 0 || + (core_req & BASE_JD_REQ_T)); + + /* *** FALLTHROUGH *** */ + + case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: + /* State where cores were registered */ + KBASE_DEBUG_ASSERT(affinity != 0 || + (core_req & BASE_JD_REQ_T)); + kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T, + affinity); + + break; + + case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: + /* State where cores were requested, but not registered */ + KBASE_DEBUG_ASSERT(affinity != 0 || + (core_req & BASE_JD_REQ_T)); + kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T, + affinity); + break; + + case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: + /* Initial state - nothing required */ + KBASE_DEBUG_ASSERT(affinity == 0); + break; + + default: + KBASE_DEBUG_ASSERT_MSG(false, + "Unhandled coreref_state: %d", + coreref_state); + break; + } +} + +static void kbase_gpu_release_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + ktime_t *end_timestamp) +{ + struct kbase_context *kctx = katom->kctx; + + switch (katom->gpu_rb_state) { + case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: + /* Should be impossible */ + WARN(1, "Attempting to release atom not in ringbuffer\n"); + break; + + case KBASE_ATOM_GPU_RB_SUBMITTED: + /* Inform power management at start/finish of atom so it can + * update its GPU utilisation metrics. Mark atom as not + * submitted beforehand. */ + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; + kbase_pm_metrics_update(kbdev, end_timestamp); + + if (katom->core_req & BASE_JD_REQ_PERMON) + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom, + &kbdev->gpu_props.props.raw_props.js_features + [katom->slot_nr]); + KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]); + KBASE_TLSTREAM_TL_NRET_CTX_LPU(kctx, + &kbdev->gpu_props.props.raw_props.js_features + [katom->slot_nr]); + + case KBASE_ATOM_GPU_RB_READY: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: + kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr, + katom->affinity); + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: + break; + + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: + if (katom->protected_state.enter != + KBASE_ATOM_ENTER_PROTECTED_CHECK || + katom->protected_state.exit != + KBASE_ATOM_EXIT_PROTECTED_CHECK) + kbdev->protected_mode_transition = false; + + if (kbase_jd_katom_is_protected(katom) && + (katom->protected_state.enter == + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) { + kbase_vinstr_resume(kbdev->vinstr_ctx); + + /* Go back to configured model for IPA */ + kbase_ipa_model_use_configured_locked(kbdev); + } + + + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_RETURN_TO_JS: + break; + } + + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; + katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; +} + +static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + kbase_gpu_release_atom(kbdev, katom, NULL); + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; +} + +static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + bool slot_busy[3]; + + if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) + return true; + slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0, + KBASE_ATOM_GPU_RB_WAITING_AFFINITY); + slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1, + KBASE_ATOM_GPU_RB_WAITING_AFFINITY); + slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2, + KBASE_ATOM_GPU_RB_WAITING_AFFINITY); + + if ((js == 2 && !(slot_busy[0] || slot_busy[1])) || + (js != 2 && !slot_busy[2])) + return true; + + /* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */ + if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) || + kbase_gpu_atoms_submitted(kbdev, 1) || + backend->rmu_workaround_flag)) + return false; + + /* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */ + if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) || + !backend->rmu_workaround_flag)) + return false; + + backend->rmu_workaround_flag = !backend->rmu_workaround_flag; + + return true; +} + +/** + * other_slots_busy - Determine if any job slots other than @js are currently + * running atoms + * @kbdev: Device pointer + * @js: Job slot + * + * Return: true if any slots other than @js are busy, false otherwise + */ +static inline bool other_slots_busy(struct kbase_device *kbdev, int js) +{ + int slot; + + for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) { + if (slot == js) + continue; + + if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot, + KBASE_ATOM_GPU_RB_SUBMITTED)) + return true; + } + + return false; +} + +static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev) +{ + return kbdev->protected_mode; +} + +static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) +{ + int err = -EINVAL; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ONCE(!kbdev->protected_ops, + "Cannot enter protected mode: protected callbacks not specified.\n"); + + /* + * When entering into protected mode, we must ensure that the + * GPU is not operating in coherent mode as well. This is to + * ensure that no protected memory can be leaked. + */ + if (kbdev->system_coherency == COHERENCY_ACE) + kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE); + + if (kbdev->protected_ops) { + /* Switch GPU to protected mode */ + err = kbdev->protected_ops->protected_mode_enable( + kbdev->protected_dev); + + if (err) + dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", + err); + else + kbdev->protected_mode = true; + } + + return err; +} + +static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ONCE(!kbdev->protected_ops, + "Cannot exit protected mode: protected callbacks not specified.\n"); + + if (!kbdev->protected_ops) + return -EINVAL; + + /* The protected mode disable callback will be called as part of reset + */ + kbase_reset_gpu_silent(kbdev); + + return 0; +} + +static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js) +{ + int err = 0; + + switch (katom[idx]->protected_state.enter) { + case KBASE_ATOM_ENTER_PROTECTED_CHECK: + KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev); + /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV + * should ensure that we are not already transitiong, and that + * there are no atoms currently on the GPU. */ + WARN_ON(kbdev->protected_mode_transition); + WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + + kbdev->protected_mode_transition = true; + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_VINSTR; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_ENTER_PROTECTED_VINSTR: + if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) { + /* + * We can't switch now because + * the vinstr core state switch + * is not done yet. + */ + return -EAGAIN; + } + + /* Use generic model for IPA in protected mode */ + kbase_ipa_model_use_fallback_locked(kbdev); + + /* Once reaching this point GPU must be + * switched to protected mode or vinstr + * re-enabled. */ + + /* + * Not in correct mode, begin protected mode switch. + * Entering protected mode requires us to power down the L2, + * and drop out of fully coherent mode. + */ + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; + + kbase_pm_update_cores_state_nolock(kbdev); + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: + /* Avoid unnecessary waiting on non-ACE platforms. */ + if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) { + if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || + kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { + /* + * The L2 is still powered, wait for all the users to + * finish with it before doing the actual reset. + */ + return -EAGAIN; + } + } + + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_FINISHED; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_ENTER_PROTECTED_FINISHED: + + /* No jobs running, so we can switch GPU mode right now. */ + err = kbase_gpu_protected_mode_enter(kbdev); + + /* + * Regardless of result, we are no longer transitioning + * the GPU. + */ + kbdev->protected_mode_transition = false; + KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev); + if (err) { + /* + * Failed to switch into protected mode, resume + * vinstr core and fail atom. + */ + kbase_vinstr_resume(kbdev->vinstr_ctx); + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); + /* Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order. */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + + /* Go back to configured model for IPA */ + kbase_ipa_model_use_configured_locked(kbdev); + + return -EINVAL; + } + + /* Protected mode sanity checks. */ + KBASE_DEBUG_ASSERT_MSG( + kbase_jd_katom_is_protected(katom[idx]) == + kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom[idx]), + kbase_gpu_in_protected_mode(kbdev)); + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_READY; + } + + return 0; +} + +static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js) +{ + int err = 0; + + + switch (katom[idx]->protected_state.exit) { + case KBASE_ATOM_EXIT_PROTECTED_CHECK: + KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev); + /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV + * should ensure that we are not already transitiong, and that + * there are no atoms currently on the GPU. */ + WARN_ON(kbdev->protected_mode_transition); + WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + + /* + * Exiting protected mode requires a reset, but first the L2 + * needs to be powered down to ensure it's not active when the + * reset is issued. + */ + katom[idx]->protected_state.exit = + KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; + + kbdev->protected_mode_transition = true; + kbase_pm_update_cores_state_nolock(kbdev); + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: + if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || + kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { + /* + * The L2 is still powered, wait for all the users to + * finish with it before doing the actual reset. + */ + return -EAGAIN; + } + katom[idx]->protected_state.exit = + KBASE_ATOM_EXIT_PROTECTED_RESET; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_EXIT_PROTECTED_RESET: + /* Issue the reset to the GPU */ + err = kbase_gpu_protected_mode_reset(kbdev); + + if (err) { + kbdev->protected_mode_transition = false; + + /* Failed to exit protected mode, fail atom */ + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); + /* Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + + kbase_vinstr_resume(kbdev->vinstr_ctx); + + /* Use generic model for IPA in protected mode */ + kbase_ipa_model_use_fallback_locked(kbdev); + + return -EINVAL; + } + + katom[idx]->protected_state.exit = + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: + /* A GPU reset is issued when exiting protected mode. Once the + * reset is done all atoms' state will also be reset. For this + * reason, if the atom is still in this state we can safely + * say that the reset has not completed i.e., we have not + * finished exiting protected mode yet. + */ + return -EAGAIN; + } + + return 0; +} + +void kbase_backend_slot_update(struct kbase_device *kbdev) +{ + int js; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + struct kbase_jd_atom *katom[2]; + int idx; + + katom[0] = kbase_gpu_inspect(kbdev, js, 0); + katom[1] = kbase_gpu_inspect(kbdev, js, 1); + WARN_ON(katom[1] && !katom[0]); + + for (idx = 0; idx < SLOT_RB_SIZE; idx++) { + bool cores_ready; + int ret; + + if (!katom[idx]) + continue; + + switch (katom[idx]->gpu_rb_state) { + case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: + /* Should be impossible */ + WARN(1, "Attempting to update atom not in ringbuffer\n"); + break; + + case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: + if (katom[idx]->atom_flags & + KBASE_KATOM_FLAG_X_DEP_BLOCKED) + break; + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: + if (kbase_gpu_check_secure_atoms(kbdev, + !kbase_jd_katom_is_protected( + katom[idx]))) + break; + + if ((idx == 1) && (kbase_jd_katom_is_protected( + katom[0]) != + kbase_jd_katom_is_protected( + katom[1]))) + break; + + if (kbdev->protected_mode_transition) + break; + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: + + /* + * Exiting protected mode must be done before + * the references on the cores are taken as + * a power down the L2 is required which + * can't happen after the references for this + * atom are taken. + */ + + if (!kbase_gpu_in_protected_mode(kbdev) && + kbase_jd_katom_is_protected(katom[idx])) { + /* Atom needs to transition into protected mode. */ + ret = kbase_jm_enter_protected_mode(kbdev, + katom, idx, js); + if (ret) + break; + } else if (kbase_gpu_in_protected_mode(kbdev) && + !kbase_jd_katom_is_protected(katom[idx])) { + /* Atom needs to transition out of protected mode. */ + ret = kbase_jm_exit_protected_mode(kbdev, + katom, idx, js); + if (ret) + break; + } + katom[idx]->protected_state.exit = + KBASE_ATOM_EXIT_PROTECTED_CHECK; + + /* Atom needs no protected mode transition. */ + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: + if (katom[idx]->will_fail_event_code) { + kbase_gpu_mark_atom_for_return(kbdev, + katom[idx]); + /* Set EVENT_DONE so this atom will be + completed, not unpulled. */ + katom[idx]->event_code = + BASE_JD_EVENT_DONE; + /* Only return if head atom or previous + * atom already removed - as atoms must + * be returned in order. */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + break; + } + + cores_ready = + kbasep_js_job_check_ref_cores(kbdev, js, + katom[idx]); + + if (katom[idx]->event_code == + BASE_JD_EVENT_PM_EVENT) { + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_RETURN_TO_JS; + break; + } + + if (!cores_ready) + break; + + kbase_js_affinity_retain_slot_cores(kbdev, js, + katom[idx]->affinity); + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_AFFINITY; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: + if (!kbase_gpu_rmu_workaround(kbdev, js)) + break; + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_READY; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_GPU_RB_READY: + + if (idx == 1) { + /* Only submit if head atom or previous + * atom already submitted */ + if ((katom[0]->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED && + katom[0]->gpu_rb_state != + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) + break; + + /* If intra-slot serialization in use + * then don't submit atom to NEXT slot + */ + if (kbdev->serialize_jobs & + KBASE_SERIALIZE_INTRA_SLOT) + break; + } + + /* If inter-slot serialization in use then don't + * submit atom if any other slots are in use */ + if ((kbdev->serialize_jobs & + KBASE_SERIALIZE_INTER_SLOT) && + other_slots_busy(kbdev, js)) + break; + + if ((kbdev->serialize_jobs & + KBASE_SERIALIZE_RESET) && + kbase_reset_gpu_active(kbdev)) + break; + + /* Check if this job needs the cycle counter + * enabled before submission */ + if (katom[idx]->core_req & BASE_JD_REQ_PERMON) + kbase_pm_request_gpu_cycle_counter_l2_is_on( + kbdev); + + kbase_job_hw_submit(kbdev, katom[idx], js); + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_SUBMITTED; + + /* Inform power management at start/finish of + * atom so it can update its GPU utilisation + * metrics. */ + kbase_pm_metrics_update(kbdev, + &katom[idx]->start_timestamp); + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_GPU_RB_SUBMITTED: + /* Atom submitted to HW, nothing else to do */ + break; + + case KBASE_ATOM_GPU_RB_RETURN_TO_JS: + /* Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, + katom[idx]); + } + break; + } + } + } + + /* Warn if PRLAM-8987 affinity restrictions are violated */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) + WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) || + kbase_gpu_atoms_submitted(kbdev, 1)) && + kbase_gpu_atoms_submitted(kbdev, 2)); +} + + +void kbase_backend_run_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + kbase_gpu_enqueue_atom(kbdev, katom); + kbase_backend_slot_update(kbdev); +} + +#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \ + (KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER)) + +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js) +{ + struct kbase_jd_atom *katom; + struct kbase_jd_atom *next_katom; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom = kbase_gpu_inspect(kbdev, js, 0); + next_katom = kbase_gpu_inspect(kbdev, js, 1); + + if (next_katom && katom->kctx == next_katom->kctx && + next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && + HAS_DEP(next_katom) && + (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL) + != 0 || + kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL) + != 0)) { + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), + JS_COMMAND_NOP, NULL); + next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; + + KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom, + &kbdev->gpu_props.props.raw_props.js_features + [katom->slot_nr]); + KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as + [katom->kctx->as_nr]); + KBASE_TLSTREAM_TL_NRET_CTX_LPU(katom->kctx, + &kbdev->gpu_props.props.raw_props.js_features + [katom->slot_nr]); + + return true; + } + + return false; +} + +void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, + u32 completion_code, + u64 job_tail, + ktime_t *end_timestamp) +{ + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); + struct kbase_context *kctx = katom->kctx; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* + * When a hard-stop is followed close after a soft-stop, the completion + * code may be set to STOPPED, even though the job is terminated + */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) { + if (completion_code == BASE_JD_EVENT_STOPPED && + (katom->atom_flags & + KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) { + completion_code = BASE_JD_EVENT_TERMINATED; + } + } + + if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) || (katom->core_req & + BASE_JD_REQ_SKIP_CACHE_END)) && + completion_code != BASE_JD_EVENT_DONE && + !(completion_code & BASE_JD_SW_EVENT)) { + /* When a job chain fails, on a T60x or when + * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not + * flushed. To prevent future evictions causing possible memory + * corruption we need to flush the cache manually before any + * affected memory gets reused. */ + katom->need_cache_flush_cores_retained = katom->affinity; + kbase_pm_request_cores(kbdev, false, katom->affinity); + } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) { + if (kbdev->gpu_props.num_core_groups > 1 && + !(katom->affinity & + kbdev->gpu_props.props.coherency_info.group[0].core_mask + ) && + (katom->affinity & + kbdev->gpu_props.props.coherency_info.group[1].core_mask + )) { + dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n"); + katom->need_cache_flush_cores_retained = + katom->affinity; + kbase_pm_request_cores(kbdev, false, + katom->affinity); + } + } + + katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); + kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0); + + if (completion_code == BASE_JD_EVENT_STOPPED) { + struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, + 0); + + /* + * Dequeue next atom from ringbuffers on same slot if required. + * This atom will already have been removed from the NEXT + * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that + * the atoms on this slot are returned in the correct order. + */ + if (next_katom && katom->kctx == next_katom->kctx && + next_katom->sched_priority == + katom->sched_priority) { + kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); + kbase_jm_return_atom_to_js(kbdev, next_katom); + } + } else if (completion_code != BASE_JD_EVENT_DONE) { + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + int i; + +#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0 + KBASE_TRACE_DUMP(kbdev); +#endif + kbasep_js_clear_submit_allowed(js_devdata, katom->kctx); + + /* + * Remove all atoms on the same context from ringbuffers. This + * will not remove atoms that are already on the GPU, as these + * are guaranteed not to have fail dependencies on the failed + * atom. + */ + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) { + struct kbase_jd_atom *katom_idx0 = + kbase_gpu_inspect(kbdev, i, 0); + struct kbase_jd_atom *katom_idx1 = + kbase_gpu_inspect(kbdev, i, 1); + + if (katom_idx0 && katom_idx0->kctx == katom->kctx && + HAS_DEP(katom_idx0) && + katom_idx0->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Dequeue katom_idx0 from ringbuffer */ + kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); + + if (katom_idx1 && + katom_idx1->kctx == katom->kctx + && HAS_DEP(katom_idx1) && + katom_idx0->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Dequeue katom_idx1 from ringbuffer */ + kbase_gpu_dequeue_atom(kbdev, i, + end_timestamp); + + katom_idx1->event_code = + BASE_JD_EVENT_STOPPED; + kbase_jm_return_atom_to_js(kbdev, + katom_idx1); + } + katom_idx0->event_code = BASE_JD_EVENT_STOPPED; + kbase_jm_return_atom_to_js(kbdev, katom_idx0); + + } else if (katom_idx1 && + katom_idx1->kctx == katom->kctx && + HAS_DEP(katom_idx1) && + katom_idx1->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Can not dequeue this atom yet - will be + * dequeued when atom at idx0 completes */ + katom_idx1->event_code = BASE_JD_EVENT_STOPPED; + kbase_gpu_mark_atom_for_return(kbdev, + katom_idx1); + } + } + } + + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc, + js, completion_code); + + if (job_tail != 0 && job_tail != katom->jc) { + bool was_updated = (job_tail != katom->jc); + + /* Some of the job has been executed, so we update the job chain + * address to where we should resume from */ + katom->jc = job_tail; + if (was_updated) + KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, + katom, job_tail, js); + } + + /* Only update the event code for jobs that weren't cancelled */ + if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) + katom->event_code = (base_jd_event_code)completion_code; + + kbase_device_trace_register_access(kctx, REG_WRITE, + JOB_CONTROL_REG(JOB_IRQ_CLEAR), + 1 << js); + + /* Complete the job, and start new ones + * + * Also defer remaining work onto the workqueue: + * - Re-queue Soft-stopped jobs + * - For any other jobs, queue the job back into the dependency system + * - Schedule out the parent context if necessary, and schedule a new + * one in. + */ +#ifdef CONFIG_GPU_TRACEPOINTS + { + /* The atom in the HEAD */ + struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, + 0); + + if (next_katom && next_katom->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED) { + char js_string[16]; + + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, + js_string, + sizeof(js_string)), + ktime_to_ns(*end_timestamp), + (u32)next_katom->kctx->id, 0, + next_katom->work_id); + kbdev->hwaccess.backend.slot_rb[js].last_context = + next_katom->kctx; + } else { + char js_string[16]; + + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, + js_string, + sizeof(js_string)), + ktime_to_ns(ktime_get()), 0, 0, + 0); + kbdev->hwaccess.backend.slot_rb[js].last_context = 0; + } + } +#endif + + if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET) + kbase_reset_gpu_silent(kbdev); + + if (completion_code == BASE_JD_EVENT_STOPPED) + katom = kbase_jm_return_atom_to_js(kbdev, katom); + else + katom = kbase_jm_complete(kbdev, katom, end_timestamp); + + if (katom) { + /* Cross-slot dependency has now become runnable. Try to submit + * it. */ + + /* Check if there are lower priority jobs to soft stop */ + kbase_job_slot_ctx_priority_check_locked(kctx, katom); + + kbase_jm_try_kick(kbdev, 1 << katom->slot_nr); + } + + /* Job completion may have unblocked other atoms. Try to update all job + * slots */ + kbase_backend_slot_update(kbdev); +} + +void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) +{ + int js; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Reset should always take the GPU out of protected mode */ + WARN_ON(kbase_gpu_in_protected_mode(kbdev)); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int atom_idx = 0; + int idx; + + for (idx = 0; idx < SLOT_RB_SIZE; idx++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, + js, atom_idx); + bool keep_in_jm_rb = false; + + if (!katom) + break; + if (katom->protected_state.exit == + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) + { + KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev); + + kbase_vinstr_resume(kbdev->vinstr_ctx); + + /* protected mode sanity checks */ + KBASE_DEBUG_ASSERT_MSG( + kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev)); + KBASE_DEBUG_ASSERT_MSG( + (kbase_jd_katom_is_protected(katom) && js == 0) || + !kbase_jd_katom_is_protected(katom), + "Protected atom on JS%d not supported", js); + } + if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) + keep_in_jm_rb = true; + + kbase_gpu_release_atom(kbdev, katom, NULL); + + /* + * If the atom wasn't on HW when the reset was issued + * then leave it in the RB and next time we're kicked + * it will be processed again from the starting state. + */ + if (keep_in_jm_rb) { + kbasep_js_job_check_deref_cores(kbdev, katom); + katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; + katom->affinity = 0; + katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; + /* As the atom was not removed, increment the + * index so that we read the correct atom in the + * next iteration. */ + atom_idx++; + continue; + } + + /* + * The atom was on the HW when the reset was issued + * all we can do is fail the atom. + */ + kbase_gpu_dequeue_atom(kbdev, js, NULL); + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + kbase_jm_complete(kbdev, katom, end_timestamp); + } + } + + kbdev->protected_mode_transition = false; +} + +static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, + int js, + struct kbase_jd_atom *katom, + u32 action) +{ + u32 hw_action = action & JS_COMMAND_MASK; + + kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom); + kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action, + katom->core_req, katom); + katom->kctx->blocked_js[js][katom->sched_priority] = true; +} + +static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + u32 action, + bool disjoint) +{ + katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; + kbase_gpu_mark_atom_for_return(kbdev, katom); + katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true; + + if (disjoint) + kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, + katom); +} + +static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) +{ + if (katom->x_post_dep) { + struct kbase_jd_atom *dep_atom = katom->x_post_dep; + + if (dep_atom->gpu_rb_state != + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB && + dep_atom->gpu_rb_state != + KBASE_ATOM_GPU_RB_RETURN_TO_JS) + return dep_atom->slot_nr; + } + return -1; +} + +static void kbase_job_evicted(struct kbase_jd_atom *katom) +{ + kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom, + katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT); +} + +bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js, + struct kbase_jd_atom *katom, + u32 action) +{ + struct kbase_jd_atom *katom_idx0; + struct kbase_jd_atom *katom_idx1; + + bool katom_idx0_valid, katom_idx1_valid; + + bool ret = false; + + int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1; + int prio_idx0 = 0, prio_idx1 = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom_idx0 = kbase_gpu_inspect(kbdev, js, 0); + katom_idx1 = kbase_gpu_inspect(kbdev, js, 1); + + if (katom_idx0) + prio_idx0 = katom_idx0->sched_priority; + if (katom_idx1) + prio_idx1 = katom_idx1->sched_priority; + + if (katom) { + katom_idx0_valid = (katom_idx0 == katom); + /* If idx0 is to be removed and idx1 is on the same context, + * then idx1 must also be removed otherwise the atoms might be + * returned out of order */ + if (katom_idx1) + katom_idx1_valid = (katom_idx1 == katom) || + (katom_idx0_valid && + (katom_idx0->kctx == + katom_idx1->kctx)); + else + katom_idx1_valid = false; + } else { + katom_idx0_valid = (katom_idx0 && + (!kctx || katom_idx0->kctx == kctx)); + katom_idx1_valid = (katom_idx1 && + (!kctx || katom_idx1->kctx == kctx) && + prio_idx0 == prio_idx1); + } + + if (katom_idx0_valid) + stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0); + if (katom_idx1_valid) + stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1); + + if (katom_idx0_valid) { + if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Simple case - just dequeue and return */ + kbase_gpu_dequeue_atom(kbdev, js, NULL); + if (katom_idx1_valid) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + katom_idx1->event_code = + BASE_JD_EVENT_REMOVED_FROM_NEXT; + kbase_jm_return_atom_to_js(kbdev, katom_idx1); + katom_idx1->kctx->blocked_js[js][prio_idx1] = + true; + } + + katom_idx0->event_code = + BASE_JD_EVENT_REMOVED_FROM_NEXT; + kbase_jm_return_atom_to_js(kbdev, katom_idx0); + katom_idx0->kctx->blocked_js[js][prio_idx0] = true; + } else { + /* katom_idx0 is on GPU */ + if (katom_idx1 && katom_idx1->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* katom_idx0 and katom_idx1 are on GPU */ + + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_COMMAND_NEXT), NULL) == 0) { + /* idx0 has already completed - stop + * idx1 if needed*/ + if (katom_idx1_valid) { + kbase_gpu_stop_atom(kbdev, js, + katom_idx1, + action); + ret = true; + } + } else { + /* idx1 is in NEXT registers - attempt + * to remove */ + kbase_reg_write(kbdev, + JOB_SLOT_REG(js, + JS_COMMAND_NEXT), + JS_COMMAND_NOP, NULL); + + if (kbase_reg_read(kbdev, + JOB_SLOT_REG(js, + JS_HEAD_NEXT_LO), NULL) + != 0 || + kbase_reg_read(kbdev, + JOB_SLOT_REG(js, + JS_HEAD_NEXT_HI), NULL) + != 0) { + /* idx1 removed successfully, + * will be handled in IRQ */ + kbase_job_evicted(katom_idx1); + kbase_gpu_remove_atom(kbdev, + katom_idx1, + action, true); + stop_x_dep_idx1 = + should_stop_x_dep_slot(katom_idx1); + + /* stop idx0 if still on GPU */ + kbase_gpu_stop_atom(kbdev, js, + katom_idx0, + action); + ret = true; + } else if (katom_idx1_valid) { + /* idx0 has already completed, + * stop idx1 if needed */ + kbase_gpu_stop_atom(kbdev, js, + katom_idx1, + action); + ret = true; + } + } + } else if (katom_idx1_valid) { + /* idx1 not on GPU but must be dequeued*/ + + /* idx1 will be handled in IRQ */ + kbase_gpu_remove_atom(kbdev, katom_idx1, action, + false); + /* stop idx0 */ + /* This will be repeated for anything removed + * from the next registers, since their normal + * flow was also interrupted, and this function + * might not enter disjoint state e.g. if we + * don't actually do a hard stop on the head + * atom */ + kbase_gpu_stop_atom(kbdev, js, katom_idx0, + action); + ret = true; + } else { + /* no atom in idx1 */ + /* just stop idx0 */ + kbase_gpu_stop_atom(kbdev, js, katom_idx0, + action); + ret = true; + } + } + } else if (katom_idx1_valid) { + if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Mark for return */ + /* idx1 will be returned once idx0 completes */ + kbase_gpu_remove_atom(kbdev, katom_idx1, action, + false); + } else { + /* idx1 is on GPU */ + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_COMMAND_NEXT), NULL) == 0) { + /* idx0 has already completed - stop idx1 */ + kbase_gpu_stop_atom(kbdev, js, katom_idx1, + action); + ret = true; + } else { + /* idx1 is in NEXT registers - attempt to + * remove */ + kbase_reg_write(kbdev, JOB_SLOT_REG(js, + JS_COMMAND_NEXT), + JS_COMMAND_NOP, NULL); + + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_HEAD_NEXT_LO), NULL) != 0 || + kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_HEAD_NEXT_HI), NULL) != 0) { + /* idx1 removed successfully, will be + * handled in IRQ once idx0 completes */ + kbase_job_evicted(katom_idx1); + kbase_gpu_remove_atom(kbdev, katom_idx1, + action, + false); + } else { + /* idx0 has already completed - stop + * idx1 */ + kbase_gpu_stop_atom(kbdev, js, + katom_idx1, + action); + ret = true; + } + } + } + } + + + if (stop_x_dep_idx0 != -1) + kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0, + NULL, action); + + if (stop_x_dep_idx1 != -1) + kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1, + NULL, action); + + return ret; +} + +void kbase_gpu_cacheclean(struct kbase_device *kbdev) +{ + /* Limit the number of loops to avoid a hang if the interrupt is missed + */ + u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + + mutex_lock(&kbdev->cacheclean_lock); + + /* use GPU_COMMAND completion solution */ + /* clean & invalidate the caches */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES, NULL); + + /* wait for cache flush to complete before continuing */ + while (--max_loops && + (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) & + CLEAN_CACHES_COMPLETED) == 0) + ; + + /* clear the CLEAN_CACHES_COMPLETED irq */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, + CLEAN_CACHES_COMPLETED); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), + CLEAN_CACHES_COMPLETED, NULL); + KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state != + KBASE_INSTR_STATE_CLEANING, + "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang."); + + mutex_unlock(&kbdev->cacheclean_lock); +} + +void kbase_backend_cacheclean(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + if (katom->need_cache_flush_cores_retained) { + unsigned long flags; + + kbase_gpu_cacheclean(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_unrequest_cores(kbdev, false, + katom->need_cache_flush_cores_retained); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + katom->need_cache_flush_cores_retained = 0; + } +} + +void kbase_backend_complete_wq(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + /* + * If cache flush required due to HW workaround then perform the flush + * now + */ + kbase_backend_cacheclean(kbdev, katom); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969) && + (katom->core_req & BASE_JD_REQ_FS) && + katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT && + (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) && + !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) { + dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n"); + if (kbasep_10969_workaround_clamp_coordinates(katom)) { + /* The job had a TILE_RANGE_FAULT after was soft-stopped + * Due to an HW issue we try to execute the job again. + */ + dev_dbg(kbdev->dev, + "Clamping has been executed, try to rerun the job\n" + ); + katom->event_code = BASE_JD_EVENT_STOPPED; + katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN; + } + } + + /* Clear the coreref_state now - while check_deref_cores() may not have + * been called yet, the caller will have taken a copy of this field. If + * this is not done, then if the atom is re-scheduled (following a soft + * stop) then the core reference would not be retaken. */ + katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; + katom->affinity = 0; +} + +void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, + base_jd_core_req core_req, u64 affinity, + enum kbase_atom_coreref_state coreref_state) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity, + coreref_state); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!kbdev->pm.active_count) { + mutex_lock(&kbdev->js_data.runpool_mutex); + mutex_lock(&kbdev->pm.lock); + kbase_pm_update_active(kbdev); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&kbdev->js_data.runpool_mutex); + } +} + +void kbase_gpu_dump_slots(struct kbase_device *kbdev) +{ + unsigned long flags; + int js; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n"); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int idx; + + for (idx = 0; idx < SLOT_RB_SIZE; idx++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, + js, + idx); + + if (katom) + dev_info(kbdev->dev, + " js%d idx%d : katom=%p gpu_rb_state=%d\n", + js, idx, katom, katom->gpu_rb_state); + else + dev_info(kbdev->dev, " js%d idx%d : empty\n", + js, idx); + } + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + + + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h new file mode 100644 index 000000000000..1e0e05ad3ea4 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h @@ -0,0 +1,76 @@ +/* + * + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * Register-based HW access backend specific APIs + */ + +#ifndef _KBASE_HWACCESS_GPU_H_ +#define _KBASE_HWACCESS_GPU_H_ + +#include <backend/gpu/mali_kbase_pm_internal.h> + +/** + * kbase_gpu_irq_evict - Evict an atom from a NEXT slot + * + * @kbdev: Device pointer + * @js: Job slot to evict from + * + * Evict the atom in the NEXT slot for the specified job slot. This function is + * called from the job complete IRQ handler when the previous job has failed. + * + * Return: true if job evicted from NEXT registers, false otherwise + */ +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js); + +/** + * kbase_gpu_complete_hw - Complete an atom on job slot js + * + * @kbdev: Device pointer + * @js: Job slot that has completed + * @completion_code: Event code from job that has completed + * @job_tail: The tail address from the hardware if the job has partially + * completed + * @end_timestamp: Time of completion + */ +void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, + u32 completion_code, + u64 job_tail, + ktime_t *end_timestamp); + +/** + * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer + * + * @kbdev: Device pointer + * @js: Job slot to inspect + * @idx: Index into ringbuffer. 0 is the job currently running on + * the slot, 1 is the job waiting, all other values are invalid. + * Return: The atom at that position in the ringbuffer + * or NULL if no atom present + */ +struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, + int idx); + +/** + * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers + * + * @kbdev: Device pointer + */ +void kbase_gpu_dump_slots(struct kbase_device *kbdev); + +#endif /* _KBASE_HWACCESS_GPU_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c new file mode 100644 index 000000000000..54d8ddd80097 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c @@ -0,0 +1,303 @@ +/* + * + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Base kernel affinity manager APIs + */ + +#include <mali_kbase.h> +#include "mali_kbase_js_affinity.h" +#include "mali_kbase_hw.h" + +#include <backend/gpu/mali_kbase_pm_internal.h> + + +bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, + int js) +{ + /* + * Here are the reasons for using job slot 2: + * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose) + * - In absence of the above, then: + * - Atoms with BASE_JD_REQ_COHERENT_GROUP + * - But, only when there aren't contexts with + * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on + * all cores on slot 1 could be blocked by those using a coherent group + * on slot 2 + * - And, only when you actually have 2 or more coregroups - if you + * only have 1 coregroup, then having jobs for slot 2 implies they'd + * also be for slot 1, meaning you'll get interference from them. Jobs + * able to run on slot 2 could also block jobs that can only run on + * slot 1 (tiler jobs) + */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) + return true; + + if (js != 2) + return true; + + /* Only deal with js==2 now: */ + if (kbdev->gpu_props.num_core_groups > 1) { + /* Only use slot 2 in the 2+ coregroup case */ + if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev, + KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) == + false) { + /* ...But only when we *don't* have atoms that run on + * all cores */ + + /* No specific check for BASE_JD_REQ_COHERENT_GROUP + * atoms - the policy will sort that out */ + return true; + } + } + + /* Above checks failed mean we shouldn't use slot 2 */ + return false; +} + +/* + * As long as it has been decided to have a deeper modification of + * what job scheduler, power manager and affinity manager will + * implement, this function is just an intermediate step that + * assumes: + * - all working cores will be powered on when this is called. + * - largest current configuration is 2 core groups. + * - It has been decided not to have hardcoded values so the low + * and high cores in a core split will be evently distributed. + * - Odd combinations of core requirements have been filtered out + * and do not get to this function (e.g. CS+T+NSS is not + * supported here). + * - This function is frequently called and can be optimized, + * (see notes in loops), but as the functionallity will likely + * be modified, optimization has not been addressed. +*/ +bool kbase_js_choose_affinity(u64 * const affinity, + struct kbase_device *kbdev, + struct kbase_jd_atom *katom, int js) +{ + base_jd_core_req core_req = katom->core_req; + unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; + u64 core_availability_mask; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + core_availability_mask = kbase_pm_ca_get_core_mask(kbdev); + + /* + * If no cores are currently available (core availability policy is + * transitioning) then fail. + */ + if (0 == core_availability_mask) { + *affinity = 0; + return false; + } + + KBASE_DEBUG_ASSERT(js >= 0); + + if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == + BASE_JD_REQ_T) { + /* If the hardware supports XAFFINITY then we'll only enable + * the tiler (which is the default so this is a no-op), + * otherwise enable shader core 0. */ + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) + *affinity = 1; + else + *affinity = 0; + + return true; + } + + if (1 == kbdev->gpu_props.num_cores) { + /* trivial case only one core, nothing to do */ + *affinity = core_availability_mask & + kbdev->pm.debug_core_mask[js]; + } else { + if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | + BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { + if (js == 0 || num_core_groups == 1) { + /* js[0] and single-core-group systems just get + * the first core group */ + *affinity = + kbdev->gpu_props.props.coherency_info.group[0].core_mask + & core_availability_mask & + kbdev->pm.debug_core_mask[js]; + } else { + /* js[1], js[2] use core groups 0, 1 for + * dual-core-group systems */ + u32 core_group_idx = ((u32) js) - 1; + + KBASE_DEBUG_ASSERT(core_group_idx < + num_core_groups); + *affinity = + kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask + & core_availability_mask & + kbdev->pm.debug_core_mask[js]; + + /* If the job is specifically targeting core + * group 1 and the core availability policy is + * keeping that core group off, then fail */ + if (*affinity == 0 && core_group_idx == 1 && + kbdev->pm.backend.cg1_disabled + == true) + katom->event_code = + BASE_JD_EVENT_PM_EVENT; + } + } else { + /* All cores are available when no core split is + * required */ + *affinity = core_availability_mask & + kbdev->pm.debug_core_mask[js]; + } + } + + /* + * If no cores are currently available in the desired core group(s) + * (core availability policy is transitioning) then fail. + */ + if (*affinity == 0) + return false; + + /* Enable core 0 if tiler required for hardware without XAFFINITY + * support (notes above) */ + if (core_req & BASE_JD_REQ_T) { + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) + *affinity = *affinity | 1; + } + + return true; +} + +static inline bool kbase_js_affinity_is_violating( + struct kbase_device *kbdev, + u64 *affinities) +{ + /* This implementation checks whether the two slots involved in Generic + * thread creation have intersecting affinity. This is due to micro- + * architectural issues where a job in slot A targetting cores used by + * slot B could prevent the job in slot B from making progress until the + * job in slot A has completed. + */ + u64 affinity_set_left; + u64 affinity_set_right; + u64 intersection; + + KBASE_DEBUG_ASSERT(affinities != NULL); + + affinity_set_left = affinities[1]; + + affinity_set_right = affinities[2]; + + /* A violation occurs when any bit in the left_set is also in the + * right_set */ + intersection = affinity_set_left & affinity_set_right; + + return (bool) (intersection != (u64) 0u); +} + +bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, + u64 affinity) +{ + struct kbasep_js_device_data *js_devdata; + u64 new_affinities[BASE_JM_MAX_NR_SLOTS]; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); + js_devdata = &kbdev->js_data; + + memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities, + sizeof(js_devdata->runpool_irq.slot_affinities)); + + new_affinities[js] |= affinity; + + return kbase_js_affinity_is_violating(kbdev, new_affinities); +} + +void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, + u64 affinity) +{ + struct kbasep_js_device_data *js_devdata; + u64 cores; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); + js_devdata = &kbdev->js_data; + + KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity) + == false); + + cores = affinity; + while (cores) { + int bitnum = fls64(cores) - 1; + u64 bit = 1ULL << bitnum; + s8 cnt; + + cnt = + ++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]); + + if (cnt == 1) + js_devdata->runpool_irq.slot_affinities[js] |= bit; + + cores &= ~bit; + } +} + +void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, + u64 affinity) +{ + struct kbasep_js_device_data *js_devdata; + u64 cores; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); + js_devdata = &kbdev->js_data; + + cores = affinity; + while (cores) { + int bitnum = fls64(cores) - 1; + u64 bit = 1ULL << bitnum; + s8 cnt; + + KBASE_DEBUG_ASSERT( + js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0); + + cnt = + --(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]); + + if (0 == cnt) + js_devdata->runpool_irq.slot_affinities[js] &= ~bit; + + cores &= ~bit; + } +} + +#if KBASE_TRACE_ENABLE +void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata; + int slot_nr; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + + for (slot_nr = 0; slot_nr < 3; ++slot_nr) + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL, + NULL, 0u, slot_nr, + (u32) js_devdata->runpool_irq.slot_affinities[slot_nr]); +} +#endif /* KBASE_TRACE_ENABLE */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h new file mode 100644 index 000000000000..35d9781ae092 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h @@ -0,0 +1,129 @@ +/* + * + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Affinity Manager internal APIs. + */ + +#ifndef _KBASE_JS_AFFINITY_H_ +#define _KBASE_JS_AFFINITY_H_ + +/** + * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to + * submit a job to a particular job slot in the current status + * + * @kbdev: The kbase device structure of the device + * @js: Job slot number to check for allowance + * + * Will check if submitting to the given job slot is allowed in the current + * status. For example using job slot 2 while in soft-stoppable state and only + * having 1 coregroup is not allowed by the policy. This function should be + * called prior to submitting a job to a slot to make sure policy rules are not + * violated. + * + * The following locking conditions are made on the caller + * - it must hold hwaccess_lock + */ +bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js); + +/** + * kbase_js_choose_affinity - Compute affinity for a given job. + * + * @affinity: Affinity bitmap computed + * @kbdev: The kbase device structure of the device + * @katom: Job chain of which affinity is going to be found + * @js: Slot the job chain is being submitted + * + * Currently assumes an all-on/all-off power management policy. + * Also assumes there is at least one core with tiler available. + * + * Returns true if a valid affinity was chosen, false if + * no cores were available. + */ +bool kbase_js_choose_affinity(u64 * const affinity, + struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + int js); + +/** + * kbase_js_affinity_would_violate - Determine whether a proposed affinity on + * job slot @js would cause a violation of affinity restrictions. + * + * @kbdev: Kbase device structure + * @js: The job slot to test + * @affinity: The affinity mask to test + * + * The following locks must be held by the caller + * - hwaccess_lock + * + * Return: true if the affinity would violate the restrictions + */ +bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, + u64 affinity); + +/** + * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by + * a slot + * + * @kbdev: Kbase device structure + * @js: The job slot retaining the cores + * @affinity: The cores to retain + * + * The following locks must be held by the caller + * - hwaccess_lock + */ +void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, + u64 affinity); + +/** + * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used + * by a slot + * + * @kbdev: Kbase device structure + * @js: Job slot + * @affinity: Bit mask of core to be released + * + * Cores must be released as soon as a job is dequeued from a slot's 'submit + * slots', and before another job is submitted to those slots. Otherwise, the + * refcount could exceed the maximum number submittable to a slot, + * %BASE_JM_SUBMIT_SLOTS. + * + * The following locks must be held by the caller + * - hwaccess_lock + */ +void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, + u64 affinity); + +/** + * kbase_js_debug_log_current_affinities - log the current affinities + * + * @kbdev: Kbase device structure + * + * Output to the Trace log the current tracked affinities on all slots + */ +#if KBASE_TRACE_ENABLE +void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev); +#else /* KBASE_TRACE_ENABLE */ +static inline void +kbase_js_debug_log_current_affinities(struct kbase_device *kbdev) +{ +} +#endif /* KBASE_TRACE_ENABLE */ + +#endif /* _KBASE_JS_AFFINITY_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c new file mode 100644 index 000000000000..a8c1af23a369 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c @@ -0,0 +1,356 @@ +/* + * + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * Register-based HW access backend specific job scheduler APIs + */ + +#include <mali_kbase.h> +#include <mali_kbase_hwaccess_jm.h> +#include <backend/gpu/mali_kbase_jm_internal.h> +#include <backend/gpu/mali_kbase_js_internal.h> + +/* + * Define for when dumping is enabled. + * This should not be based on the instrumentation level as whether dumping is + * enabled for a particular level is down to the integrator. However this is + * being used for now as otherwise the cinstr headers would be needed. + */ +#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL) + +/* + * Hold the runpool_mutex for this + */ +static inline bool timer_callback_should_run(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + s8 nr_running_ctxs; + + lockdep_assert_held(&kbdev->js_data.runpool_mutex); + + /* Timer must stop if we are suspending */ + if (backend->suspend_timer) + return false; + + /* nr_contexts_pullable is updated with the runpool_mutex. However, the + * locking in the caller gives us a barrier that ensures + * nr_contexts_pullable is up-to-date for reading */ + nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); + +#ifdef CONFIG_MALI_DEBUG + if (kbdev->js_data.softstop_always) { + /* Debug support for allowing soft-stop on a single context */ + return true; + } +#endif /* CONFIG_MALI_DEBUG */ + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) { + /* Timeouts would have to be 4x longer (due to micro- + * architectural design) to support OpenCL conformance tests, so + * only run the timer when there's: + * - 2 or more CL contexts + * - 1 or more GLES contexts + * + * NOTE: We will treat a context that has both Compute and Non- + * Compute jobs will be treated as an OpenCL context (hence, we + * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE). + */ + { + s8 nr_compute_ctxs = + kbasep_js_ctx_attr_count_on_runpool(kbdev, + KBASEP_JS_CTX_ATTR_COMPUTE); + s8 nr_noncompute_ctxs = nr_running_ctxs - + nr_compute_ctxs; + + return (bool) (nr_compute_ctxs >= 2 || + nr_noncompute_ctxs > 0); + } + } else { + /* Run the timer callback whenever you have at least 1 context + */ + return (bool) (nr_running_ctxs > 0); + } +} + +static enum hrtimer_restart timer_callback(struct hrtimer *timer) +{ + unsigned long flags; + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + struct kbase_backend_data *backend; + int s; + bool reset_needed = false; + + KBASE_DEBUG_ASSERT(timer != NULL); + + backend = container_of(timer, struct kbase_backend_data, + scheduling_timer); + kbdev = container_of(backend, struct kbase_device, hwaccess.backend); + js_devdata = &kbdev->js_data; + + /* Loop through the slots */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) { + struct kbase_jd_atom *atom = NULL; + + if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) { + atom = kbase_gpu_inspect(kbdev, s, 0); + KBASE_DEBUG_ASSERT(atom != NULL); + } + + if (atom != NULL) { + /* The current version of the model doesn't support + * Soft-Stop */ + if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) { + u32 ticks = atom->ticks++; + +#if !CINSTR_DUMPING_ENABLED + u32 soft_stop_ticks, hard_stop_ticks, + gpu_reset_ticks; + if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { + soft_stop_ticks = + js_devdata->soft_stop_ticks_cl; + hard_stop_ticks = + js_devdata->hard_stop_ticks_cl; + gpu_reset_ticks = + js_devdata->gpu_reset_ticks_cl; + } else { + soft_stop_ticks = + js_devdata->soft_stop_ticks; + hard_stop_ticks = + js_devdata->hard_stop_ticks_ss; + gpu_reset_ticks = + js_devdata->gpu_reset_ticks_ss; + } + + /* If timeouts have been changed then ensure + * that atom tick count is not greater than the + * new soft_stop timeout. This ensures that + * atoms do not miss any of the timeouts due to + * races between this worker and the thread + * changing the timeouts. */ + if (backend->timeouts_updated && + ticks > soft_stop_ticks) + ticks = atom->ticks = soft_stop_ticks; + + /* Job is Soft-Stoppable */ + if (ticks == soft_stop_ticks) { + int disjoint_threshold = + KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD; + u32 softstop_flags = 0u; + /* Job has been scheduled for at least + * js_devdata->soft_stop_ticks ticks. + * Soft stop the slot so we can run + * other jobs. + */ + dev_dbg(kbdev->dev, "Soft-stop"); +#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS + /* nr_user_contexts_running is updated + * with the runpool_mutex, but we can't + * take that here. + * + * However, if it's about to be + * increased then the new context can't + * run any jobs until they take the + * hwaccess_lock, so it's OK to observe + * the older value. + * + * Similarly, if it's about to be + * decreased, the last job from another + * context has already finished, so it's + * not too bad that we observe the older + * value and register a disjoint event + * when we try soft-stopping */ + if (js_devdata->nr_user_contexts_running + >= disjoint_threshold) + softstop_flags |= + JS_COMMAND_SW_CAUSES_DISJOINT; + + kbase_job_slot_softstop_swflags(kbdev, + s, atom, softstop_flags); +#endif + } else if (ticks == hard_stop_ticks) { + /* Job has been scheduled for at least + * js_devdata->hard_stop_ticks_ss ticks. + * It should have been soft-stopped by + * now. Hard stop the slot. + */ +#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS + int ms = + js_devdata->scheduling_period_ns + / 1000000u; + dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", + (unsigned long)ticks, + (unsigned long)ms); + kbase_job_slot_hardstop(atom->kctx, s, + atom); +#endif + } else if (ticks == gpu_reset_ticks) { + /* Job has been scheduled for at least + * js_devdata->gpu_reset_ticks_ss ticks. + * It should have left the GPU by now. + * Signal that the GPU needs to be + * reset. + */ + reset_needed = true; + } +#else /* !CINSTR_DUMPING_ENABLED */ + /* NOTE: During CINSTR_DUMPING_ENABLED, we use + * the alternate timeouts, which makes the hard- + * stop and GPU reset timeout much longer. We + * also ensure that we don't soft-stop at all. + */ + if (ticks == js_devdata->soft_stop_ticks) { + /* Job has been scheduled for at least + * js_devdata->soft_stop_ticks. We do + * not soft-stop during + * CINSTR_DUMPING_ENABLED, however. + */ + dev_dbg(kbdev->dev, "Soft-stop"); + } else if (ticks == + js_devdata->hard_stop_ticks_dumping) { + /* Job has been scheduled for at least + * js_devdata->hard_stop_ticks_dumping + * ticks. Hard stop the slot. + */ +#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS + int ms = + js_devdata->scheduling_period_ns + / 1000000u; + dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", + (unsigned long)ticks, + (unsigned long)ms); + kbase_job_slot_hardstop(atom->kctx, s, + atom); +#endif + } else if (ticks == + js_devdata->gpu_reset_ticks_dumping) { + /* Job has been scheduled for at least + * js_devdata->gpu_reset_ticks_dumping + * ticks. It should have left the GPU by + * now. Signal that the GPU needs to be + * reset. + */ + reset_needed = true; + } +#endif /* !CINSTR_DUMPING_ENABLED */ + } + } + } +#if KBASE_GPU_RESET_EN + if (reset_needed) { + dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve."); + + if (kbase_prepare_to_reset_gpu_locked(kbdev)) + kbase_reset_gpu_locked(kbdev); + } +#endif /* KBASE_GPU_RESET_EN */ + /* the timer is re-issued if there is contexts in the run-pool */ + + if (backend->timer_running) + hrtimer_start(&backend->scheduling_timer, + HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), + HRTIMER_MODE_REL); + + backend->timeouts_updated = false; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return HRTIMER_NORESTART; +} + +void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + unsigned long flags; + + lockdep_assert_held(&js_devdata->runpool_mutex); + + if (!timer_callback_should_run(kbdev)) { + /* Take spinlock to force synchronisation with timer */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + backend->timer_running = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + /* From now on, return value of timer_callback_should_run() will + * also cause the timer to not requeue itself. Its return value + * cannot change, because it depends on variables updated with + * the runpool_mutex held, which the caller of this must also + * hold */ + hrtimer_cancel(&backend->scheduling_timer); + } + + if (timer_callback_should_run(kbdev) && !backend->timer_running) { + /* Take spinlock to force synchronisation with timer */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + backend->timer_running = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + hrtimer_start(&backend->scheduling_timer, + HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), + HRTIMER_MODE_REL); + + KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, + 0u); + } +} + +int kbase_backend_timer_init(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + backend->scheduling_timer.function = timer_callback; + + backend->timer_running = false; + + return 0; +} + +void kbase_backend_timer_term(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + hrtimer_cancel(&backend->scheduling_timer); +} + +void kbase_backend_timer_suspend(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + backend->suspend_timer = true; + + kbase_backend_ctx_count_changed(kbdev); +} + +void kbase_backend_timer_resume(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + backend->suspend_timer = false; + + kbase_backend_ctx_count_changed(kbdev); +} + +void kbase_backend_timeouts_changed(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + backend->timeouts_updated = true; +} + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h new file mode 100644 index 000000000000..3f53779c6747 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h @@ -0,0 +1,69 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * Register-based HW access backend specific job scheduler APIs + */ + +#ifndef _KBASE_JS_BACKEND_H_ +#define _KBASE_JS_BACKEND_H_ + +/** + * kbase_backend_timer_init() - Initialise the JS scheduling timer + * @kbdev: Device pointer + * + * This function should be called at driver initialisation + * + * Return: 0 on success + */ +int kbase_backend_timer_init(struct kbase_device *kbdev); + +/** + * kbase_backend_timer_term() - Terminate the JS scheduling timer + * @kbdev: Device pointer + * + * This function should be called at driver termination + */ +void kbase_backend_timer_term(struct kbase_device *kbdev); + +/** + * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling + * timer + * @kbdev: Device pointer + * + * This function should be called on suspend, after the active count has reached + * zero. This is required as the timer may have been started on job submission + * to the job scheduler, but before jobs are submitted to the GPU. + * + * Caller must hold runpool_mutex. + */ +void kbase_backend_timer_suspend(struct kbase_device *kbdev); + +/** + * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS + * scheduling timer + * @kbdev: Device pointer + * + * This function should be called on resume. Note that is is not guaranteed to + * re-start the timer, only evalute whether it should be re-started. + * + * Caller must hold runpool_mutex. + */ +void kbase_backend_timer_resume(struct kbase_device *kbdev); + +#endif /* _KBASE_JS_BACKEND_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c new file mode 100644 index 000000000000..aa1817c8bca9 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -0,0 +1,401 @@ +/* + * + * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <linux/bitops.h> + +#include <mali_kbase.h> +#include <mali_kbase_mem.h> +#include <mali_kbase_mmu_hw.h> +#include <mali_kbase_tlstream.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include <mali_kbase_as_fault_debugfs.h> + +static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn, + u32 num_pages) +{ + u64 region; + + /* can't lock a zero sized range */ + KBASE_DEBUG_ASSERT(num_pages); + + region = pfn << PAGE_SHIFT; + /* + * fls returns (given the ASSERT above): + * 1 .. 32 + * + * 10 + fls(num_pages) + * results in the range (11 .. 42) + */ + + /* gracefully handle num_pages being zero */ + if (0 == num_pages) { + region |= 11; + } else { + u8 region_width; + + region_width = 10 + fls(num_pages); + if (num_pages != (1ul << (region_width - 11))) { + /* not pow2, so must go up to the next pow2 */ + region_width += 1; + } + KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE); + KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE); + region |= region_width; + } + + return region; +} + +static int wait_ready(struct kbase_device *kbdev, + unsigned int as_nr, struct kbase_context *kctx) +{ + unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx); + + /* Wait for the MMU status to indicate there is no active command, in + * case one is pending. Do not log remaining register accesses. */ + while (--max_loops && (val & AS_STATUS_AS_ACTIVE)) + val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL); + + if (max_loops == 0) { + dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n"); + return -1; + } + + /* If waiting in loop was performed, log last read value. */ + if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops) + kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx); + + return 0; +} + +static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd, + struct kbase_context *kctx) +{ + int status; + + /* write AS_COMMAND when MMU is ready to accept another command */ + status = wait_ready(kbdev, as_nr, kctx); + if (status == 0) + kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd, + kctx); + + return status; +} + +static void validate_protected_page_fault(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + /* GPUs which support (native) protected mode shall not report page + * fault addresses unless it has protected debug mode and protected + * debug mode is turned on */ + u32 protected_debug_mode = 0; + + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) + return; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { + protected_debug_mode = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_STATUS), + kctx) & GPU_DBGEN; + } + + if (!protected_debug_mode) { + /* fault_addr should never be reported in protected mode. + * However, we just continue by printing an error message */ + dev_err(kbdev->dev, "Fault address reported in protected mode\n"); + } +} + +void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) +{ + const int num_as = 16; + const int busfault_shift = MMU_PAGE_FAULT_FLAGS; + const int pf_shift = 0; + const unsigned long as_bit_mask = (1UL << num_as) - 1; + unsigned long flags; + u32 new_mask; + u32 tmp; + + /* bus faults */ + u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask; + /* page faults (note: Ignore ASes with both pf and bf) */ + u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + + /* remember current mask */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL); + /* mask interrupts for now */ + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); + + while (bf_bits | pf_bits) { + struct kbase_as *as; + int as_no; + struct kbase_context *kctx; + + /* + * the while logic ensures we have a bit set, no need to check + * for not-found here + */ + as_no = ffs(bf_bits | pf_bits) - 1; + as = &kbdev->as[as_no]; + + /* + * Refcount the kctx ASAP - it shouldn't disappear anyway, since + * Bus/Page faults _should_ only occur whilst jobs are running, + * and a job causing the Bus/Page fault shouldn't complete until + * the MMU is updated + */ + kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no); + + + /* find faulting address */ + as->fault_addr = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, + AS_FAULTADDRESS_HI), + kctx); + as->fault_addr <<= 32; + as->fault_addr |= kbase_reg_read(kbdev, + MMU_AS_REG(as_no, + AS_FAULTADDRESS_LO), + kctx); + + /* Mark the fault protected or not */ + as->protected_mode = kbdev->protected_mode; + + if (kbdev->protected_mode && as->fault_addr) + { + /* check if address reporting is allowed */ + validate_protected_page_fault(kbdev, kctx); + } + + /* report the fault to debugfs */ + kbase_as_fault_debugfs_new(kbdev, as_no); + + /* record the fault status */ + as->fault_status = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, + AS_FAULTSTATUS), + kctx); + + /* find the fault type */ + as->fault_type = (bf_bits & (1 << as_no)) ? + KBASE_MMU_FAULT_TYPE_BUS : + KBASE_MMU_FAULT_TYPE_PAGE; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { + as->fault_extra_addr = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_HI), + kctx); + as->fault_extra_addr <<= 32; + as->fault_extra_addr |= kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_LO), + kctx); + } + + if (kbase_as_has_bus_fault(as)) { + /* Mark bus fault as handled. + * Note that a bus fault is processed first in case + * where both a bus fault and page fault occur. + */ + bf_bits &= ~(1UL << as_no); + + /* remove the queued BF (and PF) from the mask */ + new_mask &= ~(MMU_BUS_ERROR(as_no) | + MMU_PAGE_FAULT(as_no)); + } else { + /* Mark page fault as handled */ + pf_bits &= ~(1UL << as_no); + + /* remove the queued PF from the mask */ + new_mask &= ~MMU_PAGE_FAULT(as_no); + } + + /* Process the interrupt for this address space */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_mmu_interrupt_process(kbdev, kctx, as); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + + /* reenable interrupts */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL); + new_mask |= tmp; + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); +} + +void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, + struct kbase_context *kctx) +{ + struct kbase_mmu_setup *current_setup = &as->current_setup; + u32 transcfg = 0; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { + transcfg = current_setup->transcfg & 0xFFFFFFFFUL; + + /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */ + /* Clear PTW_MEMATTR bits */ + transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; + /* Enable correct PTW_MEMATTR bits */ + transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; + + if (kbdev->system_coherency == COHERENCY_ACE) { + /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */ + /* Clear PTW_SH bits */ + transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); + /* Enable correct PTW_SH bits */ + transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); + } + + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), + transcfg, kctx); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), + (current_setup->transcfg >> 32) & 0xFFFFFFFFUL, + kctx); + } else { + if (kbdev->system_coherency == COHERENCY_ACE) + current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; + } + + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), + current_setup->transtab & 0xFFFFFFFFUL, kctx); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), + (current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx); + + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO), + current_setup->memattr & 0xFFFFFFFFUL, kctx); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), + (current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx); + + KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, + current_setup->transtab, + current_setup->memattr, + transcfg); + + write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx); +} + +int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, + struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op, + unsigned int handling_irq) +{ + int ret; + + lockdep_assert_held(&kbdev->mmu_hw_mutex); + + if (op == AS_COMMAND_UNLOCK) { + /* Unlock doesn't require a lock first */ + ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); + } else { + u64 lock_addr = lock_region(kbdev, vpfn, nr); + + /* Lock the region that needs to be updated */ + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO), + lock_addr & 0xFFFFFFFFUL, kctx); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI), + (lock_addr >> 32) & 0xFFFFFFFFUL, kctx); + write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx); + + /* Run the MMU operation */ + write_cmd(kbdev, as->number, op, kctx); + + /* Wait for the flush to complete */ + ret = wait_ready(kbdev, as->number, kctx); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) { + /* Issue an UNLOCK command to ensure that valid page + tables are re-read by the GPU after an update. + Note that, the FLUSH command should perform all the + actions necessary, however the bus logs show that if + multiple page faults occur within an 8 page region + the MMU does not always re-read the updated page + table entries for later faults or is only partially + read, it subsequently raises the page fault IRQ for + the same addresses, the unlock ensures that the MMU + cache is flushed, so updates can be re-read. As the + region is now unlocked we need to issue 2 UNLOCK + commands in order to flush the MMU/uTLB, + see PRLAM-8812. + */ + write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); + write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); + } + } + + return ret; +} + +void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, + struct kbase_context *kctx, enum kbase_mmu_fault_type type) +{ + unsigned long flags; + u32 pf_bf_mask; + + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + + /* + * A reset is in-flight and we're flushing the IRQ + bottom half + * so don't update anything as it could race with the reset code. + */ + if (kbdev->irq_reset_flush) + goto unlock; + + /* Clear the page (and bus fault IRQ as well in case one occurred) */ + pf_bf_mask = MMU_PAGE_FAULT(as->number); + if (type == KBASE_MMU_FAULT_TYPE_BUS || + type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) + pf_bf_mask |= MMU_BUS_ERROR(as->number); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx); + +unlock: + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); +} + +void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, + struct kbase_context *kctx, enum kbase_mmu_fault_type type) +{ + unsigned long flags; + u32 irq_mask; + + /* Enable the page fault IRQ (and bus fault IRQ as well in case one + * occurred) */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + + /* + * A reset is in-flight and we're flushing the IRQ + bottom half + * so don't update anything as it could race with the reset code. + */ + if (kbdev->irq_reset_flush) + goto unlock; + + irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) | + MMU_PAGE_FAULT(as->number); + + if (type == KBASE_MMU_FAULT_TYPE_BUS || + type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) + irq_mask |= MMU_BUS_ERROR(as->number); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx); + +unlock: + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); +} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h new file mode 100644 index 000000000000..c02253c6acc3 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h @@ -0,0 +1,42 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Interface file for the direct implementation for MMU hardware access + * + * Direct MMU hardware interface + * + * This module provides the interface(s) that are required by the direct + * register access implementation of the MMU hardware interface + */ + +#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_ +#define _MALI_KBASE_MMU_HW_DIRECT_H_ + +#include <mali_kbase_defs.h> + +/** + * kbase_mmu_interrupt - Process an MMU interrupt. + * + * Process the MMU interrupt that was reported by the &kbase_device. + * + * @kbdev: kbase context to clear the fault from. + * @irq_stat: Value of the MMU_IRQ_STATUS register + */ +void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); + +#endif /* _MALI_KBASE_MMU_HW_DIRECT_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c new file mode 100644 index 000000000000..0614348e935a --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c @@ -0,0 +1,63 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * "Always on" power management policy + */ + +#include <mali_kbase.h> +#include <mali_kbase_pm.h> + +static u64 always_on_get_core_mask(struct kbase_device *kbdev) +{ + return kbdev->gpu_props.props.raw_props.shader_present; +} + +static bool always_on_get_core_active(struct kbase_device *kbdev) +{ + return true; +} + +static void always_on_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static void always_on_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +/* + * The struct kbase_pm_policy structure for the demand power policy. + * + * This is the static structure that defines the demand power policy's callback + * and name. + */ +const struct kbase_pm_policy kbase_pm_always_on_policy_ops = { + "always_on", /* name */ + always_on_init, /* init */ + always_on_term, /* term */ + always_on_get_core_mask, /* get_core_mask */ + always_on_get_core_active, /* get_core_active */ + 0u, /* flags */ + KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h new file mode 100644 index 000000000000..f9d244b01bc2 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h @@ -0,0 +1,77 @@ + +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * "Always on" power management policy + */ + +#ifndef MALI_KBASE_PM_ALWAYS_ON_H +#define MALI_KBASE_PM_ALWAYS_ON_H + +/** + * DOC: + * The "Always on" power management policy has the following + * characteristics: + * + * - When KBase indicates that the GPU will be powered up, but we don't yet + * know which Job Chains are to be run: + * All Shader Cores are powered up, regardless of whether or not they will + * be needed later. + * + * - When KBase indicates that a set of Shader Cores are needed to submit the + * currently queued Job Chains: + * All Shader Cores are kept powered, regardless of whether or not they will + * be needed + * + * - When KBase indicates that the GPU need not be powered: + * The Shader Cores are kept powered, regardless of whether or not they will + * be needed. The GPU itself is also kept powered, even though it is not + * needed. + * + * This policy is automatically overridden during system suspend: the desired + * core state is ignored, and the cores are forced off regardless of what the + * policy requests. After resuming from suspend, new changes to the desired + * core state made by the policy are honored. + * + * Note: + * + * - KBase indicates the GPU will be powered up when it has a User Process that + * has just started to submit Job Chains. + * + * - KBase indicates the GPU need not be powered when all the Job Chains from + * User Processes have finished, and it is waiting for a User Process to + * submit some more Job Chains. + */ + +/** + * struct kbasep_pm_policy_always_on - Private struct for policy instance data + * @dummy: unused dummy variable + * + * This contains data that is private to the particular power policy that is + * active. + */ +struct kbasep_pm_policy_always_on { + int dummy; +}; + +extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops; + +#endif /* MALI_KBASE_PM_ALWAYS_ON_H */ + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c new file mode 100644 index 000000000000..c88b80a325dd --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c @@ -0,0 +1,478 @@ +/* + * + * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * GPU backend implementation of base kernel power management APIs + */ + +#include <mali_kbase.h> +#include <mali_midg_regmap.h> +#include <mali_kbase_config_defaults.h> + +#include <mali_kbase_pm.h> +#include <mali_kbase_hwaccess_jm.h> +#include <backend/gpu/mali_kbase_js_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> +#include <backend/gpu/mali_kbase_jm_internal.h> + +static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); + +void kbase_pm_register_access_enable(struct kbase_device *kbdev) +{ + struct kbase_pm_callback_conf *callbacks; + + callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; + + if (callbacks) + callbacks->power_on_callback(kbdev); + + kbdev->pm.backend.gpu_powered = true; +} + +void kbase_pm_register_access_disable(struct kbase_device *kbdev) +{ + struct kbase_pm_callback_conf *callbacks; + + callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; + + if (callbacks) + callbacks->power_off_callback(kbdev); + + kbdev->pm.backend.gpu_powered = false; +} + +int kbase_hwaccess_pm_init(struct kbase_device *kbdev) +{ + int ret = 0; + struct kbase_pm_callback_conf *callbacks; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + mutex_init(&kbdev->pm.lock); + + kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!kbdev->pm.backend.gpu_poweroff_wait_wq) + return -ENOMEM; + + INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, + kbase_pm_gpu_poweroff_wait_wq); + + kbdev->pm.backend.gpu_powered = false; + kbdev->pm.suspending = false; +#ifdef CONFIG_MALI_DEBUG + kbdev->pm.backend.driver_ready_for_irqs = false; +#endif /* CONFIG_MALI_DEBUG */ + kbdev->pm.backend.gpu_in_desired_state = true; + init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); + + callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; + if (callbacks) { + kbdev->pm.backend.callback_power_on = + callbacks->power_on_callback; + kbdev->pm.backend.callback_power_off = + callbacks->power_off_callback; + kbdev->pm.backend.callback_power_suspend = + callbacks->power_suspend_callback; + kbdev->pm.backend.callback_power_resume = + callbacks->power_resume_callback; + kbdev->pm.callback_power_runtime_init = + callbacks->power_runtime_init_callback; + kbdev->pm.callback_power_runtime_term = + callbacks->power_runtime_term_callback; + kbdev->pm.backend.callback_power_runtime_on = + callbacks->power_runtime_on_callback; + kbdev->pm.backend.callback_power_runtime_off = + callbacks->power_runtime_off_callback; + kbdev->pm.backend.callback_power_runtime_idle = + callbacks->power_runtime_idle_callback; + } else { + kbdev->pm.backend.callback_power_on = NULL; + kbdev->pm.backend.callback_power_off = NULL; + kbdev->pm.backend.callback_power_suspend = NULL; + kbdev->pm.backend.callback_power_resume = NULL; + kbdev->pm.callback_power_runtime_init = NULL; + kbdev->pm.callback_power_runtime_term = NULL; + kbdev->pm.backend.callback_power_runtime_on = NULL; + kbdev->pm.backend.callback_power_runtime_off = NULL; + kbdev->pm.backend.callback_power_runtime_idle = NULL; + } + + /* Initialise the metrics subsystem */ + ret = kbasep_pm_metrics_init(kbdev); + if (ret) + return ret; + + init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait); + kbdev->pm.backend.l2_powered = 0; + + init_waitqueue_head(&kbdev->pm.backend.reset_done_wait); + kbdev->pm.backend.reset_done = false; + + init_waitqueue_head(&kbdev->pm.zero_active_count_wait); + kbdev->pm.active_count = 0; + + spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock); + spin_lock_init(&kbdev->pm.backend.gpu_powered_lock); + + init_waitqueue_head(&kbdev->pm.backend.poweroff_wait); + + if (kbase_pm_ca_init(kbdev) != 0) + goto workq_fail; + + if (kbase_pm_policy_init(kbdev) != 0) + goto pm_policy_fail; + + return 0; + +pm_policy_fail: + kbase_pm_ca_term(kbdev); +workq_fail: + kbasep_pm_metrics_term(kbdev); + return -EINVAL; +} + +void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) +{ + lockdep_assert_held(&kbdev->pm.lock); + + /* Turn clocks and interrupts on - no-op if we haven't done a previous + * kbase_pm_clock_off() */ + kbase_pm_clock_on(kbdev, is_resume); + + /* Update core status as required by the policy */ + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START); + kbase_pm_update_cores_state(kbdev); + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END); + + /* NOTE: We don't wait to reach the desired state, since running atoms + * will wait for that state to be reached anyway */ +} + +static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) +{ + struct kbase_device *kbdev = container_of(data, struct kbase_device, + pm.backend.gpu_poweroff_wait_work); + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + unsigned long flags; + +#if !PLATFORM_POWER_DOWN_ONLY + /* Wait for power transitions to complete. We do this with no locks held + * so that we don't deadlock with any pending workqueues */ + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START); + kbase_pm_check_transitions_sync(kbdev); + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END); +#endif /* !PLATFORM_POWER_DOWN_ONLY */ + + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + +#if PLATFORM_POWER_DOWN_ONLY + if (kbdev->pm.backend.gpu_powered) { + if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)) { + /* If L2 cache is powered then we must flush it before + * we power off the GPU. Normally this would have been + * handled when the L2 was powered off. */ + kbase_gpu_cacheclean(kbdev); + } + } +#endif /* PLATFORM_POWER_DOWN_ONLY */ + + if (!backend->poweron_required) { +#if !PLATFORM_POWER_DOWN_ONLY + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + WARN_ON(kbdev->l2_available_bitmap || + kbdev->shader_available_bitmap || + kbdev->tiler_available_bitmap); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#endif /* !PLATFORM_POWER_DOWN_ONLY */ + + /* Consume any change-state events */ + kbase_timeline_pm_check_handle_event(kbdev, + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + + /* Disable interrupts and turn the clock off */ + if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) { + /* + * Page/bus faults are pending, must drop locks to + * process. Interrupts are disabled so no more faults + * should be generated at this point. + */ + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + kbase_flush_mmu_wqs(kbdev); + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + + /* Turn off clock now that fault have been handled. We + * dropped locks so poweron_required may have changed - + * power back on if this is the case.*/ + if (backend->poweron_required) + kbase_pm_clock_on(kbdev, false); + else + WARN_ON(!kbase_pm_clock_off(kbdev, + backend->poweroff_is_suspend)); + } + } + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + backend->poweroff_wait_in_progress = false; + if (backend->poweron_required) { + backend->poweron_required = false; + kbase_pm_update_cores_state_nolock(kbdev); + kbase_backend_slot_update(kbdev); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + + wake_up(&kbdev->pm.backend.poweroff_wait); +} + +void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) +{ + unsigned long flags; + + lockdep_assert_held(&kbdev->pm.lock); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (!kbdev->pm.backend.poweroff_wait_in_progress) { + /* Force all cores off */ + kbdev->pm.backend.desired_shader_state = 0; + kbdev->pm.backend.desired_tiler_state = 0; + + /* Force all cores to be unavailable, in the situation where + * transitions are in progress for some cores but not others, + * and kbase_pm_check_transitions_nolock can not immediately + * power off the cores */ + kbdev->shader_available_bitmap = 0; + kbdev->tiler_available_bitmap = 0; + kbdev->l2_available_bitmap = 0; + + kbdev->pm.backend.poweroff_wait_in_progress = true; + kbdev->pm.backend.poweroff_is_suspend = is_suspend; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + /*Kick off wq here. Callers will have to wait*/ + queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, + &kbdev->pm.backend.gpu_poweroff_wait_work); + } else { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +} + +static bool is_poweroff_in_progress(struct kbase_device *kbdev) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ret = (kbdev->pm.backend.poweroff_wait_in_progress == false); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} + +void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev) +{ + wait_event_killable(kbdev->pm.backend.poweroff_wait, + is_poweroff_in_progress(kbdev)); +} + +int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, + unsigned int flags) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + unsigned long irq_flags; + int ret; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + + /* A suspend won't happen during startup/insmod */ + KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + + /* Power up the GPU, don't enable IRQs as we are not ready to receive + * them. */ + ret = kbase_pm_init_hw(kbdev, flags); + if (ret) { + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + return ret; + } + + kbasep_pm_init_core_use_bitmaps(kbdev); + + kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = + kbdev->pm.debug_core_mask[1] = + kbdev->pm.debug_core_mask[2] = + kbdev->gpu_props.props.raw_props.shader_present; + + /* Pretend the GPU is active to prevent a power policy turning the GPU + * cores off */ + kbdev->pm.active_count = 1; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + /* Ensure cycle counter is off */ + kbdev->pm.backend.gpu_cycle_counter_requests = 0; + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + + /* We are ready to receive IRQ's now as power policy is set up, so + * enable them now. */ +#ifdef CONFIG_MALI_DEBUG + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags); + kbdev->pm.backend.driver_ready_for_irqs = true; + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags); +#endif + kbase_pm_enable_interrupts(kbdev); + + /* Turn on the GPU and any cores needed by the policy */ + kbase_pm_do_poweron(kbdev, false); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + + /* Idle the GPU and/or cores, if the policy wants it to */ + kbase_pm_context_idle(kbdev); + + return 0; +} + +void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + mutex_lock(&kbdev->pm.lock); + kbase_pm_cancel_deferred_poweroff(kbdev); + kbase_pm_do_poweroff(kbdev, false); + mutex_unlock(&kbdev->pm.lock); +} + +KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt); + +void kbase_hwaccess_pm_term(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0); + + /* Free any resources the policy allocated */ + kbase_pm_policy_term(kbdev); + kbase_pm_ca_term(kbdev); + + /* Shut down the metrics subsystem */ + kbasep_pm_metrics_term(kbdev); + + destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq); +} + +void kbase_pm_power_changed(struct kbase_device *kbdev) +{ + bool cores_are_available; + unsigned long flags; + + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END); + + if (cores_are_available) { + /* Log timelining information that a change in state has + * completed */ + kbase_timeline_pm_handle_event(kbdev, + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + + kbase_backend_slot_update(kbdev); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, + u64 new_core_mask_js0, u64 new_core_mask_js1, + u64 new_core_mask_js2) +{ + kbdev->pm.debug_core_mask[0] = new_core_mask_js0; + kbdev->pm.debug_core_mask[1] = new_core_mask_js1; + kbdev->pm.debug_core_mask[2] = new_core_mask_js2; + kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | + new_core_mask_js2; + + kbase_pm_update_cores_state_nolock(kbdev); +} + +void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev) +{ + kbase_pm_update_active(kbdev); +} + +void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev) +{ + kbase_pm_update_active(kbdev); +} + +void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + /* Force power off the GPU and all cores (regardless of policy), only + * after the PM active count reaches zero (otherwise, we risk turning it + * off prematurely) */ + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + + kbase_pm_cancel_deferred_poweroff(kbdev); + kbase_pm_do_poweroff(kbdev, true); + + kbase_backend_timer_suspend(kbdev); + + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + + kbase_pm_wait_for_poweroff_complete(kbdev); +} + +void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + + kbdev->pm.suspending = false; + kbase_pm_do_poweron(kbdev, true); + + kbase_backend_timer_resume(kbdev); + + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); +} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c new file mode 100644 index 000000000000..85890f1e85f5 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c @@ -0,0 +1,182 @@ +/* + * + * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Base kernel core availability APIs + */ + +#include <mali_kbase.h> +#include <mali_kbase_pm.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +static const struct kbase_pm_ca_policy *const policy_list[] = { + &kbase_pm_ca_fixed_policy_ops, +#ifdef CONFIG_MALI_DEVFREQ + &kbase_pm_ca_devfreq_policy_ops, +#endif +#if !MALI_CUSTOMER_RELEASE + &kbase_pm_ca_random_policy_ops +#endif +}; + +/** + * POLICY_COUNT - The number of policies available in the system. + * + * This is derived from the number of functions listed in policy_list. + */ +#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list)) + +int kbase_pm_ca_init(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + kbdev->pm.backend.ca_current_policy = policy_list[0]; + + kbdev->pm.backend.ca_current_policy->init(kbdev); + + return 0; +} + +void kbase_pm_ca_term(struct kbase_device *kbdev) +{ + kbdev->pm.backend.ca_current_policy->term(kbdev); +} + +int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list) +{ + if (!list) + return POLICY_COUNT; + + *list = policy_list; + + return POLICY_COUNT; +} + +KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies); + +const struct kbase_pm_ca_policy +*kbase_pm_ca_get_policy(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + return kbdev->pm.backend.ca_current_policy; +} + +KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy); + +void kbase_pm_ca_set_policy(struct kbase_device *kbdev, + const struct kbase_pm_ca_policy *new_policy) +{ + const struct kbase_pm_ca_policy *old_policy; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(new_policy != NULL); + + KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u, + new_policy->id); + + /* During a policy change we pretend the GPU is active */ + /* A suspend won't happen here, because we're in a syscall from a + * userspace thread */ + kbase_pm_context_active(kbdev); + + mutex_lock(&kbdev->pm.lock); + + /* Remove the policy to prevent IRQ handlers from working on it */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + old_policy = kbdev->pm.backend.ca_current_policy; + kbdev->pm.backend.ca_current_policy = NULL; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (old_policy->term) + old_policy->term(kbdev); + + if (new_policy->init) + new_policy->init(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.ca_current_policy = new_policy; + + /* If any core power state changes were previously attempted, but + * couldn't be made because the policy was changing (current_policy was + * NULL), then re-try them here. */ + kbase_pm_update_cores_state_nolock(kbdev); + + kbdev->pm.backend.ca_current_policy->update_core_status(kbdev, + kbdev->shader_ready_bitmap, + kbdev->shader_transitioning_bitmap); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + mutex_unlock(&kbdev->pm.lock); + + /* Now the policy change is finished, we release our fake context active + * reference */ + kbase_pm_context_idle(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy); + +u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* All cores must be enabled when instrumentation is in use */ + if (kbdev->pm.backend.instr_enabled) + return kbdev->gpu_props.props.raw_props.shader_present & + kbdev->pm.debug_core_mask_all; + + if (kbdev->pm.backend.ca_current_policy == NULL) + return kbdev->gpu_props.props.raw_props.shader_present & + kbdev->pm.debug_core_mask_all; + + return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) & + kbdev->pm.debug_core_mask_all; +} + +KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); + +void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, + u64 cores_transitioning) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbdev->pm.backend.ca_current_policy != NULL) + kbdev->pm.backend.ca_current_policy->update_core_status(kbdev, + cores_ready, + cores_transitioning); +} + +void kbase_pm_ca_instr_enable(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.instr_enabled = true; + + kbase_pm_update_cores_state_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +void kbase_pm_ca_instr_disable(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + kbdev->pm.backend.instr_enabled = false; + + kbase_pm_update_cores_state_nolock(kbdev); +} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h new file mode 100644 index 000000000000..ee9e751f2d79 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h @@ -0,0 +1,92 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Base kernel core availability APIs + */ + +#ifndef _KBASE_PM_CA_H_ +#define _KBASE_PM_CA_H_ + +/** + * kbase_pm_ca_init - Initialize core availability framework + * + * Must be called before calling any other core availability function + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 if the core availability framework was successfully initialized, + * -errno otherwise + */ +int kbase_pm_ca_init(struct kbase_device *kbdev); + +/** + * kbase_pm_ca_term - Terminate core availability framework + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_ca_term(struct kbase_device *kbdev); + +/** + * kbase_pm_ca_get_core_mask - Get currently available shaders core mask + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Returns a mask of the currently available shader cores. + * Calls into the core availability policy + * + * Return: The bit mask of available cores + */ +u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev); + +/** + * kbase_pm_ca_update_core_status - Update core status + * + * @kbdev: The kbase device structure for the device (must be + * a valid pointer) + * @cores_ready: The bit mask of cores ready for job submission + * @cores_transitioning: The bit mask of cores that are transitioning power + * state + * + * Update core availability policy with current core power status + * + * Calls into the core availability policy + */ +void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, + u64 cores_transitioning); + +/** + * kbase_pm_ca_instr_enable - Enable override for instrumentation + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This overrides the output of the core availability policy, ensuring that all + * cores are available + */ +void kbase_pm_ca_instr_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_ca_instr_disable - Disable override for instrumentation + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This disables any previously enabled override, and resumes normal policy + * functionality + */ +void kbase_pm_ca_instr_disable(struct kbase_device *kbdev); + +#endif /* _KBASE_PM_CA_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c new file mode 100644 index 000000000000..66bf660cffb6 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c @@ -0,0 +1,129 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * A core availability policy implementing core mask selection from devfreq OPPs + * + */ + +#include <mali_kbase.h> +#include <mali_kbase_pm.h> +#include <backend/gpu/mali_kbase_pm_internal.h> +#include <linux/version.h> + +void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) +{ + struct kbasep_pm_ca_policy_devfreq *data = + &kbdev->pm.backend.ca_policy_data.devfreq; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + data->cores_desired = core_mask; + + /* Disable any cores that are now unwanted */ + data->cores_enabled &= data->cores_desired; + + kbdev->pm.backend.ca_in_transition = true; + + /* If there are no cores to be powered off then power on desired cores + */ + if (!(data->cores_used & ~data->cores_desired)) { + data->cores_enabled = data->cores_desired; + kbdev->pm.backend.ca_in_transition = false; + } + + kbase_pm_update_cores_state_nolock(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX %llX\n", + data->cores_desired, data->cores_enabled); +} + +static void devfreq_init(struct kbase_device *kbdev) +{ + struct kbasep_pm_ca_policy_devfreq *data = + &kbdev->pm.backend.ca_policy_data.devfreq; + + if (kbdev->current_core_mask) { + data->cores_enabled = kbdev->current_core_mask; + data->cores_desired = kbdev->current_core_mask; + } else { + data->cores_enabled = + kbdev->gpu_props.props.raw_props.shader_present; + data->cores_desired = + kbdev->gpu_props.props.raw_props.shader_present; + } + data->cores_used = 0; + kbdev->pm.backend.ca_in_transition = false; +} + +static void devfreq_term(struct kbase_device *kbdev) +{ +} + +static u64 devfreq_get_core_mask(struct kbase_device *kbdev) +{ + return kbdev->pm.backend.ca_policy_data.devfreq.cores_enabled; +} + +static void devfreq_update_core_status(struct kbase_device *kbdev, + u64 cores_ready, + u64 cores_transitioning) +{ + struct kbasep_pm_ca_policy_devfreq *data = + &kbdev->pm.backend.ca_policy_data.devfreq; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + data->cores_used = cores_ready | cores_transitioning; + + /* If in desired state then clear transition flag */ + if (data->cores_enabled == data->cores_desired) + kbdev->pm.backend.ca_in_transition = false; + + /* If all undesired cores are now off then power on desired cores. + * The direct comparison against cores_enabled limits potential + * recursion to one level */ + if (!(data->cores_used & ~data->cores_desired) && + data->cores_enabled != data->cores_desired) { + data->cores_enabled = data->cores_desired; + + kbase_pm_update_cores_state_nolock(kbdev); + + kbdev->pm.backend.ca_in_transition = false; + } +} + +/* + * The struct kbase_pm_ca_policy structure for the devfreq core availability + * policy. + * + * This is the static structure that defines the devfreq core availability power + * policy's callback and name. + */ +const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops = { + "devfreq", /* name */ + devfreq_init, /* init */ + devfreq_term, /* term */ + devfreq_get_core_mask, /* get_core_mask */ + devfreq_update_core_status, /* update_core_status */ + 0u, /* flags */ + KBASE_PM_CA_POLICY_ID_DEVFREQ, /* id */ +}; + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h new file mode 100644 index 000000000000..7ab3cd4d8460 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h @@ -0,0 +1,55 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * A core availability policy for use with devfreq, where core masks are + * associated with OPPs. + */ + +#ifndef MALI_KBASE_PM_CA_DEVFREQ_H +#define MALI_KBASE_PM_CA_DEVFREQ_H + +/** + * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy + * + * This contains data that is private to the devfreq core availability + * policy. + * + * @cores_desired: Cores that the policy wants to be available + * @cores_enabled: Cores that the policy is currently returning as available + * @cores_used: Cores currently powered or transitioning + */ +struct kbasep_pm_ca_policy_devfreq { + u64 cores_desired; + u64 cores_enabled; + u64 cores_used; +}; + +extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops; + +/** + * kbase_devfreq_set_core_mask - Set core mask for policy to use + * @kbdev: Device pointer + * @core_mask: New core mask + * + * The new core mask will have immediate effect if the GPU is powered, or will + * take effect when it is next powered on. + */ +void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); + +#endif /* MALI_KBASE_PM_CA_DEVFREQ_H */ + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c new file mode 100644 index 000000000000..864612d31f9b --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c @@ -0,0 +1,65 @@ +/* + * + * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * A power policy implementing fixed core availability + */ + +#include <mali_kbase.h> +#include <mali_kbase_pm.h> + +static void fixed_init(struct kbase_device *kbdev) +{ + kbdev->pm.backend.ca_in_transition = false; +} + +static void fixed_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static u64 fixed_get_core_mask(struct kbase_device *kbdev) +{ + return kbdev->gpu_props.props.raw_props.shader_present; +} + +static void fixed_update_core_status(struct kbase_device *kbdev, + u64 cores_ready, + u64 cores_transitioning) +{ + CSTD_UNUSED(kbdev); + CSTD_UNUSED(cores_ready); + CSTD_UNUSED(cores_transitioning); +} + +/* + * The struct kbase_pm_policy structure for the fixed power policy. + * + * This is the static structure that defines the fixed power policy's callback + * and name. + */ +const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = { + "fixed", /* name */ + fixed_init, /* init */ + fixed_term, /* term */ + fixed_get_core_mask, /* get_core_mask */ + fixed_update_core_status, /* update_core_status */ + 0u, /* flags */ + KBASE_PM_CA_POLICY_ID_FIXED, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h new file mode 100644 index 000000000000..a763155cb703 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h @@ -0,0 +1,40 @@ +/* + * + * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * A power policy implementing fixed core availability + */ + +#ifndef MALI_KBASE_PM_CA_FIXED_H +#define MALI_KBASE_PM_CA_FIXED_H + +/** + * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data + * + * @dummy: Dummy member - no state is needed + * + * This contains data that is private to the particular power policy that is + * active. + */ +struct kbasep_pm_ca_policy_fixed { + int dummy; +}; + +extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops; + +#endif /* MALI_KBASE_PM_CA_FIXED_H */ + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c new file mode 100644 index 000000000000..f891fa225a89 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c @@ -0,0 +1,70 @@ +/* + * + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * "Coarse Demand" power management policy + */ + +#include <mali_kbase.h> +#include <mali_kbase_pm.h> + +static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev) +{ + if (kbdev->pm.active_count == 0) + return 0; + + return kbdev->gpu_props.props.raw_props.shader_present; +} + +static bool coarse_demand_get_core_active(struct kbase_device *kbdev) +{ + if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | + kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt + && !kbdev->tiler_inuse_cnt) + return false; + + return true; +} + +static void coarse_demand_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static void coarse_demand_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +/* The struct kbase_pm_policy structure for the demand power policy. + * + * This is the static structure that defines the demand power policy's callback + * and name. + */ +const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = { + "coarse_demand", /* name */ + coarse_demand_init, /* init */ + coarse_demand_term, /* term */ + coarse_demand_get_core_mask, /* get_core_mask */ + coarse_demand_get_core_active, /* get_core_active */ + 0u, /* flags */ + KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h new file mode 100644 index 000000000000..749d305eee9a --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h @@ -0,0 +1,64 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * "Coarse Demand" power management policy + */ + +#ifndef MALI_KBASE_PM_COARSE_DEMAND_H +#define MALI_KBASE_PM_COARSE_DEMAND_H + +/** + * DOC: + * The "Coarse" demand power management policy has the following + * characteristics: + * - When KBase indicates that the GPU will be powered up, but we don't yet + * know which Job Chains are to be run: + * - All Shader Cores are powered up, regardless of whether or not they will + * be needed later. + * - When KBase indicates that a set of Shader Cores are needed to submit the + * currently queued Job Chains: + * - All Shader Cores are kept powered, regardless of whether or not they will + * be needed + * - When KBase indicates that the GPU need not be powered: + * - The Shader Cores are powered off, and the GPU itself is powered off too. + * + * @note: + * - KBase indicates the GPU will be powered up when it has a User Process that + * has just started to submit Job Chains. + * - KBase indicates the GPU need not be powered when all the Job Chains from + * User Processes have finished, and it is waiting for a User Process to + * submit some more Job Chains. + */ + +/** + * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand + * policy + * + * This contains data that is private to the coarse demand power policy. + * + * @dummy: Dummy member - no state needed + */ +struct kbasep_pm_policy_coarse_demand { + int dummy; +}; + +extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops; + +#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h new file mode 100644 index 000000000000..352744ee6d73 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h @@ -0,0 +1,519 @@ +/* + * + * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Backend-specific Power Manager definitions + */ + +#ifndef _KBASE_PM_HWACCESS_DEFS_H_ +#define _KBASE_PM_HWACCESS_DEFS_H_ + +#include "mali_kbase_pm_ca_fixed.h" +#include "mali_kbase_pm_ca_devfreq.h" +#if !MALI_CUSTOMER_RELEASE +#include "mali_kbase_pm_ca_random.h" +#endif + +#include "mali_kbase_pm_always_on.h" +#include "mali_kbase_pm_coarse_demand.h" +#include "mali_kbase_pm_demand.h" +#if !MALI_CUSTOMER_RELEASE +#include "mali_kbase_pm_demand_always_powered.h" +#include "mali_kbase_pm_fast_start.h" +#endif + +/* Forward definition - see mali_kbase.h */ +struct kbase_device; +struct kbase_jd_atom; + +/** + * enum kbase_pm_core_type - The types of core in a GPU. + * + * These enumerated values are used in calls to + * - kbase_pm_get_present_cores() + * - kbase_pm_get_active_cores() + * - kbase_pm_get_trans_cores() + * - kbase_pm_get_ready_cores(). + * + * They specify which type of core should be acted on. These values are set in + * a manner that allows core_type_to_reg() function to be simpler and more + * efficient. + * + * @KBASE_PM_CORE_L2: The L2 cache + * @KBASE_PM_CORE_SHADER: Shader cores + * @KBASE_PM_CORE_TILER: Tiler cores + * @KBASE_PM_CORE_STACK: Core stacks + */ +enum kbase_pm_core_type { + KBASE_PM_CORE_L2 = L2_PRESENT_LO, + KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO, + KBASE_PM_CORE_TILER = TILER_PRESENT_LO, + KBASE_PM_CORE_STACK = STACK_PRESENT_LO +}; + +/** + * struct kbasep_pm_metrics_data - Metrics data collected for use by the power + * management framework. + * + * @time_period_start: time at which busy/idle measurements started + * @time_busy: number of ns the GPU was busy executing jobs since the + * @time_period_start timestamp. + * @time_idle: number of ns since time_period_start the GPU was not executing + * jobs since the @time_period_start timestamp. + * @prev_busy: busy time in ns of previous time period. + * Updated when metrics are reset. + * @prev_idle: idle time in ns of previous time period + * Updated when metrics are reset. + * @gpu_active: true when the GPU is executing jobs. false when + * not. Updated when the job scheduler informs us a job in submitted + * or removed from a GPU slot. + * @busy_cl: number of ns the GPU was busy executing CL jobs. Note that + * if two CL jobs were active for 400ns, this value would be updated + * with 800. + * @busy_gl: number of ns the GPU was busy executing GL jobs. Note that + * if two GL jobs were active for 400ns, this value would be updated + * with 800. + * @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device. + * @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As + * GL jobs never run on slot 2 this slot is not recorded. + * @lock: spinlock protecting the kbasep_pm_metrics_data structure + * @timer: timer to regularly make DVFS decisions based on the power + * management metrics. + * @timer_active: boolean indicating @timer is running + * @platform_data: pointer to data controlled by platform specific code + * @kbdev: pointer to kbase device for which metrics are collected + * + */ +struct kbasep_pm_metrics_data { + ktime_t time_period_start; + u32 time_busy; + u32 time_idle; + u32 prev_busy; + u32 prev_idle; + bool gpu_active; + u32 busy_cl[2]; + u32 busy_gl; + u32 active_cl_ctx[2]; + u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */ + spinlock_t lock; + +#ifdef CONFIG_MALI_MIDGARD_DVFS + struct hrtimer timer; + bool timer_active; +#endif + + void *platform_data; + struct kbase_device *kbdev; +}; + +union kbase_pm_policy_data { + struct kbasep_pm_policy_always_on always_on; + struct kbasep_pm_policy_coarse_demand coarse_demand; + struct kbasep_pm_policy_demand demand; +#if !MALI_CUSTOMER_RELEASE + struct kbasep_pm_policy_demand_always_powered demand_always_powered; + struct kbasep_pm_policy_fast_start fast_start; +#endif +}; + +union kbase_pm_ca_policy_data { + struct kbasep_pm_ca_policy_fixed fixed; + struct kbasep_pm_ca_policy_devfreq devfreq; +#if !MALI_CUSTOMER_RELEASE + struct kbasep_pm_ca_policy_random random; +#endif +}; + +/** + * struct kbase_pm_backend_data - Data stored per device for power management. + * + * This structure contains data for the power management framework. There is one + * instance of this structure per device in the system. + * + * @ca_current_policy: The policy that is currently actively controlling core + * availability. + * @pm_current_policy: The policy that is currently actively controlling the + * power state. + * @ca_policy_data: Private data for current CA policy + * @pm_policy_data: Private data for current PM policy + * @ca_in_transition: Flag indicating when core availability policy is + * transitioning cores. The core availability policy must + * set this when a change in core availability is occurring. + * power_change_lock must be held when accessing this. + * @reset_done: Flag when a reset is complete + * @reset_done_wait: Wait queue to wait for changes to @reset_done + * @l2_powered_wait: Wait queue for whether the l2 cache has been powered as + * requested + * @l2_powered: State indicating whether all the l2 caches are powered. + * Non-zero indicates they're *all* powered + * Zero indicates that some (or all) are not powered + * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter + * users + * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests + * @desired_shader_state: A bit mask identifying the shader cores that the + * power policy would like to be on. The current state + * of the cores may be different, but there should be + * transitions in progress that will eventually achieve + * this state (assuming that the policy doesn't change + * its mind in the mean time). + * @powering_on_shader_state: A bit mask indicating which shader cores are + * currently in a power-on transition + * @desired_tiler_state: A bit mask identifying the tiler cores that the power + * policy would like to be on. See @desired_shader_state + * @powering_on_tiler_state: A bit mask indicating which tiler core are + * currently in a power-on transition + * @powering_on_l2_state: A bit mask indicating which l2-caches are currently + * in a power-on transition + * @powering_on_stack_state: A bit mask indicating which core stacks are + * currently in a power-on transition + * @gpu_in_desired_state: This flag is set if the GPU is powered as requested + * by the desired_xxx_state variables + * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0 + * @gpu_powered: Set to true when the GPU is powered and register + * accesses are possible, false otherwise + * @instr_enabled: Set to true when instrumentation is enabled, + * false otherwise + * @cg1_disabled: Set if the policy wants to keep the second core group + * powered off + * @driver_ready_for_irqs: Debug state indicating whether sufficient + * initialization of the driver has occurred to handle + * IRQs + * @gpu_powered_lock: Spinlock that must be held when writing @gpu_powered or + * accessing @driver_ready_for_irqs + * @metrics: Structure to hold metrics for the GPU + * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is + * powered off + * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders + * and/or timers are powered off + * @gpu_poweroff_timer: Timer for powering off GPU + * @gpu_poweroff_wq: Workqueue to power off GPU on when timer fires + * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq + * @shader_poweroff_pending: Bit mask of shaders to be powered off on next + * timer callback + * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer + * callback + * @poweroff_timer_needed: true if the poweroff timer is currently required, + * false otherwise + * @poweroff_timer_running: true if the poweroff timer is currently running, + * false otherwise + * power_change_lock should be held when accessing, + * unless there is no way the timer can be running (eg + * hrtimer_cancel() was called immediately before) + * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress. + * hwaccess_lock must be held when accessing + * @poweron_required: true if a GPU power on is required. Should only be set + * when poweroff_wait_in_progress is true, and therefore the + * GPU can not immediately be powered on. pm.lock must be + * held when accessing + * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend + * request. pm.lock must be held when accessing + * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off + * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq + * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete + * @callback_power_on: Callback when the GPU needs to be turned on. See + * &struct kbase_pm_callback_conf + * @callback_power_off: Callback when the GPU may be turned off. See + * &struct kbase_pm_callback_conf + * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to + * be turned off. See &struct kbase_pm_callback_conf + * @callback_power_resume: Callback when a resume occurs and the GPU needs to + * be turned on. See &struct kbase_pm_callback_conf + * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See + * &struct kbase_pm_callback_conf + * @callback_power_runtime_off: Callback when the GPU may be turned off. See + * &struct kbase_pm_callback_conf + * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See + * &struct kbase_pm_callback_conf + * + * Note: + * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the + * policy is being changed with kbase_pm_ca_set_policy() or + * kbase_pm_set_policy(). The change is protected under + * kbase_device.pm.power_change_lock. Direct access to this + * from IRQ context must therefore check for NULL. If NULL, then + * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy + * functions that would have been done under IRQ. + */ +struct kbase_pm_backend_data { + const struct kbase_pm_ca_policy *ca_current_policy; + const struct kbase_pm_policy *pm_current_policy; + union kbase_pm_ca_policy_data ca_policy_data; + union kbase_pm_policy_data pm_policy_data; + bool ca_in_transition; + bool reset_done; + wait_queue_head_t reset_done_wait; + wait_queue_head_t l2_powered_wait; + int l2_powered; + int gpu_cycle_counter_requests; + spinlock_t gpu_cycle_counter_requests_lock; + + u64 desired_shader_state; + u64 powering_on_shader_state; + u64 desired_tiler_state; + u64 powering_on_tiler_state; + u64 powering_on_l2_state; +#ifdef CONFIG_MALI_CORESTACK + u64 powering_on_stack_state; +#endif /* CONFIG_MALI_CORESTACK */ + + bool gpu_in_desired_state; + wait_queue_head_t gpu_in_desired_state_wait; + + bool gpu_powered; + + bool instr_enabled; + + bool cg1_disabled; + +#ifdef CONFIG_MALI_DEBUG + bool driver_ready_for_irqs; +#endif /* CONFIG_MALI_DEBUG */ + + spinlock_t gpu_powered_lock; + + + struct kbasep_pm_metrics_data metrics; + + int gpu_poweroff_pending; + int shader_poweroff_pending_time; + + struct hrtimer gpu_poweroff_timer; + struct workqueue_struct *gpu_poweroff_wq; + struct work_struct gpu_poweroff_work; + + u64 shader_poweroff_pending; + u64 tiler_poweroff_pending; + + bool poweroff_timer_needed; + bool poweroff_timer_running; + + bool poweroff_wait_in_progress; + bool poweron_required; + bool poweroff_is_suspend; + + struct workqueue_struct *gpu_poweroff_wait_wq; + struct work_struct gpu_poweroff_wait_work; + + wait_queue_head_t poweroff_wait; + + int (*callback_power_on)(struct kbase_device *kbdev); + void (*callback_power_off)(struct kbase_device *kbdev); + void (*callback_power_suspend)(struct kbase_device *kbdev); + void (*callback_power_resume)(struct kbase_device *kbdev); + int (*callback_power_runtime_on)(struct kbase_device *kbdev); + void (*callback_power_runtime_off)(struct kbase_device *kbdev); + int (*callback_power_runtime_idle)(struct kbase_device *kbdev); +}; + + +/* List of policy IDs */ +enum kbase_pm_policy_id { + KBASE_PM_POLICY_ID_DEMAND = 1, + KBASE_PM_POLICY_ID_ALWAYS_ON, + KBASE_PM_POLICY_ID_COARSE_DEMAND, +#if !MALI_CUSTOMER_RELEASE + KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED, + KBASE_PM_POLICY_ID_FAST_START +#endif +}; + +typedef u32 kbase_pm_policy_flags; + +/** + * struct kbase_pm_policy - Power policy structure. + * + * Each power policy exposes a (static) instance of this structure which + * contains function pointers to the policy's methods. + * + * @name: The name of this policy + * @init: Function called when the policy is selected + * @term: Function called when the policy is unselected + * @get_core_mask: Function called to get the current shader core mask + * @get_core_active: Function called to get the current overall GPU power + * state + * @flags: Field indicating flags for this policy + * @id: Field indicating an ID for this policy. This is not + * necessarily the same as its index in the list returned + * by kbase_pm_list_policies(). + * It is used purely for debugging. + */ +struct kbase_pm_policy { + char *name; + + /** + * Function called when the policy is selected + * + * This should initialize the kbdev->pm.pm_policy_data structure. It + * should not attempt to make any changes to hardware state. + * + * It is undefined what state the cores are in when the function is + * called. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + */ + void (*init)(struct kbase_device *kbdev); + + /** + * Function called when the policy is unselected. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + */ + void (*term)(struct kbase_device *kbdev); + + /** + * Function called to get the current shader core mask + * + * The returned mask should meet or exceed (kbdev->shader_needed_bitmap + * | kbdev->shader_inuse_bitmap). + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * + * Return: The mask of shader cores to be powered + */ + u64 (*get_core_mask)(struct kbase_device *kbdev); + + /** + * Function called to get the current overall GPU power state + * + * This function should consider the state of kbdev->pm.active_count. If + * this count is greater than 0 then there is at least one active + * context on the device and the GPU should be powered. If it is equal + * to 0 then there are no active contexts and the GPU could be powered + * off if desired. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * + * Return: true if the GPU should be powered, false otherwise + */ + bool (*get_core_active)(struct kbase_device *kbdev); + + kbase_pm_policy_flags flags; + enum kbase_pm_policy_id id; +}; + + +enum kbase_pm_ca_policy_id { + KBASE_PM_CA_POLICY_ID_FIXED = 1, + KBASE_PM_CA_POLICY_ID_DEVFREQ, + KBASE_PM_CA_POLICY_ID_RANDOM +}; + +typedef u32 kbase_pm_ca_policy_flags; + +/** + * Maximum length of a CA policy names + */ +#define KBASE_PM_CA_MAX_POLICY_NAME_LEN 15 + +/** + * struct kbase_pm_ca_policy - Core availability policy structure. + * + * Each core availability policy exposes a (static) instance of this structure + * which contains function pointers to the policy's methods. + * + * @name: The name of this policy + * @init: Function called when the policy is selected + * @term: Function called when the policy is unselected + * @get_core_mask: Function called to get the current shader core + * availability mask + * @update_core_status: Function called to update the current core status + * @flags: Field indicating flags for this policy + * @id: Field indicating an ID for this policy. This is not + * necessarily the same as its index in the list returned + * by kbase_pm_list_policies(). + * It is used purely for debugging. + */ +struct kbase_pm_ca_policy { + char name[KBASE_PM_CA_MAX_POLICY_NAME_LEN + 1]; + + /** + * Function called when the policy is selected + * + * This should initialize the kbdev->pm.ca_policy_data structure. It + * should not attempt to make any changes to hardware state. + * + * It is undefined what state the cores are in when the function is + * called. + * + * @kbdev The kbase device structure for the device (must be a + * valid pointer) + */ + void (*init)(struct kbase_device *kbdev); + + /** + * Function called when the policy is unselected. + * + * @kbdev The kbase device structure for the device (must be a + * valid pointer) + */ + void (*term)(struct kbase_device *kbdev); + + /** + * Function called to get the current shader core availability mask + * + * When a change in core availability is occurring, the policy must set + * kbdev->pm.ca_in_transition to true. This is to indicate that + * reporting changes in power state cannot be optimized out, even if + * kbdev->pm.desired_shader_state remains unchanged. This must be done + * by any functions internal to the Core Availability Policy that change + * the return value of kbase_pm_ca_policy::get_core_mask. + * + * @kbdev The kbase device structure for the device (must be a + * valid pointer) + * + * Return: The current core availability mask + */ + u64 (*get_core_mask)(struct kbase_device *kbdev); + + /** + * Function called to update the current core status + * + * If none of the cores in core group 0 are ready or transitioning, then + * the policy must ensure that the next call to get_core_mask does not + * return 0 for all cores in core group 0. It is an error to disable + * core group 0 through the core availability policy. + * + * When a change in core availability has finished, the policy must set + * kbdev->pm.ca_in_transition to false. This is to indicate that + * changes in power state can once again be optimized out when + * kbdev->pm.desired_shader_state is unchanged. + * + * @kbdev: The kbase device structure for the device + * (must be a valid pointer) + * @cores_ready: The mask of cores currently powered and + * ready to run jobs + * @cores_transitioning: The mask of cores currently transitioning + * power state + */ + void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready, + u64 cores_transitioning); + + kbase_pm_ca_policy_flags flags; + + /** + * Field indicating an ID for this policy. This is not necessarily the + * same as its index in the list returned by kbase_pm_list_policies(). + * It is used purely for debugging. + */ + enum kbase_pm_ca_policy_id id; +}; + +#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c new file mode 100644 index 000000000000..81322fd0dd17 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c @@ -0,0 +1,73 @@ +/* + * + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * A simple demand based power management policy + */ + +#include <mali_kbase.h> +#include <mali_kbase_pm.h> + +static u64 demand_get_core_mask(struct kbase_device *kbdev) +{ + u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap; + + if (0 == kbdev->pm.active_count) + return 0; + + return desired; +} + +static bool demand_get_core_active(struct kbase_device *kbdev) +{ + if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | + kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt + && !kbdev->tiler_inuse_cnt) + return false; + + return true; +} + +static void demand_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static void demand_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +/* + * The struct kbase_pm_policy structure for the demand power policy. + * + * This is the static structure that defines the demand power policy's callback + * and name. + */ +const struct kbase_pm_policy kbase_pm_demand_policy_ops = { + "demand", /* name */ + demand_init, /* init */ + demand_term, /* term */ + demand_get_core_mask, /* get_core_mask */ + demand_get_core_active, /* get_core_active */ + 0u, /* flags */ + KBASE_PM_POLICY_ID_DEMAND, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h new file mode 100644 index 000000000000..c0c84b6e9189 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h @@ -0,0 +1,64 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * A simple demand based power management policy + */ + +#ifndef MALI_KBASE_PM_DEMAND_H +#define MALI_KBASE_PM_DEMAND_H + +/** + * DOC: Demand power management policy + * + * The demand power management policy has the following characteristics: + * - When KBase indicates that the GPU will be powered up, but we don't yet + * know which Job Chains are to be run: + * - The Shader Cores are not powered up + * + * - When KBase indicates that a set of Shader Cores are needed to submit the + * currently queued Job Chains: + * - Only those Shader Cores are powered up + * + * - When KBase indicates that the GPU need not be powered: + * - The Shader Cores are powered off, and the GPU itself is powered off too. + * + * Note: + * - KBase indicates the GPU will be powered up when it has a User Process that + * has just started to submit Job Chains. + * + * - KBase indicates the GPU need not be powered when all the Job Chains from + * User Processes have finished, and it is waiting for a User Process to + * submit some more Job Chains. + */ + +/** + * struct kbasep_pm_policy_demand - Private structure for policy instance data + * + * @dummy: No state is needed, a dummy variable + * + * This contains data that is private to the demand power policy. + */ +struct kbasep_pm_policy_demand { + int dummy; +}; + +extern const struct kbase_pm_policy kbase_pm_demand_policy_ops; + +#endif /* MALI_KBASE_PM_DEMAND_H */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c new file mode 100644 index 000000000000..cbc258cb361b --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c @@ -0,0 +1,1672 @@ +/* + * + * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Base kernel Power Management hardware control + */ + +#include <mali_kbase.h> +#include <mali_kbase_config_defaults.h> +#include <mali_midg_regmap.h> +#if defined(CONFIG_MALI_GATOR_SUPPORT) +#include <mali_kbase_gator.h> +#endif +#include <mali_kbase_tlstream.h> +#include <mali_kbase_pm.h> +#include <mali_kbase_config_defaults.h> +#include <mali_kbase_smc.h> +#include <mali_kbase_hwaccess_jm.h> +#include <mali_kbase_ctx_sched.h> +#include <backend/gpu/mali_kbase_cache_policy_backend.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include <backend/gpu/mali_kbase_irq_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +#include <linux/of.h> + +#if MALI_MOCK_TEST +#define MOCKABLE(function) function##_original +#else +#define MOCKABLE(function) function +#endif /* MALI_MOCK_TEST */ + +/** + * enum kbasep_pm_action - Actions that can be performed on a core. + * + * This enumeration is private to the file. Its values are set to allow + * core_type_to_reg() function, which decodes this enumeration, to be simpler + * and more efficient. + * + * @ACTION_PRESENT: The cores that are present + * @ACTION_READY: The cores that are ready + * @ACTION_PWRON: Power on the cores specified + * @ACTION_PWROFF: Power off the cores specified + * @ACTION_PWRTRANS: The cores that are transitioning + * @ACTION_PWRACTIVE: The cores that are active + */ +enum kbasep_pm_action { + ACTION_PRESENT = 0, + ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO), + ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO), + ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO), + ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO), + ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO) +}; + +static u64 kbase_pm_get_state( + struct kbase_device *kbdev, + enum kbase_pm_core_type core_type, + enum kbasep_pm_action action); + +/** + * core_type_to_reg - Decode a core type and action to a register. + * + * Given a core type (defined by kbase_pm_core_type) and an action (defined + * by kbasep_pm_action) this function will return the register offset that + * will perform the action on the core type. The register returned is the _LO + * register and an offset must be applied to use the _HI register. + * + * @core_type: The type of core + * @action: The type of action + * + * Return: The register offset of the _LO register that performs an action of + * type @action on a core of type @core_type. + */ +static u32 core_type_to_reg(enum kbase_pm_core_type core_type, + enum kbasep_pm_action action) +{ +#ifdef CONFIG_MALI_CORESTACK + if (core_type == KBASE_PM_CORE_STACK) { + switch (action) { + case ACTION_PRESENT: + return STACK_PRESENT_LO; + case ACTION_READY: + return STACK_READY_LO; + case ACTION_PWRON: + return STACK_PWRON_LO; + case ACTION_PWROFF: + return STACK_PWROFF_LO; + case ACTION_PWRTRANS: + return STACK_PWRTRANS_LO; + default: + BUG(); + } + } +#endif /* CONFIG_MALI_CORESTACK */ + + return (u32)core_type + (u32)action; +} + +#ifdef CONFIG_ARM64 +static void mali_cci_flush_l2(struct kbase_device *kbdev) +{ + const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED; + u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + u32 raw; + + /* + * Note that we don't take the cache flush mutex here since + * we expect to be the last user of the L2, all other L2 users + * would have dropped their references, to initiate L2 power + * down, L2 power down being the only valid place for this + * to be called from. + */ + + kbase_reg_write(kbdev, + GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES, + NULL); + + raw = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), + NULL); + + /* Wait for cache flush to complete before continuing, exit on + * gpu resets or loop expiry. */ + while (((raw & mask) == 0) && --loops) { + raw = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), + NULL); + } +} +#endif + +/** + * kbase_pm_invoke - Invokes an action on a core set + * + * This function performs the action given by @action on a set of cores of a + * type given by @core_type. It is a static function used by + * kbase_pm_transition_core_type() + * + * @kbdev: The kbase device structure of the device + * @core_type: The type of core that the action should be performed on + * @cores: A bit mask of cores to perform the action on (low 32 bits) + * @action: The action to perform on the cores + */ +static void kbase_pm_invoke(struct kbase_device *kbdev, + enum kbase_pm_core_type core_type, + u64 cores, + enum kbasep_pm_action action) +{ + u32 reg; + u32 lo = cores & 0xFFFFFFFF; + u32 hi = (cores >> 32) & 0xFFFFFFFF; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + reg = core_type_to_reg(core_type, action); + + KBASE_DEBUG_ASSERT(reg); +#if defined(CONFIG_MALI_GATOR_SUPPORT) + if (cores) { + if (action == ACTION_PWRON) + kbase_trace_mali_pm_power_on(core_type, cores); + else if (action == ACTION_PWROFF) + kbase_trace_mali_pm_power_off(core_type, cores); + } +#endif + + if (cores) { + u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); + + if (action == ACTION_PWRON) + state |= cores; + else if (action == ACTION_PWROFF) + state &= ~cores; + KBASE_TLSTREAM_AUX_PM_STATE(core_type, state); + } + + /* Tracing */ + if (cores) { + if (action == ACTION_PWRON) + switch (core_type) { + case KBASE_PM_CORE_SHADER: + KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u, + lo); + break; + case KBASE_PM_CORE_TILER: + KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL, + NULL, 0u, lo); + break; + case KBASE_PM_CORE_L2: + KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL, + 0u, lo); + break; + default: + break; + } + else if (action == ACTION_PWROFF) + switch (core_type) { + case KBASE_PM_CORE_SHADER: + KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL, + 0u, lo); + break; + case KBASE_PM_CORE_TILER: + KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL, + NULL, 0u, lo); + break; + case KBASE_PM_CORE_L2: + KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL, + 0u, lo); + /* disable snoops before L2 is turned off */ + kbase_pm_cache_snoop_disable(kbdev); + break; + default: + break; + } + } + + if (lo != 0) + kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL); + + if (hi != 0) + kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL); +} + +/** + * kbase_pm_get_state - Get information about a core set + * + * This function gets information (chosen by @action) about a set of cores of + * a type given by @core_type. It is a static function used by + * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and + * kbase_pm_get_ready_cores(). + * + * @kbdev: The kbase device structure of the device + * @core_type: The type of core that the should be queried + * @action: The property of the cores to query + * + * Return: A bit mask specifying the state of the cores + */ +static u64 kbase_pm_get_state(struct kbase_device *kbdev, + enum kbase_pm_core_type core_type, + enum kbasep_pm_action action) +{ + u32 reg; + u32 lo, hi; + + reg = core_type_to_reg(core_type, action); + + KBASE_DEBUG_ASSERT(reg); + + lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL); + hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL); + + return (((u64) hi) << 32) | ((u64) lo); +} + +void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev) +{ + kbdev->shader_inuse_bitmap = 0; + kbdev->shader_needed_bitmap = 0; + kbdev->shader_available_bitmap = 0; + kbdev->tiler_available_bitmap = 0; + kbdev->l2_users_count = 0; + kbdev->l2_available_bitmap = 0; + kbdev->tiler_needed_cnt = 0; + kbdev->tiler_inuse_cnt = 0; + + memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt)); +} + +/** + * kbase_pm_get_present_cores - Get the cores that are present + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of the cores that are present + */ +u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + switch (type) { + case KBASE_PM_CORE_L2: + return kbdev->gpu_props.props.raw_props.l2_present; + case KBASE_PM_CORE_SHADER: + return kbdev->gpu_props.props.raw_props.shader_present; + case KBASE_PM_CORE_TILER: + return kbdev->gpu_props.props.raw_props.tiler_present; +#ifdef CONFIG_MALI_CORESTACK + case KBASE_PM_CORE_STACK: + return kbdev->gpu_props.props.raw_props.stack_present; +#endif /* CONFIG_MALI_CORESTACK */ + default: + break; + } + KBASE_DEBUG_ASSERT(0); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores); + +/** + * kbase_pm_get_active_cores - Get the cores that are "active" + * (busy processing work) + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of cores that are active + */ +u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE); +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores); + +/** + * kbase_pm_get_trans_cores - Get the cores that are transitioning between + * power states + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of cores that are transitioning + */ +u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS); +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores); + +/** + * kbase_pm_get_ready_cores - Get the cores that are powered on + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of cores that are ready (powered on) + */ +u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + u64 result; + + result = kbase_pm_get_state(kbdev, type, ACTION_READY); + + switch (type) { + case KBASE_PM_CORE_SHADER: + KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u, + (u32) result); + break; + case KBASE_PM_CORE_TILER: + KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u, + (u32) result); + break; + case KBASE_PM_CORE_L2: + KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u, + (u32) result); + break; + default: + break; + } + + return result; +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores); + +/** + * kbase_pm_transition_core_type - Perform power transitions for a particular + * core type. + * + * This function will perform any available power transitions to make the actual + * hardware state closer to the desired state. If a core is currently + * transitioning then changes to the power state of that call cannot be made + * until the transition has finished. Cores which are not present in the + * hardware are ignored if they are specified in the desired_state bitmask, + * however the return value will always be 0 in this case. + * + * @kbdev: The kbase device + * @type: The core type to perform transitions for + * @desired_state: A bit mask of the desired state of the cores + * @in_use: A bit mask of the cores that are currently running + * jobs. These cores have to be kept powered up because + * there are jobs running (or about to run) on them. + * @available: Receives a bit mask of the cores that the job + * scheduler can use to submit jobs to. May be NULL if + * this is not needed. + * @powering_on: Bit mask to update with cores that are + * transitioning to a power-on state. + * + * Return: true if the desired state has been reached, false otherwise + */ +static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, + enum kbase_pm_core_type type, + u64 desired_state, + u64 in_use, + u64 * const available, + u64 *powering_on) +{ + u64 present; + u64 ready; + u64 trans; + u64 powerup; + u64 powerdown; + u64 powering_on_trans; + u64 desired_state_in_use; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Get current state */ + present = kbase_pm_get_present_cores(kbdev, type); + trans = kbase_pm_get_trans_cores(kbdev, type); + ready = kbase_pm_get_ready_cores(kbdev, type); + /* mask off ready from trans in case transitions finished between the + * register reads */ + trans &= ~ready; + + if (trans) /* Do not progress if any cores are transitioning */ + return false; + + powering_on_trans = trans & *powering_on; + *powering_on = powering_on_trans; + + if (available != NULL) + *available = (ready | powering_on_trans) & desired_state; + + /* Update desired state to include the in-use cores. These have to be + * kept powered up because there are jobs running or about to run on + * these cores + */ + desired_state_in_use = desired_state | in_use; + + /* Update state of whether l2 caches are powered */ + if (type == KBASE_PM_CORE_L2) { + if ((ready == present) && (desired_state_in_use == ready) && + (trans == 0)) { + /* All are ready, none will be turned off, and none are + * transitioning */ + kbdev->pm.backend.l2_powered = 1; + /* + * Ensure snoops are enabled after L2 is powered up, + * note that kbase keeps track of the snoop state, so + * safe to repeatedly call. + */ + kbase_pm_cache_snoop_enable(kbdev); + if (kbdev->l2_users_count > 0) { + /* Notify any registered l2 cache users + * (optimized out when no users waiting) */ + wake_up(&kbdev->pm.backend.l2_powered_wait); + } + } else + kbdev->pm.backend.l2_powered = 0; + } + + if (desired_state == ready && (trans == 0)) + return true; + + /* Restrict the cores to those that are actually present */ + powerup = desired_state_in_use & present; + powerdown = (~desired_state_in_use) & present; + + /* Restrict to cores that are not already in the desired state */ + powerup &= ~ready; + powerdown &= ready; + + /* Don't transition any cores that are already transitioning, except for + * Mali cores that support the following case: + * + * If the SHADER_PWRON or TILER_PWRON registers are written to turn on + * a core that is currently transitioning to power off, then this is + * remembered and the shader core is automatically powered up again once + * the original transition completes. Once the automatic power on is + * complete any job scheduled on the shader core should start. + */ + powerdown &= ~trans; + + if (kbase_hw_has_feature(kbdev, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS)) + if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type) + trans = powering_on_trans; /* for exception cases, only + * mask off cores in power on + * transitions */ + + powerup &= ~trans; + + /* Perform transitions if any */ + kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON); +#if !PLATFORM_POWER_DOWN_ONLY + kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF); +#endif + + /* Recalculate cores transitioning on, and re-evaluate our state */ + powering_on_trans |= powerup; + *powering_on = powering_on_trans; + if (available != NULL) + *available = (ready | powering_on_trans) & desired_state; + + return false; +} + +KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type); + +/** + * get_desired_cache_status - Determine which caches should be on for a + * particular core state + * + * This function takes a bit mask of the present caches and the cores (or + * caches) that are attached to the caches that will be powered. It then + * computes which caches should be turned on to allow the cores requested to be + * powered up. + * + * @present: The bit mask of present caches + * @cores_powered: A bit mask of cores (or L2 caches) that are desired to + * be powered + * @tilers_powered: The bit mask of tilers that are desired to be powered + * + * Return: A bit mask of the caches that should be turned on + */ +static u64 get_desired_cache_status(u64 present, u64 cores_powered, + u64 tilers_powered) +{ + u64 desired = 0; + + while (present) { + /* Find out which is the highest set bit */ + u64 bit = fls64(present) - 1; + u64 bit_mask = 1ull << bit; + /* Create a mask which has all bits from 'bit' upwards set */ + + u64 mask = ~(bit_mask - 1); + + /* If there are any cores powered at this bit or above (that + * haven't previously been processed) then we need this core on + */ + if (cores_powered & mask) + desired |= bit_mask; + + /* Remove bits from cores_powered and present */ + cores_powered &= ~mask; + present &= ~bit_mask; + } + + /* Power up the required L2(s) for the tiler */ + if (tilers_powered) + desired |= 1; + + return desired; +} + +KBASE_EXPORT_TEST_API(get_desired_cache_status); + +#ifdef CONFIG_MALI_CORESTACK +u64 kbase_pm_core_stack_mask(u64 cores) +{ + u64 stack_mask = 0; + size_t const MAX_CORE_ID = 31; + size_t const NUM_CORES_PER_STACK = 4; + size_t i; + + for (i = 0; i <= MAX_CORE_ID; ++i) { + if (test_bit(i, (unsigned long *)&cores)) { + /* Every core which ID >= 16 is filled to stacks 4-7 + * instead of 0-3 */ + size_t const stack_num = (i > 16) ? + (i % NUM_CORES_PER_STACK) + 4 : + (i % NUM_CORES_PER_STACK); + set_bit(stack_num, (unsigned long *)&stack_mask); + } + } + + return stack_mask; +} +#endif /* CONFIG_MALI_CORESTACK */ + +bool +MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) +{ + bool cores_are_available = false; + bool in_desired_state = true; + u64 desired_l2_state; +#ifdef CONFIG_MALI_CORESTACK + u64 desired_stack_state; + u64 stacks_powered; +#endif /* CONFIG_MALI_CORESTACK */ + u64 cores_powered; + u64 tilers_powered; + u64 tiler_available_bitmap; + u64 tiler_transitioning_bitmap; + u64 shader_available_bitmap; + u64 shader_ready_bitmap; + u64 shader_transitioning_bitmap; + u64 l2_available_bitmap; + u64 prev_l2_available_bitmap; + u64 l2_inuse_bitmap; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->hwaccess_lock); + + spin_lock(&kbdev->pm.backend.gpu_powered_lock); + if (kbdev->pm.backend.gpu_powered == false) { + spin_unlock(&kbdev->pm.backend.gpu_powered_lock); + if (kbdev->pm.backend.desired_shader_state == 0 && + kbdev->pm.backend.desired_tiler_state == 0) + return true; + return false; + } + + /* Trace that a change-state is being requested, and that it took + * (effectively) no time to start it. This is useful for counting how + * many state changes occurred, in a way that's backwards-compatible + * with processing the trace data */ + kbase_timeline_pm_send_event(kbdev, + KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE); + kbase_timeline_pm_handle_event(kbdev, + KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE); + + /* If any cores are already powered then, we must keep the caches on */ + shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_SHADER); + cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + cores_powered |= kbdev->pm.backend.desired_shader_state; + +#ifdef CONFIG_MALI_CORESTACK + /* Work out which core stacks want to be powered */ + desired_stack_state = kbase_pm_core_stack_mask(cores_powered); + stacks_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK) | + desired_stack_state; +#endif /* CONFIG_MALI_CORESTACK */ + + /* Work out which tilers want to be powered */ + tiler_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_TILER); + tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER); + tilers_powered |= kbdev->pm.backend.desired_tiler_state; + + /* If there are l2 cache users registered, keep all l2s powered even if + * all other cores are off. */ + if (kbdev->l2_users_count > 0) + cores_powered |= kbdev->gpu_props.props.raw_props.l2_present; + + desired_l2_state = get_desired_cache_status( + kbdev->gpu_props.props.raw_props.l2_present, + cores_powered, tilers_powered); + + l2_inuse_bitmap = get_desired_cache_status( + kbdev->gpu_props.props.raw_props.l2_present, + cores_powered | shader_transitioning_bitmap, + tilers_powered | tiler_transitioning_bitmap); + +#ifdef CONFIG_MALI_CORESTACK + if (stacks_powered) + desired_l2_state |= 1; +#endif /* CONFIG_MALI_CORESTACK */ + + /* If any l2 cache is on, then enable l2 #0, for use by job manager */ + if (0 != desired_l2_state) + desired_l2_state |= 1; + + prev_l2_available_bitmap = kbdev->l2_available_bitmap; + in_desired_state &= kbase_pm_transition_core_type(kbdev, + KBASE_PM_CORE_L2, desired_l2_state, l2_inuse_bitmap, + &l2_available_bitmap, + &kbdev->pm.backend.powering_on_l2_state); + + if (kbdev->l2_available_bitmap != l2_available_bitmap) + KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap); + + kbdev->l2_available_bitmap = l2_available_bitmap; + + +#ifdef CONFIG_MALI_CORESTACK + if (in_desired_state) { + in_desired_state &= kbase_pm_transition_core_type(kbdev, + KBASE_PM_CORE_STACK, desired_stack_state, 0, + &kbdev->stack_available_bitmap, + &kbdev->pm.backend.powering_on_stack_state); + } +#endif /* CONFIG_MALI_CORESTACK */ + + if (in_desired_state) { + in_desired_state &= kbase_pm_transition_core_type(kbdev, + KBASE_PM_CORE_TILER, + kbdev->pm.backend.desired_tiler_state, + 0, &tiler_available_bitmap, + &kbdev->pm.backend.powering_on_tiler_state); + in_desired_state &= kbase_pm_transition_core_type(kbdev, + KBASE_PM_CORE_SHADER, + kbdev->pm.backend.desired_shader_state, + kbdev->shader_inuse_bitmap, + &shader_available_bitmap, + &kbdev->pm.backend.powering_on_shader_state); + + if (kbdev->shader_available_bitmap != shader_available_bitmap) { + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, + NULL, 0u, + (u32) shader_available_bitmap); + KBASE_TIMELINE_POWER_SHADER(kbdev, + shader_available_bitmap); + } + + kbdev->shader_available_bitmap = shader_available_bitmap; + + if (kbdev->tiler_available_bitmap != tiler_available_bitmap) { + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, + NULL, NULL, 0u, + (u32) tiler_available_bitmap); + KBASE_TIMELINE_POWER_TILER(kbdev, + tiler_available_bitmap); + } + + kbdev->tiler_available_bitmap = tiler_available_bitmap; + + } else if ((l2_available_bitmap & + kbdev->gpu_props.props.raw_props.tiler_present) != + kbdev->gpu_props.props.raw_props.tiler_present) { + tiler_available_bitmap = 0; + + if (kbdev->tiler_available_bitmap != tiler_available_bitmap) + KBASE_TIMELINE_POWER_TILER(kbdev, + tiler_available_bitmap); + + kbdev->tiler_available_bitmap = tiler_available_bitmap; + } + + /* State updated for slow-path waiters */ + kbdev->pm.backend.gpu_in_desired_state = in_desired_state; + + shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_SHADER); + shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_SHADER); + + /* Determine whether the cores are now available (even if the set of + * available cores is empty). Note that they can be available even if + * we've not finished transitioning to the desired state */ + if ((kbdev->shader_available_bitmap & + kbdev->pm.backend.desired_shader_state) + == kbdev->pm.backend.desired_shader_state && + (kbdev->tiler_available_bitmap & + kbdev->pm.backend.desired_tiler_state) + == kbdev->pm.backend.desired_tiler_state) { + cores_are_available = true; + + KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u, + (u32)(kbdev->shader_available_bitmap & + kbdev->pm.backend.desired_shader_state)); + KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u, + (u32)(kbdev->tiler_available_bitmap & + kbdev->pm.backend.desired_tiler_state)); + + /* Log timelining information about handling events that power + * up cores, to match up either with immediate submission either + * because cores already available, or from PM IRQ */ + if (!in_desired_state) + kbase_timeline_pm_send_event(kbdev, + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + } + + if (in_desired_state) { + KBASE_DEBUG_ASSERT(cores_are_available); + +#if defined(CONFIG_MALI_GATOR_SUPPORT) + kbase_trace_mali_pm_status(KBASE_PM_CORE_L2, + kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_L2)); + kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER, + kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_SHADER)); + kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER, + kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_TILER)); +#ifdef CONFIG_MALI_CORESTACK + kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK, + kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_STACK)); +#endif /* CONFIG_MALI_CORESTACK */ +#endif + + KBASE_TLSTREAM_AUX_PM_STATE( + KBASE_PM_CORE_L2, + kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_L2)); + KBASE_TLSTREAM_AUX_PM_STATE( + KBASE_PM_CORE_SHADER, + kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_SHADER)); + KBASE_TLSTREAM_AUX_PM_STATE( + KBASE_PM_CORE_TILER, + kbase_pm_get_ready_cores( + kbdev, + KBASE_PM_CORE_TILER)); +#ifdef CONFIG_MALI_CORESTACK + KBASE_TLSTREAM_AUX_PM_STATE( + KBASE_PM_CORE_STACK, + kbase_pm_get_ready_cores( + kbdev, + KBASE_PM_CORE_STACK)); +#endif /* CONFIG_MALI_CORESTACK */ + + KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL, + kbdev->pm.backend.gpu_in_desired_state, + (u32)kbdev->pm.backend.desired_shader_state); + KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u, + (u32)kbdev->pm.backend.desired_tiler_state); + + /* Log timelining information for synchronous waiters */ + kbase_timeline_pm_send_event(kbdev, + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + /* Wake slow-path waiters. Job scheduler does not use this. */ + KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0); + + wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); + } + + spin_unlock(&kbdev->pm.backend.gpu_powered_lock); + + /* kbase_pm_ca_update_core_status can cause one-level recursion into + * this function, so it must only be called once all changes to kbdev + * have been committed, and after the gpu_powered_lock has been + * dropped. */ + if (kbdev->shader_ready_bitmap != shader_ready_bitmap || + kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) { + kbdev->shader_ready_bitmap = shader_ready_bitmap; + kbdev->shader_transitioning_bitmap = + shader_transitioning_bitmap; + + kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap, + shader_transitioning_bitmap); + } + + /* The core availability policy is not allowed to keep core group 0 + * turned off (unless it was changing the l2 power state) */ + if (!((shader_ready_bitmap | shader_transitioning_bitmap) & + kbdev->gpu_props.props.coherency_info.group[0].core_mask) && + (prev_l2_available_bitmap == desired_l2_state) && + !(kbase_pm_ca_get_core_mask(kbdev) & + kbdev->gpu_props.props.coherency_info.group[0].core_mask)) + BUG(); + + /* The core availability policy is allowed to keep core group 1 off, + * but all jobs specifically targeting CG1 must fail */ + if (!((shader_ready_bitmap | shader_transitioning_bitmap) & + kbdev->gpu_props.props.coherency_info.group[1].core_mask) && + !(kbase_pm_ca_get_core_mask(kbdev) & + kbdev->gpu_props.props.coherency_info.group[1].core_mask)) + kbdev->pm.backend.cg1_disabled = true; + else + kbdev->pm.backend.cg1_disabled = false; + + return cores_are_available; +} +KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock); + +/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has + * aborted due to a fatal signal. If the time spent waiting has exceeded this + * threshold then there is most likely a hardware issue. */ +#define PM_TIMEOUT (5*HZ) /* 5s */ + +void kbase_pm_check_transitions_sync(struct kbase_device *kbdev) +{ + unsigned long flags; + unsigned long timeout; + bool cores_are_available; + int ret; + + /* Force the transition to be checked and reported - the cores may be + * 'available' (for job submission) but not fully powered up. */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + + /* Don't need 'cores_are_available', because we don't return anything */ + CSTD_UNUSED(cores_are_available); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + timeout = jiffies + PM_TIMEOUT; + + /* Wait for cores */ + ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, + kbdev->pm.backend.gpu_in_desired_state); + + if (ret < 0 && time_after(jiffies, timeout)) { + dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); + dev_err(kbdev->dev, "Desired state :\n"); + dev_err(kbdev->dev, "\tShader=%016llx\n", + kbdev->pm.backend.desired_shader_state); + dev_err(kbdev->dev, "\tTiler =%016llx\n", + kbdev->pm.backend.desired_tiler_state); + dev_err(kbdev->dev, "Current state :\n"); + dev_err(kbdev->dev, "\tShader=%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_READY_HI), NULL), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_READY_LO), + NULL)); + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_READY_HI), NULL), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_READY_LO), NULL)); + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_READY_HI), NULL), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_READY_LO), NULL)); + dev_err(kbdev->dev, "Cores transitioning :\n"); + dev_err(kbdev->dev, "\tShader=%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + SHADER_PWRTRANS_HI), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + SHADER_PWRTRANS_LO), NULL)); + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + TILER_PWRTRANS_HI), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + TILER_PWRTRANS_LO), NULL)); + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + L2_PWRTRANS_HI), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + L2_PWRTRANS_LO), NULL)); +#if KBASE_GPU_RESET_EN + dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); +#endif /* KBASE_GPU_RESET_EN */ + } else { + /* Log timelining information that a change in state has + * completed */ + kbase_timeline_pm_handle_event(kbdev, + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + } +} +KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync); + +void kbase_pm_enable_interrupts(struct kbase_device *kbdev) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + /* + * Clear all interrupts, + * and unmask them all. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, + NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL, + NULL); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, + NULL); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL); +} + +KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); + +void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(NULL != kbdev); + /* + * Mask all interrupts, + * and clear them all. + */ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, + NULL); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, + NULL); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); +} + +void kbase_pm_disable_interrupts(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_disable_interrupts_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); + + +/* + * pmu layout: + * 0x0000: PMU TAG (RO) (0xCAFECAFE) + * 0x0004: PMU VERSION ID (RO) (0x00000000) + * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE) + */ +void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) +{ + bool reset_required = is_resume; + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + unsigned long flags; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&kbdev->pm.lock); + + if (kbdev->pm.backend.gpu_powered) { + /* Already turned on */ + if (kbdev->poweroff_pending) + kbase_pm_enable_interrupts(kbdev); + kbdev->poweroff_pending = false; + KBASE_DEBUG_ASSERT(!is_resume); + return; + } + + kbdev->poweroff_pending = false; + + KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u); + + if (is_resume && kbdev->pm.backend.callback_power_resume) { + kbdev->pm.backend.callback_power_resume(kbdev); + return; + } else if (kbdev->pm.backend.callback_power_on) { + kbdev->pm.backend.callback_power_on(kbdev); + /* If your platform properly keeps the GPU state you may use the + * return value of the callback_power_on function to + * conditionally reset the GPU on power up. Currently we are + * conservative and always reset the GPU. */ + reset_required = true; + } + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + kbdev->pm.backend.gpu_powered = true; + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (reset_required) { + /* GPU state was lost, reset GPU to ensure it is in a + * consistent state */ + kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS); + } + + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_ctx_sched_restore_all_as(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + /* Lastly, enable the interrupts */ + kbase_pm_enable_interrupts(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_clock_on); + +bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.lock); + + /* ASSERT that the cores should now be unavailable. No lock needed. */ + KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u); + + kbdev->poweroff_pending = true; + + if (!kbdev->pm.backend.gpu_powered) { + /* Already turned off */ + if (is_suspend && kbdev->pm.backend.callback_power_suspend) + kbdev->pm.backend.callback_power_suspend(kbdev); + return true; + } + + KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u); + + /* Disable interrupts. This also clears any outstanding interrupts */ + kbase_pm_disable_interrupts(kbdev); + /* Ensure that any IRQ handlers have finished */ + kbase_synchronize_irqs(kbdev); + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (atomic_read(&kbdev->faults_pending)) { + /* Page/bus faults are still being processed. The GPU can not + * be powered off until they have completed */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + flags); + return false; + } + + kbase_pm_cache_snoop_disable(kbdev); + + /* The GPU power may be turned off from this point */ + kbdev->pm.backend.gpu_powered = false; + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (is_suspend && kbdev->pm.backend.callback_power_suspend) + kbdev->pm.backend.callback_power_suspend(kbdev); + else if (kbdev->pm.backend.callback_power_off) + kbdev->pm.backend.callback_power_off(kbdev); + return true; +} + +KBASE_EXPORT_TEST_API(kbase_pm_clock_off); + +struct kbasep_reset_timeout_data { + struct hrtimer timer; + bool timed_out; + struct kbase_device *kbdev; +}; + +void kbase_pm_reset_done(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + kbdev->pm.backend.reset_done = true; + wake_up(&kbdev->pm.backend.reset_done_wait); +} + +/** + * kbase_pm_wait_for_reset - Wait for a reset to happen + * + * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state. + * + * @kbdev: Kbase device + */ +static void kbase_pm_wait_for_reset(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->pm.lock); + + wait_event(kbdev->pm.backend.reset_done_wait, + (kbdev->pm.backend.reset_done)); + kbdev->pm.backend.reset_done = false; +} + +KBASE_EXPORT_TEST_API(kbase_pm_reset_done); + +static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) +{ + struct kbasep_reset_timeout_data *rtdata = + container_of(timer, struct kbasep_reset_timeout_data, timer); + + rtdata->timed_out = 1; + + /* Set the wait queue to wake up kbase_pm_init_hw even though the reset + * hasn't completed */ + kbase_pm_reset_done(rtdata->kbdev); + + return HRTIMER_NORESTART; +} + +static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) +{ + struct device_node *np = kbdev->dev->of_node; + u32 jm_values[4]; + const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> + GPU_ID_VERSION_PRODUCT_ID_SHIFT; + const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >> + GPU_ID_VERSION_MAJOR_SHIFT; + + kbdev->hw_quirks_sc = 0; + + /* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443. + * and needed due to MIDGLES-3539. See PRLAM-11035 */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) || + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035)) + kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE; + + /* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327. + */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327)) + kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD; + +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY + /* Enable alternative hardware counter selection if configured. */ + if (!GPU_ID_IS_NEW_FORMAT(prod_id)) + kbdev->hw_quirks_sc |= SC_ALT_COUNTERS; +#endif + + /* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797)) + kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS; + + if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) { + if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ + kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; + else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */ + kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; + } + + if (!kbdev->hw_quirks_sc) + kbdev->hw_quirks_sc = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_CONFIG), NULL); + + kbdev->hw_quirks_tiler = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_CONFIG), NULL); + + /* Set tiler clock gate override if required */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953)) + kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE; + + /* Limit the GPU bus bandwidth if the platform needs this. */ + kbdev->hw_quirks_mmu = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_MMU_CONFIG), NULL); + + /* Limit read ID width for AXI */ + kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS); + kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) << + L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT; + + /* Limit write ID width for AXI */ + kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES); + kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) << + L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT; + + if (kbdev->system_coherency == COHERENCY_ACE) { + /* Allow memory configuration disparity to be ignored, we + * optimize the use of shared memory and thus we expect + * some disparity in the memory configuration */ + kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY; + } + + kbdev->hw_quirks_jm = 0; + /* Only for T86x/T88x-based products after r2p0 */ + if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) { + + if (of_property_read_u32_array(np, + "jm_config", + &jm_values[0], + ARRAY_SIZE(jm_values))) { + /* Entry not in device tree, use defaults */ + jm_values[0] = 0; + jm_values[1] = 0; + jm_values[2] = 0; + jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT; + } + + /* Limit throttle limit to 6 bits*/ + if (jm_values[3] > JM_MAX_JOB_THROTTLE_LIMIT) { + dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63)."); + jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT; + } + + /* Aggregate to one integer. */ + kbdev->hw_quirks_jm |= (jm_values[0] ? + JM_TIMESTAMP_OVERRIDE : 0); + kbdev->hw_quirks_jm |= (jm_values[1] ? + JM_CLOCK_GATE_OVERRIDE : 0); + kbdev->hw_quirks_jm |= (jm_values[2] ? + JM_JOB_THROTTLE_ENABLE : 0); + kbdev->hw_quirks_jm |= (jm_values[3] << + JM_JOB_THROTTLE_LIMIT_SHIFT); + + } else if (GPU_ID_IS_NEW_FORMAT(prod_id) && + (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == + GPU_ID2_PRODUCT_TMIX)) { + /* Only for tMIx */ + u32 coherency_features; + + coherency_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(COHERENCY_FEATURES), NULL); + + /* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly + * documented for tMIx so force correct value here. + */ + if (coherency_features == + COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { + kbdev->hw_quirks_jm |= + (COHERENCY_ACE_LITE | COHERENCY_ACE) << + JM_FORCE_COHERENCY_FEATURES_SHIFT; + } + } + + + if (!kbdev->hw_quirks_jm) + kbdev->hw_quirks_jm = kbase_reg_read(kbdev, + GPU_CONTROL_REG(JM_CONFIG), NULL); + +#ifdef CONFIG_MALI_CORESTACK +#define MANUAL_POWER_CONTROL ((u32)(1 << 8)) + kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL; +#endif /* CONFIG_MALI_CORESTACK */ +} + +static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) +{ + kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), + kbdev->hw_quirks_sc, NULL); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG), + kbdev->hw_quirks_tiler, NULL); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), + kbdev->hw_quirks_mmu, NULL); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), + kbdev->hw_quirks_jm, NULL); + +} + +void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) +{ + if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && + !kbdev->cci_snoop_enabled) { +#ifdef CONFIG_ARM64 + if (kbdev->snoop_enable_smc != 0) + kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); +#endif /* CONFIG_ARM64 */ + dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n"); + kbdev->cci_snoop_enabled = true; + } +} + +void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) +{ + if (kbdev->cci_snoop_enabled) { +#ifdef CONFIG_ARM64 + if (kbdev->snoop_disable_smc != 0) { + mali_cci_flush_l2(kbdev); + kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0); + } +#endif /* CONFIG_ARM64 */ + dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n"); + kbdev->cci_snoop_enabled = false; + } +} + +static int kbase_pm_do_reset(struct kbase_device *kbdev) +{ + struct kbasep_reset_timeout_data rtdata; + + KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); + + KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_SOFT_RESET, NULL); + + /* Unmask the reset complete interrupt only */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED, + NULL); + + /* Initialize a structure for tracking the status of the reset */ + rtdata.kbdev = kbdev; + rtdata.timed_out = 0; + + /* Create a timer to use as a timeout on the reset */ + hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + rtdata.timer.function = kbasep_reset_timeout; + + hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), + HRTIMER_MODE_REL); + + /* Wait for the RESET_COMPLETED interrupt to be raised */ + kbase_pm_wait_for_reset(kbdev); + + if (rtdata.timed_out == 0) { + /* GPU has been reset */ + hrtimer_cancel(&rtdata.timer); + destroy_hrtimer_on_stack(&rtdata.timer); + return 0; + } + + /* No interrupt has been received - check if the RAWSTAT register says + * the reset has completed */ + if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) & + RESET_COMPLETED) { + /* The interrupt is set in the RAWSTAT; this suggests that the + * interrupts are not getting to the CPU */ + dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); + /* If interrupts aren't working we can't continue. */ + destroy_hrtimer_on_stack(&rtdata.timer); + return -EINVAL; + } + + /* The GPU doesn't seem to be responding to the reset so try a hard + * reset */ + dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", + RESET_TIMEOUT); + KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_HARD_RESET, NULL); + + /* Restart the timer to wait for the hard reset to complete */ + rtdata.timed_out = 0; + + hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), + HRTIMER_MODE_REL); + + /* Wait for the RESET_COMPLETED interrupt to be raised */ + kbase_pm_wait_for_reset(kbdev); + + if (rtdata.timed_out == 0) { + /* GPU has been reset */ + hrtimer_cancel(&rtdata.timer); + destroy_hrtimer_on_stack(&rtdata.timer); + return 0; + } + + destroy_hrtimer_on_stack(&rtdata.timer); + + dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", + RESET_TIMEOUT); + + return -EINVAL; +} + +static int kbasep_protected_mode_enable(struct protected_mode_device *pdev) +{ + struct kbase_device *kbdev = pdev->data; + + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_SET_PROTECTED_MODE, NULL); + return 0; +} + +static int kbasep_protected_mode_disable(struct protected_mode_device *pdev) +{ + struct kbase_device *kbdev = pdev->data; + + lockdep_assert_held(&kbdev->pm.lock); + + return kbase_pm_do_reset(kbdev); +} + +struct protected_mode_ops kbase_native_protected_ops = { + .protected_mode_enable = kbasep_protected_mode_enable, + .protected_mode_disable = kbasep_protected_mode_disable +}; + +int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) +{ + unsigned long irq_flags; + int err; + bool resume_vinstr = false; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.lock); + + /* Ensure the clock is on before attempting to access the hardware */ + if (!kbdev->pm.backend.gpu_powered) { + if (kbdev->pm.backend.callback_power_on) + kbdev->pm.backend.callback_power_on(kbdev); + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, + irq_flags); + kbdev->pm.backend.gpu_powered = true; + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + irq_flags); + } + + /* Ensure interrupts are off to begin with, this also clears any + * outstanding interrupts */ + kbase_pm_disable_interrupts(kbdev); + /* Ensure cache snoops are disabled before reset. */ + kbase_pm_cache_snoop_disable(kbdev); + /* Prepare for the soft-reset */ + kbdev->pm.backend.reset_done = false; + + /* The cores should be made unavailable due to the reset */ + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + if (kbdev->shader_available_bitmap != 0u) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, + NULL, 0u, (u32)0u); + if (kbdev->tiler_available_bitmap != 0u) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, + NULL, NULL, 0u, (u32)0u); + kbdev->shader_available_bitmap = 0u; + kbdev->tiler_available_bitmap = 0u; + kbdev->l2_available_bitmap = 0u; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + + /* Soft reset the GPU */ + if (kbdev->protected_mode_support) + err = kbdev->protected_ops->protected_mode_disable( + kbdev->protected_dev); + else + err = kbase_pm_do_reset(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + if (kbdev->protected_mode) + resume_vinstr = true; + kbdev->protected_mode = false; + kbase_ipa_model_use_configured_locked(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + + if (err) + goto exit; + + if (flags & PM_HW_ISSUES_DETECT) + kbase_pm_hw_issues_detect(kbdev); + + kbase_pm_hw_issues_apply(kbdev); + kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); + + /* Sanity check protected mode was left after reset */ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { + u32 gpu_status = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_STATUS), NULL); + + WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE); + } + + /* If cycle counter was in use re-enable it, enable_irqs will only be + * false when called from kbase_pm_powerup */ + if (kbdev->pm.backend.gpu_cycle_counter_requests && + (flags & PM_ENABLE_IRQS)) { + /* enable interrupts as the L2 may have to be powered on */ + kbase_pm_enable_interrupts(kbdev); + kbase_pm_request_l2_caches(kbdev); + + /* Re-enable the counters if we need to */ + spin_lock_irqsave( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + if (kbdev->pm.backend.gpu_cycle_counter_requests) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START, NULL); + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + kbase_pm_release_l2_caches(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + + kbase_pm_disable_interrupts(kbdev); + } + + if (flags & PM_ENABLE_IRQS) + kbase_pm_enable_interrupts(kbdev); + +exit: + /* If GPU is leaving protected mode resume vinstr operation. */ + if (kbdev->vinstr_ctx && resume_vinstr) + kbase_vinstr_resume(kbdev->vinstr_ctx); + + return err; +} + +/** + * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters + * + * Increase the count of cycle counter users and turn the cycle counters on if + * they were previously off + * + * This function is designed to be called by + * kbase_pm_request_gpu_cycle_counter() or + * kbase_pm_request_gpu_cycle_counter_l2_is_on() only + * + * When this function is called the l2 cache must be on and the l2 cache users + * count must have been incremented by a call to ( + * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() ) + * + * @kbdev: The kbase device structure of the device + */ +static void +kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); + + ++kbdev->pm.backend.gpu_cycle_counter_requests; + + if (1 == kbdev->pm.backend.gpu_cycle_counter_requests) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START, NULL); + + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); +} + +void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < + INT_MAX); + + kbase_pm_request_l2_caches(kbdev); + + kbase_pm_request_gpu_cycle_counter_do_request(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter); + +void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < + INT_MAX); + + kbase_pm_request_l2_caches_l2_is_on(kbdev); + + kbase_pm_request_gpu_cycle_counter_do_request(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on); + +void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0); + + --kbdev->pm.backend.gpu_cycle_counter_requests; + + if (0 == kbdev->pm.backend.gpu_cycle_counter_requests) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_STOP, NULL); + + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); + + kbase_pm_release_l2_caches(kbdev); +} + +void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h new file mode 100644 index 000000000000..6804f45ac27b --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h @@ -0,0 +1,548 @@ +/* + * + * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Power management API definitions used internally by GPU backend + */ + +#ifndef _KBASE_BACKEND_PM_INTERNAL_H_ +#define _KBASE_BACKEND_PM_INTERNAL_H_ + +#include <mali_kbase_hwaccess_pm.h> + +#include "mali_kbase_pm_ca.h" +#include "mali_kbase_pm_policy.h" + + +/** + * kbase_pm_dev_idle - The GPU is idle. + * + * The OS may choose to turn off idle devices + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_dev_idle(struct kbase_device *kbdev); + +/** + * kbase_pm_dev_activate - The GPU is active. + * + * The OS should avoid opportunistically turning off the GPU while it is active + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_dev_activate(struct kbase_device *kbdev); + +/** + * kbase_pm_get_present_cores - Get details of the cores that are present in + * the device. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) present in the GPU device and also a count of + * the number of cores. + * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * + * Return: The bit mask of cores present + */ +u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * kbase_pm_get_active_cores - Get details of the cores that are currently + * active in the device. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) that are actively processing work (i.e. + * turned on *and* busy). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * + * Return: The bit mask of active cores + */ +u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * kbase_pm_get_trans_cores - Get details of the cores that are currently + * transitioning between power states. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) that are currently transitioning between + * power states. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * + * Return: The bit mask of transitioning cores + */ +u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * kbase_pm_get_ready_cores - Get details of the cores that are currently + * powered and ready for jobs. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) that are powered and ready for jobs (they may + * or may not be currently executing jobs). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * + * Return: The bit mask of ready cores + */ +u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * kbase_pm_clock_on - Turn the clock for the device on, and enable device + * interrupts. + * + * This function can be used by a power policy to turn the clock for the GPU on. + * It should be modified during integration to perform the necessary actions to + * ensure that the GPU is fully powered and clocked. + * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @is_resume: true if clock on due to resume after suspend, false otherwise + */ +void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume); + +/** + * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the + * device off. + * + * This function can be used by a power policy to turn the clock for the GPU + * off. It should be modified during integration to perform the necessary + * actions to turn the clock off (if this is possible in the integration). + * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @is_suspend: true if clock off due to suspend, false otherwise + * + * Return: true if clock was turned off, or + * false if clock can not be turned off due to pending page/bus fault + * workers. Caller must flush MMU workqueues and retry + */ +bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend); + +/** + * kbase_pm_enable_interrupts - Enable interrupts on the device. + * + * Interrupts are also enabled after a call to kbase_pm_clock_on(). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_enable_interrupts(struct kbase_device *kbdev); + +/** + * kbase_pm_disable_interrupts - Disable interrupts on the device. + * + * This prevents delivery of Power Management interrupts to the CPU so that + * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler + * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called. + * + * Interrupts are also disabled after a call to kbase_pm_clock_off(). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_disable_interrupts(struct kbase_device *kbdev); + +/** + * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts() + * that does not take the hwaccess_lock + * + * Caller must hold the hwaccess_lock. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev); + +/** + * kbase_pm_init_hw - Initialize the hardware. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @flags: Flags specifying the type of PM init + * + * This function checks the GPU ID register to ensure that the GPU is supported + * by the driver and performs a reset on the device so that it is in a known + * state before the device is used. + * + * Return: 0 if the device is supported and successfully reset. + */ +int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags); + +/** + * kbase_pm_reset_done - The GPU has been reset successfully. + * + * This function must be called by the GPU interrupt handler when the + * RESET_COMPLETED bit is set. It signals to the power management initialization + * code that the GPU has been successfully reset. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_reset_done(struct kbase_device *kbdev); + + +/** + * kbase_pm_check_transitions_nolock - Check if there are any power transitions + * to make, and if so start them. + * + * This function will check the desired_xx_state members of + * struct kbase_pm_device_data and the actual status of the hardware to see if + * any power transitions can be made at this time to make the hardware state + * closer to the state desired by the power policy. + * + * The return value can be used to check whether all the desired cores are + * available, and so whether it's worth submitting a job (e.g. from a Power + * Management IRQ). + * + * Note that this still returns true when desired_xx_state has no + * cores. That is: of the no cores desired, none were *un*available. In + * this case, the caller may still need to try submitting jobs. This is because + * the Core Availability Policy might have taken us to an intermediate state + * where no cores are powered, before powering on more cores (e.g. for core + * rotation) + * + * The caller must hold kbase_device.pm.power_change_lock + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: non-zero when all desired cores are available. That is, + * it's worthwhile for the caller to submit a job. + * false otherwise + */ +bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev); + +/** + * kbase_pm_check_transitions_sync - Synchronous and locking variant of + * kbase_pm_check_transitions_nolock() + * + * On returning, the desired state at the time of the call will have been met. + * + * There is nothing to stop the core being switched off by calls to + * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the + * caller must have already made a call to + * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously. + * + * The usual use-case for this is to ensure cores are 'READY' after performing + * a GPU Reset. + * + * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold + * kbase_device.pm.power_change_lock, because this function will take that + * lock itself. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_check_transitions_sync(struct kbase_device *kbdev); + +/** + * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state() + * where the caller must hold + * kbase_device.pm.power_change_lock + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev); + +/** + * kbase_pm_update_cores_state - Update the desired state of shader cores from + * the Power Policy, and begin any power + * transitions. + * + * This function will update the desired_xx_state members of + * struct kbase_pm_device_data by calling into the current Power Policy. It will + * then begin power transitions to make the hardware acheive the desired shader + * core state. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_update_cores_state(struct kbase_device *kbdev); + +/** + * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off + * the GPU and/or shader cores. + * + * This should be called by any functions which directly power off the GPU. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev); + +/** + * kbasep_pm_init_core_use_bitmaps - Initialise data tracking the required + * and used cores. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev); + +/** + * kbasep_pm_metrics_init - Initialize the metrics gathering framework. + * + * This must be called before other metric gathering APIs are called. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 on success, error code on error + */ +int kbasep_pm_metrics_init(struct kbase_device *kbdev); + +/** + * kbasep_pm_metrics_term - Terminate the metrics gathering framework. + * + * This must be called when metric gathering is no longer required. It is an + * error to call any metrics gathering function (other than + * kbasep_pm_metrics_init()) after calling this function. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbasep_pm_metrics_term(struct kbase_device *kbdev); + +/** + * kbase_pm_report_vsync - Function to be called by the frame buffer driver to + * update the vsync metric. + * + * This function should be called by the frame buffer driver to update whether + * the system is hitting the vsync target or not. buffer_updated should be true + * if the vsync corresponded with a new frame being displayed, otherwise it + * should be false. This function does not need to be called every vsync, but + * only when the value of @buffer_updated differs from a previous call. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * @buffer_updated: True if the buffer has been updated on this VSync, + * false otherwise + */ +void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated); + +/** + * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change + * the clock speed of the GPU. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function should be called regularly by the DVFS system to check whether + * the clock speed of the GPU needs updating. + */ +void kbase_pm_get_dvfs_action(struct kbase_device *kbdev); + +/** + * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is + * needed + * + * If the caller is the first caller then the GPU cycle counters will be enabled + * along with the l2 cache + * + * The GPU must be powered when calling this function (i.e. + * kbase_pm_context_active() must have been called). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev); + +/** + * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is + * needed (l2 cache already on) + * + * This is a version of the above function + * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the + * l2 cache is known to be on and assured to be on until the subsequent call of + * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does + * not sleep and can be called from atomic functions. + * + * The GPU must be powered when calling this function (i.e. + * kbase_pm_context_active() must have been called) and the l2 cache must be + * powered on. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev); + +/** + * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no + * longer in use + * + * If the caller is the last caller then the GPU cycle counters will be + * disabled. A request must have been made before a call to this. + * + * Caller must not hold the hwaccess_lock, as it will be taken in this function. + * If the caller is already holding this lock then + * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev); + +/** + * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter() + * that does not take hwaccess_lock + * + * Caller must hold the hwaccess_lock. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); + +/** + * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to + * complete + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev); + +/** + * kbase_pm_register_access_enable - Enable access to GPU registers + * + * Enables access to the GPU registers before power management has powered up + * the GPU with kbase_pm_powerup(). + * + * Access to registers should be done using kbase_os_reg_read()/write() at this + * stage, not kbase_reg_read()/write(). + * + * This results in the power management callbacks provided in the driver + * configuration to get called to turn on power and/or clocks to the GPU. See + * kbase_pm_callback_conf. + * + * This should only be used before power management is powered up with + * kbase_pm_powerup() + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_register_access_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_register_access_disable - Disable early register access + * + * Disables access to the GPU registers enabled earlier by a call to + * kbase_pm_register_access_enable(). + * + * This results in the power management callbacks provided in the driver + * configuration to get called to turn off power and/or clocks to the GPU. See + * kbase_pm_callback_conf + * + * This should only be used before power management is powered up with + * kbase_pm_powerup() + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_register_access_disable(struct kbase_device *kbdev); + +/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline + * function */ + +/** + * kbase_pm_metrics_is_active - Check if the power management metrics + * collection is active. + * + * Note that this returns if the power management metrics collection was + * active at the time of calling, it is possible that after the call the metrics + * collection enable may have changed state. + * + * The caller must handle the consequence that the state may have changed. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * Return: true if metrics collection was active else false. + */ +bool kbase_pm_metrics_is_active(struct kbase_device *kbdev); + +/** + * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested. + * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @is_resume: true if power on due to resume after suspend, + * false otherwise + */ +void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume); + +/** + * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been + * requested. + * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @is_suspend: true if power off due to suspend, + * false otherwise + */ +void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend); + +#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) +void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev, + unsigned long *total, unsigned long *busy); +void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev); +#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */ + +#ifdef CONFIG_MALI_MIDGARD_DVFS + +/** + * kbase_platform_dvfs_event - Report utilisation to DVFS code + * + * Function provided by platform specific code when DVFS is enabled to allow + * the power management metrics system to report utilisation. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * @utilisation: The current calculated utilisation by the metrics system. + * @util_gl_share: The current calculated gl share of utilisation. + * @util_cl_share: The current calculated cl share of utilisation per core + * group. + * Return: Returns 0 on failure and non zero on success. + */ + +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, + u32 util_gl_share, u32 util_cl_share[2]); +#endif + +void kbase_pm_power_changed(struct kbase_device *kbdev); + +/** + * kbase_pm_metrics_update - Inform the metrics system that an atom is either + * about to be run or has just completed. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @now: Pointer to the timestamp of the change, or NULL to use current time + * + * Caller must hold hwaccess_lock + */ +void kbase_pm_metrics_update(struct kbase_device *kbdev, + ktime_t *now); + +/** + * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU + * If the GPU does not have coherency this is a no-op + * @kbdev: Device pointer + * + * This function should be called after L2 power up. + */ + +void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU + * If the GPU does not have coherency this is a no-op + * @kbdev: Device pointer + * + * This function should be called before L2 power off. + */ +void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev); + +#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c new file mode 100644 index 000000000000..024248ca7123 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c @@ -0,0 +1,401 @@ +/* + * + * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Metrics for power management + */ + +#include <mali_kbase.h> +#include <mali_kbase_pm.h> +#include <backend/gpu/mali_kbase_pm_internal.h> +#include <backend/gpu/mali_kbase_jm_rb.h> + +/* When VSync is being hit aim for utilisation between 70-90% */ +#define KBASE_PM_VSYNC_MIN_UTILISATION 70 +#define KBASE_PM_VSYNC_MAX_UTILISATION 90 +/* Otherwise aim for 10-40% */ +#define KBASE_PM_NO_VSYNC_MIN_UTILISATION 10 +#define KBASE_PM_NO_VSYNC_MAX_UTILISATION 40 + +/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns + * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly + * under 11s. Exceeding this will cause overflow */ +#define KBASE_PM_TIME_SHIFT 8 + +/* Maximum time between sampling of utilization data, without resetting the + * counters. */ +#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */ + +#ifdef CONFIG_MALI_MIDGARD_DVFS +static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) +{ + unsigned long flags; + struct kbasep_pm_metrics_data *metrics; + + KBASE_DEBUG_ASSERT(timer != NULL); + + metrics = container_of(timer, struct kbasep_pm_metrics_data, timer); + kbase_pm_get_dvfs_action(metrics->kbdev); + + spin_lock_irqsave(&metrics->lock, flags); + + if (metrics->timer_active) + hrtimer_start(timer, + HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period), + HRTIMER_MODE_REL); + + spin_unlock_irqrestore(&metrics->lock, flags); + + return HRTIMER_NORESTART; +} +#endif /* CONFIG_MALI_MIDGARD_DVFS */ + +int kbasep_pm_metrics_init(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + kbdev->pm.backend.metrics.kbdev = kbdev; + + kbdev->pm.backend.metrics.time_period_start = ktime_get(); + kbdev->pm.backend.metrics.time_busy = 0; + kbdev->pm.backend.metrics.time_idle = 0; + kbdev->pm.backend.metrics.prev_busy = 0; + kbdev->pm.backend.metrics.prev_idle = 0; + kbdev->pm.backend.metrics.gpu_active = false; + kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; + kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; + kbdev->pm.backend.metrics.busy_cl[0] = 0; + kbdev->pm.backend.metrics.busy_cl[1] = 0; + kbdev->pm.backend.metrics.busy_gl = 0; + + spin_lock_init(&kbdev->pm.backend.metrics.lock); + +#ifdef CONFIG_MALI_MIDGARD_DVFS + kbdev->pm.backend.metrics.timer_active = true; + hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + kbdev->pm.backend.metrics.timer.function = dvfs_callback; + + hrtimer_start(&kbdev->pm.backend.metrics.timer, + HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), + HRTIMER_MODE_REL); +#endif /* CONFIG_MALI_MIDGARD_DVFS */ + + return 0; +} + +KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); + +void kbasep_pm_metrics_term(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_MIDGARD_DVFS + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbdev->pm.backend.metrics.timer_active = false; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + + hrtimer_cancel(&kbdev->pm.backend.metrics.timer); +#endif /* CONFIG_MALI_MIDGARD_DVFS */ +} + +KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); + +/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this + * function + */ +static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, + ktime_t now) +{ + ktime_t diff; + + lockdep_assert_held(&kbdev->pm.backend.metrics.lock); + + diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); + if (ktime_to_ns(diff) < 0) + return; + + if (kbdev->pm.backend.metrics.gpu_active) { + u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + + kbdev->pm.backend.metrics.time_busy += ns_time; + if (kbdev->pm.backend.metrics.active_cl_ctx[0]) + kbdev->pm.backend.metrics.busy_cl[0] += ns_time; + if (kbdev->pm.backend.metrics.active_cl_ctx[1]) + kbdev->pm.backend.metrics.busy_cl[1] += ns_time; + if (kbdev->pm.backend.metrics.active_gl_ctx[0]) + kbdev->pm.backend.metrics.busy_gl += ns_time; + if (kbdev->pm.backend.metrics.active_gl_ctx[1]) + kbdev->pm.backend.metrics.busy_gl += ns_time; + } else { + kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff) + >> KBASE_PM_TIME_SHIFT); + } + + kbdev->pm.backend.metrics.time_period_start = now; +} + +#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) +/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this + * function. + */ +static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev, + ktime_t now) +{ + /* Store previous value */ + kbdev->pm.backend.metrics.prev_idle = + kbdev->pm.backend.metrics.time_idle; + kbdev->pm.backend.metrics.prev_busy = + kbdev->pm.backend.metrics.time_busy; + + /* Reset current values */ + kbdev->pm.backend.metrics.time_period_start = now; + kbdev->pm.backend.metrics.time_idle = 0; + kbdev->pm.backend.metrics.time_busy = 0; + kbdev->pm.backend.metrics.busy_cl[0] = 0; + kbdev->pm.backend.metrics.busy_cl[1] = 0; + kbdev->pm.backend.metrics.busy_gl = 0; +} + +void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get()); + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} + +void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev, + unsigned long *total_out, unsigned long *busy_out) +{ + ktime_t now = ktime_get(); + unsigned long flags, busy, total; + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbase_pm_get_dvfs_utilisation_calc(kbdev, now); + + busy = kbdev->pm.backend.metrics.time_busy; + total = busy + kbdev->pm.backend.metrics.time_idle; + + /* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default + * 100ms) */ + if (total >= MALI_UTILIZATION_MAX_PERIOD) { + kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now); + } else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) { + total += kbdev->pm.backend.metrics.prev_idle + + kbdev->pm.backend.metrics.prev_busy; + busy += kbdev->pm.backend.metrics.prev_busy; + } + + *total_out = total; + *busy_out = busy; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} +#endif + +#ifdef CONFIG_MALI_MIDGARD_DVFS + +/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this + * function + */ +int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev, + int *util_gl_share, + int util_cl_share[2], + ktime_t now) +{ + int utilisation; + int busy; + + kbase_pm_get_dvfs_utilisation_calc(kbdev, now); + + if (kbdev->pm.backend.metrics.time_idle + + kbdev->pm.backend.metrics.time_busy == 0) { + /* No data - so we return NOP */ + utilisation = -1; + if (util_gl_share) + *util_gl_share = -1; + if (util_cl_share) { + util_cl_share[0] = -1; + util_cl_share[1] = -1; + } + goto out; + } + + utilisation = (100 * kbdev->pm.backend.metrics.time_busy) / + (kbdev->pm.backend.metrics.time_idle + + kbdev->pm.backend.metrics.time_busy); + + busy = kbdev->pm.backend.metrics.busy_gl + + kbdev->pm.backend.metrics.busy_cl[0] + + kbdev->pm.backend.metrics.busy_cl[1]; + + if (busy != 0) { + if (util_gl_share) + *util_gl_share = + (100 * kbdev->pm.backend.metrics.busy_gl) / + busy; + if (util_cl_share) { + util_cl_share[0] = + (100 * kbdev->pm.backend.metrics.busy_cl[0]) / + busy; + util_cl_share[1] = + (100 * kbdev->pm.backend.metrics.busy_cl[1]) / + busy; + } + } else { + if (util_gl_share) + *util_gl_share = -1; + if (util_cl_share) { + util_cl_share[0] = -1; + util_cl_share[1] = -1; + } + } + +out: + return utilisation; +} + +void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) +{ + unsigned long flags; + int utilisation, util_gl_share; + int util_cl_share[2]; + ktime_t now; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + + now = ktime_get(); + + utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share, + util_cl_share, now); + + if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 || + util_cl_share[1] < 0) { + utilisation = 0; + util_gl_share = 0; + util_cl_share[0] = 0; + util_cl_share[1] = 0; + goto out; + } + +out: +#ifdef CONFIG_MALI_MIDGARD_DVFS + kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, + util_cl_share); +#endif /*CONFIG_MALI_MIDGARD_DVFS */ + + kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now); + + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} + +bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) +{ + bool isactive; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + isactive = kbdev->pm.backend.metrics.timer_active; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + + return isactive; +} +KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); + +#endif /* CONFIG_MALI_MIDGARD_DVFS */ + +/** + * kbase_pm_metrics_active_calc - Update PM active counts based on currently + * running atoms + * @kbdev: Device pointer + * + * The caller must hold kbdev->pm.backend.metrics.lock + */ +static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) +{ + int js; + + lockdep_assert_held(&kbdev->pm.backend.metrics.lock); + + kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; + kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; + kbdev->pm.backend.metrics.gpu_active = false; + + for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); + + /* Head atom may have just completed, so if it isn't running + * then try the next atom */ + if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) + katom = kbase_gpu_inspect(kbdev, js, 1); + + if (katom && katom->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED) { + if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { + int device_nr = (katom->core_req & + BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) + ? katom->device_nr : 0; + if (!WARN_ON(device_nr >= 2)) + kbdev->pm.backend.metrics. + active_cl_ctx[device_nr] = 1; + } else { + /* Slot 2 should not be running non-compute + * atoms */ + if (!WARN_ON(js >= 2)) + kbdev->pm.backend.metrics. + active_gl_ctx[js] = 1; + } + kbdev->pm.backend.metrics.gpu_active = true; + } + } +} + +/* called when job is submitted to or removed from a GPU slot */ +void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) +{ + unsigned long flags; + ktime_t now; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + + if (!timestamp) { + now = ktime_get(); + timestamp = &now; + } + + /* Track how long CL and/or GL jobs have been busy for */ + kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp); + + kbase_pm_metrics_active_calc(kbdev); + + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c new file mode 100644 index 000000000000..075f020c66e6 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c @@ -0,0 +1,973 @@ +/* + * + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Power policy API implementations + */ + +#include <mali_kbase.h> +#include <mali_midg_regmap.h> +#include <mali_kbase_pm.h> +#include <mali_kbase_config_defaults.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +static const struct kbase_pm_policy *const policy_list[] = { +#ifdef CONFIG_MALI_NO_MALI + &kbase_pm_always_on_policy_ops, + &kbase_pm_demand_policy_ops, + &kbase_pm_coarse_demand_policy_ops, +#if !MALI_CUSTOMER_RELEASE + &kbase_pm_demand_always_powered_policy_ops, + &kbase_pm_fast_start_policy_ops, +#endif +#else /* CONFIG_MALI_NO_MALI */ +#if !PLATFORM_POWER_DOWN_ONLY + &kbase_pm_demand_policy_ops, +#endif /* !PLATFORM_POWER_DOWN_ONLY */ + &kbase_pm_coarse_demand_policy_ops, + &kbase_pm_always_on_policy_ops, +#if !MALI_CUSTOMER_RELEASE +#if !PLATFORM_POWER_DOWN_ONLY + &kbase_pm_demand_always_powered_policy_ops, + &kbase_pm_fast_start_policy_ops, +#endif /* !PLATFORM_POWER_DOWN_ONLY */ +#endif +#endif /* CONFIG_MALI_NO_MALI */ +}; + +/* The number of policies available in the system. + * This is derived from the number of functions listed in policy_get_functions. + */ +#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list)) + + +/* Function IDs for looking up Timeline Trace codes in + * kbase_pm_change_state_trace_code */ +enum kbase_pm_func_id { + KBASE_PM_FUNC_ID_REQUEST_CORES_START, + KBASE_PM_FUNC_ID_REQUEST_CORES_END, + KBASE_PM_FUNC_ID_RELEASE_CORES_START, + KBASE_PM_FUNC_ID_RELEASE_CORES_END, + /* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither + * expect to hit it nor tend to hit it very much anyway. We can detect + * whether we need more instrumentation by a difference between + * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */ + + /* Must be the last */ + KBASE_PM_FUNC_ID_COUNT +}; + + +/* State changes during request/unrequest/release-ing cores */ +enum { + KBASE_PM_CHANGE_STATE_SHADER = (1u << 0), + KBASE_PM_CHANGE_STATE_TILER = (1u << 1), + + /* These two must be last */ + KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER | + KBASE_PM_CHANGE_STATE_SHADER), + KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1 +}; +typedef u32 kbase_pm_change_state; + + +#ifdef CONFIG_MALI_TRACE_TIMELINE +/* Timeline Trace code lookups for each function */ +static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT] + [KBASE_PM_CHANGE_STATE_COUNT] = { + /* kbase_pm_request_cores */ + [KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0, + [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, + [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, + [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER | + KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, + + [KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0, + [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END, + [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END, + [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER | + KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END, + + /* kbase_pm_release_cores */ + [KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0, + [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, + [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, + [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER | + KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, + + [KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0, + [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END, + [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END, + [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER | + KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END +}; + +static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev, + enum kbase_pm_func_id func_id, + kbase_pm_change_state state) +{ + int trace_code; + + KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT); + KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) == + state); + + trace_code = kbase_pm_change_state_trace_code[func_id][state]; + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code); +} + +#else /* CONFIG_MALI_TRACE_TIMELINE */ +static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev, + enum kbase_pm_func_id func_id, kbase_pm_change_state state) +{ +} + +#endif /* CONFIG_MALI_TRACE_TIMELINE */ + +/** + * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any + * requested shader cores + * @kbdev: Device pointer + */ +static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev) +{ + u64 prev_shader_state = kbdev->pm.backend.desired_shader_state; + u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->pm.backend.desired_shader_state &= + ~kbdev->pm.backend.shader_poweroff_pending; + kbdev->pm.backend.desired_tiler_state &= + ~kbdev->pm.backend.tiler_poweroff_pending; + + kbdev->pm.backend.shader_poweroff_pending = 0; + kbdev->pm.backend.tiler_poweroff_pending = 0; + + if (prev_shader_state != kbdev->pm.backend.desired_shader_state || + prev_tiler_state != + kbdev->pm.backend.desired_tiler_state || + kbdev->pm.backend.ca_in_transition) { + bool cores_are_available; + + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START); + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END); + + /* Don't need 'cores_are_available', + * because we don't return anything */ + CSTD_UNUSED(cores_are_available); + } +} + +static enum hrtimer_restart +kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) +{ + struct kbase_device *kbdev; + unsigned long flags; + + kbdev = container_of(timer, struct kbase_device, + pm.backend.gpu_poweroff_timer); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* It is safe for this call to do nothing if the work item is already + * queued. The worker function will read the must up-to-date state of + * kbdev->pm.backend.gpu_poweroff_pending under lock. + * + * If a state change occurs while the worker function is processing, + * this call will succeed as a work item can be requeued once it has + * started processing. + */ + if (kbdev->pm.backend.gpu_poweroff_pending) + queue_work(kbdev->pm.backend.gpu_poweroff_wq, + &kbdev->pm.backend.gpu_poweroff_work); + + if (kbdev->pm.backend.shader_poweroff_pending || + kbdev->pm.backend.tiler_poweroff_pending) { + kbdev->pm.backend.shader_poweroff_pending_time--; + + KBASE_DEBUG_ASSERT( + kbdev->pm.backend.shader_poweroff_pending_time + >= 0); + + if (!kbdev->pm.backend.shader_poweroff_pending_time) + kbasep_pm_do_poweroff_cores(kbdev); + } + + if (kbdev->pm.backend.poweroff_timer_needed) { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time); + + return HRTIMER_RESTART; + } + + kbdev->pm.backend.poweroff_timer_running = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return HRTIMER_NORESTART; +} + +static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data) +{ + unsigned long flags; + struct kbase_device *kbdev; + bool do_poweroff = false; + + kbdev = container_of(data, struct kbase_device, + pm.backend.gpu_poweroff_work); + + mutex_lock(&kbdev->pm.lock); + + if (kbdev->pm.backend.gpu_poweroff_pending == 0) { + mutex_unlock(&kbdev->pm.lock); + return; + } + + kbdev->pm.backend.gpu_poweroff_pending--; + + if (kbdev->pm.backend.gpu_poweroff_pending > 0) { + mutex_unlock(&kbdev->pm.lock); + return; + } + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* Only power off the GPU if a request is still pending */ + if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev)) + do_poweroff = true; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (do_poweroff) { + kbdev->pm.backend.poweroff_timer_needed = false; + hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); + kbdev->pm.backend.poweroff_timer_running = false; + + /* Power off the GPU */ + kbase_pm_do_poweroff(kbdev, false); + } + + mutex_unlock(&kbdev->pm.lock); +} + +int kbase_pm_policy_init(struct kbase_device *kbdev) +{ + struct workqueue_struct *wq; + + wq = alloc_workqueue("kbase_pm_do_poweroff", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!wq) + return -ENOMEM; + + kbdev->pm.backend.gpu_poweroff_wq = wq; + INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work, + kbasep_pm_do_gpu_poweroff_wq); + hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); + kbdev->pm.backend.gpu_poweroff_timer.function = + kbasep_pm_do_gpu_poweroff_callback; + kbdev->pm.backend.pm_current_policy = policy_list[0]; + kbdev->pm.backend.pm_current_policy->init(kbdev); + kbdev->pm.gpu_poweroff_time = + HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); + kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; + kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU; + + return 0; +} + +void kbase_pm_policy_term(struct kbase_device *kbdev) +{ + kbdev->pm.backend.pm_current_policy->term(kbdev); + destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq); +} + +void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) +{ + unsigned long flags; + + lockdep_assert_held(&kbdev->pm.lock); + + kbdev->pm.backend.poweroff_timer_needed = false; + hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.poweroff_timer_running = false; + + /* If wq is already running but is held off by pm.lock, make sure it has + * no effect */ + kbdev->pm.backend.gpu_poweroff_pending = 0; + + kbdev->pm.backend.shader_poweroff_pending = 0; + kbdev->pm.backend.tiler_poweroff_pending = 0; + kbdev->pm.backend.shader_poweroff_pending_time = 0; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +void kbase_pm_update_active(struct kbase_device *kbdev) +{ + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; + unsigned long flags; + bool active; + + lockdep_assert_held(&pm->lock); + + /* pm_current_policy will never be NULL while pm.lock is held */ + KBASE_DEBUG_ASSERT(backend->pm_current_policy); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + active = backend->pm_current_policy->get_core_active(kbdev); + + if (active) { + if (backend->gpu_poweroff_pending) { + /* Cancel any pending power off request */ + backend->gpu_poweroff_pending = 0; + + /* If a request was pending then the GPU was still + * powered, so no need to continue */ + if (!kbdev->poweroff_pending) { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); + return; + } + } + + if (!backend->poweroff_timer_running && !backend->gpu_powered && + (pm->poweroff_gpu_ticks || + pm->poweroff_shader_ticks)) { + backend->poweroff_timer_needed = true; + backend->poweroff_timer_running = true; + hrtimer_start(&backend->gpu_poweroff_timer, + pm->gpu_poweroff_time, + HRTIMER_MODE_REL); + } + + /* Power on the GPU and any cores requested by the policy */ + if (pm->backend.poweroff_wait_in_progress) { + pm->backend.poweron_required = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_pm_do_poweron(kbdev, false); + } + } else { + /* It is an error for the power policy to power off the GPU + * when there are contexts active */ + KBASE_DEBUG_ASSERT(pm->active_count == 0); + + if (backend->shader_poweroff_pending || + backend->tiler_poweroff_pending) { + backend->shader_poweroff_pending = 0; + backend->tiler_poweroff_pending = 0; + backend->shader_poweroff_pending_time = 0; + } + + /* Request power off */ + if (pm->backend.gpu_powered) { + if (pm->poweroff_gpu_ticks) { + backend->gpu_poweroff_pending = + pm->poweroff_gpu_ticks; + backend->poweroff_timer_needed = true; + if (!backend->poweroff_timer_running) { + /* Start timer if not running (eg if + * power policy has been changed from + * always_on to something else). This + * will ensure the GPU is actually + * powered off */ + backend->poweroff_timer_running + = true; + hrtimer_start( + &backend->gpu_poweroff_timer, + pm->gpu_poweroff_time, + HRTIMER_MODE_REL); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); + } else { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); + + /* Power off the GPU immediately */ + kbase_pm_do_poweroff(kbdev, false); + } + } else { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + } +} + +void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) +{ + u64 desired_bitmap; + u64 desired_tiler_bitmap; + bool cores_are_available; + bool do_poweroff = false; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbdev->pm.backend.pm_current_policy == NULL) + return; + if (kbdev->pm.backend.poweroff_wait_in_progress) + return; + + if (kbdev->protected_mode_transition && !kbdev->shader_needed_bitmap && + !kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt + && !kbdev->tiler_inuse_cnt) { + /* We are trying to change in/out of protected mode - force all + * cores off so that the L2 powers down */ + desired_bitmap = 0; + desired_tiler_bitmap = 0; + } else { + desired_bitmap = + kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev); + desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev); + + if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) + desired_tiler_bitmap = 1; + else + desired_tiler_bitmap = 0; + + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) { + /* Unless XAFFINITY is supported, enable core 0 if tiler + * required, regardless of core availability */ + if (kbdev->tiler_needed_cnt > 0 || + kbdev->tiler_inuse_cnt > 0) + desired_bitmap |= 1; + } + } + + if (kbdev->pm.backend.desired_shader_state != desired_bitmap) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u, + (u32)desired_bitmap); + /* Are any cores being powered on? */ + if (~kbdev->pm.backend.desired_shader_state & desired_bitmap || + ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap || + kbdev->pm.backend.ca_in_transition) { + /* Check if we are powering off any cores before updating shader + * state */ + if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || + kbdev->pm.backend.desired_tiler_state & + ~desired_tiler_bitmap) { + /* Start timer to power off cores */ + kbdev->pm.backend.shader_poweroff_pending |= + (kbdev->pm.backend.desired_shader_state & + ~desired_bitmap); + kbdev->pm.backend.tiler_poweroff_pending |= + (kbdev->pm.backend.desired_tiler_state & + ~desired_tiler_bitmap); + + if (kbdev->pm.poweroff_shader_ticks && + !kbdev->protected_mode_transition) + kbdev->pm.backend.shader_poweroff_pending_time = + kbdev->pm.poweroff_shader_ticks; + else + do_poweroff = true; + } + + kbdev->pm.backend.desired_shader_state = desired_bitmap; + kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap; + + /* If any cores are being powered on, transition immediately */ + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + } else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || + kbdev->pm.backend.desired_tiler_state & + ~desired_tiler_bitmap) { + /* Start timer to power off cores */ + kbdev->pm.backend.shader_poweroff_pending |= + (kbdev->pm.backend.desired_shader_state & + ~desired_bitmap); + kbdev->pm.backend.tiler_poweroff_pending |= + (kbdev->pm.backend.desired_tiler_state & + ~desired_tiler_bitmap); + if (kbdev->pm.poweroff_shader_ticks && + !kbdev->protected_mode_transition) + kbdev->pm.backend.shader_poweroff_pending_time = + kbdev->pm.poweroff_shader_ticks; + else + kbasep_pm_do_poweroff_cores(kbdev); + } else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 && + desired_tiler_bitmap != 0 && + kbdev->pm.backend.poweroff_timer_needed) { + /* If power policy is keeping cores on despite there being no + * active contexts then disable poweroff timer as it isn't + * required. + * Only reset poweroff_timer_needed if we're not in the middle + * of the power off callback */ + kbdev->pm.backend.poweroff_timer_needed = false; + } + + /* Ensure timer does not power off wanted cores and make sure to power + * off unwanted cores */ + if (kbdev->pm.backend.shader_poweroff_pending || + kbdev->pm.backend.tiler_poweroff_pending) { + kbdev->pm.backend.shader_poweroff_pending &= + ~(kbdev->pm.backend.desired_shader_state & + desired_bitmap); + kbdev->pm.backend.tiler_poweroff_pending &= + ~(kbdev->pm.backend.desired_tiler_state & + desired_tiler_bitmap); + + if (!kbdev->pm.backend.shader_poweroff_pending && + !kbdev->pm.backend.tiler_poweroff_pending) + kbdev->pm.backend.shader_poweroff_pending_time = 0; + } + + /* Shader poweroff is deferred to the end of the function, to eliminate + * issues caused by the core availability policy recursing into this + * function */ + if (do_poweroff) + kbasep_pm_do_poweroff_cores(kbdev); + + /* Don't need 'cores_are_available', because we don't return anything */ + CSTD_UNUSED(cores_are_available); +} + +void kbase_pm_update_cores_state(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kbase_pm_update_cores_state_nolock(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +int kbase_pm_list_policies(const struct kbase_pm_policy * const **list) +{ + if (!list) + return POLICY_COUNT; + + *list = policy_list; + + return POLICY_COUNT; +} + +KBASE_EXPORT_TEST_API(kbase_pm_list_policies); + +const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + return kbdev->pm.backend.pm_current_policy; +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_policy); + +void kbase_pm_set_policy(struct kbase_device *kbdev, + const struct kbase_pm_policy *new_policy) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + const struct kbase_pm_policy *old_policy; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(new_policy != NULL); + + KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id); + + /* During a policy change we pretend the GPU is active */ + /* A suspend won't happen here, because we're in a syscall from a + * userspace thread */ + kbase_pm_context_active(kbdev); + + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + + /* Remove the policy to prevent IRQ handlers from working on it */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + old_policy = kbdev->pm.backend.pm_current_policy; + kbdev->pm.backend.pm_current_policy = NULL; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u, + old_policy->id); + if (old_policy->term) + old_policy->term(kbdev); + + KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u, + new_policy->id); + if (new_policy->init) + new_policy->init(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.pm_current_policy = new_policy; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* If any core power state changes were previously attempted, but + * couldn't be made because the policy was changing (current_policy was + * NULL), then re-try them here. */ + kbase_pm_update_active(kbdev); + kbase_pm_update_cores_state(kbdev); + + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + + /* Now the policy change is finished, we release our fake context active + * reference */ + kbase_pm_context_idle(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_set_policy); + +/* Check whether a state change has finished, and trace it as completed */ +static void +kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev) +{ + if ((kbdev->shader_available_bitmap & + kbdev->pm.backend.desired_shader_state) + == kbdev->pm.backend.desired_shader_state && + (kbdev->tiler_available_bitmap & + kbdev->pm.backend.desired_tiler_state) + == kbdev->pm.backend.desired_tiler_state) + kbase_timeline_pm_check_handle_event(kbdev, + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); +} + +void kbase_pm_request_cores(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores) +{ + u64 cores; + + kbase_pm_change_state change_gpu_state = 0u; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + cores = shader_cores; + while (cores) { + int bitnum = fls64(cores) - 1; + u64 bit = 1ULL << bitnum; + + /* It should be almost impossible for this to overflow. It would + * require 2^32 atoms to request a particular core, which would + * require 2^24 contexts to submit. This would require an amount + * of memory that is impossible on a 32-bit system and extremely + * unlikely on a 64-bit system. */ + int cnt = ++kbdev->shader_needed_cnt[bitnum]; + + if (1 == cnt) { + kbdev->shader_needed_bitmap |= bit; + change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; + } + + cores &= ~bit; + } + + if (tiler_required) { + int cnt = ++kbdev->tiler_needed_cnt; + + if (1 == cnt) + change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; + + KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0); + } + + if (change_gpu_state) { + KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL, + NULL, 0u, (u32) kbdev->shader_needed_bitmap); + + kbase_timeline_pm_cores_func(kbdev, + KBASE_PM_FUNC_ID_REQUEST_CORES_START, + change_gpu_state); + kbase_pm_update_cores_state_nolock(kbdev); + kbase_timeline_pm_cores_func(kbdev, + KBASE_PM_FUNC_ID_REQUEST_CORES_END, + change_gpu_state); + } +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_cores); + +void kbase_pm_unrequest_cores(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores) +{ + kbase_pm_change_state change_gpu_state = 0u; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + while (shader_cores) { + int bitnum = fls64(shader_cores) - 1; + u64 bit = 1ULL << bitnum; + int cnt; + + KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0); + + cnt = --kbdev->shader_needed_cnt[bitnum]; + + if (0 == cnt) { + kbdev->shader_needed_bitmap &= ~bit; + + change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; + } + + shader_cores &= ~bit; + } + + if (tiler_required) { + int cnt; + + KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0); + + cnt = --kbdev->tiler_needed_cnt; + + if (0 == cnt) + change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; + } + + if (change_gpu_state) { + KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL, + NULL, 0u, (u32) kbdev->shader_needed_bitmap); + + kbase_pm_update_cores_state_nolock(kbdev); + + /* Trace that any state change effectively completes immediately + * - no-one will wait on the state change */ + kbase_pm_trace_check_and_finish_state_change(kbdev); + } +} + +KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores); + +enum kbase_pm_cores_ready +kbase_pm_register_inuse_cores(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores) +{ + u64 prev_shader_needed; /* Just for tracing */ + u64 prev_shader_inuse; /* Just for tracing */ + + lockdep_assert_held(&kbdev->hwaccess_lock); + + prev_shader_needed = kbdev->shader_needed_bitmap; + prev_shader_inuse = kbdev->shader_inuse_bitmap; + + /* If desired_shader_state does not contain the requested cores, then + * power management is not attempting to powering those cores (most + * likely due to core availability policy) and a new job affinity must + * be chosen */ + if ((kbdev->pm.backend.desired_shader_state & shader_cores) != + shader_cores) { + return (kbdev->pm.backend.poweroff_wait_in_progress || + kbdev->pm.backend.pm_current_policy == NULL) ? + KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY; + } + + if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores || + (tiler_required && !kbdev->tiler_available_bitmap)) { + /* Trace ongoing core transition */ + kbase_timeline_pm_l2_transition_start(kbdev); + return KBASE_CORES_NOT_READY; + } + + /* If we started to trace a state change, then trace it has being + * finished by now, at the very latest */ + kbase_pm_trace_check_and_finish_state_change(kbdev); + /* Trace core transition done */ + kbase_timeline_pm_l2_transition_done(kbdev); + + while (shader_cores) { + int bitnum = fls64(shader_cores) - 1; + u64 bit = 1ULL << bitnum; + int cnt; + + KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0); + + cnt = --kbdev->shader_needed_cnt[bitnum]; + + if (0 == cnt) + kbdev->shader_needed_bitmap &= ~bit; + + /* shader_inuse_cnt should not overflow because there can only + * be a very limited number of jobs on the h/w at one time */ + + kbdev->shader_inuse_cnt[bitnum]++; + kbdev->shader_inuse_bitmap |= bit; + + shader_cores &= ~bit; + } + + if (tiler_required) { + KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0); + + --kbdev->tiler_needed_cnt; + + kbdev->tiler_inuse_cnt++; + + KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0); + } + + if (prev_shader_needed != kbdev->shader_needed_bitmap) + KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL, + NULL, 0u, (u32) kbdev->shader_needed_bitmap); + + if (prev_shader_inuse != kbdev->shader_inuse_bitmap) + KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL, + NULL, 0u, (u32) kbdev->shader_inuse_bitmap); + + return KBASE_CORES_READY; +} + +KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores); + +void kbase_pm_release_cores(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores) +{ + kbase_pm_change_state change_gpu_state = 0u; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + while (shader_cores) { + int bitnum = fls64(shader_cores) - 1; + u64 bit = 1ULL << bitnum; + int cnt; + + KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0); + + cnt = --kbdev->shader_inuse_cnt[bitnum]; + + if (0 == cnt) { + kbdev->shader_inuse_bitmap &= ~bit; + change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; + } + + shader_cores &= ~bit; + } + + if (tiler_required) { + int cnt; + + KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0); + + cnt = --kbdev->tiler_inuse_cnt; + + if (0 == cnt) + change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; + } + + if (change_gpu_state) { + KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL, + NULL, 0u, (u32) kbdev->shader_inuse_bitmap); + + kbase_timeline_pm_cores_func(kbdev, + KBASE_PM_FUNC_ID_RELEASE_CORES_START, + change_gpu_state); + kbase_pm_update_cores_state_nolock(kbdev); + kbase_timeline_pm_cores_func(kbdev, + KBASE_PM_FUNC_ID_RELEASE_CORES_END, + change_gpu_state); + + /* Trace that any state change completed immediately */ + kbase_pm_trace_check_and_finish_state_change(kbdev); + } +} + +KBASE_EXPORT_TEST_API(kbase_pm_release_cores); + +void kbase_pm_request_cores_sync(struct kbase_device *kbdev, + bool tiler_required, + u64 shader_cores) +{ + unsigned long flags; + + kbase_pm_wait_for_poweroff_complete(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_request_cores(kbdev, tiler_required, shader_cores); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_pm_check_transitions_sync(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync); + +void kbase_pm_request_l2_caches(struct kbase_device *kbdev) +{ + unsigned long flags; + u32 prior_l2_users_count; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + prior_l2_users_count = kbdev->l2_users_count++; + + KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0); + + /* if the GPU is reset while the l2 is on, l2 will be off but + * prior_l2_users_count will be > 0. l2_available_bitmap will have been + * set to 0 though by kbase_pm_init_hw */ + if (!prior_l2_users_count || !kbdev->l2_available_bitmap) + kbase_pm_check_transitions_nolock(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + wait_event(kbdev->pm.backend.l2_powered_wait, + kbdev->pm.backend.l2_powered == 1); + + /* Trace that any state change completed immediately */ + kbase_pm_trace_check_and_finish_state_change(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches); + +void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->l2_users_count++; +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on); + +void kbase_pm_release_l2_caches(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0); + + --kbdev->l2_users_count; + + if (!kbdev->l2_users_count) { + kbase_pm_check_transitions_nolock(kbdev); + /* Trace that any state change completed immediately */ + kbase_pm_trace_check_and_finish_state_change(kbdev); + } +} + +KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h new file mode 100644 index 000000000000..611a90e66e65 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h @@ -0,0 +1,227 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Power policy API definitions + */ + +#ifndef _KBASE_PM_POLICY_H_ +#define _KBASE_PM_POLICY_H_ + +/** + * kbase_pm_policy_init - Initialize power policy framework + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Must be called before calling any other policy function + * + * Return: 0 if the power policy framework was successfully + * initialized, -errno otherwise. + */ +int kbase_pm_policy_init(struct kbase_device *kbdev); + +/** + * kbase_pm_policy_term - Terminate power policy framework + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_policy_term(struct kbase_device *kbdev); + +/** + * kbase_pm_update_active - Update the active power state of the GPU + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Calls into the current power policy + */ +void kbase_pm_update_active(struct kbase_device *kbdev); + +/** + * kbase_pm_update_cores - Update the desired core state of the GPU + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Calls into the current power policy + */ +void kbase_pm_update_cores(struct kbase_device *kbdev); + + +enum kbase_pm_cores_ready { + KBASE_CORES_NOT_READY = 0, + KBASE_NEW_AFFINITY = 1, + KBASE_CORES_READY = 2 +}; + + +/** + * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores() + * + * @kbdev: The kbase device structure for the device + * @tiler_required: true if the tiler is required, false otherwise + * @shader_cores: A bitmask of shader cores which are necessary for the job + * + * When this function returns, the @shader_cores will be in the READY state. + * + * This is safe variant of kbase_pm_check_transitions_sync(): it handles the + * work of ensuring the requested cores will remain powered until a matching + * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate) + * is made. + */ +void kbase_pm_request_cores_sync(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores); + +/** + * kbase_pm_request_cores - Mark one or more cores as being required + * for jobs to be submitted + * + * @kbdev: The kbase device structure for the device + * @tiler_required: true if the tiler is required, false otherwise + * @shader_cores: A bitmask of shader cores which are necessary for the job + * + * This function is called by the job scheduler to mark one or more cores as + * being required to submit jobs that are ready to run. + * + * The cores requested are reference counted and a subsequent call to + * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be + * made to dereference the cores as being 'needed'. + * + * The active power policy will meet or exceed the requirements of the + * requested cores in the system. Any core transitions needed will be begun + * immediately, but they might not complete/the cores might not be available + * until a Power Management IRQ. + * + * Return: 0 if the cores were successfully requested, or -errno otherwise. + */ +void kbase_pm_request_cores(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores); + +/** + * kbase_pm_unrequest_cores - Unmark one or more cores as being required for + * jobs to be submitted. + * + * @kbdev: The kbase device structure for the device + * @tiler_required: true if the tiler is required, false otherwise + * @shader_cores: A bitmask of shader cores (as given to + * kbase_pm_request_cores() ) + * + * This function undoes the effect of kbase_pm_request_cores(). It should be + * used when a job is not going to be submitted to the hardware (e.g. the job is + * cancelled before it is enqueued). + * + * The active power policy will meet or exceed the requirements of the + * requested cores in the system. Any core transitions needed will be begun + * immediately, but they might not complete until a Power Management IRQ. + * + * The policy may use this as an indication that it can power down cores. + */ +void kbase_pm_unrequest_cores(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores); + +/** + * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job + * + * @kbdev: The kbase device structure for the device + * @tiler_required: true if the tiler is required, false otherwise + * @shader_cores: A bitmask of shader cores (as given to + * kbase_pm_request_cores() ) + * + * This function should be called after kbase_pm_request_cores() when the job + * is about to be submitted to the hardware. It will check that the necessary + * cores are available and if so update the 'needed' and 'inuse' bitmasks to + * reflect that the job is now committed to being run. + * + * If the necessary cores are not currently available then the function will + * return %KBASE_CORES_NOT_READY and have no effect. + * + * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready, + * + * %KBASE_NEW_AFFINITY if the affinity requested is not allowed, + * + * %KBASE_CORES_READY if the cores requested are already available + */ +enum kbase_pm_cores_ready kbase_pm_register_inuse_cores( + struct kbase_device *kbdev, + bool tiler_required, + u64 shader_cores); + +/** + * kbase_pm_release_cores - Release cores after a job has run + * + * @kbdev: The kbase device structure for the device + * @tiler_required: true if the tiler is required, false otherwise + * @shader_cores: A bitmask of shader cores (as given to + * kbase_pm_register_inuse_cores() ) + * + * This function should be called when a job has finished running on the + * hardware. A call to kbase_pm_register_inuse_cores() must have previously + * occurred. The reference counts of the specified cores will be decremented + * which may cause the bitmask of 'inuse' cores to be reduced. The power policy + * may then turn off any cores which are no longer 'inuse'. + */ +void kbase_pm_release_cores(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores); + +/** + * kbase_pm_request_l2_caches - Request l2 caches + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Request the use of l2 caches for all core groups, power up, wait and prevent + * the power manager from powering down the l2 caches. + * + * This tells the power management that the caches should be powered up, and + * they should remain powered, irrespective of the usage of shader cores. This + * does not return until the l2 caches are powered up. + * + * The caller must call kbase_pm_release_l2_caches() when they are finished + * to allow normal power management of the l2 caches to resume. + * + * This should only be used when power management is active. + */ +void kbase_pm_request_l2_caches(struct kbase_device *kbdev); + +/** + * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Increment the count of l2 users but do not attempt to power on the l2 + * + * It is the callers responsibility to ensure that the l2 is already powered up + * and to eventually call kbase_pm_release_l2_caches() + */ +void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev); + +/** + * kbase_pm_request_l2_caches - Release l2 caches + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Release the use of l2 caches for all core groups and allow the power manager + * to power them down when necessary. + * + * This tells the power management that the caches can be powered down if + * necessary, with respect to the usage of shader cores. + * + * The caller must have called kbase_pm_request_l2_caches() prior to a call + * to this. + * + * This should only be used when power management is active. + */ +void kbase_pm_release_l2_caches(struct kbase_device *kbdev); + +#endif /* _KBASE_PM_POLICY_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c new file mode 100644 index 000000000000..d992989123e8 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c @@ -0,0 +1,103 @@ +/* + * + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <mali_kbase.h> +#include <mali_kbase_hwaccess_time.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, + u64 *system_time, struct timespec *ts) +{ + u32 hi1, hi2; + + kbase_pm_request_gpu_cycle_counter(kbdev); + + /* Read hi, lo, hi to ensure that overflow from lo to hi is handled + * correctly */ + do { + hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), + NULL); + *cycle_counter = kbase_reg_read(kbdev, + GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL); + hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), + NULL); + *cycle_counter |= (((u64) hi1) << 32); + } while (hi1 != hi2); + + /* Read hi, lo, hi to ensure that overflow from lo to hi is handled + * correctly */ + do { + hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), + NULL); + *system_time = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TIMESTAMP_LO), NULL); + hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), + NULL); + *system_time |= (((u64) hi1) << 32); + } while (hi1 != hi2); + + /* Record the CPU's idea of current time */ + getrawmonotonic(ts); + + kbase_pm_release_gpu_cycle_counter(kbdev); +} + +/** + * kbase_wait_write_flush - Wait for GPU write flush + * @kctx: Context pointer + * + * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush + * its write buffer. + * + * Only in use for BASE_HW_ISSUE_6367 + * + * Note : If GPU resets occur then the counters are reset to zero, the delay may + * not be as expected. + */ +#ifndef CONFIG_MALI_NO_MALI +void kbase_wait_write_flush(struct kbase_context *kctx) +{ + u32 base_count = 0; + + /* + * The caller must be holding onto the kctx or the call is from + * userspace. + */ + kbase_pm_context_active(kctx->kbdev); + kbase_pm_request_gpu_cycle_counter(kctx->kbdev); + + while (true) { + u32 new_count; + + new_count = kbase_reg_read(kctx->kbdev, + GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL); + /* First time around, just store the count. */ + if (base_count == 0) { + base_count = new_count; + continue; + } + + /* No need to handle wrapping, unsigned maths works for this. */ + if ((new_count - base_count) > 1000) + break; + } + + kbase_pm_release_gpu_cycle_counter(kctx->kbdev); + kbase_pm_context_idle(kctx->kbdev); +} +#endif /* CONFIG_MALI_NO_MALI */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h new file mode 100644 index 000000000000..35088abc8fe5 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h @@ -0,0 +1,52 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _KBASE_BACKEND_TIME_H_ +#define _KBASE_BACKEND_TIME_H_ + +/** + * kbase_backend_get_gpu_time() - Get current GPU time + * @kbdev: Device pointer + * @cycle_counter: Pointer to u64 to store cycle counter in + * @system_time: Pointer to u64 to store system time in + * @ts: Pointer to struct timespec to store current monotonic + * time in + */ +void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, + u64 *system_time, struct timespec *ts); + +/** + * kbase_wait_write_flush() - Wait for GPU write flush + * @kctx: Context pointer + * + * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush + * its write buffer. + * + * If GPU resets occur then the counters are reset to zero, the delay may not be + * as expected. + * + * This function is only in use for BASE_HW_ISSUE_6367 + */ +#ifdef CONFIG_MALI_NO_MALI +static inline void kbase_wait_write_flush(struct kbase_context *kctx) +{ +} +#else +void kbase_wait_write_flush(struct kbase_context *kctx); +#endif + +#endif /* _KBASE_BACKEND_TIME_H_ */ |