From 238c17ed3d32f47c95f22647473e326600f83b22 Mon Sep 17 00:00:00 2001 From: Show Liu Date: Tue, 20 May 2014 15:23:52 +0800 Subject: add mali driver r4p0 --- drivers/gpu/arm/midgard/Kbuild | 228 ++ drivers/gpu/arm/midgard/Kconfig | 174 ++ drivers/gpu/arm/midgard/Makefile | 36 + drivers/gpu/arm/midgard/Makefile.kbase | 17 + drivers/gpu/arm/midgard/docs/Doxyfile | 126 + .../arm/midgard/docs/policy_operation_diagram.dot | 112 + drivers/gpu/arm/midgard/docs/policy_overview.dot | 63 + drivers/gpu/arm/midgard/mali_base_hwconfig.h | 709 +++++ drivers/gpu/arm/midgard/mali_base_kernel.h | 1743 +++++++++++ drivers/gpu/arm/midgard/mali_base_kernel_sync.h | 47 + drivers/gpu/arm/midgard/mali_base_mem_priv.h | 52 + .../arm/midgard/mali_base_vendor_specific_func.h | 26 + drivers/gpu/arm/midgard/mali_kbase.h | 455 +++ .../gpu/arm/midgard/mali_kbase_10969_workaround.c | 176 ++ .../gpu/arm/midgard/mali_kbase_10969_workaround.h | 23 + drivers/gpu/arm/midgard/mali_kbase_cache_policy.c | 41 + drivers/gpu/arm/midgard/mali_kbase_cache_policy.h | 47 + drivers/gpu/arm/midgard/mali_kbase_config.c | 358 +++ drivers/gpu/arm/midgard/mali_kbase_config.h | 843 ++++++ .../gpu/arm/midgard/mali_kbase_config_defaults.h | 202 ++ drivers/gpu/arm/midgard/mali_kbase_context.c | 257 ++ drivers/gpu/arm/midgard/mali_kbase_core_linux.c | 3057 ++++++++++++++++++++ drivers/gpu/arm/midgard/mali_kbase_cpuprops.c | 124 + drivers/gpu/arm/midgard/mali_kbase_cpuprops.h | 56 + drivers/gpu/arm/midgard/mali_kbase_debug.c | 39 + drivers/gpu/arm/midgard/mali_kbase_debug.h | 188 ++ drivers/gpu/arm/midgard/mali_kbase_defs.h | 873 ++++++ drivers/gpu/arm/midgard/mali_kbase_device.c | 774 +++++ drivers/gpu/arm/midgard/mali_kbase_event.c | 185 ++ drivers/gpu/arm/midgard/mali_kbase_gator.h | 44 + .../arm/midgard/mali_kbase_gpu_memory_debugfs.c | 100 + .../arm/midgard/mali_kbase_gpu_memory_debugfs.h | 43 + drivers/gpu/arm/midgard/mali_kbase_gpuprops.c | 336 +++ drivers/gpu/arm/midgard/mali_kbase_gpuprops.h | 54 + .../gpu/arm/midgard/mali_kbase_gpuprops_types.h | 103 + drivers/gpu/arm/midgard/mali_kbase_hw.c | 149 + drivers/gpu/arm/midgard/mali_kbase_hw.h | 52 + drivers/gpu/arm/midgard/mali_kbase_instr.c | 618 ++++ drivers/gpu/arm/midgard/mali_kbase_jd.c | 1598 ++++++++++ drivers/gpu/arm/midgard/mali_kbase_jm.c | 1400 +++++++++ drivers/gpu/arm/midgard/mali_kbase_jm.h | 199 ++ drivers/gpu/arm/midgard/mali_kbase_js.c | 2144 ++++++++++++++ drivers/gpu/arm/midgard/mali_kbase_js.h | 930 ++++++ drivers/gpu/arm/midgard/mali_kbase_js_affinity.c | 382 +++ drivers/gpu/arm/midgard/mali_kbase_js_affinity.h | 157 + drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c | 309 ++ drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h | 158 + drivers/gpu/arm/midgard/mali_kbase_js_defs.h | 479 +++ drivers/gpu/arm/midgard/mali_kbase_js_policy.h | 767 +++++ drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c | 1449 ++++++++++ drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h | 167 ++ drivers/gpu/arm/midgard/mali_kbase_linux.h | 47 + drivers/gpu/arm/midgard/mali_kbase_mem.c | 1287 ++++++++ drivers/gpu/arm/midgard/mali_kbase_mem.h | 616 ++++ drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c | 263 ++ drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h | 33 + drivers/gpu/arm/midgard/mali_kbase_mem_linux.c | 1655 +++++++++++ drivers/gpu/arm/midgard/mali_kbase_mem_linux.h | 67 + drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.c | 62 + drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h | 111 + drivers/gpu/arm/midgard/mali_kbase_mmu.c | 1685 +++++++++++ drivers/gpu/arm/midgard/mali_kbase_platform_fake.c | 142 + drivers/gpu/arm/midgard/mali_kbase_pm.c | 462 +++ drivers/gpu/arm/midgard/mali_kbase_pm.h | 849 ++++++ drivers/gpu/arm/midgard/mali_kbase_pm_always_on.c | 62 + drivers/gpu/arm/midgard/mali_kbase_pm_always_on.h | 68 + drivers/gpu/arm/midgard/mali_kbase_pm_ca.c | 173 ++ drivers/gpu/arm/midgard/mali_kbase_pm_ca.h | 170 ++ drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.c | 62 + drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.h | 37 + .../gpu/arm/midgard/mali_kbase_pm_coarse_demand.c | 68 + .../gpu/arm/midgard/mali_kbase_pm_coarse_demand.h | 60 + drivers/gpu/arm/midgard/mali_kbase_pm_demand.c | 70 + drivers/gpu/arm/midgard/mali_kbase_pm_demand.h | 57 + drivers/gpu/arm/midgard/mali_kbase_pm_driver.c | 949 ++++++ drivers/gpu/arm/midgard/mali_kbase_pm_metrics.c | 266 ++ .../gpu/arm/midgard/mali_kbase_pm_metrics_dummy.c | 39 + drivers/gpu/arm/midgard/mali_kbase_pm_policy.c | 792 +++++ drivers/gpu/arm/midgard/mali_kbase_pm_policy.h | 269 ++ .../arm/midgard/mali_kbase_profiling_gator_api.h | 40 + drivers/gpu/arm/midgard/mali_kbase_replay.c | 1069 +++++++ drivers/gpu/arm/midgard/mali_kbase_security.c | 78 + drivers/gpu/arm/midgard/mali_kbase_security.h | 52 + drivers/gpu/arm/midgard/mali_kbase_softjobs.c | 442 +++ drivers/gpu/arm/midgard/mali_kbase_sync.c | 195 ++ drivers/gpu/arm/midgard/mali_kbase_sync.h | 83 + drivers/gpu/arm/midgard/mali_kbase_sync_user.c | 155 + drivers/gpu/arm/midgard/mali_kbase_trace_defs.h | 232 ++ .../gpu/arm/midgard/mali_kbase_trace_timeline.c | 231 ++ .../gpu/arm/midgard/mali_kbase_trace_timeline.h | 368 +++ .../arm/midgard/mali_kbase_trace_timeline_defs.h | 132 + drivers/gpu/arm/midgard/mali_kbase_uku.h | 335 +++ drivers/gpu/arm/midgard/mali_kbase_utility.c | 32 + drivers/gpu/arm/midgard/mali_kbase_utility.h | 37 + drivers/gpu/arm/midgard/mali_linux_trace.h | 129 + drivers/gpu/arm/midgard/mali_midg_regmap.h | 513 ++++ drivers/gpu/arm/midgard/mali_timeline.h | 369 +++ drivers/gpu/arm/midgard/mali_uk.h | 143 + drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd.h | 481 +++ .../midgard/malisw/arm_cstd/arm_cstd_compilers.h | 617 ++++ .../midgard/malisw/arm_cstd/arm_cstd_pack_pop.h | 27 + .../midgard/malisw/arm_cstd/arm_cstd_pack_push.h | 27 + .../arm/midgard/malisw/arm_cstd/arm_cstd_types.h | 33 + .../midgard/malisw/arm_cstd/arm_cstd_types_gcc.h | 92 + .../midgard/malisw/arm_cstd/arm_cstd_types_rvct.h | 192 ++ drivers/gpu/arm/midgard/malisw/mali_malisw.h | 238 ++ drivers/gpu/arm/midgard/malisw/mali_stdtypes.h | 230 ++ drivers/gpu/arm/midgard/platform/Kbuild | 21 + .../midgard/platform/mali_kbase_platform_common.h | 26 + drivers/gpu/arm/midgard/platform/vexpress/Kbuild | 18 + .../platform/vexpress/mali_kbase_config_vexpress.c | 323 +++ .../platform/vexpress/mali_kbase_cpu_vexpress.c | 180 ++ .../platform/vexpress/mali_kbase_cpu_vexpress.h | 28 + .../platform/vexpress_6xvirtex7_10mhz/Kbuild | 18 + .../mali_kbase_config_vexpress.c | 324 +++ .../mali_kbase_cpu_vexpress.c | 71 + .../mali_kbase_cpu_vexpress.h | 28 + .../midgard/platform/vexpress_virtex7_40mhz/Kbuild | 17 + .../mali_kbase_config_vexpress.c | 323 +++ .../mali_kbase_cpu_vexpress.c | 178 ++ .../mali_kbase_cpu_vexpress.h | 26 + .../gpu/arm/midgard/platform_dummy/mali_ukk_os.h | 54 + drivers/gpu/arm/midgard/sconscript | 114 + 123 files changed, 41411 insertions(+) create mode 100755 drivers/gpu/arm/midgard/Kbuild create mode 100755 drivers/gpu/arm/midgard/Kconfig create mode 100755 drivers/gpu/arm/midgard/Makefile create mode 100755 drivers/gpu/arm/midgard/Makefile.kbase create mode 100755 drivers/gpu/arm/midgard/docs/Doxyfile create mode 100755 drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot create mode 100755 drivers/gpu/arm/midgard/docs/policy_overview.dot create mode 100755 drivers/gpu/arm/midgard/mali_base_hwconfig.h create mode 100755 drivers/gpu/arm/midgard/mali_base_kernel.h create mode 100755 drivers/gpu/arm/midgard/mali_base_kernel_sync.h create mode 100755 drivers/gpu/arm/midgard/mali_base_mem_priv.h create mode 100755 drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_cache_policy.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_cache_policy.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_config.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_config.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_config_defaults.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_context.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_core_linux.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_cpuprops.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_cpuprops.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_debug.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_debug.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_defs.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_device.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_event.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_gator.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_gpuprops.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_gpuprops.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_hw.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_hw.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_instr.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_jd.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_jm.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_jm.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_js.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_js.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_js_affinity.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_js_affinity.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_js_defs.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_js_policy.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_linux.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_mem.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_mem.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_mem_linux.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_mem_linux.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_mmu.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_platform_fake.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm_always_on.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm_always_on.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm_ca.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm_ca.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm_coarse_demand.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm_coarse_demand.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm_demand.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm_demand.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm_driver.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm_metrics.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm_metrics_dummy.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm_policy.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_pm_policy.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_replay.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_security.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_security.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_softjobs.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_sync.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_sync.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_sync_user.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_trace_defs.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_uku.h create mode 100755 drivers/gpu/arm/midgard/mali_kbase_utility.c create mode 100755 drivers/gpu/arm/midgard/mali_kbase_utility.h create mode 100755 drivers/gpu/arm/midgard/mali_linux_trace.h create mode 100755 drivers/gpu/arm/midgard/mali_midg_regmap.h create mode 100755 drivers/gpu/arm/midgard/mali_timeline.h create mode 100755 drivers/gpu/arm/midgard/mali_uk.h create mode 100755 drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd.h create mode 100755 drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_compilers.h create mode 100755 drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_pack_pop.h create mode 100755 drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_pack_push.h create mode 100755 drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types.h create mode 100755 drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types_gcc.h create mode 100755 drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types_rvct.h create mode 100755 drivers/gpu/arm/midgard/malisw/mali_malisw.h create mode 100755 drivers/gpu/arm/midgard/malisw/mali_stdtypes.h create mode 100755 drivers/gpu/arm/midgard/platform/Kbuild create mode 100755 drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h create mode 100755 drivers/gpu/arm/midgard/platform/vexpress/Kbuild create mode 100755 drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c create mode 100755 drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c create mode 100755 drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h create mode 100755 drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild create mode 100755 drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c create mode 100755 drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c create mode 100755 drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h create mode 100755 drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/Kbuild create mode 100755 drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_config_vexpress.c create mode 100755 drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.c create mode 100755 drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.h create mode 100755 drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h create mode 100755 drivers/gpu/arm/midgard/sconscript diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild new file mode 100755 index 00000000000..0a86913df51 --- /dev/null +++ b/drivers/gpu/arm/midgard/Kbuild @@ -0,0 +1,228 @@ +# +# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +# Driver version string which is returned to userspace via an ioctl +MALI_RELEASE_NAME ?= "r4p0-02rel0" + +# Paths required for build +KBASE_PATH = $(src) +KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy +UMP_PATH = $(src)/../../../base + +ifeq ($(CONFIG_MALI_ERROR_INJECTION),y) +MALI_ERROR_INJECT_ON = 1 +endif + +# Set up defaults if not defined by build system +MALI_CUSTOMER_RELEASE ?= 1 +MALI_UNIT_TEST ?= 0 +MALI_KERNEL_TEST_API ?= 0 +MALI_ERROR_INJECT_ON ?= 0 +MALI_MOCK_TEST ?= 0 +MALI_COVERAGE ?= 0 +MALI_INSTRUMENTATION_LEVEL ?= 0 +# This workaround is for what seems to be a compiler bug we observed in +# GCC 4.7 on AOSP 4.3. The bug caused an intermittent failure compiling +# the "_Pragma" syntax, where an error message is returned: +# +# "internal compiler error: unspellable token PRAGMA" +# +# This regression has thus far only been seen on the GCC 4.7 compiler bundled +# with AOSP 4.3.0. So this makefile, intended for in-tree kernel builds +# which are not known to be used with AOSP, is hardcoded to disable the +# workaround, i.e. set the define to 0. +MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0 + +# Set up our defines, which will be passed to gcc +DEFINES = \ + -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ + -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ + -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ + -DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \ + -DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \ + -DMALI_COVERAGE=$(MALI_COVERAGE) \ + -DMALI_INSTRUMENTATION_LEVEL=$(MALI_INSTRUMENTATION_LEVEL) \ + -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \ + -DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598) + +ifeq ($(KBUILD_EXTMOD),) +# in-tree +DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME) +else +# out-of-tree +DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME) +endif + +# Use our defines when compiling +ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) +subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) + +SRC := \ + mali_kbase_device.c \ + mali_kbase_cache_policy.c \ + mali_kbase_mem.c \ + mali_kbase_mmu.c \ + mali_kbase_jd.c \ + mali_kbase_jm.c \ + mali_kbase_cpuprops.c \ + mali_kbase_gpuprops.c \ + mali_kbase_js.c \ + mali_kbase_js_affinity.c \ + mali_kbase_js_ctx_attr.c \ + mali_kbase_event.c \ + mali_kbase_context.c \ + mali_kbase_pm.c \ + mali_kbase_pm_driver.c \ + mali_kbase_pm_metrics.c \ + mali_kbase_pm_ca.c \ + mali_kbase_pm_ca_fixed.c \ + mali_kbase_pm_always_on.c \ + mali_kbase_pm_coarse_demand.c \ + mali_kbase_pm_demand.c \ + mali_kbase_pm_policy.c \ + mali_kbase_config.c \ + mali_kbase_security.c \ + mali_kbase_instr.c \ + mali_kbase_softjobs.c \ + mali_kbase_10969_workaround.c \ + mali_kbase_hw.c \ + mali_kbase_utility.c \ + mali_kbase_mem_lowlevel.c \ + mali_kbase_debug.c \ + mali_kbase_trace_timeline.c \ + mali_kbase_gpu_memory_debugfs.c \ + mali_kbase_mem_linux.c \ + mali_kbase_core_linux.c \ + mali_kbase_sync.c \ + mali_kbase_sync_user.c \ + mali_kbase_replay.c \ + +ifeq ($(MALI_CUSTOMER_RELEASE),0) +SRC += \ + mali_kbase_pm_ca_random.c \ + mali_kbase_pm_demand_always_powered.c \ + mali_kbase_pm_fast_start.c +endif + +# Job Scheduler Policy: Completely Fair Scheduler +SRC += mali_kbase_js_policy_cfs.c + +ifeq ($(CONFIG_MACH_MANTA),y) + SRC += mali_kbase_mem_alloc_carveout.c +else + SRC += mali_kbase_mem_alloc.c +endif + +# ensure GPL version of malisw gets pulled in +ccflags-y += -I$(KBASE_PATH) + +ifeq ($(CONFIG_MALI_NO_MALI),y) + # Dummy model + SRC += mali_kbase_model_dummy.c + SRC += mali_kbase_model_linux.c + # HW error simulation + SRC += mali_kbase_model_error_generator.c +endif + +ifeq ($(MALI_MOCK_TEST),1) + # Test functionality + SRC += tests/internal/src/mock/mali_kbase_pm_driver_mock.c +endif + +# in-tree/out-of-tree logic needs to be slightly different to determine if a file is present +ifeq ($(KBUILD_EXTMOD),) +# in-tree +MALI_METRICS_PATH = $(srctree)/drivers/gpu/arm/midgard +else +# out-of-tree +MALI_METRICS_PATH = $(KBUILD_EXTMOD) +endif + +# Use vsync metrics example using PL111 driver, if available +ifeq ($(wildcard $(MALI_METRICS_PATH)/mali_kbase_pm_metrics_linux.c),) + SRC += mali_kbase_pm_metrics_dummy.c +else + SRC += mali_kbase_pm_metrics_linux.c +endif + +ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y) + SRC += mali_kbase_platform_fake.c + + ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS),y) + SRC += platform/vexpress/mali_kbase_config_vexpress.c \ + platform/vexpress/mali_kbase_cpu_vexpress.c + endif + + ifeq ($(CONFIG_MALI_PLATFORM_RTSM_VE),y) + SRC += platform/rtsm_ve/mali_kbase_config_vexpress.c + endif + + ifeq ($(CONFIG_MALI_PLATFORM_JUNO),y) + SRC += platform/juno/mali_kbase_config_vexpress.c + endif + + ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ),y) + SRC += platform/vexpress_virtex7_40mhz/mali_kbase_config_vexpress.c \ + platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.c + endif + + ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_6XVIRTEX7_10MHZ),y) + SRC += platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c \ + platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c + endif + + ifeq ($(CONFIG_MALI_PLATFORM_GOLDFISH),y) + SRC += platform/goldfish/mali_kbase_config_goldfish.c + endif + + ifeq ($(CONFIG_MALI_PLATFORM_PBX),y) + SRC += platform/pbx/mali_kbase_config_pbx.c + endif + + ifeq ($(CONFIG_MALI_PLATFORM_PANDA),y) + SRC += platform/panda/mali_kbase_config_panda.c + endif + + ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y) + ifeq ($(CONFIG_MALI_MIDGARD),m) + # remove begin and end quotes from the Kconfig string type + platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)) + MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name) + include $(src)/platform/$(platform_name)/Kbuild + else ifeq ($(CONFIG_MALI_MIDGARD),y) + obj-$(CONFIG_MALI_MIDGARD) += platform/ + endif + endif +endif # CONFIG_MALI_PLATFORM_FAKE=y + +ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y) +ifeq ($(CONFIG_MALI_MIDGARD),m) +# remove begin and end quotes from the Kconfig string type +platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)) +MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name) +include $(src)/platform/$(platform_name)/Kbuild +else ifeq ($(CONFIG_MALI_MIDGARD),y) +obj-$(CONFIG_MALI_MIDGARD) += platform/ +endif +endif + +# Tell the Linux build system from which .o file to create the kernel module +obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o + +# Tell the Linux build system to enable building of our .c files +mali_kbase-y := $(SRC:.c=.o) + + diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig new file mode 100755 index 00000000000..71be8bbe0d1 --- /dev/null +++ b/drivers/gpu/arm/midgard/Kconfig @@ -0,0 +1,174 @@ +# +# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +menuconfig MALI_MIDGARD + tristate "Mali Midgard series support" + default n + help + Enable this option to build support for a ARM Mali Midgard GPU. + + To compile this driver as a module, choose M here: + this will generate a single module, called mali_kbase. + +config MALI_GATOR_SUPPORT + bool "Streamline Debug support" + depends on MALI_MIDGARD + default n + help + Adds diagnostic support for use with the ARM Streamline Performance Analyzer. + You will need the Gator device driver already loaded before loading this driver when enabling + Streamline debug support. + +config MALI_MIDGARD_DVFS + bool "Enable DVFS" + depends on MALI_MIDGARD + default n + help + Choose this option to enable DVFS in the Mali Midgard DDK. + +config MALI_MIDGARD_RT_PM + bool "Enable Runtime power management" + depends on MALI_MIDGARD + depends on PM_RUNTIME + default n + help + Choose this option to enable runtime power management in the Mali Midgard DDK. + +config MALI_MIDGARD_ENABLE_TRACE + bool "Enable kbase tracing" + depends on MALI_MIDGARD + default n + help + Enables tracing in kbase. Trace log available through + the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled + +config MALI_MIDGARD_DEBUG_SYS + bool "Enable sysfs for the Mali Midgard DDK " + depends on MALI_MIDGARD && SYSFS + default n + help + Enables sysfs for the Mali Midgard DDK. Set/Monitor the Mali Midgard DDK + +# MALI_EXPERT configuration options + +menuconfig MALI_EXPERT + depends on MALI_MIDGARD + bool "Enable Expert Settings" + default n + help + Enabling this option and modifying the default settings may produce a driver with performance or + other limitations. + +config MALI_DEBUG_SHADER_SPLIT_FS + bool "Allow mapping of shader cores via sysfs" + depends on MALI_MIDGARD && MALI_MIDGARD_DEBUG_SYS && MALI_EXPERT + default n + help + Select this option to provide a sysfs entry for runtime configuration of shader + core affinity masks. + +config MALI_PLATFORM_FAKE + bool "Enable fake platform device support" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + When you start to work with the Mali Midgard series device driver the platform-specific code of + the Linux kernel for your platform may not be complete. In this situation the kernel device driver + supports creating the platform device outside of the Linux platform-specific code. + Enable this option if would like to use a platform device configuration from within the device driver. + +choice + prompt "Platform configuration" + depends on MALI_MIDGARD && MALI_EXPERT + default MALI_PLATFORM_VEXPRESS + help + Select the SOC platform that contains a Mali Midgard GPU + +config MALI_PLATFORM_VEXPRESS + depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4) + bool "Versatile Express" +config MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ + depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4) + bool "Versatile Express w/Virtex7 @ 40Mhz" +config MALI_PLATFORM_GOLDFISH + depends on ARCH_GOLDFISH + bool "Android Goldfish virtual CPU" +config MALI_PLATFORM_PBX + depends on ARCH_REALVIEW && REALVIEW_EB_A9MP && MACH_REALVIEW_PBX + bool "Realview PBX-A9" +config MALI_PLATFORM_THIRDPARTY + bool "Third Party Platform" +endchoice + +config MALI_PLATFORM_THIRDPARTY_NAME + depends on MALI_MIDGARD && MALI_PLATFORM_THIRDPARTY && MALI_EXPERT + string "Third party platform name" + help + Enter the name of a third party platform that is supported. The third part configuration + file must be in midgard/config/tpip/mali_kbase_config_xxx.c where xxx is the name + specified here. + +config MALI_DEBUG + bool "Debug build" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Select this option for increased checking and reporting of errors. + +config MALI_NO_MALI + bool "No Mali" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + This can be used to test the driver in a simulated environment + whereby the hardware is not physically present. If the hardware is physically + present it will not be used. This can be used to test the majority of the + driver without needing actual hardware or for software benchmarking. + All calls to the simulated hardware will complete immediately as if the hardware + completed the task. + +config MALI_ERROR_INJECT + bool "Error injection" + depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI + default n + help + Enables insertion of errors to test module failure and recovery mechanisms. + +config MALI_TRACE_TIMELINE + bool "Timeline tracing" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Enables timeline tracing through the kernel tracepoint system. + +config MALI_SYSTEM_TRACE + bool "Enable system event tracing support" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Choose this option to enable system trace events for each + kbase event. This is typically used for debugging but has + minimal overhead when not in use. Enable only if you know what + you are doing. + +config MALI_GPU_TRACEPOINTS + bool "Enable GPU tracepoints" + depends on MALI_MIDGARD && ANDROID + select GPU_TRACEPOINTS + help + Enables GPU tracepoints using Android trace event definitions. + +source "drivers/gpu/arm/midgard/platform/Kconfig" diff --git a/drivers/gpu/arm/midgard/Makefile b/drivers/gpu/arm/midgard/Makefile new file mode 100755 index 00000000000..9820be2b444 --- /dev/null +++ b/drivers/gpu/arm/midgard/Makefile @@ -0,0 +1,36 @@ +# +# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +KDIR ?= /lib/modules/$(shell uname -r)/build + +UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump +KBASE_PATH_RELATIVE = $(CURDIR) +KDS_PATH_RELATIVE = $(CURDIR)/../../../.. +EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers + +ifeq ($(MALI_UNIT_TEST), 1) + EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers +endif + +# GPL driver supports KDS +EXTRA_SYMBOLS += $(KDS_PATH_RELATIVE)/drivers/base/kds/Module.symvers + +# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions +all: + $(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules + +clean: + $(MAKE) -C $(KDIR) M=$(CURDIR) clean diff --git a/drivers/gpu/arm/midgard/Makefile.kbase b/drivers/gpu/arm/midgard/Makefile.kbase new file mode 100755 index 00000000000..2bef9c25eae --- /dev/null +++ b/drivers/gpu/arm/midgard/Makefile.kbase @@ -0,0 +1,17 @@ +# +# (C) COPYRIGHT 2010 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + +EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM) + diff --git a/drivers/gpu/arm/midgard/docs/Doxyfile b/drivers/gpu/arm/midgard/docs/Doxyfile new file mode 100755 index 00000000000..e2662c2dfae --- /dev/null +++ b/drivers/gpu/arm/midgard/docs/Doxyfile @@ -0,0 +1,126 @@ +# +# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +############################################################################## + +# This file contains per-module Doxygen configuration. Please do not add +# extra settings to this file without consulting all stakeholders, as they +# may cause override project-wide settings. +# +# Additionally, when defining aliases, macros, sections etc, use the module +# name as a prefix e.g. gles_my_alias. + +############################################################################## + +@INCLUDE = ../../bldsys/Doxyfile_common + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT += ../../kernel/drivers/gpu/arm/midgard/ + +############################################################################## +# Everything below here is optional, and in most cases not required +############################################################################## + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES += + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS += + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 + +FILE_PATTERNS += + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +EXCLUDE += ../../kernel/drivers/gpu/arm/midgard/malisw/ ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile + + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS += + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS += + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH += + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH += + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH += + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED += + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED += + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS += ../../kernel/drivers/gpu/arm/midgard/docs + diff --git a/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot new file mode 100755 index 00000000000..a70b5494708 --- /dev/null +++ b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot @@ -0,0 +1,112 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +digraph policy_objects_diagram { + rankdir=LR; + size="12,8"; + compound=true; + + node [ shape = box ]; + + subgraph cluster_policy_queues { + low_queue [ shape=record label = "LowP | {ctx_lo | ... | ctx_i | ... | ctx_hi}" ]; + queues_middle_sep [ label="" shape=plaintext width=0 height=0 ]; + + rt_queue [ shape=record label = "RT | {ctx_lo | ... | ctx_j | ... | ctx_hi}" ]; + + label = "Policy's Queue(s)"; + } + + call_enqueue [ shape=plaintext label="enqueue_ctx()" ]; + + { + rank=same; + ordering=out; + call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ]; + call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ]; + + call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ]; + } + + subgraph cluster_runpool { + + as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ]; + as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ]; + as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ]; + as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ]; + + label = "Policy's Run Pool"; + } + + { + rank=same; + call_jdequeue [ shape=plaintext label="dequeue_job()" ]; + sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ]; + } + + { + rank=same; + ordering=out; + sstop [ shape=ellipse label="SS-Timer expires" ] + jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ]; + + irq [ label="IRQ" shape=ellipse ]; + + job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ]; + } + + hstop [ shape=ellipse label="HS-Timer expires" ] + + /* + * Edges + */ + + call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ]; + + low_queue:qr -> call_dequeue:w; + rt_queue:qr -> call_dequeue:w; + + call_dequeue -> as1 [lhead=cluster_runpool]; + + as1->call_jdequeue [ltail=cluster_runpool]; + call_jdequeue->jobslots:0; + call_jdequeue->sstop_dotfixup [ arrowhead=none]; + sstop_dotfixup->sstop [label="Spawn SS-Timer"]; + sstop->jobslots [label="SoftStop"]; + sstop->hstop [label="Spawn HS-Timer"]; + hstop->jobslots:ne [label="HardStop"]; + + + as3->call_ctxfinish:ne [ ltail=cluster_runpool ]; + call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ]; + + call_ctxfinish->call_ctxdone [constraint=false]; + + call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false]; + + + { + jobslots->irq [constraint=false]; + + irq->job_finish [constraint=false]; + } + + irq->as2 [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ]; + +} diff --git a/drivers/gpu/arm/midgard/docs/policy_overview.dot b/drivers/gpu/arm/midgard/docs/policy_overview.dot new file mode 100755 index 00000000000..bd5e21b074c --- /dev/null +++ b/drivers/gpu/arm/midgard/docs/policy_overview.dot @@ -0,0 +1,63 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +digraph policy_objects_diagram { + rankdir=LR + size="6,6" + compound=true; + + node [ shape = box ]; + + call_enqueue [ shape=plaintext label="enqueue ctx" ]; + + + policy_queue [ label="Policy's Queue" ]; + + { + rank=same; + runpool [ label="Policy's Run Pool" ]; + + ctx_finish [ label="ctx finished" ]; + } + + { + rank=same; + jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ]; + + job_finish [ label="Job finished" ]; + } + + + + /* + * Edges + */ + + call_enqueue -> policy_queue; + + policy_queue->runpool [label="dequeue ctx" weight=0.1]; + runpool->policy_queue [label="requeue ctx" weight=0.1]; + + runpool->ctx_finish [ style=dotted ]; + + runpool->jobslots [label="dequeue job" weight=0.1]; + jobslots->runpool [label="requeue job" weight=0.1]; + + jobslots->job_finish [ style=dotted ]; +} diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig.h b/drivers/gpu/arm/midgard/mali_base_hwconfig.h new file mode 100755 index 00000000000..d770cfdd91a --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig.h @@ -0,0 +1,709 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file + * Software workarounds configuration for Hardware issues. + */ + +#ifndef _BASE_HWCONFIG_H_ +#define _BASE_HWCONFIG_H_ + +#include + +/** + * List of all hw features. + * + */ +typedef enum base_hw_feature { + /* Allow soft/hard stopping of job depending on job chain flag */ + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + + /* Allow writes to SHADER_PWRON and TILER_PWRON registers while these cores are currently transitioning to OFF power state */ + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + + /* The BASE_HW_FEATURE_END value must be the last feature listed in this enumeration + * and must be the last value in each array that contains the list of features + * for a particular HW version. + */ + BASE_HW_FEATURE_END +} base_hw_feature; + +static const base_hw_feature base_hw_features_generic[] = { + BASE_HW_FEATURE_END +}; + +static const base_hw_feature base_hw_features_t76x[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_END +}; + + +/** + * List of all workarounds. + * + */ + +typedef enum base_hw_issue { + + /* The current version of the model doesn't support Soft-Stop */ + BASE_HW_ISSUE_5736, + + /* Need way to guarantee that all previously-translated memory accesses are commited */ + BASE_HW_ISSUE_6367, + + /* Result swizzling doesn't work for GRDESC/GRDESC_DER */ + /* NOTE: compiler workaround: keep in sync with _essl_hwrev_needs_workaround() */ + BASE_HW_ISSUE_6398, + + /* Unaligned load stores crossing 128 bit boundaries will fail */ + /* NOTE: compiler workaround: keep in sync with _essl_hwrev_needs_workaround() */ + BASE_HW_ISSUE_6402, + + /* On job complete with non-done the cache is not flushed */ + BASE_HW_ISSUE_6787, + + /* WLS allocation does not respect the Instances field in the Thread Storage Descriptor */ + BASE_HW_ISSUE_7027, + + /* The clamp integer coordinate flag bit of the sampler descriptor is reserved */ + BASE_HW_ISSUE_7144, + + /* TEX_INDEX LOD is always use converted */ + /* NOTE: compiler workaround: keep in sync with _essl_hwrev_needs_workaround() */ + BASE_HW_ISSUE_8073, + + /* Write of PRFCNT_CONFIG_MODE_MANUAL to PRFCNT_CONFIG causes a instrumentation dump if + PRFCNT_TILER_EN is enabled */ + BASE_HW_ISSUE_8186, + + /* Do not set .skip flag on the GRDESC, GRDESC_DER, DELTA, MOV, and NOP texturing instructions */ + /* NOTE: compiler workaround: keep in sync with _essl_hwrev_needs_workaround() */ + BASE_HW_ISSUE_8215, + + /* TIB: Reports faults from a vtile which has not yet been allocated */ + BASE_HW_ISSUE_8245, + + /* WLMA memory goes wrong when run on shader cores other than core 0. */ + /* NOTE: compiler workaround: keep in sync with _essl_hwrev_needs_workaround() */ + BASE_HW_ISSUE_8250, + + /* Hierz doesn't work when stenciling is enabled */ + BASE_HW_ISSUE_8260, + + /* Livelock in L0 icache */ + /* NOTE: compiler workaround: keep in sync with _essl_hwrev_needs_workaround() */ + BASE_HW_ISSUE_8280, + + /* uTLB deadlock could occur when writing to an invalid page at the same time as + * access to a valid page in the same uTLB cache line ( == 4 PTEs == 16K block of mapping) */ + BASE_HW_ISSUE_8316, + + /* TLS base address mismatch, must stay below 1MB TLS */ + BASE_HW_ISSUE_8381, + + /* HT: TERMINATE for RUN command ignored if previous LOAD_DESCRIPTOR is still executing */ + BASE_HW_ISSUE_8394, + + /* CSE : Sends a TERMINATED response for a task that should not be terminated */ + /* (Note that PRLAM-8379 also uses this workaround) */ + BASE_HW_ISSUE_8401, + + /* Repeatedly Soft-stopping a job chain consisting of (Vertex Shader, Cache Flush, Tiler) + * jobs causes 0x58 error on tiler job. */ + BASE_HW_ISSUE_8408, + + /* Disable the Pause Buffer in the LS pipe. */ + BASE_HW_ISSUE_8443, + + /* Stencil test enable 1->0 sticks */ + BASE_HW_ISSUE_8456, + + /* Tiler heap issue using FBOs or multiple processes using the tiler simultaneously */ + /* (Note that PRLAM-9049 also uses this work-around) */ + BASE_HW_ISSUE_8564, + + /* Livelock issue using atomic instructions (particularly when using atomic_cmpxchg as a spinlock) */ + BASE_HW_ISSUE_8791, + + /* Fused jobs are not supported (for various reasons) */ + /* Jobs with relaxed dependencies do not support soft-stop */ + /* (Note that PRLAM-8803, PRLAM-8393, PRLAM-8559, PRLAM-8601 & PRLAM-8607 all use this work-around) */ + BASE_HW_ISSUE_8803, + + /* Blend shader output is wrong for certain formats */ + BASE_HW_ISSUE_8833, + + /* Occlusion queries can create false 0 result in boolean and counter modes */ + BASE_HW_ISSUE_8879, + + /* Output has half intensity with blend shaders enabled on 8xMSAA. */ + BASE_HW_ISSUE_8896, + + /* 8xMSAA does not work with CRC */ + BASE_HW_ISSUE_8975, + + /* Boolean occlusion queries don't work properly due to sdc issue. */ + BASE_HW_ISSUE_8986, + + /* Change in RMUs in use causes problems related with the core's SDC */ + /* NOTE: compiler workaround: keep in sync with _essl_hwrev_needs_workaround() */ + BASE_HW_ISSUE_8987, + + /* Occlusion query result is not updated if color writes are disabled. */ + BASE_HW_ISSUE_9010, + + /* Problem with number of work registers in the RSD if set to 0 */ + BASE_HW_ISSUE_9275, + + /* Translate load/store moves into decode instruction */ + /* NOTE: compiler workaround: keep in sync with _essl_hwrev_needs_workaround() */ + BASE_HW_ISSUE_9418, + + /* Incorrect coverage mask for 8xMSAA */ + BASE_HW_ISSUE_9423, + + /* Compute endpoint has a 4-deep queue of tasks, meaning a soft stop won't complete until all 4 tasks have completed */ + BASE_HW_ISSUE_9435, + + /* HT: Tiler returns TERMINATED for command that hasn't been terminated */ + BASE_HW_ISSUE_9510, + + /* Livelock issue using atomic_cmpxchg */ + /* NOTE: compiler workaround: keep in sync with _essl_hwrev_needs_workaround() */ + BASE_HW_ISSUE_9566, + + /* Occasionally the GPU will issue multiple page faults for the same address before the MMU page table has been read by the GPU */ + BASE_HW_ISSUE_9630, + + /* Must clear the 64 byte private state of the tiler information */ + BASE_HW_ISSUE_10127, + + /* RA DCD load request to SDC returns invalid load ignore causing colour buffer mismatch */ + BASE_HW_ISSUE_10327, + + /* Occlusion query result may be updated prematurely when fragment shader alters coverage */ + BASE_HW_ISSUE_10410, + + /* TEXGRD doesn't honor Sampler Descriptor LOD clamps nor bias */ + /* NOTE: compiler workaround: keep in sync with _essl_hwrev_needs_workaround() */ + BASE_HW_ISSUE_10471, + + /* MAG / MIN filter selection happens after image descriptor clamps were applied */ + BASE_HW_ISSUE_10472, + + /* GPU interprets sampler and image descriptor pointer array sizes as one bigger than they are defined in midg structures */ + BASE_HW_ISSUE_10487, + + /* ld_special 0x1n applies SRGB conversion */ + /* NOTE: compiler workaround: keep in sync with _essl_hwrev_needs_workaround() */ + BASE_HW_ISSUE_10607, + + /* LD_SPECIAL instruction reads incorrect RAW tile buffer value when internal tib format is R10G10B10A2 */ + /* NOTE: compiler workaround: keep in sync with _essl_hwrev_needs_workaround() */ + BASE_HW_ISSUE_10632, + + /* MMU TLB invalidation hazards */ + BASE_HW_ISSUE_10649, + + /* Missing cache flush in multi core-group configuration */ + BASE_HW_ISSUE_10676, + + /* Indexed format 95 cannot be used with a component swizzle of "set to 1" when sampled as integer texture */ + BASE_HW_ISSUE_10682, + + /* sometimes HW doesn't invalidate cached VPDs when it has to */ + BASE_HW_ISSUE_10684, + + /* Chicken bit on (t67x_r1p0 and t72x) to work for a HW workaround in compiler */ + BASE_HW_ISSUE_10797, + + /* Soft-stopping fragment jobs might fail with TILE_RANGE_FAULT */ + BASE_HW_ISSUE_10817, + + /* Fragment frontend heuristic bias to force early-z required */ + BASE_HW_ISSUE_10821, + + /* Intermittent missing interrupt on job completion */ + BASE_HW_ISSUE_10883, + + /* Depth bounds incorrectly normalized in hierz depth bounds test */ + BASE_HW_ISSUE_10931, + + /* Incorrect cubemap sampling */ + /* NOTE: compiler workaround: keep in sync with _essl_hwrev_needs_workaround() */ + BASE_HW_ISSUE_10946, + + /* Soft-stopping fragment jobs might fail with TILE_RANGE_ERROR (similar to issue 10817) and can use BASE_HW_ISSUE_10817 workaround */ + BASE_HW_ISSUE_10959, + + /* Soft-stopped fragment shader job can restart with out-of-bound restart index */ + BASE_HW_ISSUE_10969, + + /* Instanced arrays conformance fail, workaround by unrolling */ + BASE_HW_ISSUE_10984, + + /* TEX_INDEX lod selection (immediate , register) not working with 8.8 encoding for levels > 1 */ + /* NOTE: compiler workaround: keep in sync with _essl_hwrev_needs_workaround() */ + BASE_HW_ISSUE_10995, + + /* LD_SPECIAL instruction reads incorrect RAW tile buffer value when internal tib format is RGB565 or RGBA5551 */ + BASE_HW_ISSUE_11012, + + /* Race condition can cause tile list corruption */ + BASE_HW_ISSUE_11020, + + /* Write buffer can cause tile list corruption */ + BASE_HW_ISSUE_11024, + + /* T76X hw issues */ + + /* Partial 16xMSAA support */ + BASE_HW_ISSUE_T76X_26, + + /* Forward pixel kill doesn't work with MRT */ + BASE_HW_ISSUE_T76X_2121, + + /* CRC not working with MFBD and more than one render target */ + BASE_HW_ISSUE_T76X_2315, + + /* Some indexed formats not supported for MFBD preload. */ + BASE_HW_ISSUE_T76X_2686, + + /* Must disable CRC if the tile output size is 8 bytes or less. */ + BASE_HW_ISSUE_T76X_2712, + + /* DBD clean pixel enable bit is reserved */ + BASE_HW_ISSUE_T76X_2772, + + /* AFBC is not supported for T76X beta. */ + BASE_HW_ISSUE_T76X_2906, + + /* Prevent MMU deadlock for T76X beta. */ + BASE_HW_ISSUE_T76X_3285, + + /* Clear encoder state for a hard stopped fragment job which is AFBC + * encoded by soft resetting the GPU. Only for T76X r0p0 and r0p1 + */ + BASE_HW_ISSUE_T76X_3542, + + /* Do not use 8xMSAA with 16x8 pixel tile size or 16xMSAA with 8x8 pixel + * tile size. + */ + BASE_HW_ISSUE_T76X_3556, + + /* T76X cannot disable uses_discard even if depth and stencil are read-only. */ + BASE_HW_ISSUE_T76X_3700, + + /* ST_TILEBUFFER is not supported on T76X-r0p0-beta */ + BASE_HW_ISSUE_T76X_3759, + + /* Preload ignores any size or bounding box restrictions of the output image. */ + BASE_HW_ISSUE_T76X_3793, + + /* The BASE_HW_ISSUE_END value must be the last issue listed in this enumeration + * and must be the last value in each array that contains the list of workarounds + * for a particular HW version. + */ + BASE_HW_ISSUE_END +} base_hw_issue; + +/** + * Workarounds configuration for each HW revision + */ +/* Mali T60x r0p0-15dev0 - 2011-W39-stable-9 */ +static const base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { + BASE_HW_ISSUE_6367, + BASE_HW_ISSUE_6398, + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_6787, + BASE_HW_ISSUE_7027, + BASE_HW_ISSUE_7144, + BASE_HW_ISSUE_8073, + BASE_HW_ISSUE_8186, + BASE_HW_ISSUE_8215, + BASE_HW_ISSUE_8245, + BASE_HW_ISSUE_8250, + BASE_HW_ISSUE_8260, + BASE_HW_ISSUE_8280, + BASE_HW_ISSUE_8316, + BASE_HW_ISSUE_8381, + BASE_HW_ISSUE_8394, + BASE_HW_ISSUE_8401, + BASE_HW_ISSUE_8408, + BASE_HW_ISSUE_8443, + BASE_HW_ISSUE_8456, + BASE_HW_ISSUE_8564, + BASE_HW_ISSUE_8791, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_8833, + BASE_HW_ISSUE_8896, + BASE_HW_ISSUE_8975, + BASE_HW_ISSUE_8986, + BASE_HW_ISSUE_8987, + BASE_HW_ISSUE_9010, + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9418, + BASE_HW_ISSUE_9423, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_9510, + BASE_HW_ISSUE_9566, + BASE_HW_ISSUE_9630, + BASE_HW_ISSUE_10410, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10487, + BASE_HW_ISSUE_10607, + BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10649, + BASE_HW_ISSUE_10676, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_10969, + BASE_HW_ISSUE_10984, + BASE_HW_ISSUE_10995, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11020, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + +/* Mali T60x r0p0-00rel0 - 2011-W46-stable-13c */ +static const base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { + BASE_HW_ISSUE_6367, + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_6787, + BASE_HW_ISSUE_7027, + BASE_HW_ISSUE_8408, + BASE_HW_ISSUE_8564, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_8975, + BASE_HW_ISSUE_9010, + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9418, + BASE_HW_ISSUE_9423, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_9510, + BASE_HW_ISSUE_10410, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10487, + BASE_HW_ISSUE_10607, + BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10649, + BASE_HW_ISSUE_10676, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_10969, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11020, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + +/* Mali T60x r0p1 */ +static const base_hw_issue base_hw_issues_t60x_r0p1[] = { + BASE_HW_ISSUE_6367, + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_6787, + BASE_HW_ISSUE_7027, + BASE_HW_ISSUE_8408, + BASE_HW_ISSUE_8564, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_8975, + BASE_HW_ISSUE_9010, + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_9510, + BASE_HW_ISSUE_10410, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10487, + BASE_HW_ISSUE_10607, + BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10649, + BASE_HW_ISSUE_10676, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11020, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + +/* Mali T62x r0p1 */ +static const base_hw_issue base_hw_issues_t62x_r0p1[] = { + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10127, + BASE_HW_ISSUE_10327, + BASE_HW_ISSUE_10410, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10487, + BASE_HW_ISSUE_10607, + BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10649, + BASE_HW_ISSUE_10676, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10817, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_10959, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + +/* Mali T62x r1p0 */ +static const base_hw_issue base_hw_issues_t62x_r1p0[] = { + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10649, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_10959, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + +/* Mali T67x r1p0 */ +static const base_hw_issue base_hw_issues_t67x_r1p0[] = { + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10649, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10797, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_10959, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + +/* Mali T76x r0p0 beta */ +static const base_hw_issue base_hw_issues_t76x_r0p0_beta[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10649, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_10959, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_T76X_26, + BASE_HW_ISSUE_T76X_2121, + BASE_HW_ISSUE_T76X_2315, + BASE_HW_ISSUE_T76X_2686, + BASE_HW_ISSUE_T76X_2712, + BASE_HW_ISSUE_T76X_2772, + BASE_HW_ISSUE_T76X_2906, + BASE_HW_ISSUE_T76X_3285, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3759, + BASE_HW_ISSUE_T76X_3793, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + +/* Mali T76x r0p0 */ +static const base_hw_issue base_hw_issues_t76x_r0p0[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10649, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_T76X_26, + BASE_HW_ISSUE_T76X_3542, + BASE_HW_ISSUE_T76X_3556, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + +/* Mali T76x r0p1 */ +static const base_hw_issue base_hw_issues_t76x_r0p1[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10649, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_T76X_26, + BASE_HW_ISSUE_T76X_3542, + BASE_HW_ISSUE_T76X_3556, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + +/* Mali T76x r0p2 */ +static const base_hw_issue base_hw_issues_t76x_r0p2[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10649, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_T76X_26, + BASE_HW_ISSUE_T76X_3542, + BASE_HW_ISSUE_T76X_3556, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + +/* Mali T76x r1p0 */ +static const base_hw_issue base_hw_issues_t76x_r1p0[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10649, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + + +/* Mali T72x r0p0 */ +static const base_hw_issue base_hw_issues_t72x_r0p0[] = { + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10649, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10797, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_10946, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + +/* Mali T72x r1p0 */ +static const base_hw_issue base_hw_issues_t72x_r1p0[] = { + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10649, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10797, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_10946, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + +/* Model configuration + */ +static const base_hw_issue base_hw_issues_model_t72x[] = +{ + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_6402, /* NOTE: Fix is present in model r125162 but is not enabled until RTL is fixed */ + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10797, + BASE_HW_ISSUE_10931, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + +static const base_hw_issue base_hw_issues_model_t7xx[] = +{ + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + +static const base_hw_issue base_hw_issues_model_t6xx[] = +{ + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_6402, /* NOTE: Fix is present in model r125162 but is not enabled until RTL is fixed */ + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + /* List of hardware issues must end with BASE_HW_ISSUE_END */ + BASE_HW_ISSUE_END +}; + +#endif /* _BASE_HWCONFIG_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h new file mode 100755 index 00000000000..4aff2bdf270 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_base_kernel.h @@ -0,0 +1,1743 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file + * Base structures shared with the kernel. + */ + +#ifndef _BASE_KERNEL_H_ +#define _BASE_KERNEL_H_ + +/* For now we support the legacy API as well as the new API */ +#define BASE_LEGACY_JD_API 1 + +typedef mali_addr64 base_mem_handle; + +#include "mali_base_mem_priv.h" +#include "mali_kbase_profiling_gator_api.h" + +/* + * Dependency stuff, keep it private for now. May want to expose it if + * we decide to make the number of semaphores a configurable + * option. + */ +#define BASE_JD_ATOM_COUNT 256 + +#define BASEP_JD_SEM_PER_WORD_LOG2 5 +#define BASEP_JD_SEM_PER_WORD (1 << BASEP_JD_SEM_PER_WORD_LOG2) +#define BASEP_JD_SEM_WORD_NR(x) ((x) >> BASEP_JD_SEM_PER_WORD_LOG2) +#define BASEP_JD_SEM_MASK_IN_WORD(x) (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1))) +#define BASEP_JD_SEM_ARRAY_SIZE BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT) + +#if BASE_LEGACY_JD_API +/* Size of the ring buffer */ +#define BASEP_JCTX_RB_NRPAGES 4 +#endif /* BASE_LEGACY_JD_API */ + +#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3 + +#define BASE_MAX_COHERENT_GROUPS 16 + +#if defined CDBG_ASSERT +#define LOCAL_ASSERT CDBG_ASSERT +#elif defined KBASE_DEBUG_ASSERT +#define LOCAL_ASSERT KBASE_DEBUG_ASSERT +#else +#error assert macro not defined! +#endif + +#if defined PAGE_MASK +#define LOCAL_PAGE_LSB ~PAGE_MASK +#else +#include + +#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2 +#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1) +#else +#error Failed to find page size +#endif +#endif + +/** 32/64-bit neutral way to represent pointers */ +typedef union kbase_pointer { + void *value; /**< client should store their pointers here */ + u32 compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */ + u64 sizer; /**< Force 64-bit storage for all clients regardless */ +} kbase_pointer; + +/** + * @addtogroup base_user_api User-side Base APIs + * @{ + */ + +/** + * @addtogroup base_user_api_memory User-side Base Memory APIs + * @{ + */ + +/** + * @brief Memory allocation, access/hint flags + * + * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator + * in order to determine the best cache policy. Some combinations are + * of course invalid (eg @c MEM_PROT_CPU_WR | @c MEM_HINT_CPU_RD), + * which defines a @a write-only region on the CPU side, which is + * heavily read by the CPU... + * Other flags are only meaningful to a particular allocator. + * More flags can be added to this list, as long as they don't clash + * (see ::BASE_MEM_FLAGS_NR_BITS for the number of the first free bit). + */ +typedef u32 base_mem_alloc_flags; + +/** + * @brief Memory allocation, access/hint flags + * + * See ::base_mem_alloc_flags. + * + */ +enum { + BASE_MEM_PROT_CPU_RD = (1U << 0), /**< Read access CPU side */ + BASE_MEM_PROT_CPU_WR = (1U << 1), /**< Write access CPU side */ + BASE_MEM_PROT_GPU_RD = (1U << 2), /**< Read access GPU side */ + BASE_MEM_PROT_GPU_WR = (1U << 3), /**< Write access GPU side */ + BASE_MEM_PROT_GPU_EX = (1U << 4), /**< Execute allowed on the GPU side */ + + /* Note that the HINT flags are obsolete now. If you want the memory + * to be cached on the CPU please use the BASE_MEM_CACHED_CPU flag + */ + BASE_MEM_HINT_CPU_RD = (1U << 5), /**< Heavily read CPU side - OBSOLETE */ + BASE_MEM_HINT_CPU_WR = (1U << 6), /**< Heavily written CPU side - OBSOLETE */ + BASE_MEM_HINT_GPU_RD = (1U << 7), /**< Heavily read GPU side - OBSOLETE */ + BASE_MEM_HINT_GPU_WR = (1U << 8), /**< Heavily written GPU side - OBSOLETE */ + + BASE_MEM_GROW_ON_GPF = (1U << 9), /**< Grow backing store on GPU Page Fault */ + + BASE_MEM_COHERENT_SYSTEM = (1U << 10), /**< Page coherence Outer shareable */ + BASE_MEM_COHERENT_LOCAL = (1U << 11), /**< Page coherence Inner shareable */ + BASE_MEM_CACHED_CPU = (1U << 12), /**< Should be cached on the CPU */ + + BASE_MEM_SAME_VA = (1U << 13) /**< Must have same VA on both the GPU and the CPU */ +}; + +/** + * @brief Memory types supported by @a base_mem_import + * + * Each type defines what the supported handle type is. + * + * If any new type is added here ARM must be contacted + * to allocate a numeric value for it. + * Do not just add a new type without synchronizing with ARM + * as future releases from ARM might include other new types + * which could clash with your custom types. + */ +typedef enum base_mem_import_type { + BASE_MEM_IMPORT_TYPE_INVALID = 0, + /** UMP import. Handle type is ump_secure_id. */ + BASE_MEM_IMPORT_TYPE_UMP = 1, + /** UMM import. Handle type is a file descriptor (int) */ + BASE_MEM_IMPORT_TYPE_UMM = 2 +} base_mem_import_type; + +/* legacy API wrappers */ +#define base_tmem_import_type base_mem_import_type +#define BASE_TMEM_IMPORT_TYPE_INVALID BASE_MEM_IMPORT_TYPE_INVALID +#define BASE_TMEM_IMPORT_TYPE_UMP BASE_MEM_IMPORT_TYPE_UMP +#define BASE_TMEM_IMPORT_TYPE_UMM BASE_MEM_IMPORT_TYPE_UMM + +/** + * @brief Invalid memory handle type. + * Return value from functions returning @a base_mem_handle on error. + */ +#define BASE_MEM_INVALID_HANDLE (0ull << 12) +#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) +#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) +#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) +#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) +/* reserved handles ..-64< for future special handles */ +#define BASE_MEM_COOKIE_BASE (64ul << 12) +#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ + BASE_MEM_COOKIE_BASE) + +/* Bit mask of cookies used for for memory allocation setup */ +#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ + +/** + * @brief Number of bits used as flags for base memory management + * + * Must be kept in sync with the ::base_mem_alloc_flags flags + */ +#define BASE_MEM_FLAGS_NR_BITS 14 + +/** + * @brief Result codes of changing the size of the backing store allocated to a tmem region + */ +typedef enum base_backing_threshold_status { + BASE_BACKING_THRESHOLD_OK = 0, /**< Resize successful */ + BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE = -1, /**< Not a growable tmem object */ + BASE_BACKING_THRESHOLD_ERROR_OOM = -2, /**< Increase failed due to an out-of-memory condition */ + BASE_BACKING_THRESHOLD_ERROR_MAPPED = -3, /**< Resize attempted on buffer while it was mapped, which is not permitted */ + BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */ +} base_backing_threshold_status; + +/** + * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs + * @{ + */ + +/** + * @brief a basic memory operation (sync-set). + * + * The content of this structure is private, and should only be used + * by the accessors. + */ +typedef struct base_syncset { + basep_syncset basep_sset; +} base_syncset; + +/** @} end group base_user_api_memory_defered */ + +/** + * Handle to represent imported memory object. + * Simple opague handle to imported memory, can't be used + * with anything but base_external_resource_init to bind to an atom. + */ +typedef struct base_import_handle { + struct { + mali_addr64 handle; + } basep; +} base_import_handle; + +/** @} end group base_user_api_memory */ + +/** + * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs + * @{ + */ + +typedef int platform_fence_type; +#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1) + +/** + * Base stream handle. + * + * References an underlying base stream object. + */ +typedef struct base_stream { + struct { + int fd; + } basep; +} base_stream; + +/** + * Base fence handle. + * + * References an underlying base fence object. + */ +typedef struct base_fence { + struct { + int fd; + int stream_fd; + } basep; +} base_fence; + +#if BASE_LEGACY_JD_API +/** + * @brief A pre- or post- dual dependency. + * + * This structure is used to express either + * @li a single or dual pre-dependency (a job depending on one or two + * other jobs), + * @li a single or dual post-dependency (a job resolving a dependency + * for one or two other jobs). + * + * The dependency itself is specified as a u8, where 0 indicates no + * dependency. A single dependency is expressed by having one of the + * dependencies set to 0. + */ +typedef struct base_jd_dep { + u8 dep[2]; /**< pre/post dependencies */ +} base_jd_dep; +#endif /* BASE_LEGACY_JD_API */ + +/** + * @brief Per-job data + * + * This structure is used to store per-job data, and is completly unused + * by the Base driver. It can be used to store things such as callback + * function pointer, data to handle job completion. It is guaranteed to be + * untouched by the Base driver. + */ +typedef struct base_jd_udata { + u64 blob[2]; /**< per-job data array */ +} base_jd_udata; + +/** + * @brief Memory aliasing info + * + * Describes a memory handle to be aliased. + * A subset of the handle can be chosen for aliasing, given an offset and a + * length. + * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a + * region where a special page is mapped with a write-alloc cache setup, + * typically used when the write result of the GPU isn't needed, but the GPU + * must write anyway. + * + * Offset and length are specified in pages. + * Offset must be within the size of the handle. + * Offset+length must not overrun the size of the handle. + * + * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE + * @offset Offset within the handle to start aliasing from, in pages. + * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE. + * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE + * specifies the number of times the special page is needed. + */ +struct base_mem_aliasing_info { + base_mem_handle handle; + u64 offset; + u64 length; +}; + +/** + * @brief Job chain hardware requirements. + * + * A job chain must specify what GPU features it needs to allow the + * driver to schedule the job correctly. By not specifying the + * correct settings can/will cause an early job termination. Multiple + * values can be ORed together to specify multiple requirements. + * Special case is ::BASE_JD_REQ_DEP, which is used to express complex + * dependencies, and that doesn't execute anything on the hardware. + */ +typedef u16 base_jd_core_req; + +/* Requirements that come from the HW */ +#define BASE_JD_REQ_DEP 0 /**< No requirement, dependency only */ +#define BASE_JD_REQ_FS (1U << 0) /**< Requires fragment shaders */ +/** + * Requires compute shaders + * This covers any of the following Midgard Job types: + * - Vertex Shader Job + * - Geometry Shader Job + * - An actual Compute Shader Job + * + * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the + * job is specifically just the "Compute Shader" job type, and not the "Vertex + * Shader" nor the "Geometry Shader" job type. + */ +#define BASE_JD_REQ_CS (1U << 1) +#define BASE_JD_REQ_T (1U << 2) /**< Requires tiling */ +#define BASE_JD_REQ_CF (1U << 3) /**< Requires cache flushes */ +#define BASE_JD_REQ_V (1U << 4) /**< Requires value writeback */ + +/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */ + +/* Requires fragment job with AFBC encoding */ +#define BASE_JD_REQ_FS_AFBC (1U << 13) + +/** + * SW Only requirement: the job chain requires a coherent core group. We don't + * mind which coherent core group is used. + */ +#define BASE_JD_REQ_COHERENT_GROUP (1U << 6) + +/** + * SW Only requirement: The performance counters should be enabled only when + * they are needed, to reduce power consumption. + */ + +#define BASE_JD_REQ_PERMON (1U << 7) + +/** + * SW Only requirement: External resources are referenced by this atom. + * When external resources are referenced no syncsets can be bundled with the atom + * but should instead be part of a NULL jobs inserted into the dependency tree. + * The first pre_dep object must be configured for the external resouces to use, + * the second pre_dep object can be used to create other dependencies. + */ +#define BASE_JD_REQ_EXTERNAL_RESOURCES (1U << 8) + +/** + * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted + * to the hardware but will cause some action to happen within the driver + */ +#define BASE_JD_REQ_SOFT_JOB (1U << 9) + +#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) +#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) +#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3) + +/** + * SW Only requirement : Replay job. + * + * If the preceeding job fails, the replay job will cause the jobs specified in + * the list of base_jd_replay_payload pointed to by the jc pointer to be + * replayed. + * + * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT + * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay + * job is failed, as well as any following dependencies. + * + * The replayed jobs will require a number of atom IDs. If there are not enough + * free atom IDs then the replay job will fail. + * + * If the preceeding job does not fail, then the replay job is returned as + * completed. + * + * The replayed jobs will never be returned to userspace. The preceeding failed + * job will be returned to userspace as failed; the status of this job should + * be ignored. Completion should be determined by the status of the replay soft + * job. + * + * In order for the jobs to be replayed, the job headers will have to be + * modified. The Status field will be reset to NOT_STARTED. If the Job Type + * field indicates a Vertex Shader Job then it will be changed to Null Job. + * + * The replayed jobs have the following assumptions : + * + * - No external resources. Any required external resources will be held by the + * replay atom. + * - Pre-dependencies are created based on job order. + * - Atom numbers are automatically assigned. + * - device_nr is set to 0. This is not relevant as + * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set. + * - Priority is inherited from the replay job. + */ +#define BASE_JD_REQ_SOFT_REPLAY (BASE_JD_REQ_SOFT_JOB | 0x4) + +/** + * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) + * + * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type. + * + * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job + * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. + * + * @note This is a more flexible variant of the @ref BASE_CONTEXT_HINT_ONLY_COMPUTE flag, + * allowing specific jobs to be marked as 'Only Compute' instead of the entire context + */ +#define BASE_JD_REQ_ONLY_COMPUTE (1U << 10) + +/** + * HW Requirement: Use the base_jd_atom::device_nr field to specify a + * particular core group + * + * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority + * + * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms. + * + * If the core availability policy is keeping the required core group turned off, then + * the job will fail with a BASE_JD_EVENT_PM_EVENT error code. + */ +#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP (1U << 11) + +/** + * SW Flag: If this bit is set then the successful completion of this atom + * will not cause an event to be sent to userspace + */ +#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE (1U << 12) + +/** + * SW Flag: If this bit is set then completion of this atom will not cause an + * event to be sent to userspace, whether successful or not. + */ +#define BASEP_JD_REQ_EVENT_NEVER (1U << 14) + +/** +* These requirement bits are currently unused in base_jd_core_req (currently a u16) +*/ + +#define BASEP_JD_REQ_RESERVED_BIT5 (1U << 5) +#define BASEP_JD_REQ_RESERVED_BIT15 (1U << 15) + +/** +* Mask of all the currently unused requirement bits in base_jd_core_req. +*/ + +#define BASEP_JD_REQ_RESERVED (BASEP_JD_REQ_RESERVED_BIT5 | \ + BASEP_JD_REQ_RESERVED_BIT15) + +/** + * Mask of all bits in base_jd_core_req that control the type of the atom. + * + * This allows dependency only atoms to have flags set + */ +#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED | BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\ + BASE_JD_REQ_EXTERNAL_RESOURCES | BASEP_JD_REQ_EVENT_NEVER)) + +#if BASE_LEGACY_JD_API +/** + * @brief A single job chain, with pre/post dependendencies and mem ops + * + * This structure is used to describe a single job-chain to be submitted + * as part of a bag. + * It contains all the necessary information for Base to take care of this + * job-chain, including core requirements, priority, syncsets and + * dependencies. + */ +typedef struct base_jd_atom { + mali_addr64 jc; /**< job-chain GPU address */ + base_jd_udata udata; /**< user data */ + base_jd_dep pre_dep; /**< pre-dependencies */ + base_jd_dep post_dep; /**< post-dependencies */ + base_jd_core_req core_req; /**< core requirements */ + u16 nr_syncsets; /**< nr of syncsets following the atom */ + u16 nr_extres; /**< nr of external resources following the atom */ + + /** @brief Relative priority. + * + * A positive value requests a lower priority, whilst a negative value + * requests a higher priority. Only privileged processes may request a + * higher priority. For unprivileged processes, a negative priority will + * be interpreted as zero. + */ + s8 prio; + + /** + * @brief Device number to use, depending on @ref base_jd_core_req flags set. + * + * When BASE_JD_REQ_SPECIFIC_COHERENT_GROUP is set, a 'device' is one of + * the coherent core groups, and so this targets a particular coherent + * core-group. They are numbered from 0 to (mali_base_gpu_coherent_group_info::num_groups - 1), + * and the cores targeted by this device_nr will usually be those specified by + * (mali_base_gpu_coherent_group_info::group[device_nr].core_mask). + * Further, two atoms from different processes using the same \a device_nr + * at the same time will always target the same coherent core-group. + * + * There are exceptions to when the device_nr is ignored: + * - when any process in the system uses a BASE_JD_REQ_CS or + * BASE_JD_REQ_ONLY_COMPUTE atom that can run on all cores across all + * coherency groups (i.e. also does \b not have the + * BASE_JD_REQ_COHERENT_GROUP or BASE_JD_REQ_SPECIFIC_COHERENT_GROUP flags + * set). In this case, such atoms would block device_nr==1 being used due + * to restrictions on affinity, perhaps indefinitely. To ensure progress is + * made, the atoms targeted for device_nr 1 will instead be redirected to + * device_nr 0 + * - During certain HW workarounds, such as BASE_HW_ISSUE_8987, where + * BASE_JD_REQ_ONLY_COMPUTE atoms must not use the same cores as other + * atoms. In this case, all atoms are targeted to device_nr == min( num_groups, 1 ) + * + * Note that the 'device' number for a coherent coregroup cannot exceed + * (BASE_MAX_COHERENT_GROUPS - 1). + */ + u8 device_nr; +} base_jd_atom; +#endif /* BASE_LEGACY_JD_API */ + +typedef u8 base_atom_id; /**< Type big enough to store an atom number in */ + +typedef struct base_jd_atom_v2 { + mali_addr64 jc; /**< job-chain GPU address */ + base_jd_udata udata; /**< user data */ + kbase_pointer extres_list; /**< list of external resources */ + u16 nr_extres; /**< nr of external resources */ + base_jd_core_req core_req; /**< core requirements */ + base_atom_id pre_dep[2]; /**< pre-dependencies */ + base_atom_id atom_number; /**< unique number to identify the atom */ + s8 prio; /**< priority - smaller is higher priority */ + u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ + u8 padding[7]; +} base_jd_atom_v2; + +#if BASE_LEGACY_JD_API +/* Structure definition works around the fact that C89 doesn't allow arrays of size 0 */ +typedef struct basep_jd_atom_ss { + base_jd_atom atom; + base_syncset syncsets[1]; +} basep_jd_atom_ss; +#endif /* BASE_LEGACY_JD_API */ + +typedef enum base_external_resource_access { + BASE_EXT_RES_ACCESS_SHARED, + BASE_EXT_RES_ACCESS_EXCLUSIVE +} base_external_resource_access; + +typedef struct base_external_resource { + u64 ext_resource; +} base_external_resource; + +#if BASE_LEGACY_JD_API +/* Structure definition works around the fact that C89 doesn't allow arrays of size 0 */ +typedef struct basep_jd_atom_ext_res { + base_jd_atom atom; + base_external_resource resources[1]; +} basep_jd_atom_ext_res; + +static INLINE size_t base_jd_atom_size_ex(u32 syncset_count, u32 external_res_count) +{ + int size; + + LOCAL_ASSERT(0 == syncset_count || 0 == external_res_count); + + size = syncset_count ? offsetof(basep_jd_atom_ss, syncsets[0]) + (sizeof(base_syncset) * syncset_count) : external_res_count ? offsetof(basep_jd_atom_ext_res, resources[0]) + (sizeof(base_external_resource) * external_res_count) : sizeof(base_jd_atom); + + /* Atom minimum size set to 64 bytes to ensure that the maximum + * number of atoms in the ring buffer is limited to 256 */ + return MAX(64, size); +} + +/** + * @brief Atom size evaluator + * + * This function returns the size in bytes of a ::base_jd_atom + * containing @a n syncsets. It must be used to compute the size of a + * bag before allocation. + * + * @param nr the number of syncsets for this atom + * @return the atom size in bytes + */ +static INLINE size_t base_jd_atom_size(u32 nr) +{ + return base_jd_atom_size_ex(nr, 0); +} + +/** + * @brief Atom syncset accessor + * + * This function returns a pointer to the nth syncset allocated + * together with an atom. + * + * @param[in] atom The allocated atom + * @param n The number of the syncset to be returned + * @return a pointer to the nth syncset. + */ +static INLINE base_syncset *base_jd_get_atom_syncset(base_jd_atom *atom, u16 n) +{ + LOCAL_ASSERT(atom != NULL); + LOCAL_ASSERT(0 == (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)); + LOCAL_ASSERT(n <= atom->nr_syncsets); + return &((basep_jd_atom_ss *) atom)->syncsets[n]; +} +#endif /* BASE_LEGACY_JD_API */ + +/** + * @brief Soft-atom fence trigger setup. + * + * Sets up an atom to be a SW-only atom signaling a fence + * when it reaches the run state. + * + * Using the existing base dependency system the fence can + * be set to trigger when a GPU job has finished. + * + * The base fence object must not be terminated until the atom + * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned. + * + * @a fence must be a valid fence set up with @a base_fence_init. + * Calling this function with a uninitialized fence results in undefined behavior. + * + * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom + * @param[in] fence The base fence object to trigger. + */ +static INLINE void base_jd_fence_trigger_setup(base_jd_atom * const atom, base_fence *fence) +{ + LOCAL_ASSERT(atom); + LOCAL_ASSERT(fence); + LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE); + LOCAL_ASSERT(fence->basep.stream_fd >= 0); + atom->jc = (uintptr_t) fence; + atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER; +} + +static INLINE void base_jd_fence_trigger_setup_v2(base_jd_atom_v2 *atom, base_fence *fence) +{ + LOCAL_ASSERT(atom); + LOCAL_ASSERT(fence); + LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE); + LOCAL_ASSERT(fence->basep.stream_fd >= 0); + atom->jc = (uintptr_t) fence; + atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER; +} + +/** + * @brief Soft-atom fence wait setup. + * + * Sets up an atom to be a SW-only atom waiting on a fence. + * When the fence becomes triggered the atom becomes runnable + * and completes immediately. + * + * Using the existing base dependency system the fence can + * be set to block a GPU job until it has been triggered. + * + * The base fence object must not be terminated until the atom + * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned. + * + * @a fence must be a valid fence set up with @a base_fence_init or @a base_fence_import. + * Calling this function with a uninitialized fence results in undefined behavior. + * + * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom + * @param[in] fence The base fence object to wait on + */ +static INLINE void base_jd_fence_wait_setup(base_jd_atom * const atom, base_fence *fence) +{ + LOCAL_ASSERT(atom); + LOCAL_ASSERT(fence); + LOCAL_ASSERT(fence->basep.fd >= 0); + atom->jc = (uintptr_t) fence; + atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT; +} + +static INLINE void base_jd_fence_wait_setup_v2(base_jd_atom_v2 *atom, base_fence *fence) +{ + LOCAL_ASSERT(atom); + LOCAL_ASSERT(fence); + LOCAL_ASSERT(fence->basep.fd >= 0); + atom->jc = (uintptr_t) fence; + atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT; +} + +#if BASE_LEGACY_JD_API +/** + * @brief Atom external resource accessor + * + * This functions returns a pointer to the nth external resource tracked by the atom. + * + * @param[in] atom The allocated atom + * @param n The number of the external resource to return a pointer to + * @return a pointer to the nth external resource + */ +static INLINE base_external_resource * base_jd_get_external_resource(base_jd_atom *atom, u16 n) +{ + LOCAL_ASSERT(atom != NULL); + LOCAL_ASSERT(BASE_JD_REQ_EXTERNAL_RESOURCES == (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)); + LOCAL_ASSERT(n <= atom->nr_extres); + return &((basep_jd_atom_ext_res *) atom)->resources[n]; +} +#endif /* BASE_LEGACY_JD_API */ + +/** + * @brief External resource info initialization. + * + * Sets up a external resource object to reference + * a memory allocation and the type of access requested. + * + * @param[in] res The resource object to initialize + * @param handle The handle to the imported memory object + * @param access The type of access requested + */ +static INLINE void base_external_resource_init(base_external_resource * res, base_import_handle handle, base_external_resource_access access) +{ + mali_addr64 address; + address = handle.basep.handle; + + LOCAL_ASSERT(res != NULL); + LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB)); + LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE); + + res->ext_resource = address | (access & LOCAL_PAGE_LSB); +} + +#if BASE_LEGACY_JD_API +/** + * @brief Next atom accessor + * + * This function returns a pointer to the next allocated atom. It + * relies on the fact that the current atom has been correctly + * initialized (relies on the base_jd_atom::nr_syncsets field). + * + * @param[in] atom The allocated atom + * @return a pointer to the next atom. + */ +static INLINE base_jd_atom *base_jd_get_next_atom(base_jd_atom *atom) +{ + LOCAL_ASSERT(atom != NULL); + return (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) ? (base_jd_atom *) base_jd_get_external_resource(atom, atom->nr_extres) : (base_jd_atom *) base_jd_get_atom_syncset(atom, atom->nr_syncsets); +} +#endif /* BASE_LEGACY_JD_API */ + +/** + * @brief Job chain event code bits + * Defines the bits used to create ::base_jd_event_code + */ +enum { + BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */ + BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */ + BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */ + BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */ + BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */ + BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */ + BASE_JD_SW_EVENT_RESERVED = (3u << 11), /**< Reserved event type */ + BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) /**< Mask to extract the type from an event code */ +}; + +/** + * @brief Job chain event codes + * + * HW and low-level SW events are represented by event codes. + * The status of jobs which succeeded are also represented by + * an event code (see ::BASE_JD_EVENT_DONE). + * Events are usually reported as part of a ::base_jd_event. + * + * The event codes are encoded in the following way: + * @li 10:0 - subtype + * @li 12:11 - type + * @li 13 - SW success (only valid if the SW bit is set) + * @li 14 - SW event (HW event if not set) + * @li 15 - Kernel event (should never be seen in userspace) + * + * Events are split up into ranges as follows: + * - BASE_JD_EVENT_RANGE_\_START + * - BASE_JD_EVENT_RANGE_\_END + * + * \a code is in \'s range when: + * - BASE_JD_EVENT_RANGE_\_START <= code < BASE_JD_EVENT_RANGE_\_END + * + * Ranges can be asserted for adjacency by testing that the END of the previous + * is equal to the START of the next. This is useful for optimizing some tests + * for range. + * + * A limitation is that the last member of this enum must explicitly be handled + * (with an assert-unreachable statement) in switch statements that use + * variables of this type. Otherwise, the compiler warns that we have not + * handled that enum value. + */ +typedef enum base_jd_event_code { + /* HW defined exceptions */ + + /** Start of HW Non-fault status codes + * + * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault, + * because the job was hard-stopped + */ + BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0, + + /* non-fatal exceptions */ + BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */ + BASE_JD_EVENT_DONE = 0x01, + BASE_JD_EVENT_STOPPED = 0x03, /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */ + BASE_JD_EVENT_TERMINATED = 0x04, /**< This is actually a fault status code - the job was hard stopped */ + BASE_JD_EVENT_ACTIVE = 0x08, /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */ + + /** End of HW Non-fault status codes + * + * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault, + * because the job was hard-stopped + */ + BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40, + + /** Start of HW fault and SW Error status codes */ + BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40, + + /* job exceptions */ + BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40, + BASE_JD_EVENT_JOB_POWER_FAULT = 0x41, + BASE_JD_EVENT_JOB_READ_FAULT = 0x42, + BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43, + BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44, + BASE_JD_EVENT_JOB_BUS_FAULT = 0x48, + BASE_JD_EVENT_INSTR_INVALID_PC = 0x50, + BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51, + BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52, + BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53, + BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54, + BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55, + BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56, + BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58, + BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59, + BASE_JD_EVENT_STATE_FAULT = 0x5A, + BASE_JD_EVENT_OUT_OF_MEMORY = 0x60, + BASE_JD_EVENT_UNKNOWN = 0x7F, + + /* GPU exceptions */ + BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80, + BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88, + + /* MMU exceptions */ + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4, + BASE_JD_EVENT_PERMISSION_FAULT = 0xC8, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4, + BASE_JD_EVENT_ACCESS_FLAG = 0xD8, + + /* SW defined exceptions */ + BASE_JD_EVENT_MEM_GROWTH_FAILED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_TIMED_OUT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001, + BASE_JD_EVENT_JOB_CANCELLED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, + BASE_JD_EVENT_JOB_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, + BASE_JD_EVENT_PM_EVENT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004, + + BASE_JD_EVENT_BAG_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003, + + /** End of HW fault and SW Error status codes */ + BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF, + + /** Start of SW Success status codes */ + BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000, + + BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000, + BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, + + /** End of SW Success status codes */ + BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF, + + /** Start of Kernel-only status codes. Such codes are never returned to user-space */ + BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000, + BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000, + + /** End of Kernel-only status codes. */ + BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF +} base_jd_event_code; + +/** + * @brief Event reporting structure + * + * This structure is used by the kernel driver to report information + * about GPU events. The can either be HW-specific events or low-level + * SW events, such as job-chain completion. + * + * The event code contains an event type field which can be extracted + * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK. + * + * Based on the event type base_jd_event::data holds: + * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed + * job-chain + * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has + * been completed (ie all contained job-chains have been completed). + * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used + */ +#if BASE_LEGACY_JD_API +typedef struct base_jd_event { + base_jd_event_code event_code; /**< event code */ + void *data; /**< event specific data */ +} base_jd_event; +#endif + +typedef struct base_jd_event_v2 { + base_jd_event_code event_code; /**< event code */ + base_atom_id atom_number; /**< the atom number that has completed */ + base_jd_udata udata; /**< user data */ +} base_jd_event_v2; + +/** + * Padding required to ensure that the @ref base_dump_cpu_gpu_counters structure fills + * a full cache line. + */ + +#define BASE_CPU_GPU_CACHE_LINE_PADDING (36) + + +/** + * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs. + * + * This structure is stored into the memory pointed to by the @c jc field of @ref base_jd_atom. + * + * This structure must be padded to ensure that it will occupy whole cache lines. This is to avoid + * cases where access to pages containing the structure is shared between cached and un-cached + * memory regions, which would cause memory corruption. Here we set the structure size to be 64 bytes + * which is the cache line for ARM A15 processors. + */ + +typedef struct base_dump_cpu_gpu_counters { + u64 system_time; + u64 cycle_counter; + u64 sec; + u32 usec; + u8 padding[BASE_CPU_GPU_CACHE_LINE_PADDING]; +} base_dump_cpu_gpu_counters; + + + +/** @} end group base_user_api_job_dispatch */ + +#ifdef __KERNEL__ +/* + * The following typedefs should be removed when a midg types header is added. + * See MIDCOM-1657 for details. + */ +typedef u32 midg_product_id; +typedef u32 midg_cache_features; +typedef u32 midg_tiler_features; +typedef u32 midg_mem_features; +typedef u32 midg_mmu_features; +typedef u32 midg_js_features; +typedef u32 midg_as_present; +typedef u32 midg_js_present; + +#define MIDG_MAX_JOB_SLOTS 16 + +#else +#include +#endif + +/** + * @page page_base_user_api_gpuprops User-side Base GPU Property Query API + * + * The User-side Base GPU Property Query API encapsulates two + * sub-modules: + * + * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties" + * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties" + * + * There is a related third module outside of Base, which is owned by the MIDG + * module: + * - @ref midg_gpuprops_static "Midgard Compile-time GPU Properties" + * + * Base only deals with properties that vary between different Midgard + * implementations - the Dynamic GPU properties and the Platform Config + * properties. + * + * For properties that are constant for the Midgard Architecture, refer to the + * MIDG module. However, we will discuss their relevance here just to + * provide background information. + * + * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules + * + * The compile-time properties (Platform Config, Midgard Compile-time + * properties) are exposed as pre-processor macros. + * + * Complementing the compile-time properties are the Dynamic GPU + * Properties, which act as a conduit for the Midgard Configuration + * Discovery. + * + * In general, the dynamic properties are present to verify that the platform + * has been configured correctly with the right set of Platform Config + * Compile-time Properties. + * + * As a consistant guide across the entire DDK, the choice for dynamic or + * compile-time should consider the following, in order: + * -# Can the code be written so that it doesn't need to know the + * implementation limits at all? + * -# If you need the limits, get the information from the Dynamic Property + * lookup. This should be done once as you fetch the context, and then cached + * as part of the context data structure, so it's cheap to access. + * -# If there's a clear and arguable inefficiency in using Dynamic Properties, + * then use a Compile-Time Property (Platform Config, or Midgard Compile-time + * property). Examples of where this might be sensible follow: + * - Part of a critical inner-loop + * - Frequent re-use throughout the driver, causing significant extra load + * instructions or control flow that would be worthwhile optimizing out. + * + * We cannot provide an exhaustive set of examples, neither can we provide a + * rule for every possible situation. Use common sense, and think about: what + * the rest of the driver will be doing; how the compiler might represent the + * value if it is a compile-time constant; whether an OEM shipping multiple + * devices would benefit much more from a single DDK binary, instead of + * insignificant micro-optimizations. + * + * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties + * + * Dynamic GPU properties are presented in two sets: + * -# the commonly used properties in @ref base_gpu_props, which have been + * unpacked from GPU register bitfields. + * -# The full set of raw, unprocessed properties in @ref midg_raw_gpu_props + * (also a member of @ref base_gpu_props). All of these are presented in + * the packed form, as presented by the GPU registers themselves. + * + * @usecase The raw properties in @ref midg_raw_gpu_props are necessary to + * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device + * behaving differently?". In this case, all information about the + * configuration is potentially useful, but it does not need to be processed + * by the driver. Instead, the raw registers can be processed by the Mali + * Tools software on the host PC. + * + * The properties returned extend the Midgard Configuration Discovery + * registers. For example, GPU clock speed is not specified in the Midgard + * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function. + * + * The GPU properties are obtained by a call to + * _mali_base_get_gpu_props(). This simply returns a pointer to a const + * base_gpu_props structure. It is constant for the life of a base + * context. Multiple calls to _mali_base_get_gpu_props() to a base context + * return the same pointer to a constant structure. This avoids cache pollution + * of the common data. + * + * This pointer must not be freed, because it does not point to the start of a + * region allocated by the memory allocator; instead, just close the @ref + * base_context. + * + * + * @section sec_base_user_api_gpuprops_config Platform Config Compile-time Properties + * + * The Platform Config File sets up gpu properties that are specific to a + * certain platform. Properties that are 'Implementation Defined' in the + * Midgard Architecture spec are placed here. + * + * @note Reference configurations are provided for Midgard Implementations, such as + * the Mali-T600 family. The customer need not repeat this information, and can select one of + * these reference configurations. For example, VA_BITS, PA_BITS and the + * maximum number of samples per pixel might vary between Midgard Implementations, but + * \b not for platforms using the Mali-T604. This information is placed in + * the reference configuration files. + * + * The System Integrator creates the following structure: + * - platform_XYZ + * - platform_XYZ/plat + * - platform_XYZ/plat/plat_config.h + * + * They then edit plat_config.h, using the example plat_config.h files as a + * guide. + * + * At the very least, the customer must set @ref CONFIG_GPU_CORE_TYPE, and will + * receive a helpful \#error message if they do not do this correctly. This + * selects the Reference Configuration for the Midgard Implementation. The rationale + * behind this decision (against asking the customer to write \#include + * in their plat_config.h) is as follows: + * - This mechanism 'looks' like a regular config file (such as Linux's + * .config) + * - It is difficult to get wrong in a way that will produce strange build + * errors: + * - They need not know where the mali_t600.h, other_midg_gpu.h etc. files are stored - and + * so they won't accidentally pick another file with 'mali_t600' in its name + * - When the build doesn't work, the System Integrator may think the DDK is + * doesn't work, and attempt to fix it themselves: + * - For the @ref CONFIG_GPU_CORE_TYPE mechanism, the only way to get past the + * error is to set @ref CONFIG_GPU_CORE_TYPE, and this is what the \#error tells + * you. + * - For a \#include mechanism, checks must still be made elsewhere, which the + * System Integrator may try working around by setting \#defines (such as + * VA_BITS) themselves in their plat_config.h. In the worst case, they may + * set the prevention-mechanism \#define of + * "A_CORRECT_MIDGARD_CORE_WAS_CHOSEN". + * - In this case, they would believe they are on the right track, because + * the build progresses with their fix, but with errors elsewhere. + * + * However, there is nothing to prevent the customer using \#include to organize + * their own configurations files hierarchically. + * + * The mechanism for the header file processing is as follows: + * + * @dot + digraph plat_config_mechanism { + rankdir=BT + size="6,6" + + "mali_base.h"; + "midg/midg.h"; + + node [ shape=box ]; + { + rank = same; ordering = out; + + "midg/midg_gpu_props.h"; + "base/midg_gpus/mali_t600.h"; + "base/midg_gpus/other_midg_gpu.h"; + } + { rank = same; "plat/plat_config.h"; } + { + rank = same; + "midg/midg.h" [ shape=box ]; + gpu_chooser [ label="" style="invisible" width=0 height=0 fixedsize=true ]; + select_gpu [ label="Mali-T600 | Other\n(select_gpu.h)" shape=polygon,sides=4,distortion=0.25 width=3.3 height=0.99 fixedsize=true ] ; + } + node [ shape=box ]; + { rank = same; "plat/plat_config.h"; } + { rank = same; "mali_base.h"; } + + "mali_base.h" -> "midg/midg.h" -> "midg/midg_gpu_props.h"; + "mali_base.h" -> "plat/plat_config.h" ; + "mali_base.h" -> select_gpu ; + + "plat/plat_config.h" -> gpu_chooser [style="dotted,bold" dir=none weight=4] ; + gpu_chooser -> select_gpu [style="dotted,bold"] ; + + select_gpu -> "base/midg_gpus/mali_t600.h" ; + select_gpu -> "base/midg_gpus/other_midg_gpu.h" ; + } + @enddot + * + * + * @section sec_base_user_api_gpuprops_kernel Kernel Operation + * + * During Base Context Create time, user-side makes a single kernel call: + * - A call to fill user memory with GPU information structures + * + * The kernel-side will fill the provided the entire processed @ref base_gpu_props + * structure, because this information is required in both + * user and kernel side; it does not make sense to decode it twice. + * + * Coherency groups must be derived from the bitmasks, but this can be done + * kernel side, and just once at kernel startup: Coherency groups must already + * be known kernel-side, to support chains that specify a 'Only Coherent Group' + * SW requirement, or 'Only Coherent Group with Tiler' SW requirement. + * + * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation + * Creation of the coherent group data is done at device-driver startup, and so + * is one-time. This will most likely involve a loop with CLZ, shifting, and + * bit clearing on the L2_PRESENT or L3_PRESENT masks, depending on whether the + * system is L2 or L2+L3 Coherent. The number of shader cores is done by a + * population count, since faulty cores may be disabled during production, + * producing a non-contiguous mask. + * + * The memory requirements for this algoirthm can be determined either by a u64 + * population count on the L2/L3_PRESENT masks (a LUT helper already is + * requried for the above), or simple assumption that there can be no more than + * 16 coherent groups, since core groups are typically 4 cores. + */ + +/** + * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs + * @{ + */ + +/** + * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties + * @{ + */ + +#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3 + +#define BASE_MAX_COHERENT_GROUPS 16 + +struct mali_base_gpu_core_props { + /** + * Product specific value. + */ + midg_product_id product_id; + + /** + * Status of the GPU release. + * No defined values, but starts at 0 and increases by one for each release + * status (alpha, beta, EAC, etc.). + * 4 bit values (0-15). + */ + u16 version_status; + + /** + * Minor release number of the GPU. "P" part of an "RnPn" release number. + * 8 bit values (0-255). + */ + u16 minor_revision; + + /** + * Major release number of the GPU. "R" part of an "RnPn" release number. + * 4 bit values (0-15). + */ + u16 major_revision; + + u16 padding; + + /** + * @usecase GPU clock speed is not specified in the Midgard Architecture, but is + * necessary for OpenCL's clGetDeviceInfo() function. + */ + u32 gpu_speed_mhz; + + /** + * @usecase GPU clock max/min speed is required for computing best/worst case + * in tasks as job scheduling ant irq_throttling. (It is not specified in the + * Midgard Architecture). + */ + u32 gpu_freq_khz_max; + u32 gpu_freq_khz_min; + + /** + * Size of the shader program counter, in bits. + */ + u32 log2_program_counter_size; + + /** + * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a + * bitpattern where a set bit indicates that the format is supported. + * + * Before using a texture format, it is recommended that the corresponding + * bit be checked. + */ + u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + + /** + * Theoretical maximum memory available to the GPU. It is unlikely that a + * client will be able to allocate all of this memory for their own + * purposes, but this at least provides an upper bound on the memory + * available to the GPU. + * + * This is required for OpenCL's clGetDeviceInfo() call when + * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The + * client will not be expecting to allocate anywhere near this value. + */ + u64 gpu_available_memory_size; +}; + +/** + * + * More information is possible - but associativity and bus width are not + * required by upper-level apis. + */ +struct mali_base_gpu_l2_cache_props { + u8 log2_line_size; + u8 log2_cache_size; + u8 num_l2_slices; /* Number of L2C slices. 1 or higher */ + u8 padding[5]; +}; + +struct mali_base_gpu_l3_cache_props { + u8 log2_line_size; + u8 log2_cache_size; + u8 padding[6]; +}; + +struct mali_base_gpu_tiler_props { + u32 bin_size_bytes; /* Max is 4*2^15 */ + u32 max_active_levels; /* Max is 2^15 */ +}; + +/** + * GPU threading system details. + */ +struct mali_base_gpu_thread_props { + u32 max_threads; /* Max. number of threads per core */ + u32 max_workgroup_size; /* Max. number of threads per workgroup */ + u32 max_barrier_size; /* Max. number of threads that can synchronize on a simple barrier */ + u16 max_registers; /* Total size [1..65535] of the register file available per core. */ + u8 max_task_queue; /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */ + u8 max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */ + u8 impl_tech; /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */ + u8 padding[7]; +}; + +/** + * @brief descriptor for a coherent group + * + * \c core_mask exposes all cores in that coherent group, and \c num_cores + * provides a cached population-count for that mask. + * + * @note Whilst all cores are exposed in the mask, not all may be available to + * the application, depending on the Kernel Job Scheduler policy. Therefore, + * the application should not further restrict the core mask itself, as it may + * result in an empty core mask. However, it can guarentee that there will be + * at least one core available for each core group exposed . + * + * @usecase Chains marked at certain user-side priorities (e.g. the Long-running + * (batch) priority ) can be prevented from running on entire core groups by the + * Kernel Chain Scheduler policy. + * + * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage. + */ +struct mali_base_gpu_coherent_group { + u64 core_mask; /**< Core restriction mask required for the group */ + u16 num_cores; /**< Number of cores in the group */ + u16 padding[3]; +}; + +/** + * @brief Coherency group information + * + * Note that the sizes of the members could be reduced. However, the \c group + * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte + * aligned, thus leading to wastage if the other members sizes were reduced. + * + * The groups are sorted by core mask. The core masks are non-repeating and do + * not intersect. + */ +struct mali_base_gpu_coherent_group_info { + u32 num_groups; + + /** + * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches. + * + * The GPU Counter dumping writes 2048 bytes per core group, regardless of + * whether the core groups are coherent or not. Hence this member is needed + * to calculate how much memory is required for dumping. + * + * @note Do not use it to work out how many valid elements are in the + * group[] member. Use num_groups instead. + */ + u32 num_core_groups; + + /** + * Coherency features of the memory, accessed by @ref midg_mem_features + * methods + */ + midg_mem_features coherency; + + u32 padding; + + /** + * Descriptors of coherent groups + */ + struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS]; +}; + +/** + * A complete description of the GPU's Hardware Configuration Discovery + * registers. + * + * The information is presented inefficiently for access. For frequent access, + * the values should be better expressed in an unpacked form in the + * base_gpu_props structure. + * + * @usecase The raw properties in @ref midg_raw_gpu_props are necessary to + * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device + * behaving differently?". In this case, all information about the + * configuration is potentially useful, but it does not need to be processed + * by the driver. Instead, the raw registers can be processed by the Mali + * Tools software on the host PC. + * + */ +struct midg_raw_gpu_props { + u64 shader_present; + u64 tiler_present; + u64 l2_present; + u64 l3_present; + + midg_cache_features l2_features; + midg_cache_features l3_features; + midg_mem_features mem_features; + midg_mmu_features mmu_features; + + midg_as_present as_present; + + u32 js_present; + midg_js_features js_features[MIDG_MAX_JOB_SLOTS]; + midg_tiler_features tiler_features; + u32 texture_features[3]; + + u32 gpu_id; + + u32 thread_max_threads; + u32 thread_max_workgroup_size; + u32 thread_max_barrier_size; + u32 thread_features; + + u32 padding; +}; + +/** + * Return structure for _mali_base_get_gpu_props(). + * + * NOTE: the raw_props member in this datastructure contains the register + * values from which the value of the other members are derived. The derived + * members exist to allow for efficient access and/or shielding the details + * of the layout of the registers. + * + */ +typedef struct mali_base_gpu_props { + struct mali_base_gpu_core_props core_props; + struct mali_base_gpu_l2_cache_props l2_props; + struct mali_base_gpu_l3_cache_props l3_props; + struct mali_base_gpu_tiler_props tiler_props; + struct mali_base_gpu_thread_props thread_props; + + /** This member is large, likely to be 128 bytes */ + struct midg_raw_gpu_props raw_props; + + /** This must be last member of the structure */ + struct mali_base_gpu_coherent_group_info coherency_info; +} base_gpu_props; + +/** @} end group base_user_api_gpuprops_dyn */ + +/** @} end group base_user_api_gpuprops */ + +/** + * @addtogroup base_user_api_core User-side Base core APIs + * @{ + */ + +/** + * \enum base_context_create_flags + * + * Flags to pass to ::base_context_init. + * Flags can be ORed together to enable multiple things. + * + * These share the same space as @ref basep_context_private_flags, and so must + * not collide with them. + */ +enum base_context_create_flags { + /** No flags set */ + BASE_CONTEXT_CREATE_FLAG_NONE = 0, + + /** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */ + BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0), + + /** Base context is a 'System Monitor' context for Hardware counters. + * + * One important side effect of this is that job submission is disabled. */ + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1), + + /** Base context flag indicating a 'hint' that this context uses Compute + * Jobs only. + * + * Specifially, this means that it only sends atoms that do not + * contain the following @ref base_jd_core_req : + * - BASE_JD_REQ_FS + * - BASE_JD_REQ_T + * + * Violation of these requirements will cause the Job-Chains to be rejected. + * + * In addition, it is inadvisable for the atom's Job-Chains to contain Jobs + * of the following @ref midg_job_type (whilst it may work now, it may not + * work in future) : + * - @ref MIDG_JOB_VERTEX + * - @ref MIDG_JOB_GEOMETRY + * + * @note An alternative to using this is to specify the BASE_JD_REQ_ONLY_COMPUTE + * requirement in atoms. + */ + BASE_CONTEXT_HINT_ONLY_COMPUTE = (1u << 2) +}; + +/** + * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init() + */ +#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \ + (((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \ + ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \ + ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE)) + +/** + * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel + */ +#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \ + (((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \ + ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE)) + +/** + * Private flags used on the base context + * + * These start at bit 31, and run down to zero. + * + * They share the same space as @ref base_context_create_flags, and so must + * not collide with them. + */ +enum basep_context_private_flags { + /** Private flag tracking whether job descriptor dumping is disabled */ + BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED = (1 << 31) +}; + +/** @} end group base_user_api_core */ + +/** @} end group base_user_api */ + +/** + * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties + * @{ + * + * C Pre-processor macros are exposed here to do with Platform + * Config. + * + * These include: + * - GPU Properties that are constant on a particular Midgard Family + * Implementation e.g. Maximum samples per pixel on Mali-T600. + * - General platform config for the GPU, such as the GPU major and minor + * revison. + */ + +/** @} end group base_plat_config_gpuprops */ + +/** + * @addtogroup base_api Base APIs + * @{ + */ +/** + * @addtogroup basecpuprops Base CPU Properties + * @{ + */ + +/** + * @brief CPU Property Flag for base_cpu_props::cpu_flags, indicating a + * Little Endian System. If not set in base_cpu_props::cpu_flags, then the + * system is Big Endian. + * + * The compile-time equivalent is @ref OSU_CONFIG_CPU_LITTLE_ENDIAN. + */ +#define BASE_CPU_PROPERTY_FLAG_LITTLE_ENDIAN F_BIT_0 + + +/** + * @brief Platform dynamic CPU ID properties structure + */ +typedef struct base_cpu_id_props +{ + /** + * CPU ID + */ + u32 id; + + /** + * CPU Part number + */ + u16 part; + + /** + * ASCII code of implementer trademark + */ + u8 implementer; + + /** + * CPU Variant + */ + u8 variant; + + /** + * CPU Architecture + */ + u8 arch; + + /** + * CPU revision + */ + u8 rev; + + /** + Validity of CPU id where 0-invalid and + 1-valid only if ALL the cpu_id props are valid + */ + u8 valid; + + u8 padding[1]; +}base_cpu_id_props; + + +/** @brief Platform Dynamic CPU properties structure */ +typedef struct base_cpu_props { + u32 nr_cores; /**< Number of CPU cores */ + + /** + * CPU page size as a Logarithm to Base 2. The compile-time + * equivalent is @ref OSU_CONFIG_CPU_PAGE_SIZE_LOG2 + */ + u32 cpu_page_size_log2; + + /** + * CPU L1 Data cache line size as a Logarithm to Base 2. The compile-time + * equivalent is @ref OSU_CONFIG_CPU_L1_DCACHE_LINE_SIZE_LOG2. + */ + u32 cpu_l1_dcache_line_size_log2; + + /** + * CPU L1 Data cache size, in bytes. The compile-time equivalient is + * @ref OSU_CONFIG_CPU_L1_DCACHE_SIZE. + * + * This CPU Property is mainly provided to implement OpenCL's + * clGetDeviceInfo(), which allows the CL_DEVICE_GLOBAL_MEM_CACHE_SIZE + * hint to be queried. + */ + u32 cpu_l1_dcache_size; + + /** + * CPU Property Flags bitpattern. + * + * This is a combination of bits as specified by the macros prefixed with + * 'BASE_CPU_PROPERTY_FLAG_'. + */ + u32 cpu_flags; + + /** + * Maximum clock speed in MHz. + * @usecase 'Maximum' CPU Clock Speed information is required by OpenCL's + * clGetDeviceInfo() function for the CL_DEVICE_MAX_CLOCK_FREQUENCY hint. + */ + u32 max_cpu_clock_speed_mhz; + + /** + * @brief Total memory, in bytes. + * + * This is the theoretical maximum memory available to the CPU. It is + * unlikely that a client will be able to allocate all of this memory for + * their own purposes, but this at least provides an upper bound on the + * memory available to the CPU. + * + * This is required for OpenCL's clGetDeviceInfo() call when + * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL CPU devices. + */ + u64 available_memory_size; + + /** + * CPU ID detailed info + */ + base_cpu_id_props cpu_id; + + u32 padding; +} base_cpu_props; +/** @} end group basecpuprops */ + +/** + * @brief The payload for a replay job. This must be in GPU memory. + */ +typedef struct base_jd_replay_payload { + /** + * Pointer to the first entry in the base_jd_replay_jc list. These + * will be replayed in @b reverse order (so that extra ones can be added + * to the head in future soft jobs without affecting this soft job) + */ + mali_addr64 tiler_jc_list; + + /** + * Pointer to the fragment job chain. + */ + mali_addr64 fragment_jc; + + /** + * Pointer to the tiler heap free FBD field to be modified. + */ + mali_addr64 tiler_heap_free; + + /** + * Hierarchy mask for the replayed fragment jobs. May be zero. + */ + u16 fragment_hierarchy_mask; + + /** + * Hierarchy mask for the replayed tiler jobs. May be zero. + */ + u16 tiler_hierarchy_mask; + + /** + * Default weight to be used for hierarchy levels not in the original + * mask. + */ + u32 hierarchy_default_weight; + + /** + * Core requirements for the tiler job chain + */ + base_jd_core_req tiler_core_req; + + /** + * Core requirements for the fragment job chain + */ + base_jd_core_req fragment_core_req; + + u8 padding[4]; +} base_jd_replay_payload; + +/** + * @brief An entry in the linked list of job chains to be replayed. This must + * be in GPU memory. + */ +typedef struct base_jd_replay_jc { + /** + * Pointer to next entry in the list. A setting of NULL indicates the + * end of the list. + */ + mali_addr64 next; + + /** + * Pointer to the job chain. + */ + mali_addr64 jc; + +} base_jd_replay_jc; + +/* Maximum number of jobs allowed in a fragment chain in the payload of a + * replay job */ +#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256 + +/** @} end group base_api */ + +typedef struct base_profiling_controls { + u32 profiling_controls[FBDUMP_CONTROL_MAX]; +} base_profiling_controls; + +#endif /* _BASE_KERNEL_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_kernel_sync.h b/drivers/gpu/arm/midgard/mali_base_kernel_sync.h new file mode 100755 index 00000000000..01a837cfae8 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_base_kernel_sync.h @@ -0,0 +1,47 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file + * Base cross-proccess sync API. + */ + +#ifndef _BASE_KERNEL_SYNC_H_ +#define _BASE_KERNEL_SYNC_H_ + +#include + +#define STREAM_IOC_MAGIC '~' + +/* Fence insert. + * + * Inserts a fence on the stream operated on. + * Fence can be waited via a base fence wait soft-job + * or triggered via a base fence trigger soft-job. + * + * Fences must be cleaned up with close when no longer needed. + * + * No input/output arguments. + * Returns + * >=0 fd + * <0 error code + */ +#define STREAM_IOC_FENCE_INSERT _IO(STREAM_IOC_MAGIC, 0) + +#endif /* _BASE_KERNEL_SYNC_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/drivers/gpu/arm/midgard/mali_base_mem_priv.h new file mode 100755 index 00000000000..40942283824 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_base_mem_priv.h @@ -0,0 +1,52 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _BASE_MEM_PRIV_H_ +#define _BASE_MEM_PRIV_H_ + +#define BASE_SYNCSET_OP_MSYNC (1U << 0) +#define BASE_SYNCSET_OP_CSYNC (1U << 1) + +/* + * This structure describe a basic memory coherency operation. + * It can either be: + * @li a sync from CPU to Memory: + * - type = ::BASE_SYNCSET_OP_MSYNC + * - mem_handle = a handle to the memory object on which the operation + * is taking place + * - user_addr = the address of the range to be synced + * - size = the amount of data to be synced, in bytes + * - offset is ignored. + * @li a sync from Memory to CPU: + * - type = ::BASE_SYNCSET_OP_CSYNC + * - mem_handle = a handle to the memory object on which the operation + * is taking place + * - user_addr = the address of the range to be synced + * - size = the amount of data to be synced, in bytes. + * - offset is ignored. + */ +typedef struct basep_syncset { + base_mem_handle mem_handle; + u64 user_addr; + u64 size; + u8 type; + u8 padding[7]; +} basep_syncset; + +#endif diff --git a/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h new file mode 100755 index 00000000000..ce02e8b8e2b --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h @@ -0,0 +1,26 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +#ifndef _BASE_VENDOR_SPEC_FUNC_H_ +#define _BASE_VENDOR_SPEC_FUNC_H_ + +#include + +mali_error kbase_get_vendor_specific_cpu_clock_speed(u32 * const); + +#endif /*_BASE_VENDOR_SPEC_FUNC_H_*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h new file mode 100755 index 00000000000..5fd4b847a86 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase.h @@ -0,0 +1,455 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _KBASE_H_ +#define _KBASE_H_ + +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mali_base_kernel.h" +#include +#include + +#include "mali_kbase_pm.h" +#include "mali_kbase_mem_lowlevel.h" +#include "mali_kbase_defs.h" +#include "mali_kbase_trace_timeline.h" +#include "mali_kbase_js.h" +#include "mali_kbase_mem.h" +#include "mali_kbase_security.h" +#include "mali_kbase_utility.h" +#include +#include "mali_kbase_cpuprops.h" +#include "mali_kbase_gpuprops.h" +#ifdef CONFIG_GPU_TRACEPOINTS +#include +#endif +/** + * @page page_base_kernel_main Kernel-side Base (KBase) APIs + * + * The Kernel-side Base (KBase) APIs are divided up as follows: + * - @subpage page_kbase_js_policy + */ + +/** + * @defgroup base_kbase_api Kernel-side Base (KBase) APIs + */ + +kbase_device *kbase_device_alloc(void); +/* +* note: configuration attributes member of kbdev needs to have +* been setup before calling kbase_device_init +*/ + +/* +* API to acquire device list semaphone and return pointer +* to the device list head +*/ +const struct list_head *kbase_dev_list_get(void); +/* API to release the device list semaphore */ +void kbase_dev_list_put(const struct list_head *dev_list); + +mali_error kbase_device_init(kbase_device * const kbdev); +void kbase_device_term(kbase_device *kbdev); +void kbase_device_free(kbase_device *kbdev); +int kbase_device_has_feature(kbase_device *kbdev, u32 feature); +kbase_midgard_type kbase_device_get_type(kbase_device *kbdev); +struct kbase_device *kbase_find_device(int minor); /* Only needed for gator integration */ + +void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value); + +u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control); + +/** + * Ensure that all IRQ handlers have completed execution + * + * @param kbdev The kbase device + */ +void kbase_synchronize_irqs(kbase_device *kbdev); + +kbase_context *kbase_create_context(kbase_device *kbdev); +void kbase_destroy_context(kbase_context *kctx); +mali_error kbase_context_set_create_flags(kbase_context *kctx, u32 flags); + +mali_error kbase_instr_hwcnt_setup(kbase_context *kctx, kbase_uk_hwcnt_setup *setup); +mali_error kbase_instr_hwcnt_enable(kbase_context *kctx, kbase_uk_hwcnt_setup *setup); +mali_error kbase_instr_hwcnt_disable(kbase_context *kctx); +mali_error kbase_instr_hwcnt_clear(kbase_context *kctx); +mali_error kbase_instr_hwcnt_dump(kbase_context *kctx); +mali_error kbase_instr_hwcnt_dump_irq(kbase_context *kctx); +mali_bool kbase_instr_hwcnt_dump_complete(kbase_context *kctx, mali_bool * const success); +void kbase_instr_hwcnt_suspend(kbase_device *kbdev); +void kbase_instr_hwcnt_resume(kbase_device *kbdev); + +void kbasep_cache_clean_worker(struct work_struct *data); +void kbase_clean_caches_done(kbase_device *kbdev); + +/** + * The GPU has completed performance count sampling successfully. + */ +void kbase_instr_hwcnt_sample_done(kbase_device *kbdev); + +mali_error kbase_jd_init(kbase_context *kctx); +void kbase_jd_exit(kbase_context *kctx); +mali_error kbase_jd_submit(kbase_context *kctx, const kbase_uk_job_submit *user_bag); +void kbase_jd_done(kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, + kbasep_js_atom_done_code done_code); +void kbase_jd_cancel(kbase_device *kbdev, kbase_jd_atom *katom); +void kbase_jd_zap_context(kbase_context *kctx); +mali_bool jd_done_nolock(kbase_jd_atom *katom); +void kbase_jd_free_external_resources(kbase_jd_atom *katom); +mali_bool jd_submit_atom(kbase_context *kctx, + const base_jd_atom_v2 *user_atom, + kbase_jd_atom *katom); + +mali_error kbase_job_slot_init(kbase_device *kbdev); +void kbase_job_slot_halt(kbase_device *kbdev); +void kbase_job_slot_term(kbase_device *kbdev); +void kbase_job_done(kbase_device *kbdev, u32 done); +void kbase_job_zap_context(kbase_context *kctx); + +void kbase_job_slot_softstop(kbase_device *kbdev, int js, kbase_jd_atom *target_katom); +void kbase_job_slot_hardstop(kbase_context *kctx, int js, kbase_jd_atom *target_katom); + +void kbase_event_post(kbase_context *ctx, kbase_jd_atom *event); +int kbase_event_dequeue(kbase_context *ctx, base_jd_event_v2 *uevent); +int kbase_event_pending(kbase_context *ctx); +mali_error kbase_event_init(kbase_context *kctx); +void kbase_event_close(kbase_context *kctx); +void kbase_event_cleanup(kbase_context *kctx); +void kbase_event_wakeup(kbase_context *kctx); + +int kbase_process_soft_job(kbase_jd_atom *katom); +mali_error kbase_prepare_soft_job(kbase_jd_atom *katom); +void kbase_finish_soft_job(kbase_jd_atom *katom); +void kbase_cancel_soft_job(kbase_jd_atom *katom); +void kbase_resume_suspended_soft_jobs(kbase_device *kbdev); + +int kbase_replay_process(kbase_jd_atom *katom); + +/* api used internally for register access. Contains validation and tracing */ +void kbase_reg_write(kbase_device *kbdev, u16 offset, u32 value, kbase_context *kctx); +u32 kbase_reg_read(kbase_device *kbdev, u16 offset, kbase_context *kctx); +void kbase_device_trace_register_access(kbase_context *kctx, kbase_reg_access_type type, u16 reg_offset, u32 reg_value); +void kbase_device_trace_buffer_install(kbase_context *kctx, u32 *tb, size_t size); +void kbase_device_trace_buffer_uninstall(kbase_context *kctx); + +/* api to be ported per OS, only need to do the raw register access */ +void kbase_os_reg_write(kbase_device *kbdev, u16 offset, u32 value); +u32 kbase_os_reg_read(kbase_device *kbdev, u16 offset); + +/** Report a GPU fault. + * + * This function is called from the interrupt handler when a GPU fault occurs. + * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN. + * + * @param kbdev The kbase device that the GPU fault occurred from. + * @param multiple Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS was also set + */ +void kbase_report_gpu_fault(kbase_device *kbdev, int multiple); + +/** Kill all jobs that are currently running from a context + * + * This is used in response to a page fault to remove all jobs from the faulting context from the hardware. + * + * @param kctx The context to kill jobs from + */ +void kbase_job_kill_jobs_from_context(kbase_context *kctx); + +/** + * GPU interrupt handler + * + * This function is called from the interrupt handler when a GPU irq is to be handled. + * + * @param kbdev The kbase device to handle an IRQ for + * @param val The value of the GPU IRQ status register which triggered the call + */ +void kbase_gpu_interrupt(kbase_device *kbdev, u32 val); + +/** + * Prepare for resetting the GPU. + * This function just soft-stops all the slots to ensure that as many jobs as possible are saved. + * + * The function returns a boolean which should be interpreted as follows: + * - MALI_TRUE - Prepared for reset, kbase_reset_gpu should be called. + * - MALI_FALSE - Another thread is performing a reset, kbase_reset_gpu should not be called. + * + * @return See description + */ +mali_bool kbase_prepare_to_reset_gpu(kbase_device *kbdev); + +/** + * Pre-locked version of @a kbase_prepare_to_reset_gpu. + * + * Identical to @a kbase_prepare_to_reset_gpu, except that the + * kbasep_js_device_data::runpool_irq::lock is externally locked. + * + * @see kbase_prepare_to_reset_gpu + */ +mali_bool kbase_prepare_to_reset_gpu_locked(kbase_device *kbdev); + +/** Reset the GPU + * + * This function should be called after kbase_prepare_to_reset_gpu iff it returns MALI_TRUE. + * It should never be called without a corresponding call to kbase_prepare_to_reset_gpu. + * + * After this function is called (or not called if kbase_prepare_to_reset_gpu returned MALI_FALSE), + * the caller should wait for kbdev->reset_waitq to be signalled to know when the reset has completed. + */ +void kbase_reset_gpu(kbase_device *kbdev); + +/** + * Pre-locked version of @a kbase_reset_gpu. + * + * Identical to @a kbase_reset_gpu, except that the + * kbasep_js_device_data::runpool_irq::lock is externally locked. + * + * @see kbase_reset_gpu + */ +void kbase_reset_gpu_locked(kbase_device *kbdev); + +/** Returns the name associated with a Mali exception code + * + * @param[in] exception_code exception code + * @return name associated with the exception code + */ +const char *kbase_exception_name(u32 exception_code); + +/** + * Check whether a system suspend is in progress, or has already been suspended + * + * The caller should ensure that either kbdev->pm.active_count_lock is held, or + * a dmb was executed recently (to ensure the value is most + * up-to-date). However, without a lock the value could change afterwards. + * + * @return MALI_FALSE if a suspend is not in progress + * @return !=MALI_FALSE otherwise + */ +static INLINE mali_bool kbase_pm_is_suspending(struct kbase_device *kbdev) { + return kbdev->pm.suspending; +} + +/** + * Return the atom's ID, as was originally supplied by userspace in + * base_jd_atom_v2::atom_number + */ +static INLINE int kbase_jd_atom_id(kbase_context *kctx, kbase_jd_atom *katom) +{ + int result; + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(katom->kctx == kctx); + + result = katom - &kctx->jctx.atoms[0]; + KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT); + return result; +} + +#if KBASE_TRACE_ENABLE != 0 +/** Add trace values about a job-slot + * + * @note Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any + * functions called to get the parameters supplied to this macro must: + * - be static or static inline + * - must just return 0 and have no other statements present in the body. + */ +#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \ + kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ + KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0) + +/** Add trace values about a job-slot, with info + * + * @note Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any + * functions called to get the parameters supplied to this macro must: + * - be static or static inline + * - must just return 0 and have no other statements present in the body. + */ +#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \ + kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ + KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val) + +/** Add trace values about a ctx refcount + * + * @note Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any + * functions called to get the parameters supplied to this macro must: + * - be static or static inline + * - must just return 0 and have no other statements present in the body. + */ +#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \ + kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ + KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0) +/** Add trace values about a ctx refcount, and info + * + * @note Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any + * functions called to get the parameters supplied to this macro must: + * - be static or static inline + * - must just return 0 and have no other statements present in the body. + */ +#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \ + kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ + KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val) + +/** Add trace values (no slot or refcount) + * + * @note Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any + * functions called to get the parameters supplied to this macro must: + * - be static or static inline + * - must just return 0 and have no other statements present in the body. + */ +#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val) \ + kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ + 0, 0, 0, info_val) + +/** Clear the trace */ +#define KBASE_TRACE_CLEAR(kbdev) \ + kbasep_trace_clear(kbdev) + +/** Dump the slot trace */ +#define KBASE_TRACE_DUMP(kbdev) \ + kbasep_trace_dump(kbdev) + +/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */ +void kbasep_trace_add(kbase_device *kbdev, kbase_trace_code code, void *ctx, kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val); +/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */ +void kbasep_trace_clear(kbase_device *kbdev); +#else +#ifdef CONFIG_MALI_SYSTEM_TRACE +/* Dispatch kbase trace events as system trace events */ +#include +#define KBASE_TRACE_ADD_SLOT( kbdev, code, ctx, katom, gpu_addr, jobslot )\ + trace_mali_##code(jobslot, 0) + +#define KBASE_TRACE_ADD_SLOT_INFO( kbdev, code, ctx, katom, gpu_addr, jobslot, info_val )\ + trace_mali_##code(jobslot, info_val) + +#define KBASE_TRACE_ADD_REFCOUNT( kbdev, code, ctx, katom, gpu_addr, refcount )\ + trace_mali_##code(refcount, 0) + +#define KBASE_TRACE_ADD_REFCOUNT_INFO( kbdev, code, ctx, katom, gpu_addr, refcount, info_val )\ + trace_mali_##code(refcount, info_val) + +#define KBASE_TRACE_ADD( kbdev, code, ctx, katom, gpu_addr, info_val )\ + trace_mali_##code(gpu_addr, info_val) + +#define KBASE_TRACE_CLEAR( kbdev )\ + do{\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(0);\ + }while(0) +#define KBASE_TRACE_DUMP( kbdev )\ + do{\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(0);\ + }while(0) + +#else /* CONFIG_MALI_SYSTEM_TRACE */ +#define KBASE_TRACE_ADD_SLOT( kbdev, code, ctx, katom, gpu_addr, jobslot )\ + do{\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(ctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + } while (0) + +#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(ctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(ctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(refcount);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(ctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(subcode);\ + CSTD_UNUSED(ctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(val);\ + CSTD_NOP(0);\ + }while(0) + +#define KBASE_TRACE_CLEAR( kbdev )\ + do{\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(0);\ + }while(0) +#define KBASE_TRACE_DUMP( kbdev )\ + do{\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(0);\ + }while(0) +#endif /* CONFIG_MALI_SYSTEM_TRACE */ +#endif +/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */ +void kbasep_trace_dump(kbase_device *kbdev); +#endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c new file mode 100755 index 00000000000..b08143020aa --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c @@ -0,0 +1,176 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include + +/* This function is used to solve an HW issue with single iterator GPUs. + * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the + * restart index is out of bounds and the rerun causes a tile range fault. If this happens + * we try to clamp the restart index to a correct value and rerun the job. + */ +/* Mask of X and Y coordinates for the coordinates words in the descriptors*/ +#define X_COORDINATE_MASK 0x00000FFF +#define Y_COORDINATE_MASK 0x0FFF0000 +/* Max number of words needed from the fragment shader job descriptor */ +#define JOB_HEADER_SIZE_IN_WORDS 10 +#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32)) + +/* Word 0: Status Word */ +#define JOB_DESC_STATUS_WORD 0 +/* Word 1: Restart Index */ +#define JOB_DESC_RESTART_INDEX_WORD 1 +/* Word 2: Fault address low word */ +#define JOB_DESC_FAULT_ADDR_LOW_WORD 2 +/* Word 8: Minimum Tile Coordinates */ +#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8 +/* Word 9: Maximum Tile Coordinates */ +#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9 + +int kbasep_10969_workaround_clamp_coordinates(kbase_jd_atom *katom) +{ + struct device *dev = katom->kctx->kbdev->dev; + u32 clamped = 0; + dev_warn(dev,"Called TILE_RANGE_FAULT workaround clamping function. \n"); + if (katom->core_req & BASE_JD_REQ_FS){ + kbase_va_region * region = kbase_region_tracker_find_region_enclosing_address(katom->kctx, katom->jc ); + + if (region){ + phys_addr_t * page_array = kbase_get_phy_pages(region); + + if (page_array){ + u64 page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn; + u32 offset = katom->jc & (~PAGE_MASK); + u32 * page_1 = NULL; + u32 * page_2 = NULL; + u32 job_header[JOB_HEADER_SIZE_IN_WORDS]; + void* dst = job_header; + + /* we need the first 10 words of the fragment shader job descriptor. We need to check + * that the offset + 10 words is less that the page size otherwise we need to load the next + * page. page_size_overflow will be equal to 0 in case the whole descriptor is within the page + * >0 otherwise. + */ + u32 copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE); + + page_1 = kmap_atomic(pfn_to_page(PFN_DOWN(page_array[page_index]))); + + /* page_1 is a u32 pointer, offset is expressed in bytes */ + page_1 += offset>>2; + kbase_sync_to_cpu(page_array[page_index] + offset, page_1, copy_size); + memcpy(dst, page_1, copy_size); + + /* The data needed overflows page the dimension, need to map the subsequent page */ + if (copy_size < JOB_HEADER_SIZE){ + page_2 = kmap_atomic(pfn_to_page(PFN_DOWN(page_array[page_index + 1]))); + + kbase_sync_to_cpu(page_array[page_index + 1], page_2, JOB_HEADER_SIZE - copy_size); + memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size); + } + + /* We managed to correctly map one or two pages (in case of overflow ) */ + { + u32 minX,minY,maxX,maxY; + u32 restartX,restartY; + + /* Get Bounding Box data and restart index from fault address low word*/ + minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK; + minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK; + maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK; + maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK; + restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK; + restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK; + + dev_warn(dev, "Before Clamping: \n" \ + "Jobstatus: %08x \n" \ + "restartIdx: %08x \n" \ + "Fault_addr_low: %08x \n" \ + "minCoordsX: %08x minCoordsY: %08x \n" \ + "maxCoordsX: %08x maxCoordsY: %08x \n", + job_header[JOB_DESC_STATUS_WORD], + job_header[JOB_DESC_RESTART_INDEX_WORD], + job_header[JOB_DESC_FAULT_ADDR_LOW_WORD], + minX,minY, + maxX,maxY ); + + /* Set the restart index to the one which generated the fault*/ + job_header[JOB_DESC_RESTART_INDEX_WORD] = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD]; + + if (restartX < minX){ + job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY; + dev_warn(dev, + "Clamping restart X index to minimum. %08x clamped to %08x \n", + restartX, minX ); + clamped = 1; + } + if (restartY < minY){ + job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX; + dev_warn(dev, + "Clamping restart Y index to minimum. %08x clamped to %08x \n", + restartY, minY ); + clamped = 1; + } + if (restartX > maxX){ + job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY; + dev_warn(dev, + "Clamping restart X index to maximum. %08x clamped to %08x \n", + restartX, maxX ); + clamped = 1; + } + if (restartY > maxY){ + job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX; + dev_warn(dev, + "Clamping restart Y index to maximum. %08x clamped to %08x \n", + restartY, maxY ); + clamped = 1; + } + + if (clamped){ + /* Reset the fault address low word and set the job status to STOPPED */ + job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0; + job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED; + dev_warn(dev, "After Clamping: \n" \ + "Jobstatus: %08x \n" \ + "restartIdx: %08x \n" \ + "Fault_addr_low: %08x \n" \ + "minCoordsX: %08x minCoordsY: %08x \n" \ + "maxCoordsX: %08x maxCoordsY: %08x \n", + job_header[JOB_DESC_STATUS_WORD], + job_header[JOB_DESC_RESTART_INDEX_WORD], + job_header[JOB_DESC_FAULT_ADDR_LOW_WORD], + minX,minY, + maxX,maxY ); + + /* Flush CPU cache to update memory for future GPU reads*/ + memcpy(page_1, dst, copy_size); + kbase_sync_to_memory(page_array[page_index] + offset, page_1, copy_size); + + if (copy_size < JOB_HEADER_SIZE){ + memcpy(page_2, dst + copy_size, JOB_HEADER_SIZE - copy_size); + kbase_sync_to_memory(page_array[page_index + 1], page_2, JOB_HEADER_SIZE - copy_size); + } + + } + } + if (copy_size < JOB_HEADER_SIZE) + kunmap_atomic(page_2); + + kunmap_atomic(page_1); + } + } + } + return clamped; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h new file mode 100755 index 00000000000..85184c9f316 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h @@ -0,0 +1,23 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _KBASE_10969_WORKAROUND_ +#define _KBASE_10969_WORKAROUND_ + +int kbasep_10969_workaround_clamp_coordinates( kbase_jd_atom * katom ); + +#endif /* _KBASE_10969_WORKAROUND_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c new file mode 100755 index 00000000000..a1c3aa88307 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c @@ -0,0 +1,41 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_cache_policy.h + * Cache Policy API. + */ + +#include "mali_kbase_cache_policy.h" + +/* + * The output flags should be a combination of the following values: + * KBASE_REG_CPU_CACHED: CPU cache should be enabled + */ +u32 kbase_cache_enabled(u32 flags, u32 nr_pages) +{ + u32 cache_flags = 0; + + CSTD_UNUSED(nr_pages); + + if (flags & BASE_MEM_CACHED_CPU) + cache_flags |= KBASE_REG_CPU_CACHED; + + return cache_flags; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h new file mode 100755 index 00000000000..70f8a4c8ada --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h @@ -0,0 +1,47 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_cache_policy.h + * Cache Policy API. + */ + +#ifndef _KBASE_CACHE_POLICY_H_ +#define _KBASE_CACHE_POLICY_H_ + +#include +#include "mali_kbase.h" +#include "mali_base_kernel.h" + +/** + * @brief Choose the cache policy for a specific region + * + * Tells whether the CPU and GPU caches should be enabled or not for a specific region. + * This function can be modified to customize the cache policy depending on the flags + * and size of the region. + * + * @param[in] flags flags describing attributes of the region + * @param[in] nr_pages total number of pages (backed or not) for the region + * + * @return a combination of KBASE_REG_CPU_CACHED and KBASE_REG_GPU_CACHED depending + * on the cache policy + */ +u32 kbase_cache_enabled(u32 flags, u32 nr_pages); + +#endif /* _KBASE_CACHE_POLICY_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.c b/drivers/gpu/arm/midgard/mali_kbase_config.c new file mode 100755 index 00000000000..fe9f0276455 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_config.c @@ -0,0 +1,358 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include +#include +#include +#include + +/* Specifies how many attributes are permitted in the config (excluding terminating attribute). + * This is used in validation function so we can detect if configuration is properly terminated. This value can be + * changed if we need to introduce more attributes or many memory regions need to be defined */ +#define ATTRIBUTE_COUNT_MAX 32 + +/* Limits for gpu frequency configuration parameters. These will use for config validation. */ +#define MAX_GPU_ALLOWED_FREQ_KHZ 1000000 +#define MIN_GPU_ALLOWED_FREQ_KHZ 1 + +int kbasep_get_config_attribute_count(const kbase_attribute *attributes) +{ + int count = 1; + + if (!attributes) + return -EINVAL; + + while (attributes->id != KBASE_CONFIG_ATTR_END) { + attributes++; + count++; + } + + return count; +} + +const kbase_attribute *kbasep_get_next_attribute(const kbase_attribute *attributes, int attribute_id) +{ + KBASE_DEBUG_ASSERT(attributes != NULL); + + while (attributes->id != KBASE_CONFIG_ATTR_END) { + if (attributes->id == attribute_id) + return attributes; + + attributes++; + } + return NULL; +} + +KBASE_EXPORT_TEST_API(kbasep_get_next_attribute) + +uintptr_t kbasep_get_config_value(struct kbase_device *kbdev, const kbase_attribute *attributes, int attribute_id) +{ + const kbase_attribute *attr; + + KBASE_DEBUG_ASSERT(attributes != NULL); + + attr = kbasep_get_next_attribute(attributes, attribute_id); + if (attr != NULL) + return attr->data; + + /* default values */ + switch (attribute_id) { + case KBASE_CONFIG_ATTR_GPU_IRQ_THROTTLE_TIME_US: + return DEFAULT_IRQ_THROTTLE_TIME_US; + /* Begin scheduling defaults */ + case KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS: + return DEFAULT_JS_SCHEDULING_TICK_NS; + case KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS: + return DEFAULT_JS_SOFT_STOP_TICKS; + case KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL: + return DEFAULT_JS_SOFT_STOP_TICKS_CL; + case KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS: + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) + return DEFAULT_JS_HARD_STOP_TICKS_SS_HW_ISSUE_8408; + else + return DEFAULT_JS_HARD_STOP_TICKS_SS; + case KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL: + return DEFAULT_JS_HARD_STOP_TICKS_CL; + case KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS: + return DEFAULT_JS_HARD_STOP_TICKS_NSS; + case KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS: + return DEFAULT_JS_CTX_TIMESLICE_NS; + case KBASE_CONFIG_ATTR_JS_CFS_CTX_RUNTIME_INIT_SLICES: + return DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES; + case KBASE_CONFIG_ATTR_JS_CFS_CTX_RUNTIME_MIN_SLICES: + return DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES; + case KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS: + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) + return DEFAULT_JS_RESET_TICKS_SS_HW_ISSUE_8408; + else + return DEFAULT_JS_RESET_TICKS_SS; + case KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL: + return DEFAULT_JS_RESET_TICKS_CL; + case KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS: + return DEFAULT_JS_RESET_TICKS_NSS; + case KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS: + return DEFAULT_JS_RESET_TIMEOUT_MS; + /* End scheduling defaults */ + case KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS: + return 0; + case KBASE_CONFIG_ATTR_PLATFORM_FUNCS: + return 0; + case KBASE_CONFIG_ATTR_SECURE_BUT_LOSS_OF_PERFORMANCE: + return DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE; + case KBASE_CONFIG_ATTR_CPU_SPEED_FUNC: + return DEFAULT_CPU_SPEED_FUNC; + case KBASE_CONFIG_ATTR_GPU_SPEED_FUNC: + return 0; + case KBASE_CONFIG_ATTR_ARID_LIMIT: + return DEFAULT_ARID_LIMIT; + case KBASE_CONFIG_ATTR_AWID_LIMIT: + return DEFAULT_AWID_LIMIT; + case KBASE_CONFIG_ATTR_POWER_MANAGEMENT_DVFS_FREQ: + return DEFAULT_PM_DVFS_FREQ; + case KBASE_CONFIG_ATTR_PM_GPU_POWEROFF_TICK_NS: + return DEFAULT_PM_GPU_POWEROFF_TICK_NS; + case KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_SHADER: + return DEFAULT_PM_POWEROFF_TICK_SHADER; + case KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_GPU: + return DEFAULT_PM_POWEROFF_TICK_GPU; + + default: + dev_err(kbdev->dev, "kbasep_get_config_value. Cannot get value of attribute with id=%d and no default value defined", attribute_id); + return 0; + } +} + +KBASE_EXPORT_TEST_API(kbasep_get_config_value) + +mali_bool kbasep_platform_device_init(kbase_device *kbdev) +{ + kbase_platform_funcs_conf *platform_funcs; + + platform_funcs = (kbase_platform_funcs_conf *) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_PLATFORM_FUNCS); + if (platform_funcs) { + if (platform_funcs->platform_init_func) + return platform_funcs->platform_init_func(kbdev); + } + return MALI_TRUE; +} + +void kbasep_platform_device_term(kbase_device *kbdev) +{ + kbase_platform_funcs_conf *platform_funcs; + + platform_funcs = (kbase_platform_funcs_conf *) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_PLATFORM_FUNCS); + if (platform_funcs) { + if (platform_funcs->platform_term_func) + platform_funcs->platform_term_func(kbdev); + } +} + +static mali_bool kbasep_validate_gpu_clock_freq(kbase_device *kbdev, const kbase_attribute *attributes) +{ + uintptr_t freq_min = kbasep_get_config_value(kbdev, attributes, KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MIN); + uintptr_t freq_max = kbasep_get_config_value(kbdev, attributes, KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX); + + if ((freq_min > MAX_GPU_ALLOWED_FREQ_KHZ) || (freq_min < MIN_GPU_ALLOWED_FREQ_KHZ) || (freq_max > MAX_GPU_ALLOWED_FREQ_KHZ) || (freq_max < MIN_GPU_ALLOWED_FREQ_KHZ) || (freq_min > freq_max)) { + dev_warn(kbdev->dev, "Invalid GPU frequencies found in configuration: min=%ldkHz, max=%ldkHz.", freq_min, freq_max); + return MALI_FALSE; + } + + return MALI_TRUE; +} + +static mali_bool kbasep_validate_pm_callback(const kbase_pm_callback_conf *callbacks, const kbase_device * kbdev ) +{ + if (callbacks == NULL) { + /* Having no callbacks is valid */ + return MALI_TRUE; + } + + if ((callbacks->power_off_callback != NULL && callbacks->power_on_callback == NULL) || (callbacks->power_off_callback == NULL && callbacks->power_on_callback != NULL)) { + dev_warn(kbdev->dev, "Invalid power management callbacks: Only one of power_off_callback and power_on_callback was specified"); + return MALI_FALSE; + } + return MALI_TRUE; +} + +static mali_bool kbasep_validate_cpu_speed_func(kbase_cpuprops_clock_speed_function fcn) +{ + return fcn != NULL; +} + +mali_bool kbasep_validate_configuration_attributes(kbase_device *kbdev, const kbase_attribute *attributes) +{ + int i; + mali_bool had_gpu_freq_min = MALI_FALSE, had_gpu_freq_max = MALI_FALSE; + + KBASE_DEBUG_ASSERT(attributes); + + for (i = 0; attributes[i].id != KBASE_CONFIG_ATTR_END; i++) { + if (i >= ATTRIBUTE_COUNT_MAX) { + dev_warn(kbdev->dev, "More than ATTRIBUTE_COUNT_MAX=%d configuration attributes defined. Is attribute list properly terminated?", ATTRIBUTE_COUNT_MAX); + return MALI_FALSE; + } + + switch (attributes[i].id) { + case KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MIN: + had_gpu_freq_min = MALI_TRUE; + if (MALI_FALSE == kbasep_validate_gpu_clock_freq(kbdev, attributes)) { + /* Warning message handled by kbasep_validate_gpu_clock_freq() */ + return MALI_FALSE; + } + break; + + case KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX: + had_gpu_freq_max = MALI_TRUE; + if (MALI_FALSE == kbasep_validate_gpu_clock_freq(kbdev, attributes)) { + /* Warning message handled by kbasep_validate_gpu_clock_freq() */ + return MALI_FALSE; + } + break; + + /* Only non-zero unsigned 32-bit values accepted */ + case KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS: +#if CSTD_CPU_64BIT + if (attributes[i].data == 0u || (u64) attributes[i].data > (u64) U32_MAX) +#else + if (attributes[i].data == 0u) +#endif + { + dev_warn(kbdev->dev, "Invalid Job Scheduling Configuration attribute for " "KBASE_CONFIG_ATTR_JS_SCHEDULING_TICKS_NS: %d", (int)attributes[i].data); + return MALI_FALSE; + } + break; + + /* All these Job Scheduling attributes are FALLTHROUGH: only unsigned 32-bit values accepted */ + case KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS: + case KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL: + case KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS: + case KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL: + case KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS: + case KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS: + case KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL: + case KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS: + case KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS: + case KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS: + case KBASE_CONFIG_ATTR_JS_CFS_CTX_RUNTIME_INIT_SLICES: + case KBASE_CONFIG_ATTR_JS_CFS_CTX_RUNTIME_MIN_SLICES: +#if CSTD_CPU_64BIT + if ((u64) attributes[i].data > (u64) U32_MAX) { + dev_warn(kbdev->dev, "Job Scheduling Configuration attribute exceeds 32-bits: " "id==%d val==%d", attributes[i].id, (int)attributes[i].data); + return MALI_FALSE; + } +#endif + break; + + case KBASE_CONFIG_ATTR_GPU_IRQ_THROTTLE_TIME_US: +#if CSTD_CPU_64BIT + if ((u64) attributes[i].data > (u64) U32_MAX) { + dev_warn(kbdev->dev, "IRQ throttle time attribute exceeds 32-bits: " "id==%d val==%d", attributes[i].id, (int)attributes[i].data); + return MALI_FALSE; + } +#endif + break; + + case KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS: + if (MALI_FALSE == kbasep_validate_pm_callback((kbase_pm_callback_conf *) attributes[i].data, kbdev)) { + /* Warning message handled by kbasep_validate_pm_callback() */ + return MALI_FALSE; + } + break; + + case KBASE_CONFIG_ATTR_SECURE_BUT_LOSS_OF_PERFORMANCE: + if (attributes[i].data != MALI_TRUE && attributes[i].data != MALI_FALSE) { + dev_warn(kbdev->dev, "Value for KBASE_CONFIG_ATTR_SECURE_BUT_LOSS_OF_PERFORMANCE was not " "MALI_TRUE or MALI_FALSE: %u", (unsigned int)attributes[i].data); + return MALI_FALSE; + } + break; + + case KBASE_CONFIG_ATTR_CPU_SPEED_FUNC: + if (MALI_FALSE == kbasep_validate_cpu_speed_func((kbase_cpuprops_clock_speed_function) attributes[i].data)) { + dev_warn(kbdev->dev, "Invalid function pointer in KBASE_CONFIG_ATTR_CPU_SPEED_FUNC"); + return MALI_FALSE; + } + break; + + case KBASE_CONFIG_ATTR_GPU_SPEED_FUNC: + if (0 == attributes[i].data) { + dev_warn(kbdev->dev, "Invalid function pointer in KBASE_CONFIG_ATTR_GPU_SPEED_FUNC"); + return MALI_FALSE; + } + break; + + case KBASE_CONFIG_ATTR_PLATFORM_FUNCS: + /* any value is allowed */ + break; + + case KBASE_CONFIG_ATTR_AWID_LIMIT: + case KBASE_CONFIG_ATTR_ARID_LIMIT: + if ((u32) attributes[i].data > 0x3) { + dev_warn(kbdev->dev, "Invalid AWID or ARID limit"); + return MALI_FALSE; + } + break; + + case KBASE_CONFIG_ATTR_POWER_MANAGEMENT_DVFS_FREQ: +#if CSTD_CPU_64BIT + if ((u64) attributes[i].data > (u64) U32_MAX) { + dev_warn(kbdev->dev, "PM DVFS interval exceeds 32-bits: " "id==%d val==%d", attributes[i].id, (int)attributes[i].data); + return MALI_FALSE; + } +#endif + break; + + case KBASE_CONFIG_ATTR_PM_GPU_POWEROFF_TICK_NS: +#if CSTD_CPU_64BIT + if (attributes[i].data == 0u || (u64) attributes[i].data > (u64) U32_MAX) { +#else + if (attributes[i].data == 0u) { +#endif + dev_warn(kbdev->dev, "Invalid Power Manager Configuration attribute for " "KBASE_CONFIG_ATTR_PM_GPU_POWEROFF_TICK_NS: %d", (int)attributes[i].data); + return MALI_FALSE; + } + break; + + case KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_SHADER: + case KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_GPU: +#if CSTD_CPU_64BIT + if ((u64) attributes[i].data > (u64) U32_MAX) { + dev_warn(kbdev->dev, "Power Manager Configuration attribute exceeds 32-bits: " "id==%d val==%d", attributes[i].id, (int)attributes[i].data); + return MALI_FALSE; + } +#endif + break; + + default: + dev_warn(kbdev->dev, "Invalid attribute found in configuration: %d", attributes[i].id); + return MALI_FALSE; + } + } + + if (!had_gpu_freq_min) { + dev_warn(kbdev->dev, "Configuration does not include mandatory attribute KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MIN"); + return MALI_FALSE; + } + + if (!had_gpu_freq_max) { + dev_warn(kbdev->dev, "Configuration does not include mandatory attribute KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX"); + return MALI_FALSE; + } + + return MALI_TRUE; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.h b/drivers/gpu/arm/midgard/mali_kbase_config.h new file mode 100755 index 00000000000..52d9eda7857 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_config.h @@ -0,0 +1,843 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_config.h + * Configuration API and Attributes for KBase + */ + +#ifndef _KBASE_CONFIG_H_ +#define _KBASE_CONFIG_H_ + +#include + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_config Configuration API and Attributes + * @{ + */ + +#if MALI_CUSTOMER_RELEASE == 0 +/* This flag is set for internal builds so we can run tests without credentials. */ +#define KBASE_HWCNT_DUMP_BYPASS_ROOT 1 +#else +#define KBASE_HWCNT_DUMP_BYPASS_ROOT 0 +#endif + +#include + +/** + * Device wide configuration + */ +enum { + /** + * Invalid attribute ID (reserve 0). + * + * Attached value: Ignored + * Default value: NA + * */ + KBASE_CONFIG_ATTR_INVALID, + + /** + * Maximum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ + KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX, + + /** + * Minimum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ + KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MIN, + + /** + * Irq throttle. It is the minimum desired time in between two + * consecutive gpu interrupts (given in 'us'). The irq throttle + * gpu register will be configured after this, taking into + * account the configured max frequency. + * + * Attached value: number in micro seconds + * Default value: see DEFAULT_IRQ_THROTTLE_TIME_US + */ + KBASE_CONFIG_ATTR_GPU_IRQ_THROTTLE_TIME_US, + + /*** Begin Job Scheduling Configs ***/ + /** + * Job Scheduler scheduling tick granuality. This is in nanoseconds to + * allow HR timer support. + * + * On each scheduling tick, the scheduler may decide to: + * -# soft stop a job (the job will be re-run later, and other jobs will + * be able to run on the GPU now). This effectively controls the + * 'timeslice' given to a job. + * -# hard stop a job (to kill a job if it has spent too long on the GPU + * and didn't soft-stop). + * + * The numbers of ticks for these events are controlled by: + * - @ref KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS + * - @ref KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS + * - @ref KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS + * + * A soft-stopped job will later be resumed, allowing it to use more GPU + * time in total than that defined by any of the above. However, + * the scheduling policy attempts to limit the amount of \em uninterrupted + * time spent on the GPU using the above values (that is, the 'timeslice' + * of a job) + * + * This value is supported by the following scheduling policies: + * - The Completely Fair Share (CFS) policy + * + * Attached value: unsigned 32-bit kbasep_js_device_data::scheduling_tick_ns. + * The value might be rounded down to lower precision. Must be non-zero + * after rounding.
+ * Default value: @ref DEFAULT_JS_SCHEDULING_TICK_NS + * + * @note this value is allowed to be greater than + * @ref KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS. This allows jobs to run on (much) + * longer than the job-timeslice, but once this happens, the context gets + * scheduled in (much) less frequently than others that stay within the + * ctx-timeslice. + */ + KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS, + + /** + * Job Scheduler minimum number of scheduling ticks before non-CL jobs + * are soft-stopped. + * + * This defines the amount of time a job is allowed to stay on the GPU, + * before it is soft-stopped to allow other jobs to run. + * + * That is, this defines the 'timeslice' of the job. It is separate from the + * timeslice of the context that contains the job (see + * @ref KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS). + * + * This value is supported by the following scheduling policies: + * - The Completely Fair Share (CFS) policy + * + * Attached value: unsigned 32-bit kbasep_js_device_data::soft_stop_ticks
+ * Default value: @ref DEFAULT_JS_SOFT_STOP_TICKS + * + * @note a value of zero means "the quickest time to soft-stop a job", + * which is somewhere between instant and one tick later. + * + * @note this value is allowed to be greater than + * @ref KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS or + * @ref KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS. This effectively disables + * soft-stop, and just uses hard-stop instead. In this case, this value + * should be much greater than any of the hard stop values (to avoid + * soft-stop-after-hard-stop) + * + * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS + */ + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS, + + /** + * Job Scheduler minimum number of scheduling ticks before CL jobs + * are soft-stopped. + * + * This defines the amount of time a job is allowed to stay on the GPU, + * before it is soft-stopped to allow other jobs to run. + * + * That is, this defines the 'timeslice' of the job. It is separate + * from the timeslice of the context that contains the job (see + * @ref KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS). + * + * This value is supported by the following scheduling policies: + * - The Completely Fair Share (CFS) policy + * + * Attached value: unsigned 32-bit + * kbasep_js_device_data::soft_stop_ticks_cl
+ * Default value: @ref DEFAULT_JS_SOFT_STOP_TICKS_CL + * + * @note a value of zero means "the quickest time to soft-stop a job", + * which is somewhere between instant and one tick later. + * + * @note this value is allowed to be greater than + * @ref KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL. This effectively + * disables soft-stop, and just uses hard-stop instead. In this case, + * this value should be much greater than any of the hard stop values + * (to avoid soft-stop-after-hard-stop) + * + * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS + */ + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL, + + /** + * Job Scheduler minimum number of scheduling ticks before non-CL jobs + * are hard-stopped. + * + * This defines the amount of time a job is allowed to spend on the GPU before it + * is killed. Such jobs won't be resumed if killed. + * + * This value is supported by the following scheduling policies: + * - The Completely Fair Share (CFS) policy + * + * Attached value: unsigned 32-bit kbasep_js_device_data::hard_stop_ticks_ss
+ * Default value: @ref DEFAULT_JS_HARD_STOP_TICKS_SS + * + * @note a value of zero means "the quickest time to hard-stop a job", + * which is somewhere between instant and one tick later. + * + * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS + */ + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS, + + /** + * Job Scheduler minimum number of scheduling ticks before CL jobs are hard-stopped. + * + * This defines the amount of time a job is allowed to spend on the GPU before it + * is killed. Such jobs won't be resumed if killed. + * + * This value is supported by the following scheduling policies: + * - The Completely Fair Share (CFS) policy + * + * Attached value: unsigned 32-bit kbasep_js_device_data::hard_stop_ticks_cl
+ * Default value: @ref DEFAULT_JS_HARD_STOP_TICKS_CL + * + * @note a value of zero means "the quickest time to hard-stop a job", + * which is somewhere between instant and one tick later. + * + * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS + */ + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL, + + /** + * Job Scheduler minimum number of scheduling ticks before jobs are hard-stopped + * when dumping. + * + * This defines the amount of time a job is allowed to spend on the GPU before it + * is killed. Such jobs won't be resumed if killed. + * + * This value is supported by the following scheduling policies: + * - The Completely Fair Share (CFS) policy + * + * Attached value: unsigned 32-bit kbasep_js_device_data::hard_stop_ticks_nss
+ * Default value: @ref DEFAULT_JS_HARD_STOP_TICKS_NSS + * + * @note a value of zero means "the quickest time to hard-stop a job", + * which is somewhere between instant and one tick later. + * + * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS + */ + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS, + + /** + * Job Scheduler timeslice that a context is scheduled in for, in nanoseconds. + * + * When a context has used up this amount of time across its jobs, it is + * scheduled out to let another run. + * + * @note the resolution is nanoseconds (ns) here, because that's the format + * often used by the OS. + * + * This value controls affects the actual time defined by the following + * config values: + * - @ref KBASE_CONFIG_ATTR_JS_CFS_CTX_RUNTIME_INIT_SLICES + * - @ref KBASE_CONFIG_ATTR_JS_CFS_CTX_RUNTIME_MIN_SLICES + * + * This value is supported by the following scheduling policies: + * - The Completely Fair Share (CFS) policy + * + * Attached value: unsigned 32-bit kbasep_js_device_data::ctx_timeslice_ns. + * The value might be rounded down to lower precision.
+ * Default value: @ref DEFAULT_JS_CTX_TIMESLICE_NS + * + * @note a value of zero models a "Round Robin" scheduling policy, and + * disables @ref KBASE_CONFIG_ATTR_JS_CFS_CTX_RUNTIME_INIT_SLICES + * (initially causing LIFO scheduling) and + * @ref KBASE_CONFIG_ATTR_JS_CFS_CTX_RUNTIME_MIN_SLICES (allowing + * not-run-often contexts to get scheduled in quickly, but to only use + * a single timeslice when they get scheduled in). + */ + KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS, + + /** + * Job Scheduler initial runtime of a context for the CFS Policy, in time-slices. + * + * This value is relative to that of the least-run context, and defines + * where in the CFS queue a new context is added. A value of 1 means 'after + * the least-run context has used its timeslice'. Therefore, when all + * contexts consistently use the same amount of time, a value of 1 models a + * FIFO. A value of 0 would model a LIFO. + * + * The value is represented in "numbers of time slices". Multiply this + * value by that defined in @ref KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS to get + * the time value for this in nanoseconds. + * + * Attached value: unsigned 32-bit kbasep_js_device_data::cfs_ctx_runtime_init_slices
+ * Default value: @ref DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES + */ + KBASE_CONFIG_ATTR_JS_CFS_CTX_RUNTIME_INIT_SLICES, + + /** + * Job Scheduler minimum runtime value of a context for CFS, in time_slices + * relative to that of the least-run context. + * + * This is a measure of how much preferrential treatment is given to a + * context that is not run very often. + * + * Specficially, this value defines how many timeslices such a context is + * (initially) allowed to use at once. Such contexts (e.g. 'interactive' + * processes) will appear near the front of the CFS queue, and can initially + * use more time than contexts that run continuously (e.g. 'batch' + * processes). + * + * This limit \b prevents a "stored-up timeslices" DoS attack, where a ctx + * not run for a long time attacks the system by using a very large initial + * number of timeslices when it finally does run. + * + * Attached value: unsigned 32-bit kbasep_js_device_data::cfs_ctx_runtime_min_slices
+ * Default value: @ref DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES + * + * @note A value of zero allows not-run-often contexts to get scheduled in + * quickly, but to only use a single timeslice when they get scheduled in. + */ + KBASE_CONFIG_ATTR_JS_CFS_CTX_RUNTIME_MIN_SLICES, + + /** + * Job Scheduler minimum number of scheduling ticks before non-CL jobs + * cause the GPU to be reset. + * + * This defines the amount of time a job is allowed to spend on the GPU before it + * is assumed that the GPU has hung and needs to be reset. The assumes that the job + * has been hard-stopped already and so the presence of a job that has remained on + * the GPU for so long indicates that the GPU has in some way hung. + * + * This value is supported by the following scheduling policies: + * - The Completely Fair Share (CFS) policy + * + * Attached value: unsigned 32-bit kbasep_js_device_data::gpu_reset_ticks_nss
+ * Default value: @ref DEFAULT_JS_RESET_TICKS_SS + * + * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS + */ + KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS, + + /** + * Job Scheduler minimum number of scheduling ticks before CL jobs + * cause the GPU to be reset. + * + * This defines the amount of time a job is allowed to spend on the GPU before it + * is assumed that the GPU has hung and needs to be reset. The assumes that the job + * has been hard-stopped already and so the presence of a job that has remained on + * the GPU for so long indicates that the GPU has in some way hung. + * + * This value is supported by the following scheduling policies: + * - The Completely Fair Share (CFS) policy + * + * Attached value: unsigned 32-bit kbasep_js_device_data::gpu_reset_ticks_cl
+ * Default value: @ref DEFAULT_JS_RESET_TICKS_CL + * + * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS + */ + KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL, + + /** + * Job Scheduler minimum number of scheduling ticks before jobs cause the GPU to be + * reset when dumping. + * + * This defines the amount of time a job is allowed to spend on the GPU before it + * is assumed that the GPU has hung and needs to be reset. The assumes that the job + * has been hard-stopped already and so the presence of a job that has remained on + * the GPU for so long indicates that the GPU has in some way hung. + * + * This value is supported by the following scheduling policies: + * - The Completely Fair Share (CFS) policy + * + * Attached value: unsigned 32-bit kbasep_js_device_data::gpu_reset_ticks_nss
+ * Default value: @ref DEFAULT_JS_RESET_TICKS_NSS + * + * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS + */ + KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS, + + /** + * Number of milliseconds given for other jobs on the GPU to be + * soft-stopped when the GPU needs to be reset. + * + * Attached value: number in milliseconds + * Default value: @ref DEFAULT_JS_RESET_TIMEOUT_MS + */ + KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS, + /*** End Job Scheduling Configs ***/ + + /** Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ + KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS, + + /** + * Boolean indicating whether the driver is configured to be secure at + * a potential loss of performance. + * + * This currently affects only r0p0-15dev0 HW and earlier. + * + * On r0p0-15dev0 HW and earlier, there are tradeoffs between security and + * performance: + * + * - When this is set to MALI_TRUE, the driver remains fully secure, + * but potentially loses performance compared with setting this to + * MALI_FALSE. + * - When set to MALI_FALSE, the driver is open to certain security + * attacks. + * + * From r0p0-00rel0 and onwards, there is no security loss by setting + * this to MALI_FALSE, and no performance loss by setting it to + * MALI_TRUE. + * + * Attached value: mali_bool value + * Default value: @ref DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE + */ + KBASE_CONFIG_ATTR_SECURE_BUT_LOSS_OF_PERFORMANCE, + + /** + * A pointer to a function that calculates the CPU clock + * speed of the platform in MHz - see + * @ref kbase_cpuprops_clock_speed_function for the function + * prototype. + * + * Attached value: A @ref kbase_cpuprops_clock_speed_function. + * Default Value: Pointer to @ref DEFAULT_CPU_SPEED_FUNC - + * returns a clock speed of 100 MHz. + */ + KBASE_CONFIG_ATTR_CPU_SPEED_FUNC, + + /** + * A pointer to a function that calculates the GPU clock + * speed of the platform in MHz - see + * @ref kbase_gpuprops_clock_speed_function for the function + * prototype. + * + * Attached value: A @ref kbase_gpuprops_clock_speed_function. + * Default Value: NULL (in which case the driver assumes a current + * GPU frequency specified by KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX) + */ + KBASE_CONFIG_ATTR_GPU_SPEED_FUNC, + + /** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ + KBASE_CONFIG_ATTR_PLATFORM_FUNCS, + + /** + * Limit ARID width on the AXI bus. + * + * Attached value: u32 register value + * KBASE_AID_32 - use the full 32 IDs (5 ID bits) + * KBASE_AID_16 - use 16 IDs (4 ID bits) + * KBASE_AID_8 - use 8 IDs (3 ID bits) + * KBASE_AID_4 - use 4 IDs (2 ID bits) + * Default value: KBASE_AID_32 (no limit). Note hardware implementation + * may limit to a lower value. + */ + KBASE_CONFIG_ATTR_ARID_LIMIT, + + /** + * Limit AWID width on the AXI bus. + * + * Attached value: u32 register value + * KBASE_AID_32 - use the full 32 IDs (5 ID bits) + * KBASE_AID_16 - use 16 IDs (4 ID bits) + * KBASE_AID_8 - use 8 IDs (3 ID bits) + * KBASE_AID_4 - use 4 IDs (2 ID bits) + * Default value: KBASE_AID_32 (no limit). Note hardware implementation + * may limit to a lower value. + */ + KBASE_CONFIG_ATTR_AWID_LIMIT, + + /** + * Rate at which dvfs data should be collected. + * + * Attached value: u32 value + * Default value: 500 Milliseconds + */ + KBASE_CONFIG_ATTR_POWER_MANAGEMENT_DVFS_FREQ, + + /** + * Power Management poweroff tick granuality. This is in nanoseconds to + * allow HR timer support. + * + * On each scheduling tick, the power manager core may decide to: + * -# Power off one or more shader cores + * -# Power off the entire GPU + * + * Attached value: number in nanoseconds + * Default value: @ref DEFAULT_PM_GPU_POWEROFF_TICK_NS, + */ + KBASE_CONFIG_ATTR_PM_GPU_POWEROFF_TICK_NS, + + /** + * Power Manager number of ticks before shader cores are powered off + * + * Attached value: unsigned 32-bit kbasep_pm_device_data::poweroff_shader_ticks
+ * Default value: @ref DEFAULT_PM_POWEROFF_TICK_SHADER + * + * @see KBASE_CONFIG_ATTR_PM_GPU_POWEROFF_TICK_NS + */ + KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_SHADER, + + /** + * Power Manager number of ticks before GPU is powered off + * + * Attached value: unsigned 32-bit kbasep_pm_device_data::poweroff_gpu_ticks
+ * Default value: @ref DEFAULT_PM_POWEROFF_TICK_GPU + * + * @see KBASE_CONFIG_ATTR_PM_GPU_POWEROFF_TICK_NS + */ + KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_GPU, + + /** + * End of attribute list indicator. + * The configuration loader will stop processing any more elements + * when it encounters this attribute. + * + * Default value: NA + */ + KBASE_CONFIG_ATTR_END = 0x1FFFUL +}; + +enum { + /** + * Use unrestricted Address ID width on the AXI bus. + */ + KBASE_AID_32 = 0x0, + + /** + * Restrict GPU to a half of maximum Address ID count. + * This will reduce performance, but reduce bus load due to GPU. + */ + KBASE_AID_16 = 0x3, + + /** + * Restrict GPU to a quarter of maximum Address ID count. + * This will reduce performance, but reduce bus load due to GPU. + */ + KBASE_AID_8 = 0x2, + + /** + * Restrict GPU to an eighth of maximum Address ID count. + * This will reduce performance, but reduce bus load due to GPU. + */ + KBASE_AID_4 = 0x1 +}; + +/* + * @brief specifies a single attribute + * + * Attribute is identified by attr field. Data is either integer or a pointer to attribute-specific structure. + */ +typedef struct kbase_attribute { + int id; + uintptr_t data; +} kbase_attribute; + +/* Forward declaration of kbase_device */ +struct kbase_device; + +/* + * @brief Specifies the functions for platform specific initialization and termination + * + * By default no functions are required. No additional platform specific control is necessary. + */ +typedef struct kbase_platform_funcs_conf { + /** + * Function pointer for platform specific initialization or NULL if no initialization function is required. + * This function will be called \em before any other callbacks listed in the kbase_attribute struct (such as + * Power Management callbacks). + * The platform specific private pointer kbase_device::platform_context can be accessed (and possibly initialized) in here. + */ + mali_bool(*platform_init_func) (struct kbase_device *kbdev); + /** + * Function pointer for platform specific termination or NULL if no termination function is required. + * This function will be called \em after any other callbacks listed in the kbase_attribute struct (such as + * Power Management callbacks). + * The platform specific private pointer kbase_device::platform_context can be accessed (and possibly terminated) in here. + */ + void (*platform_term_func) (struct kbase_device *kbdev); + +} kbase_platform_funcs_conf; + +/* + * @brief Specifies the callbacks for power management + * + * By default no callbacks will be made and the GPU must not be powered off. + */ +typedef struct kbase_pm_callback_conf { + /** Callback for when the GPU is idle and the power to it can be switched off. + * + * The system integrator can decide whether to either do nothing, just switch off + * the clocks to the GPU, or to completely power down the GPU. + * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the + * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + */ + void (*power_off_callback) (struct kbase_device *kbdev); + + /** Callback for when the GPU is about to become active and power must be supplied. + * + * This function must not return until the GPU is powered and clocked sufficiently for register access to + * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback. + * If the GPU state has been lost then this function must return 1, otherwise it should return 0. + * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the + * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * + * The return value of the first call to this function is ignored. + * + * @return 1 if the GPU state may have been lost, 0 otherwise. + */ + int (*power_on_callback) (struct kbase_device *kbdev); + + /** Callback for when the system is requesting a suspend and GPU power + * must be switched off. + * + * Note that if this callback is present, then this may be called + * without a preceding call to power_off_callback. Therefore this + * callback must be able to take any action that might otherwise happen + * in power_off_callback. + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed and modified in here. It is the platform \em + * callbacks responsibility to initialize and terminate this pointer if + * used (see @ref kbase_platform_funcs_conf). + */ + void (*power_suspend_callback) (struct kbase_device *kbdev); + + /** Callback for when the system is resuming from a suspend and GPU + * power must be switched on. + * + * Note that if this callback is present, then this may be called + * without a following call to power_on_callback. Therefore this + * callback must be able to take any action that might otherwise happen + * in power_on_callback. + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed and modified in here. It is the platform \em + * callbacks responsibility to initialize and terminate this pointer if + * used (see @ref kbase_platform_funcs_conf). + */ + void (*power_resume_callback) (struct kbase_device *kbdev); + + /** Callback for handling runtime power management initialization. + * + * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback + * will become active from calls made to the OS from within this function. + * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + * + * @return MALI_ERROR_NONE on success, else mali_error erro code. + */ + mali_error(*power_runtime_init_callback) (struct kbase_device *kbdev); + + /** Callback for handling runtime power management termination. + * + * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback + * should no longer be called by the OS on completion of this function. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + */ + void (*power_runtime_term_callback) (struct kbase_device *kbdev); + + /** Callback for runtime power-off power management callback + * + * For linux this callback will be called by the kernel runtime_suspend callback. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + * + * @return 0 on success, else OS error code. + */ + void (*power_runtime_off_callback) (struct kbase_device *kbdev); + + /** Callback for runtime power-on power management callback + * + * For linux this callback will be called by the kernel runtime_resume callback. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + */ + int (*power_runtime_on_callback) (struct kbase_device *kbdev); + +} kbase_pm_callback_conf; + +/** + * Type of the function pointer for KBASE_CONFIG_ATTR_CPU_SPEED_FUNC. + * + * @param clock_speed [out] Once called this will contain the current CPU clock speed in MHz. + * This is mainly used to implement OpenCL's clGetDeviceInfo(). + * + * @return 0 on success, 1 on error. + */ +typedef int (*kbase_cpuprops_clock_speed_function) (u32 *clock_speed); + +/** + * Type of the function pointer for KBASE_CONFIG_ATTR_GPU_SPEED_FUNC. + * + * @param clock_speed [out] Once called this will contain the current GPU clock speed in MHz. + * If the system timer is not available then this function is required + * for the OpenCL queue profiling to return correct timing information. + * + * @return 0 on success, 1 on error. When an error is returned the caller assumes a current + * GPU speed as specified by KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX. + */ +typedef int (*kbase_gpuprops_clock_speed_function) (u32 *clock_speed); + +#ifdef CONFIG_OF +typedef struct kbase_platform_config { + const kbase_attribute *attributes; + u32 midgard_type; +} kbase_platform_config; +#else + +/* + * @brief Specifies start and end of I/O memory region. + */ +typedef struct kbase_io_memory_region { + u64 start; + u64 end; +} kbase_io_memory_region; + +/* + * @brief Specifies I/O related resources like IRQs and memory region for I/O operations. + */ +typedef struct kbase_io_resources { + u32 job_irq_number; + u32 mmu_irq_number; + u32 gpu_irq_number; + kbase_io_memory_region io_memory_region; +} kbase_io_resources; + +typedef struct kbase_platform_config { + const kbase_attribute *attributes; + const kbase_io_resources *io_resources; + u32 midgard_type; +} kbase_platform_config; + +#endif /* CONFIG_OF */ + +/** + * @brief Return character string associated with the given midgard type. + * + * @param[in] midgard_type - ID of midgard type + * + * @return Pointer to NULL-terminated character array associated with the given midgard type + */ +const char *kbasep_midgard_type_to_string(u32 midgard_type); + +/** + * @brief Gets the next config attribute with the specified ID from the array of attributes. + * + * Function gets the next attribute with specified attribute id within specified array. If no such attribute is found, + * NULL is returned. + * + * @param[in] attributes Array of attributes in which lookup is performed + * @param[in] attribute_id ID of attribute + * + * @return Pointer to the first attribute matching id or NULL if none is found. + */ +const kbase_attribute *kbasep_get_next_attribute(const kbase_attribute *attributes, int attribute_id); + +/** + * @brief Gets the value of a single config attribute. + * + * Function gets the value of attribute specified as parameter. If no such attribute is found in the array of + * attributes, default value is used. + * + * @param[in] kbdev Kbase device pointer + * @param[in] attributes Array of attributes in which lookup is performed + * @param[in] attribute_id ID of attribute + * + * @return Value of attribute with the given id + */ +uintptr_t kbasep_get_config_value(struct kbase_device *kbdev, const kbase_attribute *attributes, int attribute_id); + +/** + * @brief Validates configuration attributes + * + * Function checks validity of given configuration attributes. It will fail on any attribute with unknown id, attribute + * with invalid value or attribute list that is not correctly terminated. It will also fail if + * KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MIN or KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX are not specified. + * + * @param[in] kbdev Kbase device pointer + * @param[in] attributes Array of attributes to validate + * + * @return MALI_TRUE if no errors have been found in the config. MALI_FALSE otherwise. + */ +mali_bool kbasep_validate_configuration_attributes(struct kbase_device *kbdev, const kbase_attribute *attributes); + +/** + * @brief Gets the pointer to platform config. + * + * @return Pointer to the platform config + */ +kbase_platform_config *kbase_get_platform_config(void); + +/** + * @brief Gets the count of attributes in array + * + * Function gets the count of attributes in array. Note that end of list indicator is also included. + * + * @param[in] attributes Array of attributes + * + * @return Number of attributes in the array including end of list indicator. + */ +int kbasep_get_config_attribute_count(const kbase_attribute *attributes); + +/** + * @brief Platform specific call to initialize hardware + * + * Function calls a platform defined routine if specified in the configuration attributes. + * The routine can initialize any hardware and context state that is required for the GPU block to function. + * + * @param[in] kbdev Kbase device pointer + * + * @return MALI_TRUE if no errors have been found in the config. MALI_FALSE otherwise. + */ +mali_bool kbasep_platform_device_init(struct kbase_device *kbdev); + +/** + * @brief Platform specific call to terminate hardware + * + * Function calls a platform defined routine if specified in the configuration attributes. + * The routine can destroy any platform specific context state and shut down any hardware functionality that are + * outside of the Power Management callbacks. + * + * @param[in] kbdev Kbase device pointer + * + */ +void kbasep_platform_device_term(struct kbase_device *kbdev); + + /** @} *//* end group kbase_config */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_CONFIG_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h new file mode 100755 index 00000000000..7c735815b6d --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h @@ -0,0 +1,202 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_config_defaults.h + * + * Default values for configuration settings + * + */ + +#ifndef _KBASE_CONFIG_DEFAULTS_H_ +#define _KBASE_CONFIG_DEFAULTS_H_ + +/* Default irq throttle time. This is the default desired minimum time in + * between two consecutive interrupts from the gpu. The irq throttle gpu + * register is set after this value. */ +#define DEFAULT_IRQ_THROTTLE_TIME_US 20 + +/*** Begin Scheduling defaults ***/ + +/** + * Default scheduling tick granuality, in nanoseconds + */ +/* 50ms */ +#define DEFAULT_JS_SCHEDULING_TICK_NS 50000000u + +/** + * Default minimum number of scheduling ticks before jobs are soft-stopped. + * + * This defines the time-slice for a job (which may be different from that of + * a context) + */ +/* Between 0.1 and 0.15s before soft-stop */ +#define DEFAULT_JS_SOFT_STOP_TICKS 2 + +/** + * Default minimum number of scheduling ticks before CL jobs are soft-stopped. + */ +/* Between 0.05 and 0.1s before soft-stop */ +#define DEFAULT_JS_SOFT_STOP_TICKS_CL 1 + +/** + * Default minimum number of scheduling ticks before jobs are hard-stopped + */ +/* 1.2s before hard-stop, for a certain GLES2 test at 128x128 (bound by + * combined vertex+tiler job) + */ +#define DEFAULT_JS_HARD_STOP_TICKS_SS_HW_ISSUE_8408 24 +/* Between 0.2 and 0.25s before hard-stop */ +#define DEFAULT_JS_HARD_STOP_TICKS_SS 4 + +/** + * Default minimum number of scheduling ticks before CL jobs are hard-stopped. + */ +/* Between 0.1 and 0.15s before hard-stop */ +#define DEFAULT_JS_HARD_STOP_TICKS_CL 2 + +/** + * Default minimum number of scheduling ticks before jobs are hard-stopped + * during dumping + */ +/* 60s @ 50ms tick */ +#define DEFAULT_JS_HARD_STOP_TICKS_NSS 1200 + +/** + * Default minimum number of scheduling ticks before the GPU is reset + * to clear a "stuck" job + */ +/* 1.8s before resetting GPU, for a certain GLES2 test at 128x128 (bound by + * combined vertex+tiler job) + */ +#define DEFAULT_JS_RESET_TICKS_SS_HW_ISSUE_8408 36 +/* 0.3-0.35s before GPU is reset */ +#define DEFAULT_JS_RESET_TICKS_SS 6 + +/** + * Default minimum number of scheduling ticks before the GPU is reset + * to clear a "stuck" CL job. + */ +/* 0.2-0.25s before GPU is reset */ +#define DEFAULT_JS_RESET_TICKS_CL 4 + +/** + * Default minimum number of scheduling ticks before the GPU is reset + * to clear a "stuck" job during dumping. + */ +/* 60.1s @ 100ms tick */ +#define DEFAULT_JS_RESET_TICKS_NSS 1202 + +/** + * Number of milliseconds given for other jobs on the GPU to be + * soft-stopped when the GPU needs to be reset. + */ +#define DEFAULT_JS_RESET_TIMEOUT_MS 3000 + +/** + * Default timeslice that a context is scheduled in for, in nanoseconds. + * + * When a context has used up this amount of time across its jobs, it is + * scheduled out to let another run. + * + * @note the resolution is nanoseconds (ns) here, because that's the format + * often used by the OS. + */ +/* 0.05s - at 20fps a ctx does at least 1 frame before being scheduled out. + * At 40fps, 2 frames, etc + */ +#define DEFAULT_JS_CTX_TIMESLICE_NS 50000000 + +/** + * Default initial runtime of a context for CFS, in ticks. + * + * This value is relative to that of the least-run context, and defines where + * in the CFS queue a new context is added. + */ +#define DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES 1 + +/** + * Default minimum runtime value of a context for CFS, in ticks. + * + * This value is relative to that of the least-run context. This prevents + * "stored-up timeslices" DoS attacks. + */ +#define DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES 2 + +/** + * Default setting for whether to prefer security or performance. + * + * Currently affects only r0p0-15dev0 HW and earlier. + */ +#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE MALI_FALSE + +/** + * Default setting for read Address ID limiting on AXI. + */ +#define DEFAULT_ARID_LIMIT KBASE_AID_32 + +/** + * Default setting for write Address ID limiting on AXI. + */ +#define DEFAULT_AWID_LIMIT KBASE_AID_32 + +/** + * Default setting for using alternative hardware counters. + */ +#define DEFAULT_ALTERNATIVE_HWC MALI_FALSE + +/*** End Scheduling defaults ***/ + +/*** Begin Power Manager defaults */ + +/* Milliseconds */ +#define DEFAULT_PM_DVFS_FREQ 500 + +/** + * Default poweroff tick granuality, in nanoseconds + */ +/* 400us */ +#define DEFAULT_PM_GPU_POWEROFF_TICK_NS 400000 + +/** + * Default number of poweroff ticks before shader cores are powered off + */ +/* 400-800us */ +#define DEFAULT_PM_POWEROFF_TICK_SHADER 2 + +/** + * Default number of poweroff ticks before GPU is powered off + */ +#define DEFAULT_PM_POWEROFF_TICK_GPU 2 /* 400-800us */ + +/*** End Power Manager defaults ***/ + +/** + * Default UMP device mapping. A UMP_DEVICE__SHIFT value which + * defines which UMP device this GPU should be mapped to. + */ +#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT + +/** + * Default value for KBASE_CONFIG_ATTR_CPU_SPEED_FUNC. + * Points to @ref kbase_cpuprops_get_default_clock_speed. + */ +#define DEFAULT_CPU_SPEED_FUNC \ + ((uintptr_t)kbase_cpuprops_get_default_clock_speed) + +#endif /* _KBASE_CONFIG_DEFAULTS_H_ */ + diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c new file mode 100755 index 00000000000..c7083b4dcd0 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_context.c @@ -0,0 +1,257 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_context.c + * Base kernel context APIs + */ + +#include +#include + +#define MEMPOOL_PAGES 16384 + + +/** + * @brief Create a kernel base context. + * + * Allocate and init a kernel base context. + */ +kbase_context *kbase_create_context(kbase_device *kbdev) +{ + kbase_context *kctx; + mali_error mali_err; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + /* zero-inited as lot of code assume it's zero'ed out on create */ + kctx = vzalloc(sizeof(*kctx)); + + if (!kctx) + goto out; + + kctx->kbdev = kbdev; + kctx->as_nr = KBASEP_AS_NR_INVALID; +#ifdef CONFIG_MALI_TRACE_TIMELINE + kctx->timeline.owner_tgid = task_tgid_nr(current); +#endif + atomic_set(&kctx->setup_complete, 0); + atomic_set(&kctx->setup_in_progress, 0); + kctx->keep_gpu_powered = MALI_FALSE; + spin_lock_init(&kctx->mm_update_lock); + kctx->process_mm = NULL; + atomic_set(&kctx->nonmapped_pages, 0); + + if (MALI_ERROR_NONE != kbase_mem_allocator_init(&kctx->osalloc, MEMPOOL_PAGES)) + goto free_kctx; + + kctx->pgd_allocator = &kctx->osalloc; + atomic_set(&kctx->used_pages, 0); + + if (kbase_jd_init(kctx)) + goto free_allocator; + + mali_err = kbasep_js_kctx_init(kctx); + if (MALI_ERROR_NONE != mali_err) + goto free_jd; /* safe to call kbasep_js_kctx_term in this case */ + + mali_err = kbase_event_init(kctx); + if (MALI_ERROR_NONE != mali_err) + goto free_jd; + + mutex_init(&kctx->reg_lock); + + INIT_LIST_HEAD(&kctx->waiting_soft_jobs); +#ifdef CONFIG_KDS + INIT_LIST_HEAD(&kctx->waiting_kds_resource); +#endif + + mali_err = kbase_mmu_init(kctx); + if (MALI_ERROR_NONE != mali_err) + goto free_event; + + kctx->pgd = kbase_mmu_alloc_pgd(kctx); + if (!kctx->pgd) + goto free_mmu; + + if (MALI_ERROR_NONE != kbase_mem_allocator_alloc(&kctx->osalloc, 1, &kctx->aliasing_sink_page)) + goto no_sink_page; + + kctx->tgid = current->tgid; + kctx->pid = current->pid; + init_waitqueue_head(&kctx->event_queue); + + kctx->cookies = KBASE_COOKIE_MASK; + + /* Make sure page 0 is not used... */ + if (kbase_region_tracker_init(kctx)) + goto no_region_tracker; +#ifdef CONFIG_GPU_TRACEPOINTS + atomic_set(&kctx->jctx.work_id, 0); +#endif +#ifdef CONFIG_MALI_TRACE_TIMELINE + atomic_set(&kctx->timeline.jd_atoms_in_flight, 0); +#endif + + return kctx; + +no_region_tracker: +no_sink_page: + kbase_mem_allocator_free(&kctx->osalloc, 1, &kctx->aliasing_sink_page, 0); + kbase_mmu_free_pgd(kctx); +free_mmu: + kbase_mmu_term(kctx); +free_event: + kbase_event_cleanup(kctx); +free_jd: + /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ + kbasep_js_kctx_term(kctx); + kbase_jd_exit(kctx); +free_allocator: + kbase_mem_allocator_term(&kctx->osalloc); +free_kctx: + vfree(kctx); +out: + return NULL; + +} +KBASE_EXPORT_SYMBOL(kbase_create_context) + +static void kbase_reg_pending_dtor(struct kbase_va_region *reg) +{ + KBASE_LOG(2, reg->kctx->kbdev->dev, "Freeing pending unmapped region\n"); + kbase_mem_phy_alloc_put(reg->alloc); + kfree(reg); +} + +/** + * @brief Destroy a kernel base context. + * + * Destroy a kernel base context. Calls kbase_destroy_os_context() to + * free OS specific structures. Will release all outstanding regions. + */ +void kbase_destroy_context(kbase_context *kctx) +{ + kbase_device *kbdev; + int pages; + unsigned long pending_regions_to_clean; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(NULL != kbdev); + + KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u); + + /* Ensure the core is powered up for the destroy process */ + /* A suspend won't happen here, because we're in a syscall from a userspace + * thread. */ + kbase_pm_context_active(kbdev); + + if (kbdev->hwcnt.kctx == kctx) { + /* disable the use of the hw counters if the app didn't use the API correctly or crashed */ + KBASE_TRACE_ADD(kbdev, CORE_CTX_HWINSTR_TERM, kctx, NULL, 0u, 0u); + dev_warn(kbdev->dev, "The privileged process asking for instrumentation forgot to disable it " "before exiting. Will end instrumentation for them"); + kbase_instr_hwcnt_disable(kctx); + } + + kbase_jd_zap_context(kctx); + kbase_event_cleanup(kctx); + + kbase_gpu_vm_lock(kctx); + + /* MMU is disabled as part of scheduling out the context */ + kbase_mmu_free_pgd(kctx); + + /* drop the aliasing sink page now that it can't be mapped anymore */ + kbase_mem_allocator_free(&kctx->osalloc, 1, &kctx->aliasing_sink_page, 0); + + /* free pending region setups */ + pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK; + while (pending_regions_to_clean) { + unsigned int cookie = __ffs(pending_regions_to_clean); + BUG_ON(!kctx->pending_regions[cookie]); + + kbase_reg_pending_dtor(kctx->pending_regions[cookie]); + + kctx->pending_regions[cookie] = NULL; + pending_regions_to_clean &= ~(1UL << cookie); + } + + kbase_region_tracker_term(kctx); + kbase_gpu_vm_unlock(kctx); + + /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ + kbasep_js_kctx_term(kctx); + + kbase_jd_exit(kctx); + + kbase_pm_context_idle(kbdev); + + kbase_mmu_term(kctx); + + pages = atomic_read(&kctx->used_pages); + if (pages != 0) + dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); + + if (kctx->keep_gpu_powered) { + atomic_dec(&kbdev->keep_gpu_powered_count); + kbase_pm_context_idle(kbdev); + } + + kbase_mem_allocator_term(&kctx->osalloc); + WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); + vfree(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_destroy_context) + +/** + * Set creation flags on a context + */ +mali_error kbase_context_set_create_flags(kbase_context *kctx, u32 flags) +{ + mali_error err = MALI_ERROR_NONE; + kbasep_js_kctx_info *js_kctx_info; + KBASE_DEBUG_ASSERT(NULL != kctx); + + js_kctx_info = &kctx->jctx.sched_info; + + /* Validate flags */ + if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) { + err = MALI_ERROR_FUNCTION_FAILED; + goto out; + } + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + + /* Translate the flags */ + if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) + js_kctx_info->ctx.flags &= ~((u32) KBASE_CTX_FLAG_SUBMIT_DISABLED); + + if ((flags & BASE_CONTEXT_HINT_ONLY_COMPUTE) != 0) + js_kctx_info->ctx.flags |= (u32) KBASE_CTX_FLAG_HINT_ONLY_COMPUTE; + + /* Latch the initial attributes into the Job Scheduler */ + kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx); + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + out: + return err; +} +KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags) diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c new file mode 100755 index 00000000000..8ee5f965931 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c @@ -0,0 +1,3057 @@ + +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_core_linux.c + * Base kernel driver init. + */ + +#include +#include +#include +#include +#include +#ifdef CONFIG_MALI_NO_MALI +#include "mali_kbase_model_linux.h" +#endif /* CONFIG_MALI_NO_MALI */ + +#ifdef CONFIG_KDS +#include +#include +#include +#endif /* CONFIG_KDS */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* is_compat_task */ +#include +#include +#ifdef CONFIG_SYNC +#include +#endif /* CONFIG_SYNC */ + +/* + * This file is included since when we support device tree we don't + * use the platform fake code for registering the kbase config attributes. + */ +#ifdef CONFIG_OF +#include +#endif + +#ifdef CONFIG_MACH_MANTA +#include +#endif + +#define JOB_IRQ_TAG 0 +#define MMU_IRQ_TAG 1 +#define GPU_IRQ_TAG 2 + +struct kbase_irq_table { + u32 tag; + irq_handler_t handler; +}; +#if MALI_UNIT_TEST +kbase_exported_test_data shared_kernel_test_data; +EXPORT_SYMBOL(shared_kernel_test_data); +#endif /* MALI_UNIT_TEST */ + +#define KBASE_DRV_NAME "mali" + +static const char kbase_drv_name[] = KBASE_DRV_NAME; + +static int kbase_dev_nr; + +static DEFINE_SEMAPHORE(kbase_dev_list_lock); +static LIST_HEAD(kbase_dev_list); + +KBASE_EXPORT_TEST_API(kbase_dev_list_lock) +KBASE_EXPORT_TEST_API(kbase_dev_list) +#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)" +static INLINE void __compile_time_asserts(void) +{ + CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE); +} + +#ifdef CONFIG_KDS + +typedef struct kbasep_kds_resource_set_file_data { + struct kds_resource_set *lock; +} kbasep_kds_resource_set_file_data; + +static int kds_resource_release(struct inode *inode, struct file *file); + +static const struct file_operations kds_resource_fops = { + .release = kds_resource_release +}; + +typedef struct kbase_kds_resource_list_data { + struct kds_resource **kds_resources; + unsigned long *kds_access_bitmap; + int num_elems; +} kbase_kds_resource_list_data; + +static int kds_resource_release(struct inode *inode, struct file *file) +{ + struct kbasep_kds_resource_set_file_data *data; + + data = (struct kbasep_kds_resource_set_file_data *)file->private_data; + if (NULL != data) { + if (NULL != data->lock) + kds_resource_set_release(&data->lock); + + kfree(data); + } + return 0; +} + +mali_error kbasep_kds_allocate_resource_list_data(kbase_context *kctx, base_external_resource *ext_res, int num_elems, kbase_kds_resource_list_data *resources_list) +{ + base_external_resource *res = ext_res; + int res_id; + + /* assume we have to wait for all */ + + KBASE_DEBUG_ASSERT(0 != num_elems); + resources_list->kds_resources = kmalloc(sizeof(struct kds_resource *) * num_elems, GFP_KERNEL); + + if (NULL == resources_list->kds_resources) + return MALI_ERROR_OUT_OF_MEMORY; + + KBASE_DEBUG_ASSERT(0 != num_elems); + resources_list->kds_access_bitmap = kzalloc(sizeof(unsigned long) * ((num_elems + BITS_PER_LONG - 1) / BITS_PER_LONG), GFP_KERNEL); + + if (NULL == resources_list->kds_access_bitmap) { + kfree(resources_list->kds_access_bitmap); + return MALI_ERROR_OUT_OF_MEMORY; + } + + for (res_id = 0; res_id < num_elems; res_id++, res++) { + int exclusive; + kbase_va_region *reg; + struct kds_resource *kds_res = NULL; + + exclusive = res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE; + reg = kbase_region_tracker_find_region_enclosing_address(kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + + /* did we find a matching region object? */ + if (NULL == reg) + break; + + /* no need to check reg->alloc as only regions with an alloc has + * a size, and kbase_region_tracker_find_region_enclosing_address + * only returns regions with size > 0 */ + switch (reg->alloc->type) { +#if defined(CONFIG_UMP) && defined(CONFIG_KDS) + case KBASE_MEM_TYPE_IMPORTED_UMP: + kds_res = ump_dd_kds_resource_get(reg->alloc->imported.ump_handle); + break; +#endif /* defined(CONFIG_UMP) && defined(CONFIG_KDS) */ + default: + break; + } + + /* no kds resource for the region ? */ + if (!kds_res) + break; + + resources_list->kds_resources[res_id] = kds_res; + + if (exclusive) + set_bit(res_id, resources_list->kds_access_bitmap); + } + + /* did the loop run to completion? */ + if (res_id == num_elems) + return MALI_ERROR_NONE; + + /* Clean up as the resource list is not valid. */ + kfree(resources_list->kds_resources); + kfree(resources_list->kds_access_bitmap); + + return MALI_ERROR_FUNCTION_FAILED; +} + +mali_bool kbasep_validate_kbase_pointer(kbase_pointer *p) +{ +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + if (p->compat_value == 0) + return MALI_FALSE; + } else { +#endif /* CONFIG_COMPAT */ + if (NULL == p->value) + return MALI_FALSE; +#ifdef CONFIG_COMPAT + } +#endif /* CONFIG_COMPAT */ + return MALI_TRUE; +} + +mali_error kbase_external_buffer_lock(kbase_context *kctx, kbase_uk_ext_buff_kds_data *args, u32 args_size) +{ + base_external_resource *ext_res_copy; + size_t ext_resource_size; + mali_error return_error = MALI_ERROR_FUNCTION_FAILED; + int fd; + + if (args_size != sizeof(kbase_uk_ext_buff_kds_data)) + return MALI_ERROR_FUNCTION_FAILED; + + /* Check user space has provided valid data */ + if (!kbasep_validate_kbase_pointer(&args->external_resource) || !kbasep_validate_kbase_pointer(&args->file_descriptor) || (0 == args->num_res) || (args->num_res > KBASE_MAXIMUM_EXT_RESOURCES)) + return MALI_ERROR_FUNCTION_FAILED; + + ext_resource_size = sizeof(base_external_resource) * args->num_res; + + KBASE_DEBUG_ASSERT(0 != ext_resource_size); + ext_res_copy = kmalloc(ext_resource_size, GFP_KERNEL); + + if (NULL != ext_res_copy) { + base_external_resource *__user ext_res_user; + int *__user file_descriptor_user; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + ext_res_user = compat_ptr(args->external_resource.compat_value); + file_descriptor_user = compat_ptr(args->file_descriptor.compat_value); + } else { +#endif /* CONFIG_COMPAT */ + ext_res_user = args->external_resource.value; + file_descriptor_user = args->file_descriptor.value; +#ifdef CONFIG_COMPAT + } +#endif /* CONFIG_COMPAT */ + + /* Copy the external resources to lock from user space */ + if (0 == copy_from_user(ext_res_copy, ext_res_user, ext_resource_size)) { + kbasep_kds_resource_set_file_data *fdata; + + /* Allocate data to be stored in the file */ + fdata = kmalloc(sizeof(kbasep_kds_resource_set_file_data), GFP_KERNEL); + + if (NULL != fdata) { + kbase_kds_resource_list_data resource_list_data; + /* Parse given elements and create resource and access lists */ + return_error = kbasep_kds_allocate_resource_list_data(kctx, ext_res_copy, args->num_res, &resource_list_data); + if (MALI_ERROR_NONE == return_error) { + long err; + + fdata->lock = NULL; + + fd = anon_inode_getfd("kds_ext", &kds_resource_fops, fdata, 0); + + err = copy_to_user(file_descriptor_user, &fd, sizeof(fd)); + + /* If the file descriptor was valid and we successfully copied it to user space, then we + * can try and lock the requested kds resources. + */ + if ((fd >= 0) && (0 == err)) { + struct kds_resource_set *lock; + + lock = kds_waitall(args->num_res, resource_list_data.kds_access_bitmap, resource_list_data.kds_resources, KDS_WAIT_BLOCKING); + + if (IS_ERR_OR_NULL(lock)) { + return_error = MALI_ERROR_FUNCTION_FAILED; + } else { + return_error = MALI_ERROR_NONE; + fdata->lock = lock; + } + } else { + return_error = MALI_ERROR_FUNCTION_FAILED; + } + + kfree(resource_list_data.kds_resources); + kfree(resource_list_data.kds_access_bitmap); + } + + if (MALI_ERROR_NONE != return_error) { + /* If the file was opened successfully then close it which will clean up + * the file data, otherwise we clean up the file data ourself. */ + if (fd >= 0) + sys_close(fd); + else + kfree(fdata); + } + } else { + return_error = MALI_ERROR_OUT_OF_MEMORY; + } + } + kfree(ext_res_copy); + } + return return_error; +} +#endif /* CONFIG_KDS */ + +static mali_error kbase_dispatch(kbase_context *kctx, void * const args, u32 args_size) +{ + struct kbase_device *kbdev; + uk_header *ukh = args; + u32 id; + + KBASE_DEBUG_ASSERT(ukh != NULL); + + kbdev = kctx->kbdev; + id = ukh->id; + ukh->ret = MALI_ERROR_NONE; /* Be optimistic */ + + if (UKP_FUNC_ID_CHECK_VERSION == id) { + if (args_size == sizeof(uku_version_check_args)) { + uku_version_check_args *version_check = (uku_version_check_args *)args; + + version_check->major = BASE_UK_VERSION_MAJOR; + version_check->minor = BASE_UK_VERSION_MINOR; + + ukh->ret = MALI_ERROR_NONE; + } else { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + } + return MALI_ERROR_NONE; + } + + + if (!atomic_read(&kctx->setup_complete)) { + /* setup pending, try to signal that we'll do the setup */ + if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1)) { + /* setup was already in progress, err this call */ + return MALI_ERROR_FUNCTION_FAILED; + } + + /* we're the one doing setup */ + + /* is it the only call we accept? */ + if (id == KBASE_FUNC_SET_FLAGS) { + kbase_uk_set_flags *kbase_set_flags = (kbase_uk_set_flags *) args; + + if (sizeof(*kbase_set_flags) != args_size) { + /* not matching the expected call, stay stuck in setup mode */ + goto bad_size; + } + + if (MALI_ERROR_NONE != kbase_context_set_create_flags(kctx, kbase_set_flags->create_flags)) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + /* bad flags, will stay stuck in setup mode */ + return MALI_ERROR_NONE; + } else { + /* we've done the setup, all OK */ + atomic_set(&kctx->setup_complete, 1); + return MALI_ERROR_NONE; + } + } else { + /* unexpected call, will stay stuck in setup mode */ + return MALI_ERROR_FUNCTION_FAILED; + } + } + + /* setup complete, perform normal operation */ + switch (id) { + case KBASE_FUNC_MEM_ALLOC: + { + kbase_uk_mem_alloc *mem = args; + struct kbase_va_region *reg; + + if (sizeof(*mem) != args_size) + goto bad_size; + + reg = kbase_mem_alloc(kctx, mem->va_pages, mem->commit_pages, mem->extent, &mem->flags, &mem->gpu_va, &mem->va_alignment); + if (!reg) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + case KBASE_FUNC_MEM_IMPORT: + { + kbase_uk_mem_import *mem_import = args; + int *__user phandle; + int handle; + + if (sizeof(*mem_import) != args_size) + goto bad_size; +#ifdef CONFIG_64BIT + if (is_compat_task()) + phandle = compat_ptr(mem_import->phandle.compat_value); + else +#endif + phandle = mem_import->phandle.value; + + switch (mem_import->type) { + case BASE_MEM_IMPORT_TYPE_UMP: + get_user(handle, phandle); + break; + case BASE_MEM_IMPORT_TYPE_UMM: + get_user(handle, phandle); + break; + default: + goto bad_type; + break; + } + + if (kbase_mem_import(kctx, mem_import->type, handle, &mem_import->gpu_va, &mem_import->va_pages, &mem_import->flags)) { +bad_type: + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + } + break; + } + case KBASE_FUNC_MEM_ALIAS: { + kbase_uk_mem_alias *alias = args; + struct base_mem_aliasing_info *__user user_ai; + struct base_mem_aliasing_info *ai; + + if (sizeof(*alias) != args_size) + goto bad_size; + + if (alias->nents > 4) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + +#ifdef CONFIG_64BIT + if (is_compat_task()) + user_ai = compat_ptr(alias->ai.compat_value); + else +#endif + user_ai = alias->ai.value; + + ai = kmalloc(GFP_KERNEL, sizeof(*ai) * alias->nents); + if (!ai) { + ukh->ret = MALI_ERROR_OUT_OF_MEMORY; + break; + } + + if (copy_from_user(ai, user_ai, + sizeof(*ai) * alias->nents)) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + goto copy_failed; + } + + alias->gpu_va = kbase_mem_alias(kctx, &alias->flags, + alias->stride, + alias->nents, ai, + &alias->va_pages); + if (!alias->gpu_va) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + goto no_alias; + } +no_alias: +copy_failed: + kfree(ai); + break; + } + case KBASE_FUNC_MEM_COMMIT: + { + kbase_uk_mem_commit *commit = args; + + if (sizeof(*commit) != args_size) + goto bad_size; + + if (commit->gpu_addr & ~PAGE_MASK) { + dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_COMMIT: commit->gpu_addr: passed parameter is invalid"); + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + if (kbase_mem_commit(kctx, commit->gpu_addr, commit->pages, (base_backing_threshold_status*)&commit->result_subcode)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + case KBASE_FUNC_MEM_QUERY: + { + kbase_uk_mem_query *query = args; + if (sizeof(*query) != args_size) + goto bad_size; + + if (query->gpu_addr & ~PAGE_MASK) { + dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->gpu_addr: passed parameter is invalid"); + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + if (query->query != KBASE_MEM_QUERY_COMMIT_SIZE && + query->query != KBASE_MEM_QUERY_VA_SIZE && + query->query != KBASE_MEM_QUERY_FLAGS) { + dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->query = %lld unknown", (unsigned long long)query->query); + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + ukh->ret = kbase_mem_query(kctx, query->gpu_addr, query->query, &query->value); + break; + } + break; + + case KBASE_FUNC_MEM_FLAGS_CHANGE: + { + kbase_uk_mem_flags_change * fc = args; + if (sizeof(*fc) != args_size) + goto bad_size; + + if ((fc->gpu_va & ~PAGE_MASK) && (fc->gpu_va >= PAGE_SIZE)) { + dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FLAGS_CHANGE: mem->gpu_va: passed parameter is invalid"); + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + if (kbase_mem_flags_change(kctx, fc->gpu_va, fc->flags, fc->mask)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + + break; + } + case KBASE_FUNC_MEM_FREE: + { + kbase_uk_mem_free *mem = args; + + if (sizeof(*mem) != args_size) + goto bad_size; + + if ((mem->gpu_addr & ~PAGE_MASK) && (mem->gpu_addr >= PAGE_SIZE)) { + dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FREE: mem->gpu_addr: passed parameter is invalid"); + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + if (kbase_mem_free(kctx, mem->gpu_addr)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + case KBASE_FUNC_JOB_SUBMIT: + { + kbase_uk_job_submit *job = args; + + if (sizeof(*job) != args_size) + goto bad_size; + + if (MALI_ERROR_NONE != kbase_jd_submit(kctx, job)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + case KBASE_FUNC_SYNC: + { + kbase_uk_sync_now *sn = args; + + if (sizeof(*sn) != args_size) + goto bad_size; + + if (sn->sset.basep_sset.mem_handle & ~PAGE_MASK) { + dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid"); + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + if (MALI_ERROR_NONE != kbase_sync_now(kctx, &sn->sset)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + case KBASE_FUNC_POST_TERM: + { + kbase_event_close(kctx); + break; + } + + case KBASE_FUNC_HWCNT_SETUP: + { + kbase_uk_hwcnt_setup *setup = args; + + if (sizeof(*setup) != args_size) + goto bad_size; + + if (MALI_ERROR_NONE != kbase_instr_hwcnt_setup(kctx, setup)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + case KBASE_FUNC_HWCNT_DUMP: + { + /* args ignored */ + if (MALI_ERROR_NONE != kbase_instr_hwcnt_dump(kctx)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + case KBASE_FUNC_HWCNT_CLEAR: + { + /* args ignored */ + if (MALI_ERROR_NONE != kbase_instr_hwcnt_clear(kctx)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + case KBASE_FUNC_CPU_PROPS_REG_DUMP: + { + kbase_uk_cpuprops *setup = args; + + if (sizeof(*setup) != args_size) + goto bad_size; + + if (MALI_ERROR_NONE != kbase_cpuprops_uk_get_props(kctx, setup)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + case KBASE_FUNC_GPU_PROPS_REG_DUMP: + { + kbase_uk_gpuprops *setup = args; + + if (sizeof(*setup) != args_size) + goto bad_size; + + if (MALI_ERROR_NONE != kbase_gpuprops_uk_get_props(kctx, setup)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + case KBASE_FUNC_FIND_CPU_OFFSET: + { + kbase_uk_find_cpu_offset *find = args; + + if (sizeof(*find) != args_size) + goto bad_size; + + if (find->gpu_addr & ~PAGE_MASK) { + dev_warn(kbdev->dev, + "kbase_dispatch case KBASE_FUNC_FIND_CPU_OFFSET:" + "find->gpu_addr: passed parameter is invalid"); + goto out_bad; + } + + if (find->size > SIZE_MAX || find->cpu_addr > ULONG_MAX) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + else { + mali_error err; + + err = kbasep_find_enclosing_cpu_mapping_offset( + kctx, + find->gpu_addr, + (uintptr_t) find->cpu_addr, + (size_t) find->size, + &find->offset); + + if (err != MALI_ERROR_NONE) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + } + break; + } + case KBASE_FUNC_GET_VERSION: + { + kbase_uk_get_ddk_version *get_version = (kbase_uk_get_ddk_version *) args; + + if (sizeof(*get_version) != args_size) + goto bad_size; + + /* version buffer size check is made in compile time assert */ + memcpy(get_version->version_buffer, KERNEL_SIDE_DDK_VERSION_STRING, sizeof(KERNEL_SIDE_DDK_VERSION_STRING)); + get_version->version_string_size = sizeof(KERNEL_SIDE_DDK_VERSION_STRING); + break; + } + + case KBASE_FUNC_STREAM_CREATE: + { +#ifdef CONFIG_SYNC + kbase_uk_stream_create *screate = (kbase_uk_stream_create *) args; + + if (sizeof(*screate) != args_size) + goto bad_size; + + if (strnlen(screate->name, sizeof(screate->name)) >= sizeof(screate->name)) { + /* not NULL terminated */ + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + ukh->ret = kbase_stream_create(screate->name, &screate->fd); +#else /* CONFIG_SYNC */ + ukh->ret = MALI_ERROR_FUNCTION_FAILED; +#endif /* CONFIG_SYNC */ + break; + } + case KBASE_FUNC_FENCE_VALIDATE: + { +#ifdef CONFIG_SYNC + kbase_uk_fence_validate *fence_validate = (kbase_uk_fence_validate *) args; + if (sizeof(*fence_validate) != args_size) + goto bad_size; + + ukh->ret = kbase_fence_validate(fence_validate->fd); +#endif /* CONFIG_SYNC */ + break; + } + + case KBASE_FUNC_EXT_BUFFER_LOCK: + { +#ifdef CONFIG_KDS + ukh->ret = kbase_external_buffer_lock(kctx, (kbase_uk_ext_buff_kds_data *) args, args_size); +#endif /* CONFIG_KDS */ + break; + } + + case KBASE_FUNC_SET_TEST_DATA: + { +#if MALI_UNIT_TEST + kbase_uk_set_test_data *set_data = args; + + shared_kernel_test_data = set_data->test_data; + shared_kernel_test_data.kctx.value = kctx; + shared_kernel_test_data.mm.value = (void *)current->mm; + ukh->ret = MALI_ERROR_NONE; +#endif /* MALI_UNIT_TEST */ + break; + } + + case KBASE_FUNC_INJECT_ERROR: + { +#ifdef CONFIG_MALI_ERROR_INJECT + unsigned long flags; + kbase_error_params params = ((kbase_uk_error_params *) args)->params; + /*mutex lock */ + spin_lock_irqsave(&kbdev->reg_op_lock, flags); + ukh->ret = job_atom_inject_error(¶ms); + spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); + /*mutex unlock */ +#endif /* CONFIG_MALI_ERROR_INJECT */ + break; + } + + case KBASE_FUNC_MODEL_CONTROL: + { +#ifdef CONFIG_MALI_NO_MALI + unsigned long flags; + kbase_model_control_params params = ((kbase_uk_model_control_params *) args)->params; + /*mutex lock */ + spin_lock_irqsave(&kbdev->reg_op_lock, flags); + ukh->ret = midg_model_control(kbdev->model, ¶ms); + spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); + /*mutex unlock */ +#endif /* CONFIG_MALI_NO_MALI */ + break; + } + + case KBASE_FUNC_KEEP_GPU_POWERED: + { + kbase_uk_keep_gpu_powered *kgp = (kbase_uk_keep_gpu_powered *) args; + /* A suspend won't happen here, because we're in a syscall from a + * userspace thread. + * + * Nevertheless, we'd get the wrong pm_context_active/idle counting + * here if a suspend did happen, so let's assert it won't: */ + KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + + if (kgp->enabled && !kctx->keep_gpu_powered) { + kbase_pm_context_active(kbdev); + atomic_inc(&kbdev->keep_gpu_powered_count); + kctx->keep_gpu_powered = MALI_TRUE; + } else if (!kgp->enabled && kctx->keep_gpu_powered) { + atomic_dec(&kbdev->keep_gpu_powered_count); + kbase_pm_context_idle(kbdev); + kctx->keep_gpu_powered = MALI_FALSE; + } + + break; + } + + case KBASE_FUNC_GET_PROFILING_CONTROLS : + { + struct kbase_uk_profiling_controls *controls = \ + (struct kbase_uk_profiling_controls *)args; + u32 i; + + if (sizeof(*controls) != args_size) + goto bad_size; + + for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++) { + controls->profiling_controls[i] = kbase_get_profiling_control(kbdev, i); + } + + break; + } + + /* used only for testing purposes; these controls are to be set by gator through gator API */ + case KBASE_FUNC_SET_PROFILING_CONTROLS : + { + struct kbase_uk_profiling_controls *controls = \ + (struct kbase_uk_profiling_controls *)args; + u32 i; + + if (sizeof(*controls) != args_size) + goto bad_size; + + for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++) + { + _mali_profiling_control(i, controls->profiling_controls[i]); + } + + break; + } + + default: + dev_err(kbdev->dev, "unknown ioctl %u", id); + goto out_bad; + } + + return MALI_ERROR_NONE; + + bad_size: + dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id); + out_bad: + return MALI_ERROR_FUNCTION_FAILED; +} + +static struct kbase_device *to_kbase_device(struct device *dev) +{ + return dev_get_drvdata(dev); +} + +/* + * API to acquire device list semaphore and + * return pointer to the device list head + */ +const struct list_head *kbase_dev_list_get(void) +{ + down(&kbase_dev_list_lock); + return &kbase_dev_list; +} + +/* API to release the device list semaphore */ +void kbase_dev_list_put(const struct list_head *dev_list) +{ + up(&kbase_dev_list_lock); +} + +/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */ +struct kbase_device *kbase_find_device(int minor) +{ + struct kbase_device *kbdev = NULL; + struct list_head *entry; + + down(&kbase_dev_list_lock); + list_for_each(entry, &kbase_dev_list) { + struct kbase_device *tmp; + + tmp = list_entry(entry, struct kbase_device, entry); + if (tmp->mdev.minor == minor || minor == -1) { + kbdev = tmp; + get_device(kbdev->dev); + break; + } + } + up(&kbase_dev_list_lock); + + return kbdev; +} +EXPORT_SYMBOL(kbase_find_device); + +void kbase_release_device(struct kbase_device *kbdev) +{ + put_device(kbdev->dev); +} +EXPORT_SYMBOL(kbase_release_device); + +static int kbase_open(struct inode *inode, struct file *filp) +{ + struct kbase_device *kbdev = NULL; + kbase_context *kctx; + int ret = 0; + + kbdev = kbase_find_device(iminor(inode)); + + if (!kbdev) + return -ENODEV; + + kctx = kbase_create_context(kbdev); + if (!kctx) { + ret = -ENOMEM; + goto out; + } + + init_waitqueue_head(&kctx->event_queue); + filp->private_data = kctx; + + KBASE_LOG(1, kbdev->dev, "created base context\n"); + + { + kbasep_kctx_list_element *element; + + element = kzalloc(sizeof(kbasep_kctx_list_element), GFP_KERNEL); + if (element) { + mutex_lock(&kbdev->kctx_list_lock); + element->kctx = kctx; + list_add(&element->link, &kbdev->kctx_list); + mutex_unlock(&kbdev->kctx_list_lock); + } else { + /* we don't treat this as a fail - just warn about it */ + dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n"); + } + } + return 0; + + out: + kbase_release_device(kbdev); + return ret; +} + +static int kbase_release(struct inode *inode, struct file *filp) +{ + kbase_context *kctx = filp->private_data; + struct kbase_device *kbdev = kctx->kbdev; + kbasep_kctx_list_element *element, *tmp; + mali_bool found_element = MALI_FALSE; + + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) { + if (element->kctx == kctx) { + list_del(&element->link); + kfree(element); + found_element = MALI_TRUE; + } + } + mutex_unlock(&kbdev->kctx_list_lock); + if (!found_element) + dev_warn(kbdev->dev, "kctx not in kctx_list\n"); + + filp->private_data = NULL; + kbase_destroy_context(kctx); + + KBASE_LOG(1, kbdev->dev, "deleted base context\n"); + kbase_release_device(kbdev); + return 0; +} + +#define CALL_MAX_SIZE 536 + +static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + u64 msg[(CALL_MAX_SIZE + 7) >> 3] = { 0xdeadbeefdeadbeefull }; /* alignment fixup */ + u32 size = _IOC_SIZE(cmd); + kbase_context *kctx = filp->private_data; + + if (size > CALL_MAX_SIZE) + return -ENOTTY; + + if (0 != copy_from_user(&msg, (void *)arg, size)) { + dev_err(kctx->kbdev->dev, "failed to copy ioctl argument into kernel space\n"); + return -EFAULT; + } + + if (MALI_ERROR_NONE != kbase_dispatch(kctx, &msg, size)) + return -EFAULT; + + if (0 != copy_to_user((void *)arg, &msg, size)) { + dev_err(kctx->kbdev->dev, "failed to copy results of UK call back to user space\n"); + return -EFAULT; + } + return 0; +} + +static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) +{ + kbase_context *kctx = filp->private_data; + base_jd_event_v2 uevent; + int out_count = 0; + + if (count < sizeof(uevent)) + return -ENOBUFS; + + do { + while (kbase_event_dequeue(kctx, &uevent)) { + if (out_count > 0) + goto out; + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(kctx->event_queue, kbase_event_pending(kctx))) + return -ERESTARTSYS; + } + if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) { + if (out_count == 0) + return -EPIPE; + goto out; + } + + if (copy_to_user(buf, &uevent, sizeof(uevent))) + return -EFAULT; + + buf += sizeof(uevent); + out_count++; + count -= sizeof(uevent); + } while (count >= sizeof(uevent)); + + out: + return out_count * sizeof(uevent); +} + +static unsigned int kbase_poll(struct file *filp, poll_table *wait) +{ + kbase_context *kctx = filp->private_data; + + poll_wait(filp, &kctx->event_queue, wait); + if (kbase_event_pending(kctx)) + return POLLIN | POLLRDNORM; + + return 0; +} + +void kbase_event_wakeup(kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx); + + wake_up_interruptible(&kctx->event_queue); +} + +KBASE_EXPORT_TEST_API(kbase_event_wakeup) + +int kbase_check_flags(int flags) +{ + /* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always + * closes the file descriptor in a child process. + */ + if (0 == (flags & O_CLOEXEC)) + return -EINVAL; + + return 0; +} + +static const struct file_operations kbase_fops = { + .owner = THIS_MODULE, + .open = kbase_open, + .release = kbase_release, + .read = kbase_read, + .poll = kbase_poll, + .unlocked_ioctl = kbase_ioctl, + .compat_ioctl = kbase_ioctl, + .mmap = kbase_mmap, + .check_flags = kbase_check_flags, +}; + +#ifndef CONFIG_MALI_NO_MALI +void kbase_os_reg_write(kbase_device *kbdev, u16 offset, u32 value) +{ + writel(value, kbdev->reg + offset); +} + +u32 kbase_os_reg_read(kbase_device *kbdev, u16 offset) +{ + return readl(kbdev->reg + offset); +} + +static void *kbase_tag(void *ptr, u32 tag) +{ + return (void *)(((uintptr_t) ptr) | tag); +} + +static void *kbase_untag(void *ptr) +{ + return (void *)(((uintptr_t) ptr) & ~3); +} + +static irqreturn_t kbase_job_irq_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags); + + if (!kbdev->pm.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL); + +#ifdef CONFIG_MALI_DEBUG + if (!kbdev->pm.driver_ready_for_irqs) + dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", + __func__, irq, val ); +#endif /* CONFIG_MALI_DEBUG */ + spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags); + + if (!val) + return IRQ_NONE; + + KBASE_LOG(3, kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_job_done(kbdev, val); + + return IRQ_HANDLED; +} + +KBASE_EXPORT_TEST_API(kbase_job_irq_handler); + +static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags); + + if (!kbdev->pm.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL); + +#ifdef CONFIG_MALI_DEBUG + if (!kbdev->pm.driver_ready_for_irqs) + dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", + __func__, irq, val ); +#endif /* CONFIG_MALI_DEBUG */ + spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags); + + if (!val) + return IRQ_NONE; + + KBASE_LOG(3, kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_mmu_interrupt(kbdev, val); + + return IRQ_HANDLED; +} + +static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags); + + if (!kbdev->pm.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL); + +#ifdef CONFIG_MALI_DEBUG + if (!kbdev->pm.driver_ready_for_irqs) + KBASE_LOG(3, kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", + __func__, irq, val ); +#endif /* CONFIG_MALI_DEBUG */ + spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags); + + if (!val) + return IRQ_NONE; + + KBASE_LOG(3, kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_gpu_interrupt(kbdev, val); + + return IRQ_HANDLED; +} + +static irq_handler_t kbase_handler_table[] = { + [JOB_IRQ_TAG] = kbase_job_irq_handler, + [MMU_IRQ_TAG] = kbase_mmu_irq_handler, + [GPU_IRQ_TAG] = kbase_gpu_irq_handler, +}; + +#ifdef CONFIG_MALI_DEBUG +#define JOB_IRQ_HANDLER JOB_IRQ_TAG +#define MMU_IRQ_HANDLER MMU_IRQ_TAG +#define GPU_IRQ_HANDLER GPU_IRQ_TAG + +/** + * @brief Registers given interrupt handler for requested interrupt type + * Case irq handler is not specified default handler shall be registered + * + * @param[in] kbdev - Device for which the handler is to be registered + * @param[in] custom_handler - Handler to be registered + * @param[in] irq_type - Interrupt type + * @return MALI_ERROR_NONE case success, MALI_ERROR_FUNCTION_FAILED otherwise + */ +static mali_error kbase_set_custom_irq_handler(kbase_device *kbdev, irq_handler_t custom_handler, int irq_type) +{ + mali_error result = MALI_ERROR_NONE; + irq_handler_t requested_irq_handler = NULL; + KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) && (GPU_IRQ_HANDLER >= irq_type)); + + /* Release previous handler */ + if (kbdev->irqs[irq_type].irq) + free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type)); + + requested_irq_handler = (NULL != custom_handler) ? custom_handler : kbase_handler_table[irq_type]; + + if (0 != request_irq(kbdev->irqs[irq_type].irq, requested_irq_handler, kbdev->irqs[irq_type].flags | IRQF_SHARED, dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) { + result = MALI_ERROR_FUNCTION_FAILED; + dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", kbdev->irqs[irq_type].irq, irq_type); +#ifdef CONFIG_SPARSE_IRQ + dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); +#endif /* CONFIG_SPARSE_IRQ */ + } + + return result; +} + +KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler) + +/* test correct interrupt assigment and reception by cpu */ +typedef struct kbasep_irq_test { + struct hrtimer timer; + wait_queue_head_t wait; + int triggered; + u32 timeout; +} kbasep_irq_test; + +static kbasep_irq_test kbasep_irq_test_data; + +#define IRQ_TEST_TIMEOUT 500 + +static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags); + + if (!kbdev->pm.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL); + + spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags); + + if (!val) + return IRQ_NONE; + + KBASE_LOG(3, kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbasep_irq_test_data.triggered = 1; + wake_up(&kbasep_irq_test_data.wait); + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL); + + return IRQ_HANDLED; +} + +static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags); + + if (!kbdev->pm.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL); + + spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags); + + if (!val) + return IRQ_NONE; + + KBASE_LOG(3, kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbasep_irq_test_data.triggered = 1; + wake_up(&kbasep_irq_test_data.wait); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL); + + return IRQ_HANDLED; +} + +static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer) +{ + kbasep_irq_test *test_data = container_of(timer, kbasep_irq_test, timer); + + test_data->timeout = 1; + test_data->triggered = 1; + wake_up(&test_data->wait); + return HRTIMER_NORESTART; +} + +static mali_error kbasep_common_test_interrupt(kbase_device * const kbdev, u32 tag) +{ + mali_error err = MALI_ERROR_NONE; + irq_handler_t test_handler; + + u32 old_mask_val; + u16 mask_offset; + u16 rawstat_offset; + + switch (tag) { + case JOB_IRQ_TAG: + test_handler = kbase_job_irq_test_handler; + rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT); + mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK); + break; + case MMU_IRQ_TAG: + test_handler = kbase_mmu_irq_test_handler; + rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT); + mask_offset = MMU_REG(MMU_IRQ_MASK); + break; + case GPU_IRQ_TAG: + /* already tested by pm_driver - bail out */ + default: + return MALI_ERROR_NONE; + } + + /* store old mask */ + old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL); + /* mask interrupts */ + kbase_reg_write(kbdev, mask_offset, 0x0, NULL); + + if (kbdev->irqs[tag].irq) { + /* release original handler and install test handler */ + if (MALI_ERROR_NONE != kbase_set_custom_irq_handler(kbdev, test_handler, tag)) { + err = MALI_ERROR_FUNCTION_FAILED; + } else { + kbasep_irq_test_data.timeout = 0; + hrtimer_init(&kbasep_irq_test_data.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + kbasep_irq_test_data.timer.function = kbasep_test_interrupt_timeout; + + /* trigger interrupt */ + kbase_reg_write(kbdev, mask_offset, 0x1, NULL); + kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL); + + hrtimer_start(&kbasep_irq_test_data.timer, HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), HRTIMER_MODE_REL); + + wait_event(kbasep_irq_test_data.wait, kbasep_irq_test_data.triggered != 0); + + if (kbasep_irq_test_data.timeout != 0) { + dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n", kbdev->irqs[tag].irq, tag); + err = MALI_ERROR_FUNCTION_FAILED; + } else { + KBASE_LOG(2, kbdev->dev, "Interrupt %d (index %d) reached CPU.\n", kbdev->irqs[tag].irq, tag); + } + + hrtimer_cancel(&kbasep_irq_test_data.timer); + kbasep_irq_test_data.triggered = 0; + + /* mask interrupts */ + kbase_reg_write(kbdev, mask_offset, 0x0, NULL); + + /* release test handler */ + free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag)); + } + + /* restore original interrupt */ + if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag], kbdev->irqs[tag].flags | IRQF_SHARED, dev_name(kbdev->dev), kbase_tag(kbdev, tag))) { + dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n", kbdev->irqs[tag].irq, tag); + err = MALI_ERROR_FUNCTION_FAILED; + } + } + /* restore old mask */ + kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL); + + return err; +} + +static mali_error kbasep_common_test_interrupt_handlers(kbase_device * const kbdev) +{ + mali_error err; + + init_waitqueue_head(&kbasep_irq_test_data.wait); + kbasep_irq_test_data.triggered = 0; + + /* A suspend won't happen during startup/insmod */ + kbase_pm_context_active(kbdev); + + err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG); + if (MALI_ERROR_NONE != err) { + dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n"); + goto out; + } + + err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG); + if (MALI_ERROR_NONE != err) { + dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n"); + goto out; + } + + dev_err(kbdev->dev, "Interrupts are correctly assigned.\n"); + + out: + kbase_pm_context_idle(kbdev); + + return err; + +} +#endif /* CONFIG_MALI_DEBUG */ + +static int kbase_install_interrupts(kbase_device *kbdev) +{ + u32 nr = ARRAY_SIZE(kbase_handler_table); + int err; + u32 i; + + for (i = 0; i < nr; i++) { + err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i], kbdev->irqs[i].flags | IRQF_SHARED, dev_name(kbdev->dev), kbase_tag(kbdev, i)); + if (err) { + dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", kbdev->irqs[i].irq, i); +#ifdef CONFIG_SPARSE_IRQ + dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); +#endif /* CONFIG_SPARSE_IRQ */ + goto release; + } + } + + return 0; + + release: + while (i-- > 0) + free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); + + return err; +} + +static void kbase_release_interrupts(kbase_device *kbdev) +{ + u32 nr = ARRAY_SIZE(kbase_handler_table); + u32 i; + + for (i = 0; i < nr; i++) { + if (kbdev->irqs[i].irq) + free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); + } +} + +void kbase_synchronize_irqs(kbase_device *kbdev) +{ + u32 nr = ARRAY_SIZE(kbase_handler_table); + u32 i; + + for (i = 0; i < nr; i++) { + if (kbdev->irqs[i].irq) + synchronize_irq(kbdev->irqs[i].irq); + } +} + +#endif /* CONFIG_MALI_NO_MALI */ + + +/** Show callback for the @c power_policy sysfs file. + * + * This function is called to get the contents of the @c power_policy sysfs + * file. This is a list of the available policies with the currently active one + * surrounded by square brackets. + * + * @param dev The device this sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The output buffer for the sysfs file contents + * + * @return The number of bytes output to @c buf. + */ +static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf) +{ + struct kbase_device *kbdev; + const struct kbase_pm_policy *current_policy; + const struct kbase_pm_policy *const *policy_list; + int policy_count; + int i; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + current_policy = kbase_pm_get_policy(kbdev); + + policy_count = kbase_pm_list_policies(&policy_list); + + for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { + if (policy_list[i] == current_policy) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); + else + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); + } + + if (ret < PAGE_SIZE - 1) { + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + } else { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** Store callback for the @c power_policy sysfs file. + * + * This function is called when the @c power_policy sysfs file is written to. + * It matches the requested policy against the available policies and if a + * matching policy is found calls @ref kbase_pm_set_policy to change the + * policy. + * + * @param dev The device with sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The value written to the sysfs file + * @param count The number of bytes written to the sysfs file + * + * @return @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + const struct kbase_pm_policy *new_policy = NULL; + const struct kbase_pm_policy *const *policy_list; + int policy_count; + int i; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + policy_count = kbase_pm_list_policies(&policy_list); + + for (i = 0; i < policy_count; i++) { + if (sysfs_streq(policy_list[i]->name, buf)) { + new_policy = policy_list[i]; + break; + } + } + + if (!new_policy) { + dev_err(dev, "power_policy: policy not found\n"); + return -EINVAL; + } + + kbase_pm_set_policy(kbdev, new_policy); + + return count; +} + +/** The sysfs file @c power_policy. + * + * This is used for obtaining information about the available policies, + * determining which policy is currently active, and changing the active + * policy. + */ +DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy); + +/** Show callback for the @c core_availability_policy sysfs file. + * + * This function is called to get the contents of the @c core_availability_policy + * sysfs file. This is a list of the available policies with the currently + * active one surrounded by square brackets. + * + * @param dev The device this sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The output buffer for the sysfs file contents + * + * @return The number of bytes output to @c buf. + */ +static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char *const buf) +{ + struct kbase_device *kbdev; + const struct kbase_pm_ca_policy *current_policy; + const struct kbase_pm_ca_policy *const *policy_list; + int policy_count; + int i; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + current_policy = kbase_pm_ca_get_policy(kbdev); + + policy_count = kbase_pm_ca_list_policies(&policy_list); + + for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { + if (policy_list[i] == current_policy) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); + else + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); + } + + if (ret < PAGE_SIZE - 1) { + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + } else { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** Store callback for the @c core_availability_policy sysfs file. + * + * This function is called when the @c core_availability_policy sysfs file is + * written to. It matches the requested policy against the available policies + * and if a matching policy is found calls @ref kbase_pm_set_policy to change + * the policy. + * + * @param dev The device with sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The value written to the sysfs file + * @param count The number of bytes written to the sysfs file + * + * @return @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + const struct kbase_pm_ca_policy *new_policy = NULL; + const struct kbase_pm_ca_policy *const *policy_list; + int policy_count; + int i; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + policy_count = kbase_pm_ca_list_policies(&policy_list); + + for (i = 0; i < policy_count; i++) { + if (sysfs_streq(policy_list[i]->name, buf)) { + new_policy = policy_list[i]; + break; + } + } + + if (!new_policy) { + dev_err(dev, "core_availability_policy: policy not found\n"); + return -EINVAL; + } + + kbase_pm_ca_set_policy(kbdev, new_policy); + + return count; +} + +/** The sysfs file @c core_availability_policy + * + * This is used for obtaining information about the available policies, + * determining which policy is currently active, and changing the active + * policy. + */ +DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy); + +/** Show callback for the @c core_mask sysfs file. + * + * This function is called to get the contents of the @c core_mask sysfs + * file. + * + * @param dev The device this sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The output buffer for the sysfs file contents + * + * @return The number of bytes output to @c buf. + */ +static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char *const buf) +{ + struct kbase_device *kbdev; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current core mask : 0x%llX\n", kbdev->pm.debug_core_mask); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Available core mask : 0x%llX\n", kbdev->shader_present_bitmap); + + return ret; +} + +/** Store callback for the @c core_mask sysfs file. + * + * This function is called when the @c core_mask sysfs file is written to. + * + * @param dev The device with sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The value written to the sysfs file + * @param count The number of bytes written to the sysfs file + * + * @return @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + u64 new_core_mask; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + new_core_mask = simple_strtoull(buf, NULL, 16); + + if ((new_core_mask & kbdev->shader_present_bitmap) != new_core_mask || + !(new_core_mask & kbdev->gpu_props.props.coherency_info.group[0].core_mask)) { + dev_err(dev, "power_policy: invalid core specification\n"); + return -EINVAL; + } + + if (kbdev->pm.debug_core_mask != new_core_mask) { + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + kbdev->pm.debug_core_mask = new_core_mask; + kbase_pm_update_cores_state_nolock(kbdev); + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + } + + return count; +} + +/** The sysfs file @c core_mask. + * + * This is used to restrict shader core availability for debugging purposes. + * Reading it will show the current core mask and the mask of cores available. + * Writing to it will set the current core mask. + */ +DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); + + +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS +/* Import the external affinity mask variables */ +extern u64 mali_js0_affinity_mask; +extern u64 mali_js1_affinity_mask; +extern u64 mali_js2_affinity_mask; + +/** + * Structure containing a single shader affinity split configuration. + */ +typedef struct { + char const * tag; + char const * human_readable; + u64 js0_mask; + u64 js1_mask; + u64 js2_mask; +} sc_split_config; + +/** + * Array of available shader affinity split configurations. + */ +static sc_split_config const sc_split_configs[] = +{ + /* All must be the first config (default). */ + { + "all", "All cores", + 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL + }, + { + "mp1", "MP1 shader core", + 0x1, 0x1, 0x1 + }, + { + "mp2", "MP2 shader core", + 0x3, 0x3, 0x3 + }, + { + "mp4", "MP4 shader core", + 0xF, 0xF, 0xF + }, + { + "mp1_vf", "MP1 vertex + MP1 fragment shader core", + 0x2, 0x1, 0xFFFFFFFFFFFFFFFFULL + }, + { + "mp2_vf", "MP2 vertex + MP2 fragment shader core", + 0xA, 0x5, 0xFFFFFFFFFFFFFFFFULL + }, + /* This must be the last config. */ + { + NULL, NULL, + 0x0, 0x0, 0x0 + }, +}; + +/* Pointer to the currently active shader split configuration. */ +static sc_split_config const * current_sc_split_config = &sc_split_configs[0]; + +/** Show callback for the @c sc_split sysfs file + * + * Returns the current shader core affinity policy. + */ +static ssize_t show_split(struct device *dev, struct device_attribute *attr, char * const buf) +{ + ssize_t ret; + /* We know we are given a buffer which is PAGE_SIZE long. Our strings are all guaranteed + * to be shorter than that at this time so no length check needed. */ + ret = scnprintf(buf, PAGE_SIZE, "Current sc_split: '%s'\n", current_sc_split_config->tag ); + return ret; +} + +/** Store callback for the @c sc_split sysfs file. + * + * This function is called when the @c sc_split sysfs file is written to + * It modifies the system shader core affinity configuration to allow + * system profiling with different hardware configurations. + * + * @param dev The device with sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The value written to the sysfs file + * @param count The number of bytes written to the sysfs file + * + * @return @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_split(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + sc_split_config const * config = &sc_split_configs[0]; + + /* Try to match: loop until we hit the last "NULL" entry */ + while( config->tag ) + { + if (sysfs_streq(config->tag, buf)) + { + current_sc_split_config = config; + mali_js0_affinity_mask = config->js0_mask; + mali_js1_affinity_mask = config->js1_mask; + mali_js2_affinity_mask = config->js2_mask; + KBASE_LOG(2, dev, "Setting sc_split: '%s'\n", config->tag); + return count; + } + config++; + } + + /* No match found in config list */ + dev_err(dev, "sc_split: invalid value\n"); + dev_err(dev, " Possible settings: mp[1|2|4], mp[1|2]_vf\n"); + return -ENOENT; +} + +/** The sysfs file @c sc_split + * + * This is used for configuring/querying the current shader core work affinity + * configuration. + */ +DEVICE_ATTR(sc_split, S_IRUGO|S_IWUSR, show_split, set_split); +#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ + + +#if MALI_CUSTOMER_RELEASE == 0 +/** Store callback for the @c js_timeouts sysfs file. + * + * This function is called to get the contents of the @c js_timeouts sysfs + * file. This file contains five values separated by whitespace. The values + * are basically the same as KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS, + * KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS, KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS, + * KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS, BASE_CONFIG_ATTR_JS_RESET_TICKS_NSS + * configuration values (in that order), with the difference that the js_timeout + * valus are expressed in MILLISECONDS. + * + * The js_timeouts sysfile file allows the current values in + * use by the job scheduler to get override. Note that a value needs to + * be other than 0 for it to override the current job scheduler value. + * + * @param dev The device with sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The value written to the sysfs file + * @param count The number of bytes written to the sysfs file + * + * @return @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int items; + unsigned long js_soft_stop_ms; + unsigned long js_soft_stop_ms_cl; + unsigned long js_hard_stop_ms_ss; + unsigned long js_hard_stop_ms_cl; + unsigned long js_hard_stop_ms_nss; + unsigned long js_reset_ms_ss; + unsigned long js_reset_ms_cl; + unsigned long js_reset_ms_nss; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + items = sscanf(buf, "%lu %lu %lu %lu %lu %lu %lu %lu", &js_soft_stop_ms, &js_soft_stop_ms_cl, &js_hard_stop_ms_ss, &js_hard_stop_ms_cl, &js_hard_stop_ms_nss, &js_reset_ms_ss, &js_reset_ms_cl, &js_reset_ms_nss); + if (items == 8) { + u64 ticks; + + ticks = js_soft_stop_ms * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_tick_ns); + kbdev->js_soft_stop_ticks = ticks; + + ticks = js_soft_stop_ms_cl * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_tick_ns); + kbdev->js_soft_stop_ticks_cl = ticks; + + ticks = js_hard_stop_ms_ss * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_tick_ns); + kbdev->js_hard_stop_ticks_ss = ticks; + + ticks = js_hard_stop_ms_cl * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_tick_ns); + kbdev->js_hard_stop_ticks_cl = ticks; + + ticks = js_hard_stop_ms_nss * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_tick_ns); + kbdev->js_hard_stop_ticks_nss = ticks; + + ticks = js_reset_ms_ss * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_tick_ns); + kbdev->js_reset_ticks_ss = ticks; + + ticks = js_reset_ms_cl * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_tick_ns); + kbdev->js_reset_ticks_cl = ticks; + + ticks = js_reset_ms_nss * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_tick_ns); + kbdev->js_reset_ticks_nss = ticks; + + KBASE_LOG(2, kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_soft_stop_ticks, js_soft_stop_ms); + KBASE_LOG(2, kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_soft_stop_ticks_cl, js_soft_stop_ms_cl); + KBASE_LOG(2, kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_hard_stop_ticks_ss, js_hard_stop_ms_ss); + KBASE_LOG(2, kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_hard_stop_ticks_cl, js_hard_stop_ms_cl); + KBASE_LOG(2, kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_hard_stop_ticks_nss, js_hard_stop_ms_nss); + KBASE_LOG(2, kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_reset_ticks_ss, js_reset_ms_ss); + KBASE_LOG(2, kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_reset_ticks_cl, js_reset_ms_cl); + KBASE_LOG(2, kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_reset_ticks_nss, js_reset_ms_nss); + + return count; + } else { + dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\nUse format " " \n"); + return -EINVAL; + } +} + +/** Show callback for the @c js_timeouts sysfs file. + * + * This function is called to get the contents of the @c js_timeouts sysfs + * file. It returns the last set values written to the js_timeouts sysfs file. + * If the file didn't get written yet, the values will be 0. + * + * @param dev The device this sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The output buffer for the sysfs file contents + * + * @return The number of bytes output to @c buf. + */ +static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + u64 ms; + unsigned long js_soft_stop_ms; + unsigned long js_soft_stop_ms_cl; + unsigned long js_hard_stop_ms_ss; + unsigned long js_hard_stop_ms_cl; + unsigned long js_hard_stop_ms_nss; + unsigned long js_reset_ms_ss; + unsigned long js_reset_ms_cl; + unsigned long js_reset_ms_nss; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ms = (u64) kbdev->js_soft_stop_ticks * kbdev->js_data.scheduling_tick_ns; + do_div(ms, 1000000UL); + js_soft_stop_ms = (unsigned long)ms; + + ms = (u64) kbdev->js_soft_stop_ticks_cl * kbdev->js_data.scheduling_tick_ns; + do_div(ms, 1000000UL); + js_soft_stop_ms_cl = (unsigned long)ms; + + ms = (u64) kbdev->js_hard_stop_ticks_ss * kbdev->js_data.scheduling_tick_ns; + do_div(ms, 1000000UL); + js_hard_stop_ms_ss = (unsigned long)ms; + + ms = (u64) kbdev->js_hard_stop_ticks_cl * kbdev->js_data.scheduling_tick_ns; + do_div(ms, 1000000UL); + js_hard_stop_ms_cl = (unsigned long)ms; + + ms = (u64) kbdev->js_hard_stop_ticks_nss * kbdev->js_data.scheduling_tick_ns; + do_div(ms, 1000000UL); + js_hard_stop_ms_nss = (unsigned long)ms; + + ms = (u64) kbdev->js_reset_ticks_ss * kbdev->js_data.scheduling_tick_ns; + do_div(ms, 1000000UL); + js_reset_ms_ss = (unsigned long)ms; + + ms = (u64) kbdev->js_reset_ticks_cl * kbdev->js_data.scheduling_tick_ns; + do_div(ms, 1000000UL); + js_reset_ms_cl = (unsigned long)ms; + + ms = (u64) kbdev->js_reset_ticks_nss * kbdev->js_data.scheduling_tick_ns; + do_div(ms, 1000000UL); + js_reset_ms_nss = (unsigned long)ms; + + ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", js_soft_stop_ms, js_soft_stop_ms_cl, js_hard_stop_ms_ss, js_hard_stop_ms_cl, js_hard_stop_ms_nss, js_reset_ms_ss, js_reset_ms_cl, js_reset_ms_nss); + + if (ret >= PAGE_SIZE) { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** The sysfs file @c js_timeouts. + * + * This is used to override the current job scheduler values for + * KBASE_CONFIG_ATTR_JS_STOP_STOP_TICKS_SS + * KBASE_CONFIG_ATTR_JS_STOP_STOP_TICKS_CL + * KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS + * KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL + * KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS + * KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS + * KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL + * KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS. + */ +DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts); + + + +/** Store callback for the @c force_replay sysfs file. + * + * @param dev The device with sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The value written to the sysfs file + * @param count The number of bytes written to the sysfs file + * + * @return @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + if (!strncmp("limit=", buf, MIN(6, count))) { + int force_replay_limit; + int items = sscanf(buf, "limit=%u", &force_replay_limit); + + if (items == 1) { + kbdev->force_replay_random = MALI_FALSE; + kbdev->force_replay_limit = force_replay_limit; + kbdev->force_replay_count = 0; + + return count; + } + } else if (!strncmp("random_limit", buf, MIN(12, count))) { + kbdev->force_replay_random = MALI_TRUE; + kbdev->force_replay_count = 0; + + return count; + } else if (!strncmp("norandom_limit", buf, MIN(14, count))) { + kbdev->force_replay_random = MALI_FALSE; + kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED; + kbdev->force_replay_count = 0; + + return count; + } else if (!strncmp("core_req=", buf, MIN(9, count))) { + unsigned int core_req; + int items = sscanf(buf, "core_req=%x", &core_req); + + if (items == 1) { + kbdev->force_replay_core_req = (base_jd_core_req)core_req; + + return count; + } + } + dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=, random_limit, norandom_limit, core_req=\n"); + return -EINVAL; +} + +/** Show callback for the @c force_replay sysfs file. + * + * This function is called to get the contents of the @c force_replay sysfs + * file. It returns the last set value written to the force_replay sysfs file. + * If the file didn't get written yet, the values will be 0. + * + * @param dev The device this sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The output buffer for the sysfs file contents + * + * @return The number of bytes output to @c buf. + */ +static ssize_t show_force_replay(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + if (kbdev->force_replay_random) + ret = scnprintf(buf, PAGE_SIZE, + "limit=0\nrandom_limit\ncore_req=%x\n", + kbdev->force_replay_core_req); + else + ret = scnprintf(buf, PAGE_SIZE, + "limit=%u\nnorandom_limit\ncore_req=%x\n", + kbdev->force_replay_limit, + kbdev->force_replay_core_req); + + if (ret >= PAGE_SIZE) { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** The sysfs file @c force_replay. + * + */ +DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay, set_force_replay); +#endif /* MALI_CUSTOMER_RELEASE == 0 */ + +#ifdef CONFIG_MALI_DEBUG +static ssize_t set_js_softstop_always(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int items; + int softstop_always; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + items = sscanf(buf, "%d", &softstop_always); + if ((items == 1) && ((softstop_always == 0) || (softstop_always == 1))) { + kbdev->js_data.softstop_always = (mali_bool) softstop_always; + + KBASE_LOG(2, kbdev->dev, "Support for softstop on a single context: %s\n", (kbdev->js_data.softstop_always == MALI_FALSE) ? "Disabled" : "Enabled"); + return count; + } else { + dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\nUse format " "\n"); + return -EINVAL; + } +} + +static ssize_t show_js_softstop_always(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always); + + if (ret >= PAGE_SIZE) { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** + * By default, soft-stops are disabled when only a single context is present. The ability to + * enable soft-stop when only a single context is present can be used for debug and unit-testing purposes. + * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.) + */ +DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always); +#endif /* CONFIG_MALI_DEBUG */ + +#ifdef CONFIG_MALI_DEBUG +typedef void (kbasep_debug_command_func) (kbase_device *); + +typedef enum { + KBASEP_DEBUG_COMMAND_DUMPTRACE, + + /* This must be the last enum */ + KBASEP_DEBUG_COMMAND_COUNT +} kbasep_debug_command_code; + +typedef struct kbasep_debug_command { + char *str; + kbasep_debug_command_func *func; +} kbasep_debug_command; + +/** Debug commands supported by the driver */ +static const kbasep_debug_command debug_commands[] = { + { + .str = "dumptrace", + .func = &kbasep_trace_dump, + } +}; + +/** Show callback for the @c debug_command sysfs file. + * + * This function is called to get the contents of the @c debug_command sysfs + * file. This is a list of the available debug commands, separated by newlines. + * + * @param dev The device this sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The output buffer for the sysfs file contents + * + * @return The number of bytes output to @c buf. + */ +static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char *const buf) +{ + struct kbase_device *kbdev; + int i; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str); + + if (ret >= PAGE_SIZE) { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** Store callback for the @c debug_command sysfs file. + * + * This function is called when the @c debug_command sysfs file is written to. + * It matches the requested command against the available commands, and if + * a matching command is found calls the associated function from + * @ref debug_commands to issue the command. + * + * @param dev The device with sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The value written to the sysfs file + * @param count The number of bytes written to the sysfs file + * + * @return @c count if the function succeeded. An error code on failure. + */ +static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int i; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) { + if (sysfs_streq(debug_commands[i].str, buf)) { + debug_commands[i].func(kbdev); + return count; + } + } + + /* Debug Command not found */ + dev_err(dev, "debug_command: command not known\n"); + return -EINVAL; +} + +/** The sysfs file @c debug_command. + * + * This is used to issue general debug commands to the device driver. + * Reading it will produce a list of debug commands, separated by newlines. + * Writing to it with one of those commands will issue said command. + */ +DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug); +#endif /* CONFIG_MALI_DEBUG */ + +#ifdef CONFIG_MALI_NO_MALI +static int kbase_common_reg_map(kbase_device *kbdev) +{ + return 0; +} +static void kbase_common_reg_unmap(kbase_device * const kbdev) +{ + return; +} +#else /* CONFIG_MALI_NO_MALI */ +static int kbase_common_reg_map(kbase_device *kbdev) +{ + int err = -ENOMEM; + + kbdev->reg_res = request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev)); + if (!kbdev->reg_res) { + dev_err(kbdev->dev, "Register window unavailable\n"); + err = -EIO; + goto out_region; + } + + kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size); + if (!kbdev->reg) { + dev_err(kbdev->dev, "Can't remap register window\n"); + err = -EINVAL; + goto out_ioremap; + } + + return 0; + + out_ioremap: + release_resource(kbdev->reg_res); + kfree(kbdev->reg_res); + out_region: + return err; +} + +static void kbase_common_reg_unmap(kbase_device * const kbdev) +{ + iounmap(kbdev->reg); + release_resource(kbdev->reg_res); + kfree(kbdev->reg_res); +} +#endif /* CONFIG_MALI_NO_MALI */ + + +static int kbase_common_device_init(kbase_device *kbdev) +{ + int err = -ENOMEM; + mali_error mali_err; + enum { + inited_mem = (1u << 0), + inited_job_slot = (1u << 1), + inited_pm = (1u << 2), + inited_js = (1u << 3), + inited_irqs = (1u << 4), + inited_debug = (1u << 5), + inited_js_softstop = (1u << 6), +#if MALI_CUSTOMER_RELEASE == 0 + inited_js_timeouts = (1u << 7), + inited_force_replay = (1u << 13), +#endif /* MALI_CUSTOMER_RELEASE == 0 */ + inited_pm_runtime_init = (1u << 8), +#ifdef CONFIG_DEBUG_FS + inited_gpu_memory = (1u << 9), +#endif /* CONFIG_DEBUG_FS */ +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS + inited_sc_split = (1u << 11), +#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ +#ifdef CONFIG_MALI_TRACE_TIMELINE + inited_timeline = (1u << 12), +#endif /* CONFIG_MALI_TRACE_LINE */ + }; + + int inited = 0; + + dev_set_drvdata(kbdev->dev, kbdev); + + kbdev->mdev.minor = MISC_DYNAMIC_MINOR; + kbdev->mdev.name = kbdev->devname; + kbdev->mdev.fops = &kbase_fops; + kbdev->mdev.parent = get_device(kbdev->dev); + + scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, kbase_dev_nr++); + + if (misc_register(&kbdev->mdev)) { + dev_err(kbdev->dev, "Couldn't register misc dev %s\n", kbdev->devname); + err = -EINVAL; + goto out_misc; + } + + if (device_create_file(kbdev->dev, &dev_attr_power_policy)) { + dev_err(kbdev->dev, "Couldn't create power_policy sysfs file\n"); + goto out_file; + } + + if (device_create_file(kbdev->dev, &dev_attr_core_availability_policy)) { + dev_err(kbdev->dev, "Couldn't create core_availability_policy sysfs file\n"); + goto out_file_core_availability_policy; + } + + if (device_create_file(kbdev->dev, &dev_attr_core_mask)) { + dev_err(kbdev->dev, "Couldn't create core_mask sysfs file\n"); + goto out_file_core_mask; + } + + down(&kbase_dev_list_lock); + list_add(&kbdev->entry, &kbase_dev_list); + up(&kbase_dev_list_lock); + dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device)); + + mali_err = kbase_pm_init(kbdev); + if (MALI_ERROR_NONE != mali_err) + goto out_partial; + + inited |= inited_pm; + + if (kbdev->pm.callback_power_runtime_init) { + mali_err = kbdev->pm.callback_power_runtime_init(kbdev); + if (MALI_ERROR_NONE != mali_err) + goto out_partial; + + inited |= inited_pm_runtime_init; + } + + mali_err = kbase_mem_init(kbdev); + if (MALI_ERROR_NONE != mali_err) + goto out_partial; + + inited |= inited_mem; + + mali_err = kbase_job_slot_init(kbdev); + if (MALI_ERROR_NONE != mali_err) + goto out_partial; + + inited |= inited_job_slot; + + mali_err = kbasep_js_devdata_init(kbdev); + if (MALI_ERROR_NONE != mali_err) + goto out_partial; + + inited |= inited_js; + + err = kbase_install_interrupts(kbdev); + if (err) + goto out_partial; + + inited |= inited_irqs; + +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS + if (device_create_file(kbdev->dev, &dev_attr_sc_split)) + { + dev_err(kbdev->dev, "Couldn't create sc_split sysfs file\n"); + goto out_partial; + } + + inited |= inited_sc_split; +#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ + +#ifdef CONFIG_DEBUG_FS + if (kbasep_gpu_memory_debugfs_init(kbdev)) { + dev_err(kbdev->dev, "Couldn't create gpu_memory debugfs file\n"); + goto out_partial; + } + inited |= inited_gpu_memory; +#endif /* CONFIG_DEBUG_FS */ + +#ifdef CONFIG_MALI_DEBUG + + if (device_create_file(kbdev->dev, &dev_attr_debug_command)) { + dev_err(kbdev->dev, "Couldn't create debug_command sysfs file\n"); + goto out_partial; + } + inited |= inited_debug; + + if (device_create_file(kbdev->dev, &dev_attr_js_softstop_always)) { + dev_err(kbdev->dev, "Couldn't create js_softstop_always sysfs file\n"); + goto out_partial; + } + inited |= inited_js_softstop; +#endif /* CONFIG_MALI_DEBUG */ + +#if MALI_CUSTOMER_RELEASE == 0 + if (device_create_file(kbdev->dev, &dev_attr_js_timeouts)) { + dev_err(kbdev->dev, "Couldn't create js_timeouts sysfs file\n"); + goto out_partial; + } + inited |= inited_js_timeouts; + + if (device_create_file(kbdev->dev, &dev_attr_force_replay)) { + dev_err(kbdev->dev, "Couldn't create force_replay sysfs file\n"); + goto out_partial; + } + inited |= inited_force_replay; +#endif /* MALI_CUSTOMER_RELEASE */ + +#ifdef CONFIG_MALI_TRACE_TIMELINE + if (kbasep_trace_timeline_debugfs_init(kbdev)) { + dev_err(kbdev->dev, "Couldn't create mali_timeline_defs debugfs file\n"); + goto out_partial; + } + inited |= inited_timeline; +#endif /* CONFIG_MALI_TRACE_TIMELINE */ + + mali_err = kbase_pm_powerup(kbdev); + if (MALI_ERROR_NONE == mali_err) { +#ifdef CONFIG_MALI_DEBUG +#ifndef CONFIG_MALI_NO_MALI + if (MALI_ERROR_NONE != kbasep_common_test_interrupt_handlers(kbdev)) { + dev_err(kbdev->dev, "Interrupt assigment check failed.\n"); + err = -EINVAL; + goto out_partial; + } +#endif /* CONFIG_MALI_NO_MALI */ +#endif /* CONFIG_MALI_DEBUG */ + + /* intialise the kctx list */ + mutex_init(&kbdev->kctx_list_lock); + INIT_LIST_HEAD(&kbdev->kctx_list); + return 0; + } + + out_partial: +#ifdef CONFIG_MALI_TRACE_TIMELINE + if (inited & inited_timeline) + kbasep_trace_timeline_debugfs_term(kbdev); +#endif /* CONFIG_MALI_TRACE_TIMELINE */ +#if MALI_CUSTOMER_RELEASE == 0 + if (inited & inited_force_replay) + device_remove_file(kbdev->dev, &dev_attr_force_replay); + if (inited & inited_js_timeouts) + device_remove_file(kbdev->dev, &dev_attr_js_timeouts); +#endif /* MALI_CUSTOMER_RELEASE */ +#ifdef CONFIG_MALI_DEBUG + if (inited & inited_js_softstop) + device_remove_file(kbdev->dev, &dev_attr_js_softstop_always); + + if (inited & inited_debug) + device_remove_file(kbdev->dev, &dev_attr_debug_command); + +#endif /* CONFIG_MALI_DEBUG */ + +#ifdef CONFIG_DEBUG_FS + if (inited & inited_gpu_memory) + kbasep_gpu_memory_debugfs_term(kbdev); +#endif /* CONFIG_DEBUG_FS */ + +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS + if (inited & inited_sc_split) + { + device_remove_file(kbdev->dev, &dev_attr_sc_split); + } +#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ + + if (inited & inited_js) + kbasep_js_devdata_halt(kbdev); + + if (inited & inited_job_slot) + kbase_job_slot_halt(kbdev); + + if (inited & inited_mem) + kbase_mem_halt(kbdev); + + if (inited & inited_pm) + kbase_pm_halt(kbdev); + + if (inited & inited_irqs) + kbase_release_interrupts(kbdev); + + if (inited & inited_js) + kbasep_js_devdata_term(kbdev); + + if (inited & inited_job_slot) + kbase_job_slot_term(kbdev); + + if (inited & inited_mem) + kbase_mem_term(kbdev); + + if (inited & inited_pm_runtime_init) { + if (kbdev->pm.callback_power_runtime_term) + kbdev->pm.callback_power_runtime_term(kbdev); + } + + if (inited & inited_pm) + kbase_pm_term(kbdev); + + down(&kbase_dev_list_lock); + list_del(&kbdev->entry); + up(&kbase_dev_list_lock); + + device_remove_file(kbdev->dev, &dev_attr_core_mask); + out_file_core_mask: + device_remove_file(kbdev->dev, &dev_attr_core_availability_policy); + out_file_core_availability_policy: + device_remove_file(kbdev->dev, &dev_attr_power_policy); + out_file: + misc_deregister(&kbdev->mdev); + out_misc: + put_device(kbdev->dev); + return err; +} + +static int kbase_platform_device_probe(struct platform_device *pdev) +{ + struct kbase_device *kbdev; + struct resource *reg_res; + kbase_attribute *platform_data; + int err; + int i; + struct mali_base_gpu_core_props *core_props; +#ifdef CONFIG_MALI_NO_MALI + mali_error mali_err; +#endif /* CONFIG_MALI_NO_MALI */ +#ifdef CONFIG_OF + kbase_platform_config *config; + int attribute_count; + +#ifdef CONFIG_MALI_PLATFORM_FAKE + config = kbase_get_platform_config(); + attribute_count = kbasep_get_config_attribute_count(config->attributes); + + err = platform_device_add_data(pdev, config->attributes, + attribute_count * sizeof(config->attributes[0])); + if (err) + return err; +#endif /* CONFIG_MALI_PLATFORM_FAKE */ +#endif /* CONFIG_OF */ + + kbdev = kbase_device_alloc(); + if (!kbdev) { + dev_err(&pdev->dev, "Can't allocate device\n"); + err = -ENOMEM; + goto out; + } +#ifdef CONFIG_MALI_NO_MALI + mali_err = midg_device_create(kbdev); + if (MALI_ERROR_NONE != mali_err) { + dev_err(&pdev->dev, "Can't initialize dummy model\n"); + err = -ENOMEM; + goto out_midg; + } +#endif /* CONFIG_MALI_NO_MALI */ + + kbdev->dev = &pdev->dev; + platform_data = (kbase_attribute *) kbdev->dev->platform_data; + + if (NULL == platform_data) { + dev_err(kbdev->dev, "Platform data not specified\n"); + err = -ENOENT; + goto out_free_dev; + } + + if (MALI_TRUE != kbasep_validate_configuration_attributes(kbdev, platform_data)) { + dev_err(kbdev->dev, "Configuration attributes failed to validate\n"); + err = -EINVAL; + goto out_free_dev; + } + kbdev->config_attributes = platform_data; + + /* 3 IRQ resources */ + for (i = 0; i < 3; i++) { + struct resource *irq_res; + int irqtag; + + irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); + if (!irq_res) { + dev_err(kbdev->dev, "No IRQ resource at index %d\n", i); + err = -ENOENT; + goto out_free_dev; + } + +#ifdef CONFIG_OF + if (!strcmp(irq_res->name, "JOB")) + irqtag = JOB_IRQ_TAG; + else if (!strcmp(irq_res->name, "MMU")) + irqtag = MMU_IRQ_TAG; + else if (!strcmp(irq_res->name, "GPU")) + irqtag = GPU_IRQ_TAG; + else { + dev_err(&pdev->dev, "Invalid irq res name: '%s'\n", + irq_res->name); + err = -EINVAL; + goto out_free_dev; + } +#else + irqtag = i; +#endif /* CONFIG_OF */ + kbdev->irqs[irqtag].irq = irq_res->start; + kbdev->irqs[irqtag].flags = (irq_res->flags & IRQF_TRIGGER_MASK); + } + + /* the first memory resource is the physical address of the GPU registers */ + reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!reg_res) { + dev_err(kbdev->dev, "Invalid register resource\n"); + err = -ENOENT; + goto out_free_dev; + } + + kbdev->reg_start = reg_res->start; + kbdev->reg_size = resource_size(reg_res); + + err = kbase_common_reg_map(kbdev); + if (err) + goto out_free_dev; + +#ifdef CONFIG_DEBUG_FS + kbdev->mali_debugfs_directory = debugfs_create_dir("mali", NULL); + if (NULL == kbdev->mali_debugfs_directory) { + dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n"); + goto out_reg_unmap; + } +#endif /* CONFIG_DEBUG_FS */ + + if (MALI_ERROR_NONE != kbase_device_init(kbdev)) { + dev_err(kbdev->dev, "Can't initialize device\n"); + err = -ENOMEM; + goto out_debugfs_remove; + } + + /* obtain min/max configured gpu frequencies */ + core_props = &(kbdev->gpu_props.props.core_props); + core_props->gpu_freq_khz_min = kbasep_get_config_value(kbdev, platform_data, KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MIN); + core_props->gpu_freq_khz_max = kbasep_get_config_value(kbdev, platform_data, KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX); + kbdev->gpu_props.irq_throttle_time_us = kbasep_get_config_value(kbdev, platform_data, KBASE_CONFIG_ATTR_GPU_IRQ_THROTTLE_TIME_US); + + err = kbase_common_device_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Failed kbase_common_device_init\n"); + goto out_term_dev; + } + return 0; + +out_term_dev: + kbase_device_term(kbdev); +out_debugfs_remove: +#ifdef CONFIG_DEBUG_FS + debugfs_remove(kbdev->mali_debugfs_directory); +out_reg_unmap: +#endif /* CONFIG_DEBUG_FS */ + kbase_common_reg_unmap(kbdev); +out_free_dev: +#ifdef CONFIG_MALI_NO_MALI + midg_device_destroy(kbdev); +out_midg: +#endif /* CONFIG_MALI_NO_MALI */ + kbase_device_free(kbdev); +out: + return err; +} + +static int kbase_common_device_remove(struct kbase_device *kbdev) +{ + if (kbdev->pm.callback_power_runtime_term) + kbdev->pm.callback_power_runtime_term(kbdev); + + /* Remove the sys power policy file */ + device_remove_file(kbdev->dev, &dev_attr_power_policy); + device_remove_file(kbdev->dev, &dev_attr_core_availability_policy); + device_remove_file(kbdev->dev, &dev_attr_core_mask); + +#ifdef CONFIG_MALI_TRACE_TIMELINE + kbasep_trace_timeline_debugfs_term(kbdev); +#endif /* CONFIG_MALI_TRACE_TIMELINE */ + +#ifdef CONFIG_MALI_DEBUG + device_remove_file(kbdev->dev, &dev_attr_js_softstop_always); + device_remove_file(kbdev->dev, &dev_attr_debug_command); +#endif /* CONFIG_MALI_DEBUG */ +#if MALI_CUSTOMER_RELEASE == 0 + device_remove_file(kbdev->dev, &dev_attr_js_timeouts); +#endif /* MALI_CUSTOMER_RELEASE */ +#ifdef CONFIG_DEBUG_FS + kbasep_gpu_memory_debugfs_term(kbdev); +#endif + +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS + device_remove_file(kbdev->dev, &dev_attr_sc_split); +#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ + + kbasep_js_devdata_halt(kbdev); + kbase_job_slot_halt(kbdev); + kbase_mem_halt(kbdev); + kbase_pm_halt(kbdev); + + kbase_release_interrupts(kbdev); + + kbasep_js_devdata_term(kbdev); + kbase_job_slot_term(kbdev); + kbase_mem_term(kbdev); + kbase_pm_term(kbdev); + + down(&kbase_dev_list_lock); + list_del(&kbdev->entry); + up(&kbase_dev_list_lock); + + misc_deregister(&kbdev->mdev); + put_device(kbdev->dev); + kbase_common_reg_unmap(kbdev); + kbase_device_term(kbdev); +#ifdef CONFIG_DEBUG_FS + debugfs_remove(kbdev->mali_debugfs_directory); +#endif /* CONFIG_DEBUG_FS */ +#ifdef CONFIG_MALI_NO_MALI + midg_device_destroy(kbdev); +#endif /* CONFIG_MALI_NO_MALI */ + kbase_device_free(kbdev); + + return 0; +} + +static int kbase_platform_device_remove(struct platform_device *pdev) +{ + struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + + if (!kbdev) + return -ENODEV; + + return kbase_common_device_remove(kbdev); +} + +/** Suspend callback from the OS. + * + * This is called by Linux when the device should suspend. + * + * @param dev The device to suspend + * + * @return A standard Linux error code + */ +static int kbase_device_suspend(struct device *dev) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + kbase_pm_suspend(kbdev); + return 0; +} + +/** Resume callback from the OS. + * + * This is called by Linux when the device should resume from suspension. + * + * @param dev The device to resume + * + * @return A standard Linux error code + */ +static int kbase_device_resume(struct device *dev) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + kbase_pm_resume(kbdev); + return 0; +} + +/** Runtime suspend callback from the OS. + * + * This is called by Linux when the device should prepare for a condition in which it will + * not be able to communicate with the CPU(s) and RAM due to power management. + * + * @param dev The device to suspend + * + * @return A standard Linux error code + */ +#ifdef CONFIG_PM_RUNTIME +static int kbase_device_runtime_suspend(struct device *dev) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + if (kbdev->pm.callback_power_runtime_off) { + kbdev->pm.callback_power_runtime_off(kbdev); + KBASE_LOG(1, dev, "runtime suspend\n"); + } + return 0; +} +#endif /* CONFIG_PM_RUNTIME */ + +/** Runtime resume callback from the OS. + * + * This is called by Linux when the device should go into a fully active state. + * + * @param dev The device to suspend + * + * @return A standard Linux error code + */ + +#ifdef CONFIG_PM_RUNTIME +int kbase_device_runtime_resume(struct device *dev) +{ + int ret = 0; + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + if (kbdev->pm.callback_power_runtime_on) { + ret = kbdev->pm.callback_power_runtime_on(kbdev); + KBASE_LOG(1, dev, "runtime resume\n"); + } + return ret; +} +#endif /* CONFIG_PM_RUNTIME */ + +/** Runtime idle callback from the OS. + * + * This is called by Linux when the device appears to be inactive and it might be + * placed into a low power state + * + * @param dev The device to suspend + * + * @return A standard Linux error code + */ + +#ifdef CONFIG_PM_RUNTIME +static int kbase_device_runtime_idle(struct device *dev) +{ + /* Avoid pm_runtime_suspend being called */ + return 1; +} +#endif /* CONFIG_PM_RUNTIME */ + +/** The power management operations for the platform driver. + */ +static const struct dev_pm_ops kbase_pm_ops = { + .suspend = kbase_device_suspend, + .resume = kbase_device_resume, +#ifdef CONFIG_PM_RUNTIME + .runtime_suspend = kbase_device_runtime_suspend, + .runtime_resume = kbase_device_runtime_resume, + .runtime_idle = kbase_device_runtime_idle, +#endif /* CONFIG_PM_RUNTIME */ +}; + +#ifdef CONFIG_OF +static const struct of_device_id kbase_dt_ids[] = { + { .compatible = "arm,malit6xx" }, + { .compatible = "arm,mali-midgard" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, kbase_dt_ids); +#endif + +static struct platform_driver kbase_platform_driver = { + .probe = kbase_platform_device_probe, + .remove = kbase_platform_device_remove, + .driver = { + .name = kbase_drv_name, + .owner = THIS_MODULE, + .pm = &kbase_pm_ops, + .of_match_table = of_match_ptr(kbase_dt_ids), + }, +}; + +/* + * The driver will not provide a shortcut to create the Mali platform device + * anymore when using Device Tree. + */ +#ifdef CONFIG_OF +module_platform_driver(kbase_platform_driver); +#else /* CONFIG_MALI_PLATFORM_FAKE */ + +extern int kbase_platform_early_init(void); + +#ifdef CONFIG_MALI_PLATFORM_FAKE +extern int kbase_platform_fake_register(void); +extern void kbase_platform_fake_unregister(void); +#endif + +static int __init kbase_driver_init(void) +{ + int ret; + + ret = kbase_platform_early_init(); + if (ret) + return ret; + +#ifdef CONFIG_MALI_PLATFORM_FAKE + ret = kbase_platform_fake_register(); + if (ret) + return ret; +#endif + ret = platform_driver_register(&kbase_platform_driver); +#ifdef CONFIG_MALI_PLATFORM_FAKE + if (ret) + kbase_platform_fake_unregister(); +#endif + + return ret; +} + +static void __exit kbase_driver_exit(void) +{ + platform_driver_unregister(&kbase_platform_driver); +#ifdef CONFIG_MALI_PLATFORM_FAKE + kbase_platform_fake_unregister(); +#endif +} + +module_init(kbase_driver_init); +module_exit(kbase_driver_exit); + +#endif /* CONFIG_OF */ + +MODULE_LICENSE("GPL"); +MODULE_VERSION(MALI_RELEASE_NAME); + +/* Module parameter to control log level */ +int mali_debug_level = 0; +module_param(mali_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */ +MODULE_PARM_DESC(mali_debug_level, "Higher number, more dmesg output"); + +#ifdef CONFIG_MALI_GATOR_SUPPORT +/* Create the trace points (otherwise we just get code to call a tracepoint) */ +#define CREATE_TRACE_POINTS +#include "mali_linux_trace.h" + +void kbase_trace_mali_pm_status(u32 event, u64 value) +{ + trace_mali_pm_status(event, value); +} + +void kbase_trace_mali_pm_power_off(u32 event, u64 value) +{ + trace_mali_pm_power_off(event, value); +} + +void kbase_trace_mali_pm_power_on(u32 event, u64 value) +{ + trace_mali_pm_power_on(event, value); +} + +void kbase_trace_mali_job_slots_event(u32 event, const kbase_context *kctx, u8 atom_id) +{ + trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id); +} + +void kbase_trace_mali_page_fault_insert_pages(int event, u32 value) +{ + trace_mali_page_fault_insert_pages(event, value); +} + +void kbase_trace_mali_mmu_as_in_use(int event) +{ + trace_mali_mmu_as_in_use(event); +} + +void kbase_trace_mali_mmu_as_released(int event) +{ + trace_mali_mmu_as_released(event); +} + +void kbase_trace_mali_total_alloc_pages_change(long long int event) +{ + trace_mali_total_alloc_pages_change(event); +} +#endif /* CONFIG_MALI_GATOR_SUPPORT */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c new file mode 100755 index 00000000000..b37d22ae72b --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c @@ -0,0 +1,124 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_cpuprops.c + * Base kernel property query APIs + */ + +#include "mali_kbase.h" +#include "mali_kbase_cpuprops.h" +#include "mali_kbase_uku.h" +#include +#include +#include +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) +#include +#endif + +#define KBASE_DEFAULT_CPU_NUM 0 + +#define L1_DCACHE_LINE_SIZE_LOG2 L1_CACHE_SHIFT + +/** + * @brief Macros used to extract cpu id info + * @see Doc's for Main ID register + */ +#define KBASE_CPUPROPS_ID_GET_REV(cpuid) ( (cpuid) & 0x0F ) /* [3:0] Revision */ +#define KBASE_CPUPROPS_ID_GET_PART_NR(cpuid)( ((cpuid) >> 4) & 0xFFF ) /* [15:4] Part number */ +#define KBASE_CPUPROPS_ID_GET_ARCH(cpuid) ( ((cpuid) >> 16) & 0x0F ) /* [19:16] Architecture */ +#define KBASE_CPUPROPS_ID_GET_VARIANT(cpuid)( ((cpuid) >> 20) & 0x0F ) /* [23:20] Variant */ +#define KBASE_CPUPROPS_ID_GET_CODE(cpuid) ( ((cpuid) >> 24) & 0xFF ) /* [31:23] ASCII code of implementer trademark */ + +/*Below value sourced from OSK*/ +#define L1_DCACHE_SIZE ((u32)0x00008000) + + +/** + * @brief Retrieves detailed CPU info from given cpu_val ( ID reg ) + * + * @param kbase_props CPU props to be filled-in with cpu id info + * + */ +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) +static void kbasep_cpuprops_uk_get_cpu_id_info(kbase_uk_cpuprops * const kbase_props) +{ + kbase_props->props.cpu_id.id = read_cpuid_id(); + + kbase_props->props.cpu_id.valid = 1; + kbase_props->props.cpu_id.rev = KBASE_CPUPROPS_ID_GET_REV(kbase_props->props.cpu_id.id); + kbase_props->props.cpu_id.part = KBASE_CPUPROPS_ID_GET_PART_NR(kbase_props->props.cpu_id.id); + kbase_props->props.cpu_id.arch = KBASE_CPUPROPS_ID_GET_ARCH(kbase_props->props.cpu_id.id); + kbase_props->props.cpu_id.variant = KBASE_CPUPROPS_ID_GET_VARIANT(kbase_props->props.cpu_id.id); + kbase_props->props.cpu_id.implementer = KBASE_CPUPROPS_ID_GET_CODE(kbase_props->props.cpu_id.id); +} +#else +static void kbasep_cpuprops_uk_get_cpu_id_info(kbase_uk_cpuprops * const kbase_props) +{ + kbase_props->props.cpu_id.id = 0; + kbase_props->props.cpu_id.valid = 0; + kbase_props->props.cpu_id.rev = 0; + kbase_props->props.cpu_id.part = 0; + kbase_props->props.cpu_id.arch = 0; + kbase_props->props.cpu_id.variant = 0; + kbase_props->props.cpu_id.implementer = 'N'; +} +#endif + +int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed) +{ + KBASE_DEBUG_ASSERT(NULL != clock_speed); + + *clock_speed = 100; + return 0; +} + +mali_error kbase_cpuprops_uk_get_props(kbase_context *kctx, kbase_uk_cpuprops * const kbase_props) +{ + unsigned int max_cpu_freq; + + kbase_props->props.cpu_l1_dcache_line_size_log2 = L1_DCACHE_LINE_SIZE_LOG2; + kbase_props->props.cpu_l1_dcache_size = L1_DCACHE_SIZE; + kbase_props->props.cpu_flags = BASE_CPU_PROPERTY_FLAG_LITTLE_ENDIAN; + + kbase_props->props.nr_cores = NR_CPUS; + kbase_props->props.cpu_page_size_log2 = PAGE_SHIFT; + kbase_props->props.available_memory_size = totalram_pages << PAGE_SHIFT; + + kbasep_cpuprops_uk_get_cpu_id_info(kbase_props); + + /* check if kernel supports dynamic frequency scaling */ + max_cpu_freq = cpufreq_quick_get_max( KBASE_DEFAULT_CPU_NUM ); + if ( max_cpu_freq != 0 ) + { + /* convert from kHz to mHz */ + kbase_props->props.max_cpu_clock_speed_mhz = max_cpu_freq / 1000 ; + } + else + { + /* fallback if CONFIG_CPU_FREQ turned off */ + int result; + kbase_cpuprops_clock_speed_function kbase_cpuprops_uk_get_clock_speed; + + kbase_cpuprops_uk_get_clock_speed = (kbase_cpuprops_clock_speed_function) kbasep_get_config_value(kctx->kbdev, kctx->kbdev->config_attributes, KBASE_CONFIG_ATTR_CPU_SPEED_FUNC); + result = kbase_cpuprops_uk_get_clock_speed(&kbase_props->props.max_cpu_clock_speed_mhz); + if (result != 0) + return MALI_ERROR_FUNCTION_FAILED; + } + + return MALI_ERROR_NONE; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h new file mode 100755 index 00000000000..0f669b706dd --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h @@ -0,0 +1,56 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_cpuprops.h + * Base kernel property query APIs + */ + +#ifndef _KBASE_CPUPROPS_H_ +#define _KBASE_CPUPROPS_H_ + +#include + +/* Forward declarations */ +struct kbase_uk_cpuprops; + +/** + * @brief Default implementation of @ref KBASE_CONFIG_ATTR_CPU_SPEED_FUNC. + * + * This function sets clock_speed to 100, so will be an underestimate for + * any real system. + * + * See @ref kbase_cpuprops_clock_speed_function for details on the parameters + * and return value. + */ +int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed); + +/** + * @brief Provides CPU properties data. + * + * Fill the kbase_uk_cpuprops with values from CPU configuration. + * + * @param kctx The kbase context + * @param kbase_props A copy of the kbase_uk_cpuprops structure from userspace + * + * @return MALI_ERROR_NONE on success. Any other value indicates failure. + */ +mali_error kbase_cpuprops_uk_get_props(kbase_context *kctx, struct kbase_uk_cpuprops * const kbase_props); + +#endif /*_KBASE_CPUPROPS_H_*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.c b/drivers/gpu/arm/midgard/mali_kbase_debug.c new file mode 100755 index 00000000000..247ca40bb6c --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_debug.c @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include + +kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = { + NULL, + NULL +}; + +void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param) +{ + kbasep_debug_assert_registered_cb.func = func; + kbasep_debug_assert_registered_cb.param = param; +} + +void kbasep_debug_assert_call_hook(void) +{ + if (kbasep_debug_assert_registered_cb.func != NULL) + kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param); +} +KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook); + diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.h b/drivers/gpu/arm/midgard/mali_kbase_debug.h new file mode 100755 index 00000000000..54edc4b1179 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_debug.h @@ -0,0 +1,188 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _KBASE_DEBUG_H +#define _KBASE_DEBUG_H + +#include + +extern int mali_debug_level; +/** + * @def KBASEP_LOG(level, ...) + * @brief Logs a debug message using dev_dbg(). + * + * Logs a debug message using dev_dbg if the debug level specified for the + * message is lower or equal than the current debug level. Use higher + * numbers to log messages with increasing verbosity. + * + * The current debug level is controlled by the module parameter + * 'mali_debug_level' which is 0 by default. + * + * No special meaning is assigned to debug levels but the recommendation is + * 0 = driver init/exit messages + * 1 = function entry/exit messages + * 2 = function detailed messages + * 3 = irq/callback messages + * 4 = register read/write messages + * + * @param level debug level for the message + * @param ... Arguments you would normally pass to dev_dbg() + */ +#define KBASE_LOG(level, ...) if ((level) <= mali_debug_level) dev_dbg(__VA_ARGS__) + +/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */ +#define KBASE_DEBUG_SKIP_TRACE 0 + +/** @brief If different from 0, the trace will only contain the file and line. */ +#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0 + +/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */ +#ifndef KBASE_DEBUG_DISABLE_ASSERTS +#ifdef CONFIG_MALI_DEBUG +#define KBASE_DEBUG_DISABLE_ASSERTS 0 +#else +#define KBASE_DEBUG_DISABLE_ASSERTS 1 +#endif +#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ + +/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */ +typedef void (kbase_debug_assert_hook) (void *); + +typedef struct kbasep_debug_assert_cb { + kbase_debug_assert_hook *func; + void *param; +} kbasep_debug_assert_cb; + +/** + * @def KBASEP_DEBUG_PRINT_TRACE + * @brief Private macro containing the format of the trace to display before every message + * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME + */ +#if KBASE_DEBUG_SKIP_TRACE == 0 +#define KBASEP_DEBUG_PRINT_TRACE \ + "In file: " __FILE__ " line: " CSTD_STR2(__LINE__) +#if KBASE_DEBUG_SKIP_FUNCTION_NAME == 0 +#define KBASEP_DEBUG_PRINT_FUNCTION CSTD_FUNC +#else +#define KBASEP_DEBUG_PRINT_FUNCTION "" +#endif +#else +#define KBASEP_DEBUG_PRINT_TRACE "" +#endif + +/** + * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) + * @brief (Private) system printing function associated to the @see KBASE_DEBUG_ASSERT_MSG event. + * @param trace location in the code from where the message is printed + * @param function function from where the message is printed + * @param ... Format string followed by format arguments. + * @note function parameter cannot be concatenated with other strings + */ +/* Select the correct system output function*/ +#ifdef CONFIG_MALI_DEBUG +#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\ + do { \ + pr_err("Mali: %s function:%s ", trace, function);\ + pr_err(__VA_ARGS__);\ + pr_err("\n");\ + } while (MALI_FALSE) +#else +#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP() +#endif + +#ifdef CONFIG_MALI_DEBUG +#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook(); +#else +#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP(); +#endif + +/** + * @def KBASE_DEBUG_ASSERT(expr) + * @brief Calls @see KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false + * + * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1 + * + * @param expr Boolean expression + */ +#define KBASE_DEBUG_ASSERT(expr) \ + KBASE_DEBUG_ASSERT_MSG(expr, #expr) + +#if KBASE_DEBUG_DISABLE_ASSERTS +#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP() +#else + /** + * @def KBASE_DEBUG_ASSERT_MSG(expr, ...) + * @brief Calls @see KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false + * + * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1 + * + * @param expr Boolean expression + * @param ... Message to display when @a expr is false, as a format string followed by format arguments. + */ +#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \ + do { \ + if (MALI_FALSE == (expr)) { \ + KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\ + KBASE_CALL_ASSERT_HOOK();\ + BUG();\ + } \ + } while (MALI_FALSE) +#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ + +/** + * @def KBASE_DEBUG_CODE( X ) + * @brief Executes the code inside the macro only in debug mode + * + * @param X Code to compile only in debug mode. + */ +#ifdef CONFIG_MALI_DEBUG +#define KBASE_DEBUG_CODE(X) X +#else +#define KBASE_DEBUG_CODE(X) CSTD_NOP() +#endif /* CONFIG_MALI_DEBUG */ + +/** @} */ + +/** + * @brief Register a function to call on ASSERT + * + * Such functions will \b only be called during Debug mode, and for debugging + * features \b only. Do not rely on them to be called in general use. + * + * To disable the hook, supply NULL to \a func. + * + * @note This function is not thread-safe, and should only be used to + * register/deregister once in the module's lifetime. + * + * @param[in] func the function to call when an assert is triggered. + * @param[in] param the parameter to pass to \a func when calling it + */ +void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param); + +/** + * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook() + * + * @note This function is not thread-safe with respect to multiple threads + * registering functions and parameters with + * kbase_debug_assert_register_hook(). Otherwise, thread safety is the + * responsibility of the registered hook. + */ +void kbasep_debug_assert_call_hook(void); + +#endif /* _KBASE_DEBUG_H */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h new file mode 100755 index 00000000000..835e3edb819 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h @@ -0,0 +1,873 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_defs.h + * + * Defintions (types, defines, etcs) common to Kbase. They are placed here to + * allow the hierarchy of header files to work. + */ + +#ifndef _KBASE_DEFS_H_ +#define _KBASE_DEFS_H_ + +#include +#include +#include +#include + + +#include +#include +#include + +#ifdef CONFIG_KDS +#include +#endif /* CONFIG_KDS */ + +#ifdef CONFIG_SYNC +#include +#endif /* CONFIG_SYNC */ + +/** Enable SW tracing when set */ +#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE +#define KBASE_TRACE_ENABLE 1 +#endif + +#ifndef KBASE_TRACE_ENABLE +#ifdef CONFIG_MALI_DEBUG +#define KBASE_TRACE_ENABLE 1 +#else +#define KBASE_TRACE_ENABLE 0 +#endif /* CONFIG_MALI_DEBUG */ +#endif /* KBASE_TRACE_ENABLE */ + +/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */ +#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1 + +/** + * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware. + * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU + * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware + * before resetting. + */ +#define ZAP_TIMEOUT 1000 + +/** Number of milliseconds before we time out on a GPU soft/hard reset */ +#define RESET_TIMEOUT 500 + +/** + * Prevent soft-stops from occuring in scheduling situations + * + * This is not due to HW issues, but when scheduling is desired to be more predictable. + * + * Therefore, soft stop may still be disabled due to HW issues. + * + * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context. + * + * @note if not in use, define this value to 0 instead of \#undef'ing it + */ +#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0 + +/** + * Prevent hard-stops from occuring in scheduling situations + * + * This is not due to HW issues, but when scheduling is desired to be more predictable. + * + * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context. + * + * @note if not in use, define this value to 0 instead of \#undef'ing it + */ +#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0 + +/* Forward declarations+defintions */ +typedef struct kbase_context kbase_context; +typedef struct kbase_jd_atom kbasep_jd_atom; +typedef struct kbase_device kbase_device; + +/** + * The maximum number of Job Slots to support in the Hardware. + * + * You can optimize this down if your target devices will only ever support a + * small number of job slots. + */ +#define BASE_JM_MAX_NR_SLOTS 16 + +/** + * The maximum number of Address Spaces to support in the Hardware. + * + * You can optimize this down if your target devices will only ever support a + * small number of Address Spaces + */ +#define BASE_MAX_NR_AS 16 + +/* mmu */ +#define ENTRY_IS_ATE 1ULL +#define ENTRY_IS_INVAL 2ULL +#define ENTRY_IS_PTE 3ULL + +#define MIDGARD_MMU_VA_BITS 48 + +#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ +#define ENTRY_RD_BIT (1ULL << 6) +#define ENTRY_WR_BIT (1ULL << 7) +#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ +#define ENTRY_ACCESS_BIT (1ULL << 10) +#define ENTRY_NX_BIT (1ULL << 54) + +#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT) + +#if MIDGARD_MMU_VA_BITS > 39 +#define MIDGARD_MMU_TOPLEVEL 0 +#else +#define MIDGARD_MMU_TOPLEVEL 1 +#endif + +#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW) +#define GROWABLE_FLAGS_MASK (GROWABLE_FLAGS_REQUIRED | KBASE_REG_FREE) + +/** setting in kbase_context::as_nr that indicates it's invalid */ +#define KBASEP_AS_NR_INVALID (-1) + +#define KBASE_LOCK_REGION_MAX_SIZE (63) +#define KBASE_LOCK_REGION_MIN_SIZE (11) + +#define KBASE_TRACE_SIZE_LOG2 8 /* 256 entries */ +#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2) +#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1) + +#include "mali_kbase_js_defs.h" + +#define KBASEP_FORCE_REPLAY_DISABLED 0 + +/* Maximum force replay limit when randomization is enabled */ +#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16 + +/** + * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which + * handles retaining cores for power management and affinity management. + * + * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack + * where lots of atoms could be submitted before powerup, and each has an + * affinity chosen that causes other atoms to have an affinity + * violation. Whilst the affinity was not causing violations at the time it + * was chosen, it could cause violations thereafter. For example, 1000 jobs + * could have had their affinity chosen during the powerup time, so any of + * those 1000 jobs could cause an affinity violation later on. + * + * The attack would otherwise occur because other atoms/contexts have to wait for: + * -# the currently running atoms (which are causing the violation) to + * finish + * -# and, the atoms that had their affinity chosen during powerup to + * finish. These are run preferrentially because they don't cause a + * violation, but instead continue to cause the violation in others. + * -# or, the attacker is scheduled out (which might not happen for just 2 + * contexts) + * + * By re-choosing the affinity (which is designed to avoid violations at the + * time it's chosen), we break condition (2) of the wait, which minimizes the + * problem to just waiting for current jobs to finish (which can be bounded if + * the Job Scheduling Policy has a timer). + */ +typedef enum { + /** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */ + KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED, + /** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */ + KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES, + /** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */ + KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY, + /** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */ + KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS, + /** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */ + KBASE_ATOM_COREREF_STATE_READY +} kbase_atom_coreref_state; + +typedef enum { + /** Atom is not used */ + KBASE_JD_ATOM_STATE_UNUSED, + /** Atom is queued in JD */ + KBASE_JD_ATOM_STATE_QUEUED, + /** Atom has been given to JS (is runnable/running) */ + KBASE_JD_ATOM_STATE_IN_JS, + /** Atom has been completed, but not yet handed back to userspace */ + KBASE_JD_ATOM_STATE_COMPLETED +} kbase_jd_atom_state; + +/** Atom has been previously soft-stoppped */ +#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1) +/** Atom has been previously retried to execute */ +#define KBASE_KATOM_FLAGS_RERUN (1<<2) +#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3) + +typedef struct kbase_jd_atom kbase_jd_atom; + +struct kbase_ext_res +{ + mali_addr64 gpu_address; + struct kbase_mem_phy_alloc * alloc; +}; + +struct kbase_jd_atom { + struct work_struct work; + ktime_t start_timestamp; + + base_jd_udata udata; + kbase_context *kctx; + + struct list_head dep_head[2]; + struct list_head dep_item[2]; + struct kbase_jd_atom *dep_atom[2]; + + u16 nr_extres; + struct kbase_ext_res * extres; + + u32 device_nr; + u64 affinity; + u64 jc; + kbase_atom_coreref_state coreref_state; +#ifdef CONFIG_KDS + struct list_head node; + struct kds_resource_set *kds_rset; + mali_bool kds_dep_satisfied; +#endif /* CONFIG_KDS */ +#ifdef CONFIG_SYNC + struct sync_fence *fence; + struct sync_fence_waiter sync_waiter; +#endif /* CONFIG_SYNC */ + + /* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */ + base_jd_event_code event_code; + base_jd_core_req core_req; /**< core requirements */ + /** Job Slot to retry submitting to if submission from IRQ handler failed + * + * NOTE: see if this can be unified into the another member e.g. the event */ + int retry_submit_on_slot; + + kbasep_js_policy_job_info sched_info; + /* atom priority scaled to nice range with +20 offset 0..39 */ + int nice_prio; + + int poking; /* BASE_HW_ISSUE_8316 */ + + wait_queue_head_t completed; + kbase_jd_atom_state status; +#ifdef CONFIG_GPU_TRACEPOINTS + int work_id; +#endif + /* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */ + int slot_nr; + + u32 atom_flags; + + /* Number of times this atom has been retried. Used by replay soft job. + */ + int retry_count; +}; + +/* + * Theory of operations: + * + * Atom objects are statically allocated within the context structure. + * + * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set. + */ + +#define KBASE_JD_DEP_QUEUE_SIZE 256 + +typedef struct kbase_jd_context { + struct mutex lock; + kbasep_js_kctx_info sched_info; + kbase_jd_atom atoms[BASE_JD_ATOM_COUNT]; + + /** Tracks all job-dispatch jobs. This includes those not tracked by + * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ + u32 job_nr; + + /** Waitq that reflects whether there are no jobs (including SW-only + * dependency jobs). This is set when no jobs are present on the ctx, + * and clear when there are jobs. + * + * @note: Job Dispatcher knows about more jobs than the Job Scheduler: + * the Job Scheduler is unaware of jobs that are blocked on dependencies, + * and SW-only dependency jobs. + * + * This waitq can be waited upon to find out when the context jobs are all + * done/cancelled (including those that might've been blocked on + * dependencies) - and so, whether it can be terminated. However, it should + * only be terminated once it is neither present in the policy-queue (see + * kbasep_js_policy_try_evict_ctx() ) nor the run-pool (see + * kbasep_js_kctx_info::ctx::is_scheduled). + * + * Since the waitq is only set under kbase_jd_context::lock, + * the waiter should also briefly obtain and drop kbase_jd_context::lock to + * guarentee that the setter has completed its work on the kbase_context + * + * This must be updated atomically with: + * - kbase_jd_context::job_nr */ + wait_queue_head_t zero_jobs_wait; + + /** Job Done workqueue. */ + struct workqueue_struct *job_done_wq; + + spinlock_t tb_lock; + u32 *tb; + size_t tb_wrap_offset; + +#ifdef CONFIG_KDS + struct kds_callback kds_cb; +#endif /* CONFIG_KDS */ +#ifdef CONFIG_GPU_TRACEPOINTS + atomic_t work_id; +#endif +} kbase_jd_context; + +typedef struct kbase_jm_slot { + /* The number of slots must be a power of two */ +#define BASE_JM_SUBMIT_SLOTS 16 +#define BASE_JM_SUBMIT_SLOTS_MASK (BASE_JM_SUBMIT_SLOTS - 1) + + struct kbase_jd_atom *submitted[BASE_JM_SUBMIT_SLOTS]; + + kbase_context *last_context; + + u8 submitted_head; + u8 submitted_nr; + u8 job_chain_flag; + +} kbase_jm_slot; + +typedef enum kbase_midgard_type { + KBASE_MALI_T601, + KBASE_MALI_T604, + KBASE_MALI_T608, + KBASE_MALI_COUNT +} kbase_midgard_type; + +typedef struct kbase_device_info { + kbase_midgard_type dev_type; + u32 features; +} kbase_device_info; + +/** Poking state for BASE_HW_ISSUE_8316 */ +enum { + KBASE_AS_POKE_STATE_IN_FLIGHT = 1<<0, + KBASE_AS_POKE_STATE_KILLING_POKE = 1<<1 +}; + +/** Poking state for BASE_HW_ISSUE_8316 */ +typedef u32 kbase_as_poke_state; + +/** + * Important: Our code makes assumptions that a kbase_as structure is always at + * kbase_device->as[number]. This is used to recover the containing + * kbase_device from a kbase_as structure. + * + * Therefore, kbase_as structures must not be allocated anywhere else. + */ +typedef struct kbase_as { + int number; + + struct workqueue_struct *pf_wq; + struct work_struct work_pagefault; + struct work_struct work_busfault; + mali_addr64 fault_addr; + u32 fault_status; + struct mutex transaction_mutex; + + /* BASE_HW_ISSUE_8316 */ + struct workqueue_struct *poke_wq; + struct work_struct poke_work; + /** Protected by kbasep_js_device_data::runpool_irq::lock */ + int poke_refcount; + /** Protected by kbasep_js_device_data::runpool_irq::lock */ + kbase_as_poke_state poke_state; + struct hrtimer poke_timer; +} kbase_as; + +/** + * Instrumentation State Machine States + */ +typedef enum { + /** State where instrumentation is not active */ + KBASE_INSTR_STATE_DISABLED = 0, + /** State machine is active and ready for a command. */ + KBASE_INSTR_STATE_IDLE, + /** Hardware is currently dumping a frame. */ + KBASE_INSTR_STATE_DUMPING, + /** We've requested a clean to occur on a workqueue */ + KBASE_INSTR_STATE_REQUEST_CLEAN, + /** Hardware is currently cleaning and invalidating caches. */ + KBASE_INSTR_STATE_CLEANING, + /** Cache clean completed, and either a) a dump is complete, or + * b) instrumentation can now be setup. */ + KBASE_INSTR_STATE_CLEANED, + /** kbasep_reset_timeout_worker() has started (but not compelted) a + * reset. This generally indicates the current action should be aborted, and + * kbasep_reset_timeout_worker() will handle the cleanup */ + KBASE_INSTR_STATE_RESETTING, + /** An error has occured during DUMPING (page fault). */ + KBASE_INSTR_STATE_FAULT +} kbase_instr_state; + +typedef struct kbasep_mem_device { + atomic_t used_pages; /* Tracks usage of OS shared memory. Updated + when OS memory is allocated/freed. */ + +} kbasep_mem_device; + + + +#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X + +typedef enum { + /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE + * THIS MUST BE USED AT THE START OF THE ENUM */ +#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X) +#include "mali_kbase_trace_defs.h" +#undef KBASE_TRACE_CODE_MAKE_CODE + /* Comma on its own, to extend the list */ + , + /* Must be the last in the enum */ + KBASE_TRACE_CODE_COUNT +} kbase_trace_code; + +#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0) +#define KBASE_TRACE_FLAG_JOBSLOT (((u8)1) << 1) + +typedef struct kbase_trace { + struct timespec timestamp; + u32 thread_id; + u32 cpu; + void *ctx; + mali_bool katom; + int atom_number; + u64 atom_udata[2]; + u64 gpu_addr; + unsigned long info_val; + u8 code; + u8 jobslot; + u8 refcount; + u8 flags; +} kbase_trace; + +/** Event IDs for the power management framework. + * + * Any of these events might be missed, so they should not be relied upon to + * find the precise state of the GPU at a particular time in the + * trace. Overall, we should get a high percentage of these events for + * statisical purposes, and so a few missing should not be a problem */ +typedef enum kbase_timeline_pm_event { + /* helper for tests */ + KBASEP_TIMELINE_PM_EVENT_FIRST, + + /** Event reserved for backwards compatibility with 'init' events */ + KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST, + + /** The power state of the device has changed. + * + * Specifically, the device has reached a desired or available state. + */ + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED, + + /** The GPU is becoming active. + * + * This event is sent when the first context is about to use the GPU. + */ + KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE, + + /** The GPU is becoming idle. + * + * This event is sent when the last context has finished using the GPU. + */ + KBASE_TIMELINE_PM_EVENT_GPU_IDLE, + + /** Event reserved for backwards compatibility with 'policy_change' + * events */ + KBASE_TIMELINE_PM_EVENT_RESERVED_4, + + /** Event reserved for backwards compatibility with 'system_suspend' + * events */ + KBASE_TIMELINE_PM_EVENT_RESERVED_5, + + /** Event reserved for backwards compatibility with 'system_resume' + * events */ + KBASE_TIMELINE_PM_EVENT_RESERVED_6, + + /** The job scheduler is requesting to power up/down cores. + * + * This event is sent when: + * - powered down cores are needed to complete a job + * - powered up cores are not needed anymore + */ + KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE, + + KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE, +} kbase_timeline_pm_event; + +#ifdef CONFIG_MALI_TRACE_TIMELINE +typedef struct kbase_trace_kctx_timeline { + atomic_t jd_atoms_in_flight; + u32 owner_tgid; +} kbase_trace_kctx_timeline; + +typedef struct kbase_trace_kbdev_timeline { + /** DebugFS entry */ + struct dentry *dentry; + + /* Note: strictly speaking, not needed, because it's in sync with + * kbase_device::jm_slots[]::submitted_nr + * + * But it's kept as an example of how to add global timeline tracking + * information + * + * The caller must hold kbasep_js_device_data::runpool_irq::lock when + * accessing this */ + u8 slot_atoms_submitted[BASE_JM_SUBMIT_SLOTS]; + + /* Last UID for each PM event */ + atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1]; + /* Counter for generating PM event UIDs */ + atomic_t pm_event_uid_counter; + /* + * L2 transition state - MALI_TRUE indicates that the transition is ongoing + * Expected to be protected by pm.power_change_lock */ + mali_bool l2_transitioning; +} kbase_trace_kbdev_timeline; +#endif /* CONFIG_MALI_TRACE_TIMELINE */ + + +typedef struct kbasep_kctx_list_element { + struct list_head link; + kbase_context *kctx; +} kbasep_kctx_list_element; + +#define DEVNAME_SIZE 16 + +struct kbase_device { + /** jm_slots is protected by kbasep_js_device_data::runpool_irq::lock */ + kbase_jm_slot jm_slots[BASE_JM_MAX_NR_SLOTS]; + s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS]; + + struct list_head entry; + struct device *dev; + struct miscdevice mdev; + u64 reg_start; + size_t reg_size; + void __iomem *reg; + struct resource *reg_res; + struct { + int irq; + int flags; + } irqs[3]; + char devname[DEVNAME_SIZE]; + +#ifdef CONFIG_MALI_NO_MALI + void *model; + struct kmem_cache *irq_slab; + struct workqueue_struct *irq_workq; + atomic_t serving_job_irq; + atomic_t serving_gpu_irq; + atomic_t serving_mmu_irq; + spinlock_t reg_op_lock; +#endif /* CONFIG_MALI_NO_MALI */ + + kbase_pm_device_data pm; + kbasep_js_device_data js_data; + kbasep_mem_device memdev; + + kbase_as as[BASE_MAX_NR_AS]; + + spinlock_t mmu_mask_change; + + kbase_gpu_props gpu_props; + + /** List of SW workarounds for HW issues */ + unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; + /** List of features available */ + unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; + + /* Cached present bitmaps - these are the same as the corresponding hardware registers */ + u64 shader_present_bitmap; + u64 tiler_present_bitmap; + u64 l2_present_bitmap; + u64 l3_present_bitmap; + + /* Bitmaps of cores that are currently in use (running jobs). + * These should be kept up to date by the job scheduler. + * + * pm.power_change_lock should be held when accessing these members. + * + * kbase_pm_check_transitions_nolock() should be called when bits are + * cleared to update the power management system and allow transitions to + * occur. */ + u64 shader_inuse_bitmap; + + /* Refcount for cores in use */ + u32 shader_inuse_cnt[64]; + + /* Bitmaps of cores the JS needs for jobs ready to run */ + u64 shader_needed_bitmap; + + /* Refcount for cores needed */ + u32 shader_needed_cnt[64]; + + u32 tiler_inuse_cnt; + + u32 tiler_needed_cnt; + + /* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */ + u32 l2_users_count; + + /* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be + * submitted to these cores. These are updated by the power management code. The job scheduler should avoid + * submitting new jobs to any cores that are not marked as available. + * + * pm.power_change_lock should be held when accessing these members. + */ + u64 shader_available_bitmap; + u64 tiler_available_bitmap; + u64 l2_available_bitmap; + + u64 shader_ready_bitmap; + u64 shader_transitioning_bitmap; + + s8 nr_hw_address_spaces; /**< Number of address spaces in the GPU (constant after driver initialisation) */ + s8 nr_user_address_spaces; /**< Number of address spaces available to user contexts */ + + /* Structure used for instrumentation and HW counters dumping */ + struct { + /* The lock should be used when accessing any of the following members */ + spinlock_t lock; + + kbase_context *kctx; + u64 addr; + wait_queue_head_t wait; + int triggered; + kbase_instr_state state; + wait_queue_head_t cache_clean_wait; + struct workqueue_struct *cache_clean_wq; + struct work_struct cache_clean_work; + + kbase_context *suspended_kctx; + kbase_uk_hwcnt_setup suspended_state; + } hwcnt; + + /* Set when we're about to reset the GPU */ + atomic_t reset_gpu; +#define KBASE_RESET_GPU_NOT_PENDING 0 /* The GPU reset isn't pending */ +#define KBASE_RESET_GPU_PREPARED 1 /* kbase_prepare_to_reset_gpu has been called */ +#define KBASE_RESET_GPU_COMMITTED 2 /* kbase_reset_gpu has been called - the reset will now definitely happen + * within the timeout period */ +#define KBASE_RESET_GPU_HAPPENING 3 /* The GPU reset process is currently occuring (timeout has expired or + * kbasep_try_reset_gpu_early was called) */ + + /* Work queue and work item for performing the reset in */ + struct workqueue_struct *reset_workq; + struct work_struct reset_work; + wait_queue_head_t reset_wait; + struct hrtimer reset_timer; + + /*value to be written to the irq_throttle register each time an irq is served */ + atomic_t irq_throttle_cycles; + + const kbase_attribute *config_attributes; + +#if KBASE_TRACE_ENABLE != 0 + spinlock_t trace_lock; + u16 trace_first_out; + u16 trace_next_in; + kbase_trace *trace_rbuf; +#endif + +#if MALI_CUSTOMER_RELEASE == 0 + /* This is used to override the current job scheduler values for + * KBASE_CONFIG_ATTR_JS_STOP_STOP_TICKS_SS + * KBASE_CONFIG_ATTR_JS_STOP_STOP_TICKS_CL + * KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS + * KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL + * KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS + * KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS + * KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL + * KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS. + * + * These values are set via the js_timeouts sysfs file. + */ + u32 js_soft_stop_ticks; + u32 js_soft_stop_ticks_cl; + u32 js_hard_stop_ticks_ss; + u32 js_hard_stop_ticks_cl; + u32 js_hard_stop_ticks_nss; + u32 js_reset_ticks_ss; + u32 js_reset_ticks_cl; + u32 js_reset_ticks_nss; +#endif + + struct mutex cacheclean_lock; + + /* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */ + void *platform_context; + + /** Count of contexts keeping the GPU powered */ + atomic_t keep_gpu_powered_count; + + /* List of kbase_contexts created */ + struct list_head kctx_list; + struct mutex kctx_list_lock; + +#ifdef CONFIG_MALI_MIDGARD_RT_PM + struct delayed_work runtime_pm_workqueue; +#endif + +#ifdef CONFIG_MALI_TRACE_TIMELINE + kbase_trace_kbdev_timeline timeline; +#endif + +#ifdef CONFIG_DEBUG_FS + /* directory for debugfs entries */ + struct dentry *mali_debugfs_directory; + /* debugfs entry for gpu_memory */ + struct dentry *gpu_memory_dentry; + /* debugfs entry for trace */ + struct dentry *trace_dentry; +#endif /* CONFIG_DEBUG_FS */ + + /* fbdump profiling controls set by gator */ + u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX]; + + +#if MALI_CUSTOMER_RELEASE == 0 + /* Number of jobs that are run before a job is forced to fail and + * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced + * failures. */ + int force_replay_limit; + /* Count of jobs between forced failures. Incremented on each job. A + * job is forced to fail once this is greater than or equal to + * force_replay_limit. */ + int force_replay_count; + /* Core requirement for jobs to be failed and replayed. May be zero. */ + base_jd_core_req force_replay_core_req; + /* MALI_TRUE if force_replay_limit should be randomized. The random + * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT. + */ + mali_bool force_replay_random; +#endif +}; + +struct kbase_context { + kbase_device *kbdev; + phys_addr_t pgd; + struct list_head event_list; + struct mutex event_mutex; + mali_bool event_closed; + struct workqueue_struct *event_workq; + + u64 mem_attrs; + + atomic_t setup_complete; + atomic_t setup_in_progress; + + mali_bool keep_gpu_powered; + + u64 *mmu_teardown_pages; + + phys_addr_t aliasing_sink_page; + + struct mutex reg_lock; /* To be converted to a rwlock? */ + struct rb_root reg_rbtree; /* Red-Black tree of GPU regions (live regions) */ + + unsigned long cookies; + struct kbase_va_region *pending_regions[BITS_PER_LONG]; + + wait_queue_head_t event_queue; + pid_t tgid; + pid_t pid; + + kbase_jd_context jctx; + atomic_t used_pages; + atomic_t nonmapped_pages; + + kbase_mem_allocator osalloc; + kbase_mem_allocator * pgd_allocator; + + struct list_head waiting_soft_jobs; +#ifdef CONFIG_KDS + struct list_head waiting_kds_resource; +#endif + /** This is effectively part of the Run Pool, because it only has a valid + * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in + * + * The kbasep_js_device_data::runpool_irq::lock must be held whilst accessing + * this. + * + * If the context relating to this as_nr is required, you must use + * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear + * whilst you're using it. Alternatively, just hold the kbasep_js_device_data::runpool_irq::lock + * to ensure the context doesn't disappear (but this has restrictions on what other locks + * you can take whilst doing this) */ + int as_nr; + + /* NOTE: + * + * Flags are in jctx.sched_info.ctx.flags + * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex + * + * All other flags must be added there */ + spinlock_t mm_update_lock; + struct mm_struct * process_mm; + +#ifdef CONFIG_MALI_TRACE_TIMELINE + kbase_trace_kctx_timeline timeline; +#endif +}; + +typedef enum kbase_reg_access_type { + REG_READ, + REG_WRITE +} kbase_reg_access_type; + +typedef enum kbase_share_attr_bits { + /* (1ULL << 8) bit is reserved */ + SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */ + SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */ +} kbase_share_attr_bits; + +/* Conversion helpers for setting up high resolution timers */ +#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime((x)*1000000U)) +#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) + +/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */ +#define KBASE_CLEAN_CACHE_MAX_LOOPS 100000 +/* Maximum number of loops polling the GPU for an AS flush to complete before we assume the GPU has hung */ +#define KBASE_AS_FLUSH_MAX_LOOPS 100000 + +/* Return values from kbase_replay_process */ + +/* Replay job has completed */ +#define MALI_REPLAY_STATUS_COMPLETE 0 +/* Replay job is replaying and will continue once replayed jobs have completed. + */ +#define MALI_REPLAY_STATUS_REPLAYING 1 +#define MALI_REPLAY_STATUS_MASK 0xff +/* Caller must call kbasep_js_try_schedule_head_ctx */ +#define MALI_REPLAY_FLAG_JS_RESCHED 0x100 + +/* Maximum number of times a job can be replayed */ +#define BASEP_JD_REPLAY_LIMIT 15 + +#endif /* _KBASE_DEFS_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c new file mode 100755 index 00000000000..c321ebf8c01 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_device.c @@ -0,0 +1,774 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_device.c + * Base kernel device APIs + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include + +/* NOTE: Magic - 0x45435254 (TRCE in ASCII). + * Supports tracing feature provided in the base module. + * Please keep it in sync with the value of base module. + */ +#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254 + +#if defined(CONFIG_MALI_PLATFORM_VEXPRESS) || defined(CONFIG_MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ) +#ifdef CONFIG_MALI_PLATFORM_FAKE +extern kbase_attribute config_attributes_hw_issue_8408[]; +#endif /* CONFIG_MALI_PLATFORM_FAKE */ +#endif /* CONFIG_MALI_PLATFORM_VEXPRESS || CONFIG_MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ */ + +#if KBASE_TRACE_ENABLE != 0 +STATIC CONST char *kbasep_trace_code_string[] = { + /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE + * THIS MUST BE USED AT THE START OF THE ARRAY */ +#define KBASE_TRACE_CODE_MAKE_CODE(X) # X +#include "mali_kbase_trace_defs.h" +#undef KBASE_TRACE_CODE_MAKE_CODE +}; +#endif + +#define DEBUG_MESSAGE_SIZE 256 + +STATIC mali_error kbasep_trace_init(kbase_device *kbdev); +STATIC void kbasep_trace_term(kbase_device *kbdev); +STATIC void kbasep_trace_hook_wrapper(void *param); +#if KBASE_TRACE_ENABLE != 0 +STATIC void kbasep_trace_debugfs_init(kbase_device *kbdev); +#endif + +void kbasep_as_do_poke(struct work_struct *work); +enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *data); +void kbasep_reset_timeout_worker(struct work_struct *data); + +kbase_device *kbase_device_alloc(void) +{ + return kzalloc(sizeof(kbase_device), GFP_KERNEL); +} + +mali_error kbase_device_init(kbase_device * const kbdev) +{ + int i; /* i used after the for loop, don't reuse ! */ + + spin_lock_init(&kbdev->mmu_mask_change); + + /* Initialize platform specific context */ + if (MALI_FALSE == kbasep_platform_device_init(kbdev)) + goto fail; + + /* Ensure we can access the GPU registers */ + kbase_pm_register_access_enable(kbdev); + + /* Find out GPU properties based on the GPU feature registers */ + kbase_gpuprops_set(kbdev); + + /* Get the list of workarounds for issues on the current HW (identified by the GPU_ID register) */ + if (MALI_ERROR_NONE != kbase_hw_set_issues_mask(kbdev)) { + kbase_pm_register_access_disable(kbdev); + goto free_platform; + } + + /* Set the list of features available on the current HW (identified by the GPU_ID register) */ + kbase_hw_set_features_mask(kbdev); + + kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces; + + /* We're done accessing the GPU registers for now. */ + kbase_pm_register_access_disable(kbdev); + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + const char format[] = "mali_mmu%d"; + char name[sizeof(format)]; + const char poke_format[] = "mali_mmu%d_poker"; /* BASE_HW_ISSUE_8316 */ + char poke_name[sizeof(poke_format)]; /* BASE_HW_ISSUE_8316 */ + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { + if (0 > snprintf(poke_name, sizeof(poke_name), poke_format, i)) + goto free_workqs; + } + + if (0 > snprintf(name, sizeof(name), format, i)) + goto free_workqs; + + kbdev->as[i].number = i; + kbdev->as[i].fault_addr = 0ULL; + + kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1); + if (NULL == kbdev->as[i].pf_wq) + goto free_workqs; + + mutex_init(&kbdev->as[i].transaction_mutex); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { + struct hrtimer *poking_timer = &kbdev->as[i].poke_timer; + + kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1); + if (NULL == kbdev->as[i].poke_wq) { + destroy_workqueue(kbdev->as[i].pf_wq); + goto free_workqs; + } + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&kbdev->as[i].poke_work)); + INIT_WORK(&kbdev->as[i].poke_work, kbasep_as_do_poke); + + hrtimer_init(poking_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + + poking_timer->function = kbasep_as_poke_timer_callback; + + kbdev->as[i].poke_refcount = 0; + kbdev->as[i].poke_state = 0u; + } + } + /* don't change i after this point */ + + spin_lock_init(&kbdev->hwcnt.lock); + + kbdev->hwcnt.state = KBASE_INSTR_STATE_DISABLED; + init_waitqueue_head(&kbdev->reset_wait); + init_waitqueue_head(&kbdev->hwcnt.wait); + init_waitqueue_head(&kbdev->hwcnt.cache_clean_wait); + INIT_WORK(&kbdev->hwcnt.cache_clean_work, kbasep_cache_clean_worker); + kbdev->hwcnt.triggered = 0; + + kbdev->hwcnt.cache_clean_wq = alloc_workqueue("Mali cache cleaning workqueue", + 0, 1); + if (NULL == kbdev->hwcnt.cache_clean_wq) + goto free_workqs; + + kbdev->reset_workq = alloc_workqueue("Mali reset workqueue", 0, 1); + if (NULL == kbdev->reset_workq) + goto free_cache_clean_workq; + + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&kbdev->reset_work)); + INIT_WORK(&kbdev->reset_work, kbasep_reset_timeout_worker); + + hrtimer_init(&kbdev->reset_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + kbdev->reset_timer.function = kbasep_reset_timer_callback; + + if (kbasep_trace_init(kbdev) != MALI_ERROR_NONE) + goto free_reset_workq; + + mutex_init(&kbdev->cacheclean_lock); + atomic_set(&kbdev->keep_gpu_powered_count, 0); + +#ifdef CONFIG_MALI_TRACE_TIMELINE + for (i = 0; i < BASE_JM_SUBMIT_SLOTS; ++i) + kbdev->timeline.slot_atoms_submitted[i] = 0; + + for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i) + atomic_set(&kbdev->timeline.pm_event_uid[i], 0); +#endif /* CONFIG_MALI_TRACE_TIMELINE */ + + /* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */ + for (i = 0; i < FBDUMP_CONTROL_MAX; i++) + kbdev->kbase_profiling_controls[i] = 0; + + kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev); + +#if defined(CONFIG_MALI_PLATFORM_VEXPRESS) || defined(CONFIG_MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ) +#ifdef CONFIG_MALI_PLATFORM_FAKE + /* BASE_HW_ISSUE_8408 requires a configuration with different timeouts for + * the vexpress platform */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) + kbdev->config_attributes = config_attributes_hw_issue_8408; +#endif /* CONFIG_MALI_PLATFORM_FAKE */ +#endif /* CONFIG_MALI_PLATFORM_VEXPRESS || CONFIG_MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ */ + + return MALI_ERROR_NONE; + + free_reset_workq: + destroy_workqueue(kbdev->reset_workq); + free_cache_clean_workq: + destroy_workqueue(kbdev->hwcnt.cache_clean_wq); + free_workqs: + while (i > 0) { + i--; + destroy_workqueue(kbdev->as[i].pf_wq); + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) + destroy_workqueue(kbdev->as[i].poke_wq); + } + free_platform: + kbasep_platform_device_term(kbdev); + fail: + return MALI_ERROR_FUNCTION_FAILED; +} + +void kbase_device_term(kbase_device *kbdev) +{ + int i; + + KBASE_DEBUG_ASSERT(kbdev); + +#if KBASE_TRACE_ENABLE != 0 + kbase_debug_assert_register_hook(NULL, NULL); +#endif + + kbasep_trace_term(kbdev); + + destroy_workqueue(kbdev->reset_workq); + destroy_workqueue(kbdev->hwcnt.cache_clean_wq); + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + destroy_workqueue(kbdev->as[i].pf_wq); + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) + destroy_workqueue(kbdev->as[i].poke_wq); + } + + kbasep_platform_device_term(kbdev); +} + +void kbase_device_free(kbase_device *kbdev) +{ + kfree(kbdev); +} + +void kbase_device_trace_buffer_install(kbase_context *kctx, u32 *tb, size_t size) +{ + unsigned long flags; + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(tb); + + /* set up the header */ + /* magic number in the first 4 bytes */ + tb[0] = TRACE_BUFFER_HEADER_SPECIAL; + /* Store (write offset = 0, wrap counter = 0, transaction active = no) + * write offset 0 means never written. + * Offsets 1 to (wrap_offset - 1) used to store values when trace started + */ + tb[1] = 0; + + /* install trace buffer */ + spin_lock_irqsave(&kctx->jctx.tb_lock, flags); + kctx->jctx.tb_wrap_offset = size / 8; + kctx->jctx.tb = tb; + spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); +} + +void kbase_device_trace_buffer_uninstall(kbase_context *kctx) +{ + unsigned long flags; + KBASE_DEBUG_ASSERT(kctx); + spin_lock_irqsave(&kctx->jctx.tb_lock, flags); + kctx->jctx.tb = NULL; + kctx->jctx.tb_wrap_offset = 0; + spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); +} + +void kbase_device_trace_register_access(kbase_context *kctx, kbase_reg_access_type type, u16 reg_offset, u32 reg_value) +{ + unsigned long flags; + spin_lock_irqsave(&kctx->jctx.tb_lock, flags); + if (kctx->jctx.tb) { + u16 wrap_count; + u16 write_offset; + u32 *tb = kctx->jctx.tb; + u32 header_word; + + header_word = tb[1]; + KBASE_DEBUG_ASSERT(0 == (header_word & 0x1)); + + wrap_count = (header_word >> 1) & 0x7FFF; + write_offset = (header_word >> 16) & 0xFFFF; + + /* mark as transaction in progress */ + tb[1] |= 0x1; + mb(); + + /* calculate new offset */ + write_offset++; + if (write_offset == kctx->jctx.tb_wrap_offset) { + /* wrap */ + write_offset = 1; + wrap_count++; + wrap_count &= 0x7FFF; /* 15bit wrap counter */ + } + + /* store the trace entry at the selected offset */ + tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0); + tb[write_offset * 2 + 1] = reg_value; + mb(); + + /* new header word */ + header_word = (write_offset << 16) | (wrap_count << 1) | 0x0; /* transaction complete */ + tb[1] = header_word; + } + spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); +} + +void kbase_reg_write(kbase_device *kbdev, u16 offset, u32 value, kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kbdev->pm.gpu_powered); + KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + KBASE_LOG(4, kbdev->dev, "w: reg %04x val %08x", offset, value); + kbase_os_reg_write(kbdev, offset, value); + if (kctx && kctx->jctx.tb) + kbase_device_trace_register_access(kctx, REG_WRITE, offset, value); +} + +KBASE_EXPORT_TEST_API(kbase_reg_write) + +u32 kbase_reg_read(kbase_device *kbdev, u16 offset, kbase_context *kctx) +{ + u32 val; + KBASE_DEBUG_ASSERT(kbdev->pm.gpu_powered); + KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + val = kbase_os_reg_read(kbdev, offset); + KBASE_LOG(4, kbdev->dev, "r: reg %04x val %08x", offset, val); + if (kctx && kctx->jctx.tb) + kbase_device_trace_register_access(kctx, REG_READ, offset, val); + return val; +} + +KBASE_EXPORT_TEST_API(kbase_reg_read) + +void kbase_report_gpu_fault(kbase_device *kbdev, int multiple) +{ + u32 status; + u64 address; + + status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL); + address = (u64) kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32; + address |= kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL); + + dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", status & 0xFF, kbase_exception_name(status), address); + if (multiple) + dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); +} + +void kbase_gpu_interrupt(kbase_device *kbdev, u32 val) +{ + KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val); + if (val & GPU_FAULT) + kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS); + + if (val & RESET_COMPLETED) + kbase_pm_reset_done(kbdev); + + if (val & PRFCNT_SAMPLE_COMPLETED) + kbase_instr_hwcnt_sample_done(kbdev); + + if (val & CLEAN_CACHES_COMPLETED) + kbase_clean_caches_done(kbdev); + + KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL); + + /* kbase_pm_check_transitions must be called after the IRQ has been cleared. This is because it might trigger + * further power transitions and we don't want to miss the interrupt raised to notify us that these further + * transitions have finished. + */ + if (val & POWER_CHANGED_ALL) { + mali_bool cores_are_available; + unsigned long flags; + + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END); + + if (cores_are_available) { + /* Fast-path Job Scheduling on PM IRQ */ + int js; + /* Log timelining information that a change in state has completed */ + kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + /* A simplified check to ensure the last context hasn't exited + * after dropping the PM lock whilst doing a PM IRQ: any bits set + * in 'submit_allowed' indicate that we have a context in the + * runpool (which can't leave whilst we hold this lock). It is + * sometimes zero even when we have a context in the runpool, but + * that's no problem because we'll be unable to submit jobs + * anyway */ + if (kbdev->js_data.runpool_irq.submit_allowed) + for (js = 0; js < kbdev->gpu_props.num_job_slots; ++js) { + mali_bool needs_retry; + s8 submitted_count = 0; + needs_retry = kbasep_js_try_run_next_job_on_slot_irq_nolock(kbdev, js, &submitted_count); + /* Don't need to retry outside of IRQ context - this can + * only happen if we submitted too many in one IRQ, such + * that they were completing faster than we could + * submit. In this case, a job IRQ will fire to cause more + * work to be submitted in some way */ + CSTD_UNUSED(needs_retry); + } + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + } + } + KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val); +} + +/* + * Device trace functions + */ +#if KBASE_TRACE_ENABLE != 0 + +STATIC mali_error kbasep_trace_init(kbase_device *kbdev) +{ + void *rbuf; + + rbuf = kmalloc(sizeof(kbase_trace) * KBASE_TRACE_SIZE, GFP_KERNEL); + + if (!rbuf) + return MALI_ERROR_FUNCTION_FAILED; + + kbdev->trace_rbuf = rbuf; + spin_lock_init(&kbdev->trace_lock); + kbasep_trace_debugfs_init(kbdev); + return MALI_ERROR_NONE; +} + +STATIC void kbasep_trace_term(kbase_device *kbdev) +{ + debugfs_remove(kbdev->trace_dentry); + kbdev->trace_dentry= NULL; + kfree(kbdev->trace_rbuf); +} + +void kbasep_trace_format_msg(kbase_trace *trace_msg, char *buffer, int len) +{ + s32 written = 0; + + /* Initial part of message */ + written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0); + + if (trace_msg->katom != MALI_FALSE) { + written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0); + } + + written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0); + + /* NOTE: Could add function callbacks to handle different message types */ + /* Jobslot present */ + if ((trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT) != MALI_FALSE) + written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0); + + written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0); + + /* Refcount present */ + if ((trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT) != MALI_FALSE) + written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0); + + written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0); + + /* Rest of message */ + written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0); + +} + +void kbasep_trace_dump_msg(kbase_device *kbdev, kbase_trace *trace_msg) +{ + char buffer[DEBUG_MESSAGE_SIZE]; + + kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE); + KBASE_LOG(1, kbdev->dev, "%s", buffer); +} + +void kbasep_trace_add(kbase_device *kbdev, kbase_trace_code code, void *ctx, kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val) +{ + unsigned long irqflags; + kbase_trace *trace_msg; + + spin_lock_irqsave(&kbdev->trace_lock, irqflags); + + trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in]; + + /* Fill the message */ + trace_msg->thread_id = task_pid_nr(current); + trace_msg->cpu = task_cpu(current); + + getnstimeofday(&trace_msg->timestamp); + + trace_msg->code = code; + trace_msg->ctx = ctx; + + if (NULL == katom) { + trace_msg->katom = MALI_FALSE; + } else { + trace_msg->katom = MALI_TRUE; + trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom); + trace_msg->atom_udata[0] = katom->udata.blob[0]; + trace_msg->atom_udata[1] = katom->udata.blob[1]; + } + + trace_msg->gpu_addr = gpu_addr; + trace_msg->jobslot = jobslot; + trace_msg->refcount = MIN((unsigned int)refcount, 0xFF); + trace_msg->info_val = info_val; + trace_msg->flags = flags; + + /* Update the ringbuffer indices */ + kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK; + if (kbdev->trace_next_in == kbdev->trace_first_out) + kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK; + + /* Done */ + + spin_unlock_irqrestore(&kbdev->trace_lock, irqflags); +} + +void kbasep_trace_clear(kbase_device *kbdev) +{ + unsigned long flags; + spin_lock_irqsave(&kbdev->trace_lock, flags); + kbdev->trace_first_out = kbdev->trace_next_in; + spin_unlock_irqrestore(&kbdev->trace_lock, flags); +} + +void kbasep_trace_dump(kbase_device *kbdev) +{ + unsigned long flags; + u32 start; + u32 end; + + KBASE_LOG(1, kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val"); + spin_lock_irqsave(&kbdev->trace_lock, flags); + start = kbdev->trace_first_out; + end = kbdev->trace_next_in; + + while (start != end) { + kbase_trace *trace_msg = &kbdev->trace_rbuf[start]; + kbasep_trace_dump_msg(kbdev, trace_msg); + + start = (start + 1) & KBASE_TRACE_MASK; + } + KBASE_LOG(1, kbdev->dev, "TRACE_END"); + + spin_unlock_irqrestore(&kbdev->trace_lock, flags); + + KBASE_TRACE_CLEAR(kbdev); +} + +STATIC void kbasep_trace_hook_wrapper(void *param) +{ + kbase_device *kbdev = (kbase_device *) param; + kbasep_trace_dump(kbdev); +} + +#ifdef CONFIG_DEBUG_FS +struct trace_seq_state { + kbase_trace trace_buf[KBASE_TRACE_SIZE]; + u32 start; + u32 end; +}; + +void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos) +{ + struct trace_seq_state *state = s->private; + int i; + + if (*pos > KBASE_TRACE_SIZE) + return NULL; + i = state->start + *pos; + if ((state->end >= state->start && i >= state->end) || + i >= state->end + KBASE_TRACE_SIZE) + return NULL; + + i &= KBASE_TRACE_MASK; + + return &state->trace_buf[i]; +} + +void kbasep_trace_seq_stop(struct seq_file *s, void *data) +{ +} + +void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos) +{ + struct trace_seq_state *state = s->private; + int i; + + (*pos)++; + + i = (state->start + *pos) & KBASE_TRACE_MASK; + if (i == state->end) + return NULL; + + return &state->trace_buf[i]; +} + +int kbasep_trace_seq_show(struct seq_file *s, void *data) +{ + kbase_trace *trace_msg = data; + char buffer[DEBUG_MESSAGE_SIZE]; + + kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE); + seq_printf(s, "%s\n", buffer); + return 0; +} + +static const struct seq_operations kbasep_trace_seq_ops = { + .start = kbasep_trace_seq_start, + .next = kbasep_trace_seq_next, + .stop = kbasep_trace_seq_stop, + .show = kbasep_trace_seq_show, +}; + +static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file) +{ + kbase_device *kbdev = inode->i_private; + unsigned long flags; + + struct trace_seq_state *state; + + state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state)); + if (!state) + return -ENOMEM; + + spin_lock_irqsave(&kbdev->trace_lock, flags); + state->start = kbdev->trace_first_out; + state->end = kbdev->trace_next_in; + memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf)); + spin_unlock_irqrestore(&kbdev->trace_lock, flags); + + return 0; +} + +static const struct file_operations kbasep_trace_debugfs_fops = { + .open = kbasep_trace_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +STATIC void kbasep_trace_debugfs_init(kbase_device *kbdev) +{ + kbdev->trace_dentry = debugfs_create_file("mali_trace", S_IRUGO, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_trace_debugfs_fops); +} +#else +STATIC void kbasep_trace_debugfs_init(kbase_device *kbdev) +{ + +} +#endif /* CONFIG_DEBUG_FS */ + +#else /* KBASE_TRACE_ENABLE != 0 */ +STATIC mali_error kbasep_trace_init(kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); + return MALI_ERROR_NONE; +} + +STATIC void kbasep_trace_term(kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +STATIC void kbasep_trace_hook_wrapper(void *param) +{ + CSTD_UNUSED(param); +} + +void kbasep_trace_add(kbase_device *kbdev, kbase_trace_code code, void *ctx, kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val) +{ + CSTD_UNUSED(kbdev); + CSTD_UNUSED(code); + CSTD_UNUSED(ctx); + CSTD_UNUSED(katom); + CSTD_UNUSED(gpu_addr); + CSTD_UNUSED(flags); + CSTD_UNUSED(refcount); + CSTD_UNUSED(jobslot); + CSTD_UNUSED(info_val); +} + +void kbasep_trace_clear(kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +void kbasep_trace_dump(kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} +#endif /* KBASE_TRACE_ENABLE != 0 */ + +void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value) +{ + switch (control) { + case FBDUMP_CONTROL_ENABLE: + /* fall through */ + case FBDUMP_CONTROL_RATE: + /* fall through */ + case SW_COUNTER_ENABLE: + /* fall through */ + case FBDUMP_CONTROL_RESIZE_FACTOR: + kbdev->kbase_profiling_controls[control] = value; + break; + default: + dev_err(kbdev->dev, "Profiling control %d not found\n", control); + break; + } +} + +u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control) +{ + u32 ret_value = 0; + + switch (control) { + case FBDUMP_CONTROL_ENABLE: + /* fall through */ + case FBDUMP_CONTROL_RATE: + /* fall through */ + case SW_COUNTER_ENABLE: + /* fall through */ + case FBDUMP_CONTROL_RESIZE_FACTOR: + ret_value = kbdev->kbase_profiling_controls[control]; + break; + default: + dev_err(kbdev->dev, "Profiling control %d not found\n", control); + break; + } + + return ret_value; +} + +/* + * Called by gator to control the production of + * profiling information at runtime + * */ + +void _mali_profiling_control(u32 action, u32 value) +{ + struct kbase_device *kbdev = NULL; + + /* find the first i.e. call with -1 */ + kbdev = kbase_find_device(-1); + + if (NULL != kbdev) { + kbase_set_profiling_control(kbdev, action, value); + } +} + +KBASE_EXPORT_SYMBOL(_mali_profiling_control); diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c new file mode 100755 index 00000000000..acbccaabf6e --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_event.c @@ -0,0 +1,185 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include +#include + +STATIC base_jd_udata kbase_event_process(kbase_context *kctx, kbase_jd_atom *katom) +{ + base_jd_udata data; + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); + + data = katom->udata; + + KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight)); + + mutex_lock(&kctx->jctx.lock); + katom->status = KBASE_JD_ATOM_STATE_UNUSED; + mutex_unlock(&kctx->jctx.lock); + + wake_up(&katom->completed); + + return data; +} + +int kbase_event_pending(kbase_context *ctx) +{ + int ret; + + KBASE_DEBUG_ASSERT(ctx); + + mutex_lock(&ctx->event_mutex); + ret = (!list_empty(&ctx->event_list)) || (MALI_TRUE == ctx->event_closed); + mutex_unlock(&ctx->event_mutex); + + return ret; +} + +KBASE_EXPORT_TEST_API(kbase_event_pending) + +int kbase_event_dequeue(kbase_context *ctx, base_jd_event_v2 *uevent) +{ + kbase_jd_atom *atom; + + KBASE_DEBUG_ASSERT(ctx); + + mutex_lock(&ctx->event_mutex); + + if (list_empty(&ctx->event_list)) { + if (ctx->event_closed) { + /* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */ + mutex_unlock(&ctx->event_mutex); + uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED; + memset(&uevent->udata, 0, sizeof(uevent->udata)); + KBASE_LOG(2, ctx->kbdev->dev, + "event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n", + BASE_JD_EVENT_DRV_TERMINATED); + return 0; + } else { + mutex_unlock(&ctx->event_mutex); + return -1; + } + } + + /* normal event processing */ + atom = list_entry(ctx->event_list.next, kbase_jd_atom, dep_item[0]); + list_del(ctx->event_list.next); + + mutex_unlock(&ctx->event_mutex); + + KBASE_LOG(2, ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom); + uevent->event_code = atom->event_code; + uevent->atom_number = (atom - ctx->jctx.atoms); + uevent->udata = kbase_event_process(ctx, atom); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_event_dequeue) + +static void kbase_event_post_worker(struct work_struct *data) +{ + kbase_jd_atom *atom = CONTAINER_OF(data, kbase_jd_atom, work); + kbase_context *ctx = atom->kctx; + + if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) + kbase_jd_free_external_resources(atom); + + if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { + if (atom->event_code == BASE_JD_EVENT_DONE) { + /* Don't report the event */ + kbase_event_process(ctx, atom); + return; + } + } + + if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) { + /* Don't report the event */ + kbase_event_process(ctx, atom); + return; + } + + mutex_lock(&ctx->event_mutex); + list_add_tail(&atom->dep_item[0], &ctx->event_list); + mutex_unlock(&ctx->event_mutex); + + kbase_event_wakeup(ctx); +} + +void kbase_event_post(kbase_context *ctx, kbase_jd_atom *atom) +{ + KBASE_DEBUG_ASSERT(ctx); + KBASE_DEBUG_ASSERT(ctx->event_workq); + KBASE_DEBUG_ASSERT(atom); + + INIT_WORK(&atom->work, kbase_event_post_worker); + queue_work(ctx->event_workq, &atom->work); +} + +KBASE_EXPORT_TEST_API(kbase_event_post) + +void kbase_event_close(kbase_context *kctx) +{ + mutex_lock(&kctx->event_mutex); + kctx->event_closed = MALI_TRUE; + mutex_unlock(&kctx->event_mutex); + kbase_event_wakeup(kctx); +} + +mali_error kbase_event_init(kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx); + + INIT_LIST_HEAD(&kctx->event_list); + mutex_init(&kctx->event_mutex); + kctx->event_closed = MALI_FALSE; + kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); + + if (NULL == kctx->event_workq) + return MALI_ERROR_FUNCTION_FAILED; + + return MALI_ERROR_NONE; +} + +KBASE_EXPORT_TEST_API(kbase_event_init) + +void kbase_event_cleanup(kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(kctx->event_workq); + + flush_workqueue(kctx->event_workq); + destroy_workqueue(kctx->event_workq); + + /* We use kbase_event_dequeue to remove the remaining events as that + * deals with all the cleanup needed for the atoms. + * + * Note: use of kctx->event_list without a lock is safe because this must be the last + * thread using it (because we're about to terminate the lock) + */ + while (!list_empty(&kctx->event_list)) { + base_jd_event_v2 event; + kbase_event_dequeue(kctx, &event); + } +} + +KBASE_EXPORT_TEST_API(kbase_event_cleanup) diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator.h b/drivers/gpu/arm/midgard/mali_kbase_gator.h new file mode 100755 index 00000000000..3c4ffed6873 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gator.h @@ -0,0 +1,44 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* NB taken from gator */ +/* + * List of possible actions to be controlled by Streamline. + * The following numbers are used by gator to control the frame buffer dumping and s/w counter reporting. + * We cannot use the enums in mali_uk_types.h because they are unknown inside gator. + */ +#ifndef _KBASE_GATOR_H_ +#define _KBASE_GATOR_H_ + +#ifdef CONFIG_MALI_GATOR_SUPPORT +#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16)) +#define GATOR_JOB_SLOT_START 1 +#define GATOR_JOB_SLOT_STOP 2 +#define GATOR_JOB_SLOT_SOFT_STOPPED 3 + +void kbase_trace_mali_job_slots_event(u32 event, const kbase_context *kctx, u8 atom_id); +void kbase_trace_mali_pm_status(u32 event, u64 value); +void kbase_trace_mali_pm_power_off(u32 event, u64 value); +void kbase_trace_mali_pm_power_on(u32 event, u64 value); +void kbase_trace_mali_page_fault_insert_pages(int event, u32 value); +void kbase_trace_mali_mmu_as_in_use(int event); +void kbase_trace_mali_mmu_as_released(int event); +void kbase_trace_mali_total_alloc_pages_change(long long int event); + +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + +#endif /* _KBASE_GATOR_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c new file mode 100755 index 00000000000..e2948b1b501 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c @@ -0,0 +1,100 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include + +/** Show callback for the @c gpu_memory debugfs file. + * + * This function is called to get the contents of the @c gpu_memory debugfs + * file. This is a report of current gpu memory usage. + * + * @param sfile The debugfs entry + * @param data Data associated with the entry + * + * @return 0 if successfully prints data in debugfs entry file + * -1 if it encountered an error + */ + +static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) +{ + ssize_t ret = 0; + struct list_head *entry; + const struct list_head *kbdev_list; + kbdev_list = kbase_dev_list_get(); + list_for_each(entry, kbdev_list) { + struct kbase_device *kbdev = NULL; + kbasep_kctx_list_element *element; + + kbdev = list_entry(entry, struct kbase_device, entry); + /* output the total memory usage and cap for this device */ + ret = seq_printf(sfile, "%-16s %10u\n", \ + kbdev->devname, \ + atomic_read(&(kbdev->memdev.used_pages))); + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry(element, &kbdev->kctx_list, link) { + /* output the memory usage and cap for each kctx + * opened on this device */ + ret = seq_printf(sfile, " %s-0x%p %10u\n", \ + "kctx", + element->kctx, \ + atomic_read(&(element->kctx->used_pages))); + } + mutex_unlock(&kbdev->kctx_list_lock); + } + kbase_dev_list_put(kbdev_list); + return ret; +} + +/* + * File operations related to debugfs entry for gpu_memory + */ +STATIC int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_gpu_memory_seq_show , NULL); +} + +static const struct file_operations kbasep_gpu_memory_debugfs_fops = { + .open = kbasep_gpu_memory_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +/* + * Initialize debugfs entry for gpu_memory + */ +mali_error kbasep_gpu_memory_debugfs_init(kbase_device *kbdev) +{ + kbdev->gpu_memory_dentry = debugfs_create_file("gpu_memory", \ + S_IRUGO, \ + kbdev->mali_debugfs_directory, \ + NULL, \ + &kbasep_gpu_memory_debugfs_fops); + if (IS_ERR(kbdev->gpu_memory_dentry)) + return MALI_ERROR_FUNCTION_FAILED; + + return MALI_ERROR_NONE; +} + +/* + * Terminate debugfs entry for gpu_memory + */ +void kbasep_gpu_memory_debugfs_term(kbase_device *kbdev) +{ + debugfs_remove(kbdev->gpu_memory_dentry); +} + diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h new file mode 100755 index 00000000000..b53a9ffdedf --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h @@ -0,0 +1,43 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_gpu_memory_debugfs.h + * Header file for gpu_memory entry in debugfs + * + */ + +#ifndef _KBASE_GPU_MEMORY_H +#define _KBASE_GPU_MEMORY_H + +#include +#include +#include + +/** + * @brief Initialize gpu_memory debugfs entry + */ +mali_error kbasep_gpu_memory_debugfs_init(kbase_device *kbdev); + +/** + * @brief Terminate gpu_memory debugfs entry + */ +void kbasep_gpu_memory_debugfs_term(kbase_device *kbdev); + +#endif /*_KBASE_GPU_MEMORY_H*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c new file mode 100755 index 00000000000..0fbefea11af --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c @@ -0,0 +1,336 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_gpuprops.c + * Base kernel property query APIs + */ + +#include +#include +#include + +/** + * @brief Extracts bits from a 32-bit bitfield. + * @hideinitializer + * + * @param[in] value The value from which to extract bits. + * @param[in] offset The first bit to extract (0 being the LSB). + * @param[in] size The number of bits to extract. + * @return Bits [@a offset, @a offset + @a size) from @a value. + * + * @pre offset + size <= 32. + */ +/* from mali_cdsb.h */ +#define KBASE_UBFX32(value, offset, size) \ + (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1)) + +mali_error kbase_gpuprops_uk_get_props(kbase_context *kctx, kbase_uk_gpuprops * const kbase_props) +{ + kbase_gpuprops_clock_speed_function get_gpu_speed_mhz; + u32 gpu_speed_mhz; + int rc = 1; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != kbase_props); + + /* Current GPU speed is requested from the system integrator via the KBASE_CONFIG_ATTR_GPU_SPEED_FUNC function. + * If that function fails, or the function is not provided by the system integrator, we report the maximum + * GPU speed as specified by KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX. + */ + get_gpu_speed_mhz = (kbase_gpuprops_clock_speed_function) kbasep_get_config_value(kctx->kbdev, kctx->kbdev->config_attributes, KBASE_CONFIG_ATTR_GPU_SPEED_FUNC); + if (get_gpu_speed_mhz != NULL) { + rc = get_gpu_speed_mhz(&gpu_speed_mhz); +#ifdef CONFIG_MALI_DEBUG + /* Issue a warning message when the reported GPU speed falls outside the min/max range */ + if (rc == 0) { + u32 gpu_speed_khz = gpu_speed_mhz * 1000; + if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min || gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max) + dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n", (unsigned long)gpu_speed_khz, (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min, (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max); + } +#endif /* CONFIG_MALI_DEBUG */ + } + if (rc != 0) + gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000; + + kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz; + + memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props)); + + return MALI_ERROR_NONE; +} + +STATIC void kbase_gpuprops_dump_registers(kbase_device *kbdev, kbase_gpuprops_regdump *regdump) +{ + int i; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(NULL != regdump); + + /* Fill regdump with the content of the relevant registers */ + regdump->gpu_id = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); + + regdump->l2_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L2_FEATURES)); + regdump->l3_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L3_FEATURES)); + regdump->tiler_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(TILER_FEATURES)); + regdump->mem_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(MEM_FEATURES)); + regdump->mmu_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(MMU_FEATURES)); + regdump->as_present = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(AS_PRESENT)); + regdump->js_present = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(JS_PRESENT)); + + for (i = 0; i < MIDG_MAX_JOB_SLOTS; i++) + regdump->js_features[i] = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(JS_FEATURES_REG(i))); + + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + regdump->texture_features[i] = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i))); + + regdump->thread_max_threads = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(THREAD_MAX_THREADS)); + regdump->thread_max_workgroup_size = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE)); + regdump->thread_max_barrier_size = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE)); + regdump->thread_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(THREAD_FEATURES)); + + regdump->shader_present_lo = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PRESENT_LO)); + regdump->shader_present_hi = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PRESENT_HI)); + + regdump->tiler_present_lo = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(TILER_PRESENT_LO)); + regdump->tiler_present_hi = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(TILER_PRESENT_HI)); + + regdump->l2_present_lo = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L2_PRESENT_LO)); + regdump->l2_present_hi = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L2_PRESENT_HI)); + + regdump->l3_present_lo = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L3_PRESENT_LO)); + regdump->l3_present_hi = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L3_PRESENT_HI)); +} + +STATIC void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props) +{ + struct mali_base_gpu_coherent_group *current_group; + u64 group_present; + u64 group_mask; + u64 first_set, first_set_prev; + u32 num_groups = 0; + + KBASE_DEBUG_ASSERT(NULL != props); + + props->coherency_info.coherency = props->raw_props.mem_features; + props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present); + + if (props->coherency_info.coherency & GROUPS_L3_COHERENT) { + /* Group is l3 coherent */ + group_present = props->raw_props.l3_present; + } else if (props->coherency_info.coherency & GROUPS_L2_COHERENT) { + /* Group is l2 coherent */ + group_present = props->raw_props.l2_present; + } else { + /* Group is l1 coherent */ + group_present = props->raw_props.shader_present; + } + + /* + * The coherent group mask can be computed from the l2/l3 present + * register. + * + * For the coherent group n: + * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1) + * where first_set is group_present with only its nth set-bit kept + * (i.e. the position from where a new group starts). + * + * For instance if the groups are l2 coherent and l2_present=0x0..01111: + * The first mask is: + * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1) + * = (0x0..010 - 1) & ~(0x0..01 - 1) + * = 0x0..00f + * The second mask is: + * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1) + * = (0x0..100 - 1) & ~(0x0..010 - 1) + * = 0x0..0f0 + * And so on until all the bits from group_present have been cleared + * (i.e. there is no group left). + */ + + current_group = props->coherency_info.group; + first_set = group_present & ~(group_present - 1); + + while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) { + group_present -= first_set; /* Clear the current group bit */ + first_set_prev = first_set; + + first_set = group_present & ~(group_present - 1); + group_mask = (first_set - 1) & ~(first_set_prev - 1); + + /* Populate the coherent_group structure for each group */ + current_group->core_mask = group_mask & props->raw_props.shader_present; + current_group->num_cores = hweight64(current_group->core_mask); + + num_groups++; + current_group++; + } + + if (group_present != 0) + pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS); + + props->coherency_info.num_groups = num_groups; +} + +/** + * @brief Get the GPU configuration + * + * Fill the base_gpu_props structure with values from the GPU configuration registers. + * Only the raw properties are filled in this function + * + * @param gpu_props The base_gpu_props structure + * @param kbdev The kbase_device structure for the device + */ +static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, kbase_device *kbdev) +{ + kbase_gpuprops_regdump regdump; + int i; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(NULL != gpu_props); + + /* Dump relevant registers */ + kbase_gpuprops_dump_registers(kbdev, ®dump); + + gpu_props->raw_props.gpu_id = regdump.gpu_id; + gpu_props->raw_props.tiler_features = regdump.tiler_features; + gpu_props->raw_props.mem_features = regdump.mem_features; + gpu_props->raw_props.mmu_features = regdump.mmu_features; + gpu_props->raw_props.l2_features = regdump.l2_features; + gpu_props->raw_props.l3_features = regdump.l3_features; + + gpu_props->raw_props.as_present = regdump.as_present; + gpu_props->raw_props.js_present = regdump.js_present; + gpu_props->raw_props.shader_present = ((u64) regdump.shader_present_hi << 32) + regdump.shader_present_lo; + gpu_props->raw_props.tiler_present = ((u64) regdump.tiler_present_hi << 32) + regdump.tiler_present_lo; + gpu_props->raw_props.l2_present = ((u64) regdump.l2_present_hi << 32) + regdump.l2_present_lo; + gpu_props->raw_props.l3_present = ((u64) regdump.l3_present_hi << 32) + regdump.l3_present_lo; + + for (i = 0; i < MIDG_MAX_JOB_SLOTS; i++) + gpu_props->raw_props.js_features[i] = regdump.js_features[i]; + + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + gpu_props->raw_props.texture_features[i] = regdump.texture_features[i]; + + gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size; + gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads; + gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size; + gpu_props->raw_props.thread_features = regdump.thread_features; +} + +/** + * @brief Calculate the derived properties + * + * Fill the base_gpu_props structure with values derived from the GPU configuration registers + * + * @param gpu_props The base_gpu_props structure + * @param kbdev The kbase_device structure for the device + */ +static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, kbase_device *kbdev) +{ + int i; + + /* Populate the base_gpu_props structure */ + gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4); + gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8); + gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4); + gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16); + gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2; + gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT; + + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i]; + + gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8); + gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); + gpu_props->l2_props.num_l2_slices = 1; + if (gpu_props->core_props.product_id == GPU_ID_PI_T76X) { + gpu_props->l2_props.num_l2_slices = KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1; + } + + gpu_props->l3_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l3_features, 0U, 8); + gpu_props->l3_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l3_features, 16U, 8); + + gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6); + gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4); + + if (gpu_props->raw_props.thread_max_threads == 0) + gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT; + else + gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads; + + if (gpu_props->raw_props.thread_max_workgroup_size == 0) + gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT; + else + gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size; + + if (gpu_props->raw_props.thread_max_barrier_size == 0) + gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT; + else + gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size; + + gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16); + gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8); + gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6); + gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2); + + /* If values are not specified, then use defaults */ + if (gpu_props->thread_props.max_registers == 0) { + gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT; + gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT; + gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; + } + /* Initialize the coherent_group structure for each group */ + kbase_gpuprops_construct_coherent_groups(gpu_props); +} + +void kbase_gpuprops_set(kbase_device *kbdev) +{ + kbase_gpu_props *gpu_props; + struct midg_raw_gpu_props *raw; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + gpu_props = &kbdev->gpu_props; + raw = &gpu_props->props.raw_props; + + /* Initialize the base_gpu_props structure from the hardware */ + kbase_gpuprops_get_props(&gpu_props->props, kbdev); + + /* Populate the derived properties */ + kbase_gpuprops_calculate_props(&gpu_props->props, kbdev); + + /* Populate kbase-only fields */ + gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8); + gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8); + + gpu_props->l3_props.associativity = KBASE_UBFX32(raw->l3_features, 8U, 8); + gpu_props->l3_props.external_bus_width = KBASE_UBFX32(raw->l3_features, 24U, 8); + + gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1); + gpu_props->mem.supergroup = KBASE_UBFX32(raw->mem_features, 1U, 1); + + gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8); + gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8); + + gpu_props->num_cores = hweight64(raw->shader_present); + gpu_props->num_core_groups = hweight64(raw->l2_present); + gpu_props->num_supergroups = hweight64(raw->l3_present); + gpu_props->num_address_spaces = hweight32(raw->as_present); + gpu_props->num_job_slots = hweight32(raw->js_present); +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h new file mode 100755 index 00000000000..835c87fe89d --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h @@ -0,0 +1,54 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_gpuprops.h + * Base kernel property query APIs + */ + +#ifndef _KBASE_GPUPROPS_H_ +#define _KBASE_GPUPROPS_H_ + +#include "mali_kbase_gpuprops_types.h" + +/* Forward definition - see mali_kbase.h */ +struct kbase_device; + +/** + * @brief Set up Kbase GPU properties. + * + * Set up Kbase GPU properties with information from the GPU registers + * + * @param kbdev The kbase_device structure for the device + */ +void kbase_gpuprops_set(struct kbase_device *kbdev); + +/** + * @brief Provide GPU properties to userside through UKU call. + * + * Fill the kbase_uk_gpuprops with values from GPU configuration registers. + * + * @param kctx The kbase_context structure + * @param kbase_props A copy of the kbase_uk_gpuprops structure from userspace + * + * @return MALI_ERROR_NONE on success. Any other value indicates failure. + */ +mali_error kbase_gpuprops_uk_get_props(kbase_context *kctx, kbase_uk_gpuprops * const kbase_props); + +#endif /* _KBASE_GPUPROPS_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h new file mode 100755 index 00000000000..8793e0aff66 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h @@ -0,0 +1,103 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_gpuprops_types.h + * Base kernel property query APIs + */ + +#ifndef _KBASE_GPUPROPS_TYPES_H_ +#define _KBASE_GPUPROPS_TYPES_H_ + +#include "mali_base_kernel.h" + +#define KBASE_GPU_SPEED_MHZ 123 +#define KBASE_GPU_PC_SIZE_LOG2 24U + +typedef struct kbase_gpuprops_regdump { + u32 gpu_id; + u32 l2_features; + u32 l3_features; + u32 tiler_features; + u32 mem_features; + u32 mmu_features; + u32 as_present; + u32 js_present; + + u32 js_features[MIDG_MAX_JOB_SLOTS]; + + u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + + u32 shader_present_lo; + u32 shader_present_hi; + + u32 tiler_present_lo; + u32 tiler_present_hi; + + u32 l2_present_lo; + u32 l2_present_hi; + + u32 l3_present_lo; + u32 l3_present_hi; + + u32 thread_max_threads; + u32 thread_max_workgroup_size; + u32 thread_max_barrier_size; + u32 thread_features; +} kbase_gpuprops_regdump; + +typedef struct kbase_gpu_cache_props { + u8 associativity; + u8 external_bus_width; +} kbase_gpu_cache_props; + +typedef struct kbase_gpu_mem_props { + u8 core_group; + u8 supergroup; +} kbase_gpu_mem_props; + +typedef struct kbase_gpu_mmu_props { + u8 va_bits; + u8 pa_bits; +} kbase_gpu_mmu_props; + +typedef struct mali_kbase_gpu_props { + /* kernel-only properties */ + u8 num_cores; + u8 num_core_groups; + u8 num_supergroups; + u8 num_address_spaces; + u8 num_job_slots; + + kbase_gpu_cache_props l2_props; + kbase_gpu_cache_props l3_props; + + kbase_gpu_mem_props mem; + kbase_gpu_mmu_props mmu; + + /** + * Implementation specific irq throttle value (us), should be adjusted during integration. + */ + int irq_throttle_time_us; + + /* Properties shared with userspace */ + base_gpu_props props; +} kbase_gpu_props; + +#endif /* _KBASE_GPUPROPS_TYPES_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c new file mode 100755 index 00000000000..1fe4100a484 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c @@ -0,0 +1,149 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file + * Run-time work-arounds helpers + */ + +#include +#include +#include "mali_kbase.h" +#include "mali_kbase_hw.h" + +void kbase_hw_set_features_mask(kbase_device *kbdev) +{ + const base_hw_feature *features; + u32 gpu_id; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + + switch (gpu_id) { + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1): + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1): + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9): + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1): + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1): + case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0): + features = base_hw_features_t76x; + break; + default: + features = base_hw_features_generic; + break; + } + + for (; *features != BASE_HW_FEATURE_END; features++) + set_bit(*features, &kbdev->hw_features_mask[0]); +} + +mali_error kbase_hw_set_issues_mask(kbase_device *kbdev) +{ + const base_hw_issue *issues; + u32 gpu_id; + u32 impl_tech; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + impl_tech = kbdev->gpu_props.props.thread_props.impl_tech; + + if (impl_tech != IMPLEMENTATION_MODEL) { + switch (gpu_id) { + case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0): + issues = base_hw_issues_t60x_r0p0_15dev0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC): + issues = base_hw_issues_t60x_r0p0_eac; + break; + case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0): + issues = base_hw_issues_t60x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0): + issues = base_hw_issues_t62x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1): + issues = base_hw_issues_t62x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T67X, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T67X, 1, 0, 1): + issues = base_hw_issues_t67x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 0): + issues = base_hw_issues_t76x_r0p0_beta; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1): + issues = base_hw_issues_t76x_r0p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1): + issues = base_hw_issues_t76x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9): + /* TODO: MIDBASE-3084 - confirm hw issue list */ + issues = base_hw_issues_t76x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1): + issues = base_hw_issues_t76x_r0p2; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1): + /* TODO: MIDBASE-3086 - confirm hw issue list */ + issues = base_hw_issues_t76x_r0p2; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0): + issues = base_hw_issues_t76x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1): + case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2): + issues = base_hw_issues_t72x_r0p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0): + issues = base_hw_issues_t72x_r1p0; + break; + default: + dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); + return MALI_ERROR_FUNCTION_FAILED; + } + } else { + /* Software model */ + switch (gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT) { + case GPU_ID_PI_T60X: + case GPU_ID_PI_T62X: + case GPU_ID_PI_T67X: + issues = base_hw_issues_model_t6xx; + break; + case GPU_ID_PI_T72X: + issues = base_hw_issues_model_t72x; + break; + case GPU_ID_PI_T76X: + issues = base_hw_issues_model_t7xx; + break; + + default: + dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); + return MALI_ERROR_FUNCTION_FAILED; + } + } + + dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT); + + for (; *issues != BASE_HW_ISSUE_END; issues++) + set_bit(*issues, &kbdev->hw_issues_mask[0]); + + return MALI_ERROR_NONE; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.h b/drivers/gpu/arm/midgard/mali_kbase_hw.h new file mode 100755 index 00000000000..4501af74f1c --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hw.h @@ -0,0 +1,52 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file + * Run-time work-arounds helpers + */ + +#ifndef _KBASE_HW_H_ +#define _KBASE_HW_H_ + +#include "mali_kbase_defs.h" + +/** + * @brief Tell whether a work-around should be enabled + */ +#define kbase_hw_has_issue(kbdev, issue)\ + test_bit(issue, &(kbdev)->hw_issues_mask[0]) + +/** + * @brief Tell whether a feature is supported + */ +#define kbase_hw_has_feature(kbdev, feature)\ + test_bit(feature, &(kbdev)->hw_features_mask[0]) + +/** + * @brief Set the HW issues mask depending on the GPU ID + */ +mali_error kbase_hw_set_issues_mask(kbase_device *kbdev); + +/** + * @brief Set the features mask depending on the GPU ID + */ +void kbase_hw_set_features_mask(kbase_device *kbdev); + +#endif /* _KBASE_HW_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_instr.c b/drivers/gpu/arm/midgard/mali_kbase_instr.c new file mode 100755 index 00000000000..1e87476b3f3 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_instr.c @@ -0,0 +1,618 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_instr.c + * Base kernel instrumentation APIs. + */ + +#include +#include + +/** + * @brief Issue Cache Clean & Invalidate command to hardware + */ +static void kbasep_instr_hwcnt_cacheclean(kbase_device *kbdev) +{ + unsigned long flags; + unsigned long pm_flags; + u32 irq_mask; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + /* Wait for any reset to complete */ + while (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) { + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.cache_clean_wait, + kbdev->hwcnt.state != KBASE_INSTR_STATE_RESETTING); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + KBASE_DEBUG_ASSERT(kbdev->hwcnt.state == KBASE_INSTR_STATE_REQUEST_CLEAN); + + /* Enable interrupt */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | CLEAN_CACHES_COMPLETED, NULL); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); + + /* clean&invalidate the caches so we're sure the mmu tables for the dump buffer is valid */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CLEAN_INV_CACHES, NULL); + kbdev->hwcnt.state = KBASE_INSTR_STATE_CLEANING; + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +} + +STATIC mali_error kbase_instr_hwcnt_enable_internal(kbase_device *kbdev, kbase_context *kctx, kbase_uk_hwcnt_setup *setup) +{ + unsigned long flags, pm_flags; + mali_error err = MALI_ERROR_FUNCTION_FAILED; + kbasep_js_device_data *js_devdata; + u32 irq_mask; + int ret; + u64 shader_cores_needed; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(NULL != setup); + KBASE_DEBUG_ASSERT(NULL == kbdev->hwcnt.suspended_kctx); + + shader_cores_needed = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER); + + js_devdata = &kbdev->js_data; + + /* alignment failure */ + if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1))) + goto out_err; + + /* Override core availability policy to ensure all cores are available */ + kbase_pm_ca_instr_enable(kbdev); + + /* Mark the context as active so the GPU is kept turned on */ + /* A suspend won't happen here, because we're in a syscall from a userspace + * thread. */ + kbase_pm_context_active(kbdev); + + /* Request the cores early on synchronously - we'll release them on any errors + * (e.g. instrumentation already active) */ + kbase_pm_request_cores_sync(kbdev, MALI_TRUE, shader_cores_needed); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) { + /* GPU is being reset */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + + if (kbdev->hwcnt.state != KBASE_INSTR_STATE_DISABLED) { + /* Instrumentation is already enabled */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + goto out_unrequest_cores; + } + + /* Enable interrupt */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | PRFCNT_SAMPLE_COMPLETED, NULL); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); + + /* In use, this context is the owner */ + kbdev->hwcnt.kctx = kctx; + /* Remember the dump address so we can reprogram it later */ + kbdev->hwcnt.addr = setup->dump_buffer; + /* Remember all the settings for suspend/resume */ + if (&kbdev->hwcnt.suspended_state != setup) + memcpy(&kbdev->hwcnt.suspended_state, setup, sizeof(kbdev->hwcnt.suspended_state)); + + /* Request the clean */ + kbdev->hwcnt.state = KBASE_INSTR_STATE_REQUEST_CLEAN; + kbdev->hwcnt.triggered = 0; + /* Clean&invalidate the caches so we're sure the mmu tables for the dump buffer is valid */ + ret = queue_work(kbdev->hwcnt.cache_clean_wq, &kbdev->hwcnt.cache_clean_work); + KBASE_DEBUG_ASSERT(ret); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Wait for cacheclean to complete */ + wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0); + + KBASE_DEBUG_ASSERT(kbdev->hwcnt.state == KBASE_INSTR_STATE_IDLE); + + /* Schedule the context in */ + kbasep_js_schedule_privileged_ctx(kbdev, kctx); + + /* Configure */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | PRFCNT_CONFIG_MODE_OFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), setup->dump_buffer & 0xFFFFFFFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), setup->dump_buffer >> 32, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), setup->jm_bm, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), setup->shader_bm, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_L3_CACHE_EN), setup->l3_cache_bm, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), setup->mmu_l2_bm, kctx); + /* Due to PRLAM-8186 we need to disable the Tiler before we enable the HW counter dump. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0, kctx); + else + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), setup->tiler_bm, kctx); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | PRFCNT_CONFIG_MODE_MANUAL, kctx); + + /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), setup->tiler_bm, kctx); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) { + /* GPU is being reset */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + + kbdev->hwcnt.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.triggered = 1; + wake_up(&kbdev->hwcnt.wait); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + err = MALI_ERROR_NONE; + + KBASE_LOG(1, kbdev->dev, "HW counters dumping set-up for context %p", kctx); + return err; + out_unrequest_cores: + kbase_pm_unrequest_cores(kbdev, MALI_TRUE, shader_cores_needed); + kbase_pm_context_idle(kbdev); + out_err: + return err; +} + +/** + * @brief Enable HW counters collection + * + * Note: will wait for a cache clean to complete + */ +mali_error kbase_instr_hwcnt_enable(kbase_context *kctx, kbase_uk_hwcnt_setup *setup) +{ + kbase_device *kbdev; + mali_bool access_allowed; + kbdev = kctx->kbdev; + + KBASE_DEBUG_ASSERT(NULL != kctx); + /* Determine if the calling task has access to this capability */ + access_allowed = kbase_security_has_capability(kctx, KBASE_SEC_INSTR_HW_COUNTERS_COLLECT, KBASE_SEC_FLAG_NOAUDIT); + if (MALI_FALSE == access_allowed) + return MALI_ERROR_FUNCTION_FAILED; + + return kbase_instr_hwcnt_enable_internal(kbdev, kctx, setup); +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_enable) + +/** + * @brief Disable HW counters collection + * + * Note: might sleep, waiting for an ongoing dump to complete + */ +mali_error kbase_instr_hwcnt_disable(kbase_context *kctx) +{ + unsigned long flags, pm_flags; + mali_error err = MALI_ERROR_FUNCTION_FAILED; + u32 irq_mask; + kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(NULL != kctx); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(NULL != kbdev); + + while (1) { + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.state == KBASE_INSTR_STATE_DISABLED) { + /* Instrumentation is not enabled */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + goto out; + } + + if (kbdev->hwcnt.kctx != kctx) { + /* Instrumentation has been setup for another context */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + goto out; + } + + if (kbdev->hwcnt.state == KBASE_INSTR_STATE_IDLE) + break; + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Ongoing dump/setup - wait for its completion */ + wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0); + + } + + kbdev->hwcnt.state = KBASE_INSTR_STATE_DISABLED; + kbdev->hwcnt.triggered = 0; + + /* Disable interrupt */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); + + /* Disable the counters */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx); + + kbdev->hwcnt.kctx = NULL; + kbdev->hwcnt.addr = 0ULL; + + kbase_pm_ca_instr_disable(kbdev); + + kbase_pm_unrequest_cores(kbdev, MALI_TRUE, kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER)); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Release the context. This had its own Power Manager Active reference */ + kbasep_js_release_privileged_ctx(kbdev, kctx); + + /* Also release our Power Manager Active reference */ + kbase_pm_context_idle(kbdev); + + KBASE_LOG(1, kbdev->dev, "HW counters dumping disabled for context %p", kctx); + + err = MALI_ERROR_NONE; + + out: + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_disable) + +/** + * @brief Configure HW counters collection + */ +mali_error kbase_instr_hwcnt_setup(kbase_context *kctx, kbase_uk_hwcnt_setup *setup) +{ + mali_error err = MALI_ERROR_FUNCTION_FAILED; + kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(NULL != kbdev); + + if (NULL == setup) { + /* Bad parameter - abort */ + goto out; + } + + if (setup->dump_buffer != 0ULL) { + /* Enable HW counters */ + err = kbase_instr_hwcnt_enable(kctx, setup); + } else { + /* Disable HW counters */ + err = kbase_instr_hwcnt_disable(kctx); + } + + out: + return err; +} + +/** + * @brief Issue Dump command to hardware + * + * Notes: + * - does not sleep + */ +mali_error kbase_instr_hwcnt_dump_irq(kbase_context *kctx) +{ + unsigned long flags; + mali_error err = MALI_ERROR_FUNCTION_FAILED; + kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(NULL != kctx); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(NULL != kbdev); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.kctx != kctx) { + /* The instrumentation has been setup for another context */ + goto unlock; + } + + if (kbdev->hwcnt.state != KBASE_INSTR_STATE_IDLE) { + /* HW counters are disabled or another dump is ongoing, or we're resetting */ + goto unlock; + } + + kbdev->hwcnt.triggered = 0; + + /* Mark that we're dumping - the PF handler can signal that we faulted */ + kbdev->hwcnt.state = KBASE_INSTR_STATE_DUMPING; + + /* Reconfigure the dump address */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kbdev->hwcnt.addr & 0xFFFFFFFF, NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), kbdev->hwcnt.addr >> 32, NULL); + + /* Start dumping */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL, kbdev->hwcnt.addr, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_PRFCNT_SAMPLE, kctx); + + KBASE_LOG(1, kbdev->dev, "HW counters dumping done for context %p", kctx); + + err = MALI_ERROR_NONE; + + unlock: + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_irq) + +/** + * @brief Tell whether the HW counters dump has completed + * + * Notes: + * - does not sleep + * - success will be set to MALI_TRUE if the dump succeeded or + * MALI_FALSE on failure + */ +mali_bool kbase_instr_hwcnt_dump_complete(kbase_context *kctx, mali_bool * const success) +{ + unsigned long flags; + mali_bool complete = MALI_FALSE; + kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(NULL != kctx); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(NULL != success); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.state == KBASE_INSTR_STATE_IDLE) { + *success = MALI_TRUE; + complete = MALI_TRUE; + } else if (kbdev->hwcnt.state == KBASE_INSTR_STATE_FAULT) { + *success = MALI_FALSE; + complete = MALI_TRUE; + kbdev->hwcnt.state = KBASE_INSTR_STATE_IDLE; + } + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + return complete; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete) + +/** + * @brief Issue Dump command to hardware and wait for completion + */ +mali_error kbase_instr_hwcnt_dump(kbase_context *kctx) +{ + unsigned long flags; + mali_error err = MALI_ERROR_FUNCTION_FAILED; + kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(NULL != kctx); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(NULL != kbdev); + + err = kbase_instr_hwcnt_dump_irq(kctx); + if (MALI_ERROR_NONE != err) { + /* Can't dump HW counters */ + goto out; + } + + /* Wait for dump & cacheclean to complete */ + wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) { + /* GPU is being reset */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + + if (kbdev->hwcnt.state == KBASE_INSTR_STATE_FAULT) { + err = MALI_ERROR_FUNCTION_FAILED; + kbdev->hwcnt.state = KBASE_INSTR_STATE_IDLE; + } else { + /* Dump done */ + KBASE_DEBUG_ASSERT(kbdev->hwcnt.state == KBASE_INSTR_STATE_IDLE); + err = MALI_ERROR_NONE; + } + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + out: + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump) + +/** + * @brief Clear the HW counters + */ +mali_error kbase_instr_hwcnt_clear(kbase_context *kctx) +{ + unsigned long flags; + mali_error err = MALI_ERROR_FUNCTION_FAILED; + kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(NULL != kctx); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(NULL != kbdev); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) { + /* GPU is being reset */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + + /* Check it's the context previously set up and we're not already dumping */ + if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.state != KBASE_INSTR_STATE_IDLE) + goto out; + + /* Clear the counters */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_PRFCNT_CLEAR, kctx); + + err = MALI_ERROR_NONE; + + out: + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear) + +/** + * Workqueue for handling cache cleaning + */ +void kbasep_cache_clean_worker(struct work_struct *data) +{ + kbase_device *kbdev; + unsigned long flags; + + kbdev = container_of(data, kbase_device, hwcnt.cache_clean_work); + + mutex_lock(&kbdev->cacheclean_lock); + kbasep_instr_hwcnt_cacheclean(kbdev); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + /* Wait for our condition, and any reset to complete */ + while (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING + || kbdev->hwcnt.state == KBASE_INSTR_STATE_CLEANING) { + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.cache_clean_wait, + (kbdev->hwcnt.state != KBASE_INSTR_STATE_RESETTING + && kbdev->hwcnt.state != KBASE_INSTR_STATE_CLEANING)); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + KBASE_DEBUG_ASSERT(kbdev->hwcnt.state == KBASE_INSTR_STATE_CLEANED); + + /* All finished and idle */ + kbdev->hwcnt.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.triggered = 1; + wake_up(&kbdev->hwcnt.wait); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + mutex_unlock(&kbdev->cacheclean_lock); +} + +/** + * @brief Dump complete interrupt received + */ +void kbase_instr_hwcnt_sample_done(kbase_device *kbdev) +{ + unsigned long flags; + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.state == KBASE_INSTR_STATE_FAULT) { + kbdev->hwcnt.triggered = 1; + wake_up(&kbdev->hwcnt.wait); + } else if (kbdev->hwcnt.state == KBASE_INSTR_STATE_DUMPING) { + int ret; + /* Always clean and invalidate the cache after a successful dump */ + kbdev->hwcnt.state = KBASE_INSTR_STATE_REQUEST_CLEAN; + ret = queue_work(kbdev->hwcnt.cache_clean_wq, &kbdev->hwcnt.cache_clean_work); + KBASE_DEBUG_ASSERT(ret); + } + /* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a reset, + * and the instrumentation state hasn't been restored yet - + * kbasep_reset_timeout_worker() will do the rest of the work */ + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +} + +/** + * @brief Cache clean interrupt received + */ +void kbase_clean_caches_done(kbase_device *kbdev) +{ + u32 irq_mask; + + if (kbdev->hwcnt.state != KBASE_INSTR_STATE_DISABLED) { + unsigned long flags; + unsigned long pm_flags; + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + /* Disable interrupt */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~CLEAN_CACHES_COMPLETED, NULL); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); + + /* Wakeup... */ + if (kbdev->hwcnt.state == KBASE_INSTR_STATE_CLEANING) { + /* Only wake if we weren't resetting */ + kbdev->hwcnt.state = KBASE_INSTR_STATE_CLEANED; + wake_up(&kbdev->hwcnt.cache_clean_wait); + } + /* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a reset, + * and the instrumentation state hasn't been restored yet - + * kbasep_reset_timeout_worker() will do the rest of the work */ + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + } +} + + +/* Disable instrumentation and wait for any existing dump to complete + * It's assumed that there's only one privileged context + * Safe to do this without lock when doing an OS suspend, because it only + * changes in response to user-space IOCTLs */ +void kbase_instr_hwcnt_suspend(kbase_device *kbdev) +{ + kbase_context *kctx; + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(!kbdev->hwcnt.suspended_kctx); + + kctx = kbdev->hwcnt.kctx; + kbdev->hwcnt.suspended_kctx = kctx; + + /* Relevant state was saved into hwcnt.suspended_state when enabling the + * counters */ + + if (kctx) + { + KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_PRIVILEGED); + kbase_instr_hwcnt_disable(kctx); + } +} + +void kbase_instr_hwcnt_resume(kbase_device *kbdev) +{ + kbase_context *kctx; + KBASE_DEBUG_ASSERT(kbdev); + + kctx = kbdev->hwcnt.suspended_kctx; + kbdev->hwcnt.suspended_kctx = NULL; + + if (kctx) + { + mali_error err; + err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &kbdev->hwcnt.suspended_state); + WARN(err != MALI_ERROR_NONE, + "Failed to restore instrumented hardware counters on resume\n"); + } +} + diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c new file mode 100755 index 00000000000..0f877d152ef --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c @@ -0,0 +1,1598 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#if defined(CONFIG_DMA_SHARED_BUFFER) +#include +#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ +#ifdef CONFIG_COMPAT +#include +#endif +#include +#include +#include +#include +#ifdef CONFIG_UMP +#include +#endif /* CONFIG_UMP */ +#include + +#define beenthere(kctx,f, a...) KBASE_LOG(1, kctx->kbdev->dev, "%s:" f, __func__, ##a) + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) +/* random32 was renamed to prandom_u32 in 3.8 */ +#define prandom_u32 random32 +#endif + +/* + * This is the kernel side of the API. Only entry points are: + * - kbase_jd_submit(): Called from userspace to submit a single bag + * - kbase_jd_done(): Called from interrupt context to track the + * completion of a job. + * Callouts: + * - to the job manager (enqueue a job) + * - to the event subsystem (signals the completion/failure of bag/job-chains). + */ + +static void *get_compat_pointer(const kbase_pointer *p) +{ +#ifdef CONFIG_COMPAT + if (is_compat_task()) + return compat_ptr(p->compat_value); + else +#endif + return p->value; +} + +/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs + * + * Returns whether the JS needs a reschedule. + * + * Note that the caller must also check the atom status and + * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock + */ +static int jd_run_atom(kbase_jd_atom *katom) +{ + kbase_context *kctx = katom->kctx; + KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); + + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { + /* Dependency only atom */ + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + return 0; + } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + /* Soft-job */ + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) + == BASE_JD_REQ_SOFT_REPLAY) { + int status = kbase_replay_process(katom); + + if ((status & MALI_REPLAY_STATUS_MASK) + == MALI_REPLAY_STATUS_REPLAYING) + return status & MALI_REPLAY_FLAG_JS_RESCHED; + else + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + return 0; + } else if (kbase_process_soft_job(katom) == 0) { + kbase_finish_soft_job(katom); + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + } else { + /* The job has not completed */ + list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs); + } + return 0; + } + + katom->status = KBASE_JD_ATOM_STATE_IN_JS; + /* Queue an action about whether we should try scheduling a context */ + return kbasep_js_add_job(kctx, katom); +} + +#ifdef CONFIG_KDS + +/* Add the katom to the kds waiting list. + * Atoms must be added to the waiting list after a successful call to kds_async_waitall. + * The caller must hold the kbase_jd_context.lock */ + +static void kbase_jd_kds_waiters_add(kbase_jd_atom *katom) +{ + kbase_context *kctx; + KBASE_DEBUG_ASSERT(katom); + + kctx = katom->kctx; + + list_add_tail(&katom->node, &kctx->waiting_kds_resource); +} + +/* Remove the katom from the kds waiting list. + * Atoms must be removed from the waiting list before a call to kds_resource_set_release_sync. + * The supplied katom must first have been added to the list with a call to kbase_jd_kds_waiters_add. + * The caller must hold the kbase_jd_context.lock */ + +static void kbase_jd_kds_waiters_remove(kbase_jd_atom *katom) +{ + KBASE_DEBUG_ASSERT(katom); + list_del(&katom->node); +} + +static void kds_dep_clear(void *callback_parameter, void *callback_extra_parameter) +{ + kbase_jd_atom *katom; + kbase_jd_context *ctx; + kbase_device *kbdev; + + katom = (kbase_jd_atom *) callback_parameter; + KBASE_DEBUG_ASSERT(katom); + ctx = &katom->kctx->jctx; + kbdev = katom->kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev); + + mutex_lock(&ctx->lock); + + /* KDS resource has already been satisfied (e.g. due to zapping) */ + if (katom->kds_dep_satisfied) + goto out; + + /* This atom's KDS dependency has now been met */ + katom->kds_dep_satisfied = MALI_TRUE; + + /* Check whether the atom's other dependencies were already met */ + if (!katom->dep_atom[0] && !katom->dep_atom[1]) { + /* katom dep complete, attempt to run it */ + mali_bool resched = MALI_FALSE; + resched = jd_run_atom(katom); + + if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { + /* The atom has already finished */ + resched |= jd_done_nolock(katom); + } + + if (resched) + kbasep_js_try_schedule_head_ctx(kbdev); + } + out: + mutex_unlock(&ctx->lock); +} + +void kbase_cancel_kds_wait_job(kbase_jd_atom *katom) +{ + KBASE_DEBUG_ASSERT(katom); + + /* Prevent job_done_nolock from being called twice on an atom when + * there is a race between job completion and cancellation */ + + if ( katom->status == KBASE_JD_ATOM_STATE_QUEUED ) { + /* Wait was cancelled - zap the atom */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + if (jd_done_nolock(katom)) { + kbasep_js_try_schedule_head_ctx( katom->kctx->kbdev ); + } + } +} +#endif /* CONFIG_KDS */ + +#ifdef CONFIG_DMA_SHARED_BUFFER +static mali_error kbase_jd_umm_map(kbase_context *kctx, struct kbase_va_region *reg) +{ + struct sg_table *sgt; + struct scatterlist *s; + int i; + phys_addr_t *pa; + mali_error err; + size_t count = 0; + + KBASE_DEBUG_ASSERT(reg->alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM); + KBASE_DEBUG_ASSERT(NULL == reg->alloc->imported.umm.sgt); + sgt = dma_buf_map_attachment(reg->alloc->imported.umm.dma_attachment, DMA_BIDIRECTIONAL); + + if (IS_ERR_OR_NULL(sgt)) + return MALI_ERROR_FUNCTION_FAILED; + + /* save for later */ + reg->alloc->imported.umm.sgt = sgt; + + pa = kbase_get_phy_pages(reg); + KBASE_DEBUG_ASSERT(pa); + + for_each_sg(sgt->sgl, s, sgt->nents, i) { + int j; + size_t pages = PFN_UP(sg_dma_len(s)); + + WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1), + "sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n", + sg_dma_len(s)); + + WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), + "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", + (unsigned long long) sg_dma_address(s)); + + for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++) + *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT); + WARN_ONCE(j < pages, + "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", + reg->alloc->imported.umm.dma_buf->size); + } + + if (WARN_ONCE(count < reg->nr_pages, + "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", + reg->alloc->imported.umm.dma_buf->size)) { + err = MALI_ERROR_FUNCTION_FAILED; + goto out; + } + + /* Update nents as we now have pages to map */ + reg->alloc->nents = count; + + err = kbase_mmu_insert_pages(kctx, reg->start_pfn, kbase_get_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD); + +out: + if (MALI_ERROR_NONE != err) { + dma_buf_unmap_attachment(reg->alloc->imported.umm.dma_attachment, reg->alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + reg->alloc->imported.umm.sgt = NULL; + } + + return err; +} + +static void kbase_jd_umm_unmap(kbase_context *kctx, struct kbase_va_region *reg, int mmu_update) +{ + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(reg); + KBASE_DEBUG_ASSERT(reg->alloc->imported.umm.dma_attachment); + KBASE_DEBUG_ASSERT(reg->alloc->imported.umm.sgt); + if (mmu_update) + kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg)); + dma_buf_unmap_attachment(reg->alloc->imported.umm.dma_attachment, reg->alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + reg->alloc->imported.umm.sgt = NULL; + reg->alloc->nents = 0; +} +#endif /* CONFIG_DMA_SHARED_BUFFER */ + +void kbase_jd_free_external_resources(kbase_jd_atom *katom) +{ +#ifdef CONFIG_KDS + if (katom->kds_rset) { + kbase_jd_context * jctx = &katom->kctx->jctx; + + /* + * As the atom is no longer waiting, remove it from + * the waiting list. + */ + + mutex_lock(&jctx->lock); + kbase_jd_kds_waiters_remove( katom ); + mutex_unlock(&jctx->lock); + + /* Release the kds resource or cancel if zapping */ + kds_resource_set_release_sync(&katom->kds_rset); + } +#endif /* CONFIG_KDS */ +} + +static void kbase_jd_post_external_resources(kbase_jd_atom *katom) +{ + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); + +#ifdef CONFIG_KDS + /* Prevent the KDS resource from triggering the atom in case of zapping */ + if (katom->kds_rset) + katom->kds_dep_satisfied = MALI_TRUE; +#endif /* CONFIG_KDS */ + +#if defined(CONFIG_DMA_SHARED_BUFFER) || defined(CONFIG_MALI_DEBUG) + /* Lock also used in debug mode just for lock order checking */ + kbase_gpu_vm_lock(katom->kctx); +#endif /* defined(CONFIG_DMA_SHARED_BUFFER) || defined(CONFIG_MALI_DEBUG) */ + /* only roll back if extres is non-NULL */ + if (katom->extres) { + u32 res_no; + res_no = katom->nr_extres; + while (res_no-- > 0) { +#ifdef CONFIG_DMA_SHARED_BUFFER + if (katom->extres[res_no].alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + kbase_va_region *reg; + int mmu_update = 0; + reg = kbase_region_tracker_find_region_base_address(katom->kctx, katom->extres[res_no].gpu_address); + if (reg && reg->alloc == katom->extres[res_no].alloc) + mmu_update = 1; + if (1 == katom->extres[res_no].alloc->imported.umm.current_mapping_usage_count--) + kbase_jd_umm_unmap(katom->kctx, reg, mmu_update); + } +#endif /* CONFIG_DMA_SHARED_BUFFER */ + kbase_mem_phy_alloc_put(katom->extres[res_no].alloc); + } + kfree(katom->extres); + katom->extres = NULL; + } +#if defined(CONFIG_DMA_SHARED_BUFFER) || defined(CONFIG_MALI_DEBUG) + /* Lock also used in debug mode just for lock order checking */ + kbase_gpu_vm_unlock(katom->kctx); +#endif /* defined(CONFIG_DMA_SHARED_BUFFER) || defined(CONFIG_MALI_DEBUG) */ +} + +#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS) +static void add_kds_resource(struct kds_resource *kds_res, struct kds_resource **kds_resources, u32 *kds_res_count, unsigned long *kds_access_bitmap, mali_bool exclusive) +{ + u32 i; + + for (i = 0; i < *kds_res_count; i++) { + /* Duplicate resource, ignore */ + if (kds_resources[i] == kds_res) + return; + } + + kds_resources[*kds_res_count] = kds_res; + if (exclusive) + set_bit(*kds_res_count, kds_access_bitmap); + (*kds_res_count)++; +} +#endif + +/* + * Set up external resources needed by this job. + * + * jctx.lock must be held when this is called. + */ + +static mali_error kbase_jd_pre_external_resources(kbase_jd_atom *katom, const base_jd_atom_v2 *user_atom) +{ + mali_error err_ret_val = MALI_ERROR_FUNCTION_FAILED; + u32 res_no; +#ifdef CONFIG_KDS + u32 kds_res_count = 0; + struct kds_resource **kds_resources = NULL; + unsigned long *kds_access_bitmap = NULL; +#endif /* CONFIG_KDS */ + struct base_external_resource * input_extres; + + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); + + /* no resources encoded, early out */ + if (!katom->nr_extres) + return MALI_ERROR_FUNCTION_FAILED; + + katom->extres = kmalloc(sizeof(*katom->extres) * katom->nr_extres, GFP_KERNEL); + if (NULL == katom->extres) { + err_ret_val = MALI_ERROR_OUT_OF_MEMORY; + goto early_err_out; + } + + /* copy user buffer to the end of our real buffer. + * Make sure the struct sizes haven't changed in a way + * we don't support */ + BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres)); + input_extres = (struct base_external_resource*)(((unsigned char *)katom->extres) + (sizeof(*katom->extres) - sizeof(*input_extres)) * katom->nr_extres); + + if (copy_from_user(input_extres, get_compat_pointer(&user_atom->extres_list), sizeof(*input_extres) * katom->nr_extres) != 0) { + err_ret_val = MALI_ERROR_FUNCTION_FAILED; + goto early_err_out; + } +#ifdef CONFIG_KDS + /* assume we have to wait for all */ + KBASE_DEBUG_ASSERT(0 != katom->nr_extres); + kds_resources = kmalloc(sizeof(struct kds_resource *) * katom->nr_extres, GFP_KERNEL); + + if (NULL == kds_resources) { + err_ret_val = MALI_ERROR_OUT_OF_MEMORY; + goto early_err_out; + } + + KBASE_DEBUG_ASSERT(0 != katom->nr_extres); + kds_access_bitmap = kzalloc(sizeof(unsigned long) * ((katom->nr_extres + BITS_PER_LONG - 1) / BITS_PER_LONG), GFP_KERNEL); + + if (NULL == kds_access_bitmap) { + err_ret_val = MALI_ERROR_OUT_OF_MEMORY; + goto early_err_out; + } +#endif /* CONFIG_KDS */ + +#if defined(CONFIG_DMA_SHARED_BUFFER) || defined(CONFIG_MALI_DEBUG) + /* need to keep the GPU VM locked while we set up UMM buffers */ + /* Lock also used in debug mode just for lock order checking */ + kbase_gpu_vm_lock(katom->kctx); +#endif /* defined(CONFIG_DMA_SHARED_BUFFER) || defined(CONFIG_MALI_DEBUG) */ + + for (res_no = 0; res_no < katom->nr_extres; res_no++) { + base_external_resource *res; + kbase_va_region *reg; + + res = &input_extres[res_no]; + reg = kbase_region_tracker_find_region_enclosing_address(katom->kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + /* did we find a matching region object? */ + if (NULL == reg || (reg->flags & KBASE_REG_FREE)) { + /* roll back */ + goto failed_loop; + } + + /* decide what needs to happen for this resource */ + switch (reg->alloc->type) { + case BASE_TMEM_IMPORT_TYPE_UMP: + { +#if defined(CONFIG_KDS) && defined(CONFIG_UMP) + struct kds_resource *kds_res; + kds_res = ump_dd_kds_resource_get(reg->alloc->imported.ump_handle); + if (kds_res) + add_kds_resource(kds_res, kds_resources, &kds_res_count, kds_access_bitmap, res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE); +#endif /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */ + break; + } +#ifdef CONFIG_DMA_SHARED_BUFFER + case BASE_TMEM_IMPORT_TYPE_UMM: + { +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + struct kds_resource *kds_res; + kds_res = get_dma_buf_kds_resource(reg->alloc->imported.umm.dma_buf); + if (kds_res) + add_kds_resource(kds_res, kds_resources, &kds_res_count, kds_access_bitmap, res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE); +#endif + reg->alloc->imported.umm.current_mapping_usage_count++; + if (1 == reg->alloc->imported.umm.current_mapping_usage_count) { + /* use a local variable to not pollute err_ret_val + * with a potential success value as some other gotos depend + * on the default error code stored in err_ret_val */ + mali_error tmp; + tmp = kbase_jd_umm_map(katom->kctx, reg); + if (MALI_ERROR_NONE != tmp) { + /* failed to map this buffer, roll back */ + err_ret_val = tmp; + reg->alloc->imported.umm.current_mapping_usage_count--; + goto failed_loop; + } + } + break; + } +#endif + default: + goto failed_loop; + } + + /* finish with updating out array with the data we found */ + /* NOTE: It is important that this is the last thing we do (or + * at least not before the first write) as we overwrite elements + * as we loop and could be overwriting ourself, so no writes + * until the last read for an element. + * */ + katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ + katom->extres[res_no].alloc = kbase_mem_phy_alloc_get(reg->alloc); + } + /* successfully parsed the extres array */ +#if defined(CONFIG_DMA_SHARED_BUFFER) || defined(CONFIG_MALI_DEBUG) + /* drop the vm lock before we call into kds */ + /* Lock also used in debug mode just for lock order checking */ + kbase_gpu_vm_unlock(katom->kctx); +#endif /* defined(CONFIG_DMA_SHARED_BUFFER) || defined(CONFIG_MALI_DEBUG) */ + +#ifdef CONFIG_KDS + if (kds_res_count) { + int wait_failed; + /* We have resources to wait for with kds */ + katom->kds_dep_satisfied = MALI_FALSE; + + wait_failed = kds_async_waitall(&katom->kds_rset, + &katom->kctx->jctx.kds_cb, + katom, + NULL, + kds_res_count, + kds_access_bitmap, + kds_resources); + if (wait_failed) { + goto failed_kds_setup; + } else { + kbase_jd_kds_waiters_add( katom ); + } + } else { + /* Nothing to wait for, so kds dep met */ + katom->kds_dep_satisfied = MALI_TRUE; + } + kfree(kds_resources); + kfree(kds_access_bitmap); +#endif /* CONFIG_KDS */ + + /* all done OK */ + return MALI_ERROR_NONE; + +/* error handling section */ + +#ifdef CONFIG_KDS + failed_kds_setup: + +#if defined(CONFIG_DMA_SHARED_BUFFER) || defined(CONFIG_MALI_DEBUG) + /* lock before we unmap */ + /* Lock also used in debug mode just for lock order checking */ + kbase_gpu_vm_lock(katom->kctx); +#endif /* defined(CONFIG_DMA_SHARED_BUFFER) || defined(CONFIG_MALI_DEBUG) */ +#endif /* CONFIG_KDS */ + + failed_loop: + /* undo the loop work */ + while (res_no-- > 0) { +#ifdef CONFIG_DMA_SHARED_BUFFER + if (katom->extres[res_no].alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + struct kbase_va_region * reg; + int mmu_update = 0; + reg = kbase_region_tracker_find_region_base_address(katom->kctx, katom->extres[res_no].gpu_address); + if (reg && reg->alloc == katom->extres[res_no].alloc && reg->alloc->type) + mmu_update = 1; + katom->extres[res_no].alloc->imported.umm.current_mapping_usage_count--; + if (0 == reg->alloc->imported.umm.current_mapping_usage_count) + kbase_jd_umm_unmap(katom->kctx, reg, mmu_update); + } +#endif /* CONFIG_DMA_SHARED_BUFFER */ + kbase_mem_phy_alloc_put(katom->extres[res_no].alloc); + } +#if defined(CONFIG_DMA_SHARED_BUFFER) || defined(CONFIG_MALI_DEBUG) + /* Lock also used in debug mode just for lock order checking */ + kbase_gpu_vm_unlock(katom->kctx); +#endif /* defined(CONFIG_DMA_SHARED_BUFFER) || defined(CONFIG_MALI_DEBUG) */ + + early_err_out: + kfree(katom->extres); + katom->extres = NULL; +#ifdef CONFIG_KDS + kfree(kds_resources); + kfree(kds_access_bitmap); +#endif /* CONFIG_KDS */ + return err_ret_val; +} + +STATIC INLINE void jd_resolve_dep(struct list_head *out_list, kbase_jd_atom *katom, u8 d) +{ + u8 other_d = !d; + + while (!list_empty(&katom->dep_head[d])) { + kbase_jd_atom *dep_atom = list_entry(katom->dep_head[d].next, kbase_jd_atom, dep_item[d]); + list_del(katom->dep_head[d].next); + + dep_atom->dep_atom[d] = NULL; + if (katom->event_code != BASE_JD_EVENT_DONE) { + /* Atom failed, so remove the other dependencies and immediately fail the atom */ + if (dep_atom->dep_atom[other_d]) { + list_del(&dep_atom->dep_item[other_d]); + dep_atom->dep_atom[other_d] = NULL; + } +#ifdef CONFIG_KDS + if (!dep_atom->kds_dep_satisfied) { + /* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and + * do nothing. If the callback doesn't happen then kbase_jd_post_external_resources will clean up + */ + dep_atom->kds_dep_satisfied = MALI_TRUE; + } +#endif + + dep_atom->event_code = katom->event_code; + KBASE_DEBUG_ASSERT(dep_atom->status != KBASE_JD_ATOM_STATE_UNUSED); + dep_atom->status = KBASE_JD_ATOM_STATE_COMPLETED; + + list_add_tail(&dep_atom->dep_item[0], out_list); + } else if (!dep_atom->dep_atom[other_d]) { +#ifdef CONFIG_KDS + if (dep_atom->kds_dep_satisfied) +#endif + list_add_tail(&dep_atom->dep_item[0], out_list); + } + } +} + +KBASE_EXPORT_TEST_API(jd_resolve_dep) + +#if MALI_CUSTOMER_RELEASE == 0 +static void jd_force_failure(kbase_device *kbdev, kbase_jd_atom *katom) +{ + kbdev->force_replay_count++; + + if (kbdev->force_replay_count >= kbdev->force_replay_limit) { + kbdev->force_replay_count = 0; + katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT; + + if (kbdev->force_replay_random) + kbdev->force_replay_limit = + (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1; + + dev_info(kbdev->dev, "force_replay : promoting to error\n"); + } +} + +/** Test to see if atom should be forced to fail. + * + * This function will check if an atom has a replay job as a dependent. If so + * then it will be considered for forced failure. */ +static void jd_check_force_failure(kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + kbase_device *kbdev = kctx->kbdev; + int i; + if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) || + (katom->core_req & BASEP_JD_REQ_EVENT_NEVER)) + return; + for (i = 1; i < BASE_JD_ATOM_COUNT; i++) { + if (kctx->jctx.atoms[i].dep_atom[0] == katom || + kctx->jctx.atoms[i].dep_atom[1] == katom) { + kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i]; + + if ((dep_atom->core_req & BASEP_JD_REQ_ATOM_TYPE) == + BASE_JD_REQ_SOFT_REPLAY && + (dep_atom->core_req & kbdev->force_replay_core_req) + == kbdev->force_replay_core_req) { + jd_force_failure(kbdev, katom); + return; + } + } + } +} +#endif + +static mali_bool jd_replay(kbase_jd_atom *katom) +{ + int status = kbase_replay_process(katom); + + if ((status & MALI_REPLAY_STATUS_MASK) == + MALI_REPLAY_STATUS_REPLAYING) { + if (status & MALI_REPLAY_FLAG_JS_RESCHED) + return MALI_TRUE; + } + return MALI_FALSE; +} + +/* + * Perform the necessary handling of an atom that has finished running + * on the GPU. + * + * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller + * is responsible for calling kbase_finish_soft_job *before* calling this function. + * + * The caller must hold the kbase_jd_context.lock. + */ +mali_bool jd_done_nolock(kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + kbase_device *kbdev = kctx->kbdev; + struct list_head completed_jobs; + struct list_head runnable_jobs; + mali_bool need_to_try_schedule_context = MALI_FALSE; + int i; + + INIT_LIST_HEAD(&completed_jobs); + INIT_LIST_HEAD(&runnable_jobs); + + KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); + +#if MALI_CUSTOMER_RELEASE == 0 + jd_check_force_failure(katom); +#endif + + + /* This is needed in case an atom is failed due to being invalid, this + * can happen *before* the jobs that the atom depends on have completed */ + for (i = 0; i < 2; i++) { + if (katom->dep_atom[i]) { + list_del(&katom->dep_item[i]); + katom->dep_atom[i] = NULL; + } + } + + /* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with + * BASE_JD_EVENT_TILE_RANGE_FAULT. + * + * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the + * error code to BASE_JD_EVENT_DONE + */ + + if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) && + katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) { + if ( ( katom->core_req & BASE_JD_REQ_FS ) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) ) { + /* Promote the failure to job done */ + katom->event_code = BASE_JD_EVENT_DONE; + katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED); + } + } + + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + list_add_tail(&katom->dep_item[0], &completed_jobs); + + while (!list_empty(&completed_jobs)) { + katom = list_entry(completed_jobs.prev, kbase_jd_atom, dep_item[0]); + list_del(completed_jobs.prev); + + KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); + + for (i = 0; i < 2; i++) + jd_resolve_dep(&runnable_jobs, katom, i); + + if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) + kbase_jd_post_external_resources(katom); + + while (!list_empty(&runnable_jobs)) { + kbase_jd_atom *node = list_entry(runnable_jobs.prev, kbase_jd_atom, dep_item[0]); + list_del(runnable_jobs.prev); + + KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); + + if (katom->event_code == BASE_JD_EVENT_DONE) { + need_to_try_schedule_context |= jd_run_atom(node); + } else { + node->event_code = katom->event_code; + node->status = KBASE_JD_ATOM_STATE_COMPLETED; + + if ((node->core_req & BASEP_JD_REQ_ATOM_TYPE) + == BASE_JD_REQ_SOFT_REPLAY) { + need_to_try_schedule_context |= + jd_replay(node); + } else if (node->core_req & + BASE_JD_REQ_SOFT_JOB) { + kbase_finish_soft_job(node); + } + } + + if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) + list_add_tail(&node->dep_item[0], &completed_jobs); + } + + kbase_event_post(kctx, katom); + + /* Decrement and check the TOTAL number of jobs. This includes + * those not tracked by the scheduler: 'not ready to run' and + * 'dependency-only' jobs. */ + if (--kctx->jctx.job_nr == 0) + wake_up(&kctx->jctx.zero_jobs_wait); /* All events are safely queued now, and we can signal any waiter + * that we've got no more jobs (so we can be safely terminated) */ + } + + return need_to_try_schedule_context; +} + +KBASE_EXPORT_TEST_API(jd_done_nolock) + +#ifdef CONFIG_GPU_TRACEPOINTS +enum { + CORE_REQ_DEP_ONLY, + CORE_REQ_SOFT, + CORE_REQ_COMPUTE, + CORE_REQ_FRAGMENT, + CORE_REQ_VERTEX, + CORE_REQ_TILER, + CORE_REQ_FRAGMENT_VERTEX, + CORE_REQ_FRAGMENT_VERTEX_TILER, + CORE_REQ_FRAGMENT_TILER, + CORE_REQ_VERTEX_TILER, + CORE_REQ_UNKNOWN +}; +static const char * const core_req_strings[] = { + "Dependency Only Job", + "Soft Job", + "Compute Shader Job", + "Fragment Shader Job", + "Vertex/Geometry Shader Job", + "Tiler Job", + "Fragment Shader + Vertex/Geometry Shader Job", + "Fragment Shader + Vertex/Geometry Shader Job + Tiler Job", + "Fragment Shader + Tiler Job", + "Vertex/Geometry Shader Job + Tiler Job", + "Unknown Job" +}; +static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) +{ + if (core_req & BASE_JD_REQ_SOFT_JOB) + return core_req_strings[CORE_REQ_SOFT]; + if (core_req & BASE_JD_REQ_ONLY_COMPUTE) + return core_req_strings[CORE_REQ_COMPUTE]; + switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) { + case BASE_JD_REQ_DEP: + return core_req_strings[CORE_REQ_DEP_ONLY]; + case BASE_JD_REQ_FS: + return core_req_strings[CORE_REQ_FRAGMENT]; + case BASE_JD_REQ_CS: + return core_req_strings[CORE_REQ_VERTEX]; + case BASE_JD_REQ_T: + return core_req_strings[CORE_REQ_TILER]; + case (BASE_JD_REQ_FS | BASE_JD_REQ_CS): + return core_req_strings[CORE_REQ_FRAGMENT_VERTEX]; + case (BASE_JD_REQ_FS | BASE_JD_REQ_T): + return core_req_strings[CORE_REQ_FRAGMENT_TILER]; + case (BASE_JD_REQ_CS | BASE_JD_REQ_T): + return core_req_strings[CORE_REQ_VERTEX_TILER]; + case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T): + return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER]; + } + return core_req_strings[CORE_REQ_UNKNOWN]; +} +#endif + +mali_bool jd_submit_atom(kbase_context *kctx, + const base_jd_atom_v2 *user_atom, + kbase_jd_atom *katom) +{ + kbase_jd_context *jctx = &kctx->jctx; + base_jd_core_req core_req; + int queued = 0; + int i; + mali_bool ret; + + /* Update the TOTAL number of jobs. This includes those not tracked by + * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ + jctx->job_nr++; + + core_req = user_atom->core_req; + + katom->udata = user_atom->udata; + katom->kctx = kctx; + katom->nr_extres = user_atom->nr_extres; + katom->extres = NULL; + katom->device_nr = user_atom->device_nr; + katom->affinity = 0; + katom->jc = user_atom->jc; + katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; + katom->core_req = core_req; + katom->nice_prio = user_atom->prio; + katom->atom_flags = 0; + katom->retry_count = 0; +#ifdef CONFIG_KDS + /* Start by assuming that the KDS dependencies are satisfied, + * kbase_jd_pre_external_resources will correct this if there are dependencies */ + katom->kds_dep_satisfied = MALI_TRUE; + katom->kds_rset = NULL; +#endif /* CONFIG_KDS */ + + /* Add dependencies */ + for (i = 0; i < 2; i++) { + int dep_atom_number = user_atom->pre_dep[i]; + katom->dep_atom[i] = NULL; + if (dep_atom_number) { + kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number]; + + if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED || dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) { + if (dep_atom->event_code != BASE_JD_EVENT_DONE) { + if (i == 1 && katom->dep_atom[0]) { + /* Remove the previous dependency */ + list_del(&katom->dep_item[0]); + katom->dep_atom[0] = NULL; + } + /* Atom has completed, propagate the error code if any */ + katom->event_code = dep_atom->event_code; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; + if ((katom->core_req & + BASEP_JD_REQ_ATOM_TYPE) + == BASE_JD_REQ_SOFT_REPLAY) { + int status = + kbase_replay_process(katom); + + if ((status & + MALI_REPLAY_STATUS_MASK) + == MALI_REPLAY_STATUS_REPLAYING) { + ret = (status & + MALI_REPLAY_FLAG_JS_RESCHED); + goto out; + } + } + ret = jd_done_nolock(katom); + goto out; + } + } else { + /* Atom is in progress, add this atom to the list */ + list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]); + katom->dep_atom[i] = dep_atom; + queued = 1; + } + } + } + + /* These must occur after the above loop to ensure that an atom that + * depends on a previous atom with the same number behaves as expected */ + katom->event_code = BASE_JD_EVENT_DONE; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; + + /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ + if (0 == katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) + { + dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL"); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + ret = jd_done_nolock(katom); + goto out; + } + + /* + * If the priority is increased we need to check the caller has security caps to do this, if + * priority is decreased then this is ok as the result will have no negative impact on other + * processes running. + */ + if (0 > katom->nice_prio) { + mali_bool access_allowed; + access_allowed = kbase_security_has_capability(kctx, KBASE_SEC_MODIFY_PRIORITY, KBASE_SEC_FLAG_NOAUDIT); + if (!access_allowed) { + /* For unprivileged processes - a negative priority is interpreted as zero */ + katom->nice_prio = 0; + } + } + + /* Scale priority range to use NICE range */ + if (katom->nice_prio) { + /* Remove sign for calculation */ + int nice_priority = katom->nice_prio + 128; + /* Fixed point maths to scale from ..255 to 0..39 (NICE range with +20 offset) */ + katom->nice_prio = (((20 << 16) / 128) * nice_priority) >> 16; + } + + if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { + /* handle what we need to do to access the external resources */ + if (MALI_ERROR_NONE != kbase_jd_pre_external_resources(katom, user_atom)) { + /* setup failed (no access, bad resource, unknown resource types, etc.) */ + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + ret = jd_done_nolock(katom); + goto out; + } + } + + /* Initialize the jobscheduler policy for this atom. Function will + * return error if the atom is malformed. + * + * Soft-jobs never enter the job scheduler but have their own initialize method. + * + * If either fail then we immediately complete the atom with an error. + */ + if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) { + kbasep_js_policy *js_policy = &(kctx->kbdev->js_data.policy); + if (MALI_ERROR_NONE != kbasep_js_policy_init_job(js_policy, kctx, katom)) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + ret = jd_done_nolock(katom); + goto out; + } + } else { + /* Soft-job */ + if (MALI_ERROR_NONE != kbase_prepare_soft_job(katom)) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + ret = jd_done_nolock(katom); + goto out; + } + } + +#ifdef CONFIG_GPU_TRACEPOINTS + katom->work_id = atomic_inc_return(&jctx->work_id); + trace_gpu_job_enqueue((u32)kctx, katom->work_id, kbasep_map_core_reqs_to_string(katom->core_req)); +#endif + + if (queued) { + ret = MALI_FALSE; + goto out; + } +#ifdef CONFIG_KDS + if (!katom->kds_dep_satisfied) { + /* Queue atom due to KDS dependency */ + ret = MALI_FALSE; + goto out; + } +#endif /* CONFIG_KDS */ + + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) + == BASE_JD_REQ_SOFT_REPLAY) { + int status = kbase_replay_process(katom); + + if ((status & MALI_REPLAY_STATUS_MASK) + == MALI_REPLAY_STATUS_REPLAYING) + ret = status & MALI_REPLAY_FLAG_JS_RESCHED; + else + ret = jd_done_nolock(katom); + + goto out; + } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + if (kbase_process_soft_job(katom) == 0) { + kbase_finish_soft_job(katom); + ret = jd_done_nolock(katom); + goto out; + } + /* The job has not yet completed */ + list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs); + ret = MALI_FALSE; + } else if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + katom->status = KBASE_JD_ATOM_STATE_IN_JS; + ret = kbasep_js_add_job(kctx, katom); + } else { + /* This is a pure dependency. Resolve it immediately */ + ret = jd_done_nolock(katom); + } + + out: + return ret; +} + +mali_error kbase_jd_submit(kbase_context *kctx, const kbase_uk_job_submit *submit_data) +{ + kbase_jd_context *jctx = &kctx->jctx; + mali_error err = MALI_ERROR_NONE; + int i; + mali_bool need_to_try_schedule_context = MALI_FALSE; + kbase_device *kbdev; + void *user_addr; + + /* + * kbase_jd_submit isn't expected to fail and so all errors with the jobs + * are reported by immediately falling them (through event system) + */ + kbdev = kctx->kbdev; + + beenthere(kctx,"%s", "Enter"); + + if ((kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != 0) { + dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it"); + return MALI_ERROR_FUNCTION_FAILED; + } + + if (submit_data->stride != sizeof(base_jd_atom_v2)) { + dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel"); + return MALI_ERROR_FUNCTION_FAILED; + } + + user_addr = get_compat_pointer(&submit_data->addr); + + KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(submit_data->nr_atoms, &kctx->timeline.jd_atoms_in_flight)); + + for (i = 0; i < submit_data->nr_atoms; i++) { + base_jd_atom_v2 user_atom; + kbase_jd_atom *katom; + + if (copy_from_user(&user_atom, user_addr, sizeof(user_atom)) != 0) { + err = MALI_ERROR_FUNCTION_FAILED; + KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(submit_data->nr_atoms - i, &kctx->timeline.jd_atoms_in_flight)); + break; + } + + user_addr = (void *)((uintptr_t) user_addr + submit_data->stride); + + mutex_lock(&jctx->lock); + katom = &jctx->atoms[user_atom.atom_number]; + + while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) { + /* Atom number is already in use, wait for the atom to + * complete + */ + mutex_unlock(&jctx->lock); + + /* This thread will wait for the atom to complete. Due + * to thread scheduling we are not sure that the other + * thread that owns the atom will also schedule the + * context, so we force the scheduler to be active and + * hence eventually schedule this context at some point + * later. + */ + kbasep_js_try_schedule_head_ctx(kctx->kbdev); + if (wait_event_killable(katom->completed, + katom->status == KBASE_JD_ATOM_STATE_UNUSED)) { + /* We're being killed so the result code + * doesn't really matter + */ + return MALI_ERROR_NONE; + } + mutex_lock(&jctx->lock); + } + + need_to_try_schedule_context |= + jd_submit_atom(kctx, &user_atom, katom); + mutex_unlock(&jctx->lock); + } + + if (need_to_try_schedule_context) + kbasep_js_try_schedule_head_ctx(kbdev); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_jd_submit) + +static void kbasep_jd_cacheclean(kbase_device *kbdev) +{ + /* Limit the number of loops to avoid a hang if the interrupt is missed */ + u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + + mutex_lock(&kbdev->cacheclean_lock); + + /* use GPU_COMMAND completion solution */ + /* clean & invalidate the caches */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CLEAN_INV_CACHES, NULL); + + /* wait for cache flush to complete before continuing */ + while (--max_loops && (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) & CLEAN_CACHES_COMPLETED) == 0) + ; + + /* clear the CLEAN_CACHES_COMPLETED irq */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, CLEAN_CACHES_COMPLETED); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), CLEAN_CACHES_COMPLETED, NULL); + KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.state != KBASE_INSTR_STATE_CLEANING, + "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang."); + + mutex_unlock(&kbdev->cacheclean_lock); +} + +/** + * This function: + * - requeues the job from the runpool (if it was soft-stopped/removed from NEXT registers) + * - removes it from the system if it finished/failed/was cancelled. + * - resolves dependencies to add dependent jobs to the context, potentially starting them if necessary (which may add more references to the context) + * - releases the reference to the context from the no-longer-running job. + * - Handles retrying submission outside of IRQ context if it failed from within IRQ context. + */ +static void jd_done_worker(struct work_struct *data) +{ + kbase_jd_atom *katom = container_of(data, kbase_jd_atom, work); + kbase_jd_context *jctx; + kbase_context *kctx; + kbasep_js_kctx_info *js_kctx_info; + kbasep_js_policy *js_policy; + kbase_device *kbdev; + kbasep_js_device_data *js_devdata; + u64 cache_jc = katom->jc; + kbasep_js_atom_retained_state katom_retained_state; + + /* Soft jobs should never reach this function */ + KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); + + kctx = katom->kctx; + jctx = &kctx->jctx; + kbdev = kctx->kbdev; + js_kctx_info = &kctx->jctx.sched_info; + + js_devdata = &kbdev->js_data; + js_policy = &kbdev->js_data.policy; + + KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); + /* + * Begin transaction on JD context and JS context + */ + mutex_lock(&jctx->lock); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + + /* This worker only gets called on contexts that are scheduled *in*. This is + * because it only happens in response to an IRQ from a job that was + * running. + */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != MALI_FALSE); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) && katom->event_code != BASE_JD_EVENT_DONE && !(katom->event_code & BASE_JD_SW_EVENT)) + kbasep_jd_cacheclean(kbdev); /* cache flush when jobs complete with non-done codes */ + else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) { + if (kbdev->gpu_props.num_core_groups > 1 && + !(katom->affinity & kbdev->gpu_props.props.coherency_info.group[0].core_mask) && + (katom->affinity & kbdev->gpu_props.props.coherency_info.group[1].core_mask)) { + KBASE_LOG(2, kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n"); + kbasep_jd_cacheclean(kbdev); + } + } + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969) && + (katom->core_req & BASE_JD_REQ_FS) && + katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT && + (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) && + !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)){ + KBASE_LOG(2, kbdev->dev, + "Soft-stopped fragment shader job got a TILE_RANGE_FAULT." \ + "Possible HW issue, trying SW workaround\n" ); + if (kbasep_10969_workaround_clamp_coordinates(katom)){ + /* The job had a TILE_RANGE_FAULT after was soft-stopped. + * Due to an HW issue we try to execute the job + * again. + */ + KBASE_LOG(2, kbdev->dev, "Clamping has been executed, try to rerun the job\n" ); + katom->event_code = BASE_JD_EVENT_STOPPED; + katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN; + + /* The atom will be requeued, but requeing does not submit more + * jobs. If this was the last job, we must also ensure that more + * jobs will be run on slot 0 - this is a Fragment job. */ + kbasep_js_set_job_retry_submit_slot(katom, 0); + } + } + + /* If job was rejected due to BASE_JD_EVENT_PM_EVENT but was not + * specifically targeting core group 1, then re-submit targeting core + * group 0 */ + if (katom->event_code == BASE_JD_EVENT_PM_EVENT && !(katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) { + katom->event_code = BASE_JD_EVENT_STOPPED; + /* Don't need to worry about any previously set retry-slot - it's + * impossible for it to have been set previously, because we guarantee + * kbase_jd_done() was called with done_code==0 on this atom */ + kbasep_js_set_job_retry_submit_slot(katom, 1); + } + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) + kbase_as_poking_timer_release_atom(kbdev, kctx, katom); + + /* Release cores this job was using (this might power down unused cores, and + * cause extra latency if a job submitted here - such as depenedent jobs - + * would use those cores) */ + kbasep_js_job_check_deref_cores(kbdev, katom); + + /* Retain state before the katom disappears */ + kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); + + if (!kbasep_js_has_atom_finished(&katom_retained_state)) { + unsigned long flags; + /* Requeue the atom on soft-stop / removed from NEXT registers */ + KBASE_LOG(2, kbdev->dev, "JS: Soft Stopped/Removed from next on Ctx %p; Requeuing", kctx); + + mutex_lock(&js_devdata->runpool_mutex); + kbasep_js_clear_job_retry_submit(katom); + + KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, katom)); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbasep_js_policy_enqueue_job(js_policy, katom); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + /* A STOPPED/REMOVED job must cause a re-submit to happen, in case it + * was the last job left. Crucially, work items on work queues can run + * out of order e.g. on different CPUs, so being able to submit from + * the IRQ handler is not a good indication that we don't need to run + * jobs; the submitted job could be processed on the work-queue + * *before* the stopped job, even though it was submitted after. */ + { + int tmp; + KBASE_DEBUG_ASSERT(kbasep_js_get_atom_retry_submit_slot(&katom_retained_state, &tmp) != MALI_FALSE); + CSTD_UNUSED(tmp); + } + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + } else { + /* Remove the job from the system for all other reasons */ + mali_bool need_to_try_schedule_context; + + kbasep_js_remove_job(kbdev, kctx, katom); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + /* jd_done_nolock() requires the jsctx_mutex lock to be dropped */ + + need_to_try_schedule_context = jd_done_nolock(katom); + + /* This ctx is already scheduled in, so return value guarenteed FALSE */ + KBASE_DEBUG_ASSERT(need_to_try_schedule_context == MALI_FALSE); + } + /* katom may have been freed now, do not use! */ + + /* + * Transaction complete + */ + mutex_unlock(&jctx->lock); + + /* Job is now no longer running, so can now safely release the context + * reference, and handle any actions that were logged against the atom's retained state */ + kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state); + + KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); +} + +/** + * Work queue job cancel function + * Only called as part of 'Zapping' a context (which occurs on termination) + * Operates serially with the jd_done_worker() on the work queue. + * + * This can only be called on contexts that aren't scheduled. + * + * @note We don't need to release most of the resources that would occur on + * kbase_jd_done() or jd_done_worker(), because the atoms here must not be + * running (by virtue of only being called on contexts that aren't + * scheduled). The only resources that are an exception to this are: + * - those held by kbasep_js_job_check_ref_cores(), because these resources are + * held for non-running atoms as well as running atoms. + */ +static void jd_cancel_worker(struct work_struct *data) +{ + kbase_jd_atom *katom = container_of(data, kbase_jd_atom, work); + kbase_jd_context *jctx; + kbase_context *kctx; + kbasep_js_kctx_info *js_kctx_info; + mali_bool need_to_try_schedule_context; + kbase_device *kbdev; + + /* Soft jobs should never reach this function */ + KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); + + kctx = katom->kctx; + kbdev = kctx->kbdev; + jctx = &kctx->jctx; + js_kctx_info = &kctx->jctx.sched_info; + + KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); + + /* This only gets called on contexts that are scheduled out. Hence, we must + * make sure we don't de-ref the number of running jobs (there aren't + * any), nor must we try to schedule out the context (it's already + * scheduled out). + */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled == MALI_FALSE); + + /* Release cores this job was using (this might power down unused cores) */ + kbasep_js_job_check_deref_cores(kctx->kbdev, katom); + + /* Scheduler: Remove the job from the system */ + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + kbasep_js_remove_cancelled_job(kbdev, kctx, katom); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + mutex_lock(&jctx->lock); + + need_to_try_schedule_context = jd_done_nolock(katom); + /* Because we're zapping, we're not adding any more jobs to this ctx, so no need to + * schedule the context. There's also no need for the jsctx_mutex to have been taken + * around this too. */ + KBASE_DEBUG_ASSERT(need_to_try_schedule_context == MALI_FALSE); + + /* katom may have been freed now, do not use! */ + mutex_unlock(&jctx->lock); + +} + +/** + * @brief Complete a job that has been removed from the Hardware + * + * This must be used whenever a job has been removed from the Hardware, e.g.: + * - An IRQ indicates that the job finished (for both error and 'done' codes) + * - The job was evicted from the JSn_HEAD_NEXT registers during a Soft/Hard stop. + * + * Some work is carried out immediately, and the rest is deferred onto a workqueue + * + * This can be called safely from atomic context. + * + * The caller must hold kbasep_js_device_data::runpool_irq::lock + * + */ +void kbase_jd_done(kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, + kbasep_js_atom_done_code done_code) +{ + kbase_context *kctx; + kbase_device *kbdev; + KBASE_DEBUG_ASSERT(katom); + kctx = katom->kctx; + KBASE_DEBUG_ASSERT(kctx); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev); + + if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) + katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; + + kbase_timeline_job_slot_done(kbdev, kctx, katom, slot_nr, done_code); + + KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0); + + kbasep_js_job_done_slot_irq(katom, slot_nr, end_timestamp, done_code); + + katom->slot_nr = slot_nr; + + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); + INIT_WORK(&katom->work, jd_done_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + +KBASE_EXPORT_TEST_API(kbase_jd_done) + +void kbase_jd_cancel(kbase_device *kbdev, kbase_jd_atom *katom) +{ + kbase_context *kctx; + kbasep_js_kctx_info *js_kctx_info; + KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(NULL != katom); + kctx = katom->kctx; + KBASE_DEBUG_ASSERT(NULL != kctx); + + js_kctx_info = &kctx->jctx.sched_info; + + KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); + + /* This should only be done from a context that is not scheduled */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled == MALI_FALSE); + + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); + INIT_WORK(&katom->work, jd_cancel_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + +typedef struct zap_reset_data { + /* The stages are: + * 1. The timer has never been called + * 2. The zap has timed out, all slots are soft-stopped - the GPU reset will happen. + * The GPU has been reset when kbdev->reset_waitq is signalled + * + * (-1 - The timer has been cancelled) + */ + int stage; + kbase_device *kbdev; + struct hrtimer timer; + spinlock_t lock; +} zap_reset_data; + +static enum hrtimer_restart zap_timeout_callback(struct hrtimer *timer) +{ + zap_reset_data *reset_data = container_of(timer, zap_reset_data, timer); + kbase_device *kbdev = reset_data->kbdev; + unsigned long flags; + + spin_lock_irqsave(&reset_data->lock, flags); + + if (reset_data->stage == -1) + goto out; + + if (kbase_prepare_to_reset_gpu(kbdev)) { + dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", ZAP_TIMEOUT); + kbase_reset_gpu(kbdev); + } + + reset_data->stage = 2; + + out: + spin_unlock_irqrestore(&reset_data->lock, flags); + + return HRTIMER_NORESTART; +} + +void kbase_jd_zap_context(kbase_context *kctx) +{ + kbase_jd_atom *katom; + struct list_head *entry; + kbase_device *kbdev; + zap_reset_data reset_data; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kctx); + + kbdev = kctx->kbdev; + + KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u); + kbase_job_zap_context(kctx); + + mutex_lock(&kctx->jctx.lock); + + /* + * While holding the kbase_jd_context lock clean up jobs which are known to kbase but are + * queued outside the job scheduler. + */ + + list_for_each( entry, &kctx->waiting_soft_jobs) { + katom = list_entry(entry, kbase_jd_atom, dep_item[0]); + kbase_cancel_soft_job(katom); + } + /* kctx->waiting_soft_jobs is not valid after this point */ + +#ifdef CONFIG_KDS + + /* For each job waiting on a kds resource, cancel the wait and force the job to + * complete early, this is done so that we don't leave jobs outstanding waiting + * on kds resources which may never be released when contexts are zapped, resulting + * in a hang. + * + * Note that we can safely iterate over the list as the kbase_jd_context lock is held, + * this prevents items being removed when calling job_done_nolock in kbase_cancel_kds_wait_job. + */ + + list_for_each( entry, &kctx->waiting_kds_resource) { + katom = list_entry(entry, kbase_jd_atom, node); + + kbase_cancel_kds_wait_job(katom); + } +#endif + + mutex_unlock(&kctx->jctx.lock); + + hrtimer_init_on_stack(&reset_data.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + reset_data.timer.function = zap_timeout_callback; + + spin_lock_init(&reset_data.lock); + + reset_data.kbdev = kbdev; + reset_data.stage = 1; + + hrtimer_start(&reset_data.timer, HR_TIMER_DELAY_MSEC(ZAP_TIMEOUT), HRTIMER_MODE_REL); + + /* Wait for all jobs to finish, and for the context to be not-scheduled + * (due to kbase_job_zap_context(), we also guarentee it's not in the JS + * policy queue either */ + wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0); + wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, kctx->jctx.sched_info.ctx.is_scheduled == MALI_FALSE); + + spin_lock_irqsave(&reset_data.lock, flags); + if (reset_data.stage == 1) { + /* The timer hasn't run yet - so cancel it */ + reset_data.stage = -1; + } + spin_unlock_irqrestore(&reset_data.lock, flags); + + hrtimer_cancel(&reset_data.timer); + + if (reset_data.stage == 2) { + /* The reset has already started. + * Wait for the reset to complete + */ + wait_event(kbdev->reset_wait, atomic_read(&kbdev->reset_gpu) == KBASE_RESET_GPU_NOT_PENDING); + } + destroy_hrtimer_on_stack(&reset_data.timer); + + KBASE_LOG(1, kbdev->dev, "Zap: Finished Context %p", kctx); + + /* Ensure that the signallers of the waitqs have finished */ + mutex_lock(&kctx->jctx.lock); + mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&kctx->jctx.lock); +} + +KBASE_EXPORT_TEST_API(kbase_jd_zap_context) + +mali_error kbase_jd_init(kbase_context *kctx) +{ + int i; + mali_error mali_err = MALI_ERROR_NONE; +#ifdef CONFIG_KDS + int err; +#endif /* CONFIG_KDS */ + + KBASE_DEBUG_ASSERT(kctx); + + kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", 0, 1); + if (NULL == kctx->jctx.job_done_wq) { + mali_err = MALI_ERROR_OUT_OF_MEMORY; + goto out1; + } + + for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { + init_waitqueue_head(&kctx->jctx.atoms[i].completed); + + INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]); + INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]); + + /* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */ + kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID; + kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED; + } + + mutex_init(&kctx->jctx.lock); + + init_waitqueue_head(&kctx->jctx.zero_jobs_wait); + + spin_lock_init(&kctx->jctx.tb_lock); + +#ifdef CONFIG_KDS + err = kds_callback_init(&kctx->jctx.kds_cb, 0, kds_dep_clear); + if (0 != err) { + mali_err = MALI_ERROR_FUNCTION_FAILED; + goto out2; + } +#endif /* CONFIG_KDS */ + + kctx->jctx.job_nr = 0; + + return MALI_ERROR_NONE; + +#ifdef CONFIG_KDS + out2: + destroy_workqueue(kctx->jctx.job_done_wq); +#endif /* CONFIG_KDS */ + out1: + return mali_err; +} + +KBASE_EXPORT_TEST_API(kbase_jd_init) + +void kbase_jd_exit(kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx); + +#ifdef CONFIG_KDS + kds_callback_term(&kctx->jctx.kds_cb); +#endif /* CONFIG_KDS */ + /* Work queue is emptied by this */ + destroy_workqueue(kctx->jctx.job_done_wq); +} + +KBASE_EXPORT_TEST_API(kbase_jd_exit) diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.c b/drivers/gpu/arm/midgard/mali_kbase_jm.c new file mode 100755 index 00000000000..c85f63ed05a --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_jm.c @@ -0,0 +1,1400 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_jm.c + * Base kernel job manager APIs + */ + +#include +#include +#include +#include +#include + +#include "mali_kbase_jm.h" + +#define beenthere(kctx, f, a...) KBASE_LOG(1, kctx->kbdev->dev, "%s:" f, __func__, ##a) + +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS +u64 mali_js0_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; +u64 mali_js1_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; +u64 mali_js2_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; +#endif + + +static void kbasep_try_reset_gpu_early(kbase_device *kbdev); + +#ifdef CONFIG_GPU_TRACEPOINTS +static char *kbasep_make_job_slot_string(int js, char *js_string) +{ + sprintf(js_string, "job_slot_%i", js); + return js_string; +} +#endif + +static void kbase_job_hw_submit(kbase_device *kbdev, kbase_jd_atom *katom, int js) +{ + kbase_context *kctx; + u32 cfg; + u64 jc_head = katom->jc; + + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(katom); + + kctx = katom->kctx; + + /* Command register must be available */ + KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); + /* Affinity is not violating */ + kbase_js_debug_log_current_affinities(kbdev); + KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js, katom->affinity)); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_LO), jc_head & 0xFFFFFFFF, kctx); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_HI), jc_head >> 32, kctx); + +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS + { + u64 mask; + u32 value; + + if( 0 == js ) + { + mask = mali_js0_affinity_mask; + } + else if( 1 == js ) + { + mask = mali_js1_affinity_mask; + } + else + { + mask = mali_js2_affinity_mask; + } + + value = katom->affinity & (mask & 0xFFFFFFFF); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_NEXT_LO), value, kctx); + + value = (katom->affinity >> 32) & ((mask>>32) & 0xFFFFFFFF); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_NEXT_HI), value, kctx); + } +#else + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_NEXT_LO), katom->affinity & 0xFFFFFFFF, kctx); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_NEXT_HI), katom->affinity >> 32, kctx); +#endif + + /* start MMU, medium priority, cache clean/flush on end, clean/flush on start */ + cfg = kctx->as_nr | JSn_CONFIG_END_FLUSH_CLEAN_INVALIDATE | JSn_CONFIG_START_MMU | JSn_CONFIG_START_FLUSH_CLEAN_INVALIDATE | JSn_CONFIG_THREAD_PRI(8); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + if (!kbdev->jm_slots[js].job_chain_flag) { + cfg |= JSn_CONFIG_JOB_CHAIN_FLAG; + katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN; + kbdev->jm_slots[js].job_chain_flag = MALI_TRUE; + } else { + katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN; + kbdev->jm_slots[js].job_chain_flag = MALI_FALSE; + } + } + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_CONFIG_NEXT), cfg, kctx); + + /* Write an approximate start timestamp. + * It's approximate because there might be a job in the HEAD register. In + * such cases, we'll try to make a better approximation in the IRQ handler + * (up to the KBASE_JS_IRQ_THROTTLE_TIME_US). */ + katom->start_timestamp = ktime_get(); + + /* GO ! */ + KBASE_LOG(2, kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx", katom, kctx, js, jc_head, katom->affinity); + + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, (u32) katom->affinity); + +#ifdef CONFIG_MALI_GATOR_SUPPORT + kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js), kctx, kbase_jd_atom_id(kctx, katom)); +#endif /* CONFIG_MALI_GATOR_SUPPORT */ +#ifdef CONFIG_GPU_TRACEPOINTS + if (kbasep_jm_nr_jobs_submitted(&kbdev->jm_slots[js]) == 1) + { + /* If this is the only job on the slot, trace it as starting */ + char js_string[16]; + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string), ktime_to_ns(katom->start_timestamp), (u32)katom->kctx, 0, katom->work_id); + kbdev->jm_slots[js].last_context = katom->kctx; + } +#endif + kbase_timeline_job_slot_submit(kbdev, kctx, katom, js); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), JSn_COMMAND_START, katom->kctx); +} + +void kbase_job_submit_nolock(kbase_device *kbdev, kbase_jd_atom *katom, int js) +{ + kbase_jm_slot *jm_slots; + + KBASE_DEBUG_ASSERT(kbdev); + + jm_slots = kbdev->jm_slots; + + /* + * We can have: + * - one job already done (pending interrupt), + * - one running, + * - one ready to be run. + * Hence a maximum of 3 inflight jobs. We have a 4 job + * queue, which I hope will be enough... + */ + kbasep_jm_enqueue_submit_slot(&jm_slots[js], katom); + kbase_job_hw_submit(kbdev, katom, js); +} + +void kbase_job_done_slot(kbase_device *kbdev, int s, u32 completion_code, u64 job_tail, ktime_t *end_timestamp) +{ + kbase_jm_slot *slot; + kbase_jd_atom *katom; + mali_addr64 jc_head; + kbase_context *kctx; + + KBASE_DEBUG_ASSERT(kbdev); + + if (completion_code != BASE_JD_EVENT_DONE && completion_code != BASE_JD_EVENT_STOPPED) + dev_err(kbdev->dev, "t6xx: GPU fault 0x%02lx from job slot %d\n", (unsigned long)completion_code, s); + + /* IMPORTANT: this function must only contain work necessary to complete a + * job from a Real IRQ (and not 'fake' completion, e.g. from + * Soft-stop). For general work that must happen no matter how the job was + * removed from the hardware, place it in kbase_jd_done() */ + + slot = &kbdev->jm_slots[s]; + katom = kbasep_jm_dequeue_submit_slot(slot); + + /* If the katom completed is because it's a dummy job for HW workarounds, then take no further action */ + if (kbasep_jm_is_dummy_workaround_job(kbdev, katom)) { + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, NULL, NULL, 0, s, completion_code); + return; + } + + jc_head = katom->jc; + kctx = katom->kctx; + + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, jc_head, s, completion_code); + + if (completion_code != BASE_JD_EVENT_DONE && completion_code != BASE_JD_EVENT_STOPPED) { + +#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0 + KBASE_TRACE_DUMP(kbdev); +#endif + } + if (job_tail != 0) { + mali_bool was_updated = (job_tail != jc_head); + /* Some of the job has been executed, so we update the job chain address to where we should resume from */ + katom->jc = job_tail; + if (was_updated) + KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, kctx, katom, job_tail, s); + } + + /* Only update the event code for jobs that weren't cancelled */ + if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) + katom->event_code = (base_jd_event_code) completion_code; + + kbase_device_trace_register_access(kctx, REG_WRITE, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 1 << s); + + /* Complete the job, and start new ones + * + * Also defer remaining work onto the workqueue: + * - Re-queue Soft-stopped jobs + * - For any other jobs, queue the job back into the dependency system + * - Schedule out the parent context if necessary, and schedule a new one in. + */ +#ifdef CONFIG_GPU_TRACEPOINTS + if (kbasep_jm_nr_jobs_submitted(slot) != 0) { + kbase_jd_atom *katom; + char js_string[16]; + katom = kbasep_jm_peek_idx_submit_slot(slot, 0); /* The atom in the HEAD */ + trace_gpu_sched_switch(kbasep_make_job_slot_string(s, js_string), ktime_to_ns(*end_timestamp), (u32)katom->kctx, 0, katom->work_id); + slot->last_context = katom->kctx; + } else { + char js_string[16]; + trace_gpu_sched_switch(kbasep_make_job_slot_string(s, js_string), ktime_to_ns(ktime_get()), 0, 0, 0); + slot->last_context = 0; + } +#endif + kbase_jd_done(katom, s, end_timestamp, KBASE_JS_ATOM_DONE_START_NEW_ATOMS); +} + +/** + * Update the start_timestamp of the job currently in the HEAD, based on the + * fact that we got an IRQ for the previous set of completed jobs. + * + * The estimate also takes into account the KBASE_JS_IRQ_THROTTLE_TIME_US and + * the time the job was submitted, to work out the best estimate (which might + * still result in an over-estimate to the calculated time spent) + */ +STATIC void kbasep_job_slot_update_head_start_timestamp(kbase_device *kbdev, kbase_jm_slot *slot, ktime_t end_timestamp) +{ + KBASE_DEBUG_ASSERT(slot); + + if (kbasep_jm_nr_jobs_submitted(slot) > 0) { + kbase_jd_atom *katom; + ktime_t new_timestamp; + ktime_t timestamp_diff; + katom = kbasep_jm_peek_idx_submit_slot(slot, 0); /* The atom in the HEAD */ + + KBASE_DEBUG_ASSERT(katom != NULL); + + if (kbasep_jm_is_dummy_workaround_job(kbdev, katom) != MALI_FALSE) { + /* Don't access the members of HW workaround 'dummy' jobs */ + return; + } + + /* Account for any IRQ Throttle time - makes an overestimate of the time spent by the job */ + new_timestamp = ktime_sub_ns(end_timestamp, KBASE_JS_IRQ_THROTTLE_TIME_US * 1000); + timestamp_diff = ktime_sub(new_timestamp, katom->start_timestamp); + if (ktime_to_ns(timestamp_diff) >= 0) { + /* Only update the timestamp if it's a better estimate than what's currently stored. + * This is because our estimate that accounts for the throttle time may be too much + * of an overestimate */ + katom->start_timestamp = new_timestamp; + } + } +} + +void kbase_job_done(kbase_device *kbdev, u32 done) +{ + unsigned long flags; + int i; + u32 count = 0; + ktime_t end_timestamp = ktime_get(); + kbasep_js_device_data *js_devdata; + + KBASE_DEBUG_ASSERT(kbdev); + js_devdata = &kbdev->js_data; + + KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done); + + memset(&kbdev->slot_submit_count_irq[0], 0, sizeof(kbdev->slot_submit_count_irq)); + + /* write irq throttle register, this will prevent irqs from occurring until + * the given number of gpu clock cycles have passed */ + { + int irq_throttle_cycles = atomic_read(&kbdev->irq_throttle_cycles); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), irq_throttle_cycles, NULL); + } + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + while (done) { + kbase_jm_slot *slot; + u32 failed = done >> 16; + + /* treat failed slots as finished slots */ + u32 finished = (done & 0xFFFF) | failed; + + /* Note: This is inherently unfair, as we always check + * for lower numbered interrupts before the higher + * numbered ones.*/ + i = ffs(finished) - 1; + KBASE_DEBUG_ASSERT(i >= 0); + + slot = &kbdev->jm_slots[i]; + + do { + int nr_done; + u32 active; + u32 completion_code = BASE_JD_EVENT_DONE; /* assume OK */ + u64 job_tail = 0; + + if (failed & (1u << i)) { + /* read out the job slot status code if the job slot reported failure */ + completion_code = kbase_reg_read(kbdev, JOB_SLOT_REG(i, JSn_STATUS), NULL); + + switch (completion_code) { + case BASE_JD_EVENT_STOPPED: +#ifdef CONFIG_MALI_GATOR_SUPPORT + kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_SOFT_STOPPED, i), NULL, 0); +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + /* Soft-stopped job - read the value of JS_TAIL so that the job chain can be resumed */ + job_tail = (u64) kbase_reg_read(kbdev, JOB_SLOT_REG(i, JSn_TAIL_LO), NULL) | ((u64) kbase_reg_read(kbdev, JOB_SLOT_REG(i, JSn_TAIL_HI), NULL) << 32); + break; + case BASE_JD_EVENT_NOT_STARTED: + /* PRLAM-10673 can cause a TERMINATED job to come back as NOT_STARTED, but the error interrupt helps us detect it */ + completion_code = BASE_JD_EVENT_TERMINATED; + /* fall throught */ + default: + dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", i, completion_code, kbase_exception_name(completion_code)); + } + } + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), done & ((1 << i) | (1 << (i + 16))), NULL); + active = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL); + + if (((active >> i) & 1) == 0 && (((done >> (i + 16)) & 1) == 0)) { + /* There is a potential race we must work around: + * + * 1. A job slot has a job in both current and next registers + * 2. The job in current completes successfully, the IRQ handler reads RAWSTAT + * and calls this function with the relevant bit set in "done" + * 3. The job in the next registers becomes the current job on the GPU + * 4. Sometime before the JOB_IRQ_CLEAR line above the job on the GPU _fails_ + * 5. The IRQ_CLEAR clears the done bit but not the failed bit. This atomically sets + * JOB_IRQ_JS_STATE. However since both jobs have now completed the relevant bits + * for the slot are set to 0. + * + * If we now did nothing then we'd incorrectly assume that _both_ jobs had completed + * successfully (since we haven't yet observed the fail bit being set in RAWSTAT). + * + * So at this point if there are no active jobs left we check to see if RAWSTAT has a failure + * bit set for the job slot. If it does we know that there has been a new failure that we + * didn't previously know about, so we make sure that we record this in active (but we wait + * for the next loop to deal with it). + * + * If we were handling a job failure (i.e. done has the relevant high bit set) then we know that + * the value read back from JOB_IRQ_JS_STATE is the correct number of remaining jobs because + * the failed job will have prevented any futher jobs from starting execution. + */ + u32 rawstat = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL); + + if ((rawstat >> (i + 16)) & 1) { + /* There is a failed job that we've missed - add it back to active */ + active |= (1u << i); + } + } + + KBASE_LOG(2, kbdev->dev, "Job ended with status 0x%08X\n", completion_code); + + nr_done = kbasep_jm_nr_jobs_submitted(slot); + nr_done -= (active >> i) & 1; + nr_done -= (active >> (i + 16)) & 1; + + if (nr_done <= 0) { + dev_warn(kbdev->dev, "Spurious interrupt on slot %d", i); + goto spurious; + } + + count += nr_done; + + while (nr_done) { + if (nr_done == 1) { + kbase_job_done_slot(kbdev, i, completion_code, job_tail, &end_timestamp); + } else { + /* More than one job has completed. Since this is not the last job being reported this time it + * must have passed. This is because the hardware will not allow further jobs in a job slot to + * complete until the faile job is cleared from the IRQ status. + */ + kbase_job_done_slot(kbdev, i, BASE_JD_EVENT_DONE, 0, &end_timestamp); + } + nr_done--; + } + + spurious: + done = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) { + /* Workaround for missing interrupt caused by PRLAM-10883 */ + if (((active >> i) & 1) && (0 == kbase_reg_read(kbdev, JOB_SLOT_REG(i, JSn_STATUS), NULL))) { + /* Force job slot to be processed again */ + done |= (1u << i); + } + } + + failed = done >> 16; + finished = (done & 0xFFFF) | failed; + } while (finished & (1 << i)); + + kbasep_job_slot_update_head_start_timestamp(kbdev, slot, end_timestamp); + } + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + if (atomic_read(&kbdev->reset_gpu) == KBASE_RESET_GPU_COMMITTED) { + /* If we're trying to reset the GPU then we might be able to do it early + * (without waiting for a timeout) because some jobs have completed + */ + kbasep_try_reset_gpu_early(kbdev); + } + + KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count); +} +KBASE_EXPORT_TEST_API(kbase_job_done) + +static mali_bool kbasep_soft_stop_allowed(kbase_device *kbdev, u16 core_reqs) +{ + mali_bool soft_stops_allowed = MALI_TRUE; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { + if ((core_reqs & BASE_JD_REQ_T) != 0) + soft_stops_allowed = MALI_FALSE; + } + return soft_stops_allowed; +} + +static mali_bool kbasep_hard_stop_allowed(kbase_device *kbdev, u16 core_reqs) +{ + mali_bool hard_stops_allowed = MALI_TRUE; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) { + if ((core_reqs & BASE_JD_REQ_T) != 0) + hard_stops_allowed = MALI_FALSE; + } + return hard_stops_allowed; +} + +static void kbasep_job_slot_soft_or_hard_stop_do_action(kbase_device *kbdev, int js, u32 action, u16 core_reqs, kbase_jd_atom * target_katom ) +{ + kbase_context *kctx = target_katom->kctx; +#if KBASE_TRACE_ENABLE + u32 status_reg_before; + u64 job_in_head_before; + u32 status_reg_after; + + /* Check the head pointer */ + job_in_head_before = ((u64) kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_HEAD_LO), NULL)) + | (((u64) kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_HEAD_HI), NULL)) << 32); + status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_STATUS), NULL); +#endif + + if (action == JSn_COMMAND_SOFT_STOP) { + mali_bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev, core_reqs); + if (!soft_stop_allowed) { +#ifdef CONFIG_MALI_DEBUG + KBASE_LOG(2, kbdev->dev, "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", (unsigned int)core_reqs); +#endif /* CONFIG_MALI_DEBUG */ + return; + } + + /* We are about to issue a soft stop, so mark the atom as having been soft stopped */ + target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; + } + + if (action == JSn_COMMAND_HARD_STOP) { + mali_bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev, core_reqs); + if (!hard_stop_allowed) { + /* Jobs can be hard-stopped for the following reasons: + * * CFS decides the job has been running too long (and soft-stop has not occurred). + * In this case the GPU will be reset by CFS if the job remains on the GPU. + * + * * The context is destroyed, kbase_jd_zap_context will attempt to hard-stop the job. However + * it also has a watchdog which will cause the GPU to be reset if the job remains on the GPU. + * + * * An (unhandled) MMU fault occurred. As long as BASE_HW_ISSUE_8245 is defined then + * the GPU will be reset. + * + * All three cases result in the GPU being reset if the hard-stop fails, + * so it is safe to just return and ignore the hard-stop request. + */ + dev_warn(kbdev->dev, "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", (unsigned int)core_reqs); + return; + } + } + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316) && action == JSn_COMMAND_SOFT_STOP) { + int i; + kbase_jm_slot *slot; + slot = &kbdev->jm_slots[js]; + + for (i = 0; i < kbasep_jm_nr_jobs_submitted(slot); i++) { + kbase_jd_atom *katom; + + katom = kbasep_jm_peek_idx_submit_slot(slot, i); + + KBASE_DEBUG_ASSERT(katom); + + if (kbasep_jm_is_dummy_workaround_job(kbdev, katom) != MALI_FALSE) { + /* Don't access the members of HW workaround 'dummy' jobs + * + * This assumes that such jobs can't cause HW_ISSUE_8316, and could only be blocked + * by other jobs causing HW_ISSUE_8316 (which will get poked/or eventually get killed) */ + continue; + } + + /* For HW_ISSUE_8316, only 'bad' jobs attacking the system can + * cause this issue: normally, all memory should be allocated in + * multiples of 4 pages, and growable memory should be changed size + * in multiples of 4 pages. + * + * Whilst such 'bad' jobs can be cleared by a GPU reset, the + * locking up of a uTLB entry caused by the bad job could also + * stall other ASs, meaning that other ASs' jobs don't complete in + * the 'grace' period before the reset. We don't want to lose other + * ASs' jobs when they would normally complete fine, so we must + * 'poke' the MMU regularly to help other ASs complete */ + kbase_as_poking_timer_retain_atom(kbdev, katom->kctx, katom); + } + } + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + if (action == JSn_COMMAND_SOFT_STOP) + action = (target_katom->atom_flags & KBASE_KATOM_FLAGS_JOBCHAIN) ? + JSn_COMMAND_SOFT_STOP_1: + JSn_COMMAND_SOFT_STOP_0; + else + action = (target_katom->atom_flags & KBASE_KATOM_FLAGS_JOBCHAIN) ? + JSn_COMMAND_HARD_STOP_1: + JSn_COMMAND_HARD_STOP_0; + } + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_COMMAND), action, kctx); + +#if KBASE_TRACE_ENABLE + status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_STATUS), NULL); + if (status_reg_after == BASE_JD_EVENT_ACTIVE) { + kbase_jm_slot *slot; + kbase_jd_atom *head; + kbase_context *head_kctx; + + slot = &kbdev->jm_slots[js]; + head = kbasep_jm_peek_idx_submit_slot(slot, slot->submitted_nr - 1); + head_kctx = head->kctx; + + /* We don't need to check kbasep_jm_is_dummy_workaround_job( head ) here: + * - Members are not indirected through + * - The members will all be zero anyway + */ + if (status_reg_before == BASE_JD_EVENT_ACTIVE) + KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, head, job_in_head_before, js); + else + KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); + + switch(action) { + case JSn_COMMAND_SOFT_STOP: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx, head, head->jc, js); + break; + case JSn_COMMAND_SOFT_STOP_0: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, head, head->jc, js); + break; + case JSn_COMMAND_SOFT_STOP_1: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, head, head->jc, js); + break; + case JSn_COMMAND_HARD_STOP: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx, head, head->jc, js); + break; + case JSn_COMMAND_HARD_STOP_0: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, head, head->jc, js); + break; + case JSn_COMMAND_HARD_STOP_1: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js); + break; + default: + BUG(); + break; + } + } else { + if (status_reg_before == BASE_JD_EVENT_ACTIVE) + KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, job_in_head_before, js); + else + KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); + + switch(action) { + case JSn_COMMAND_SOFT_STOP: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, js); + break; + case JSn_COMMAND_SOFT_STOP_0: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, 0, js); + break; + case JSn_COMMAND_SOFT_STOP_1: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, 0, js); + break; + case JSn_COMMAND_HARD_STOP: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, js); + break; + case JSn_COMMAND_HARD_STOP_0: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, 0, js); + break; + case JSn_COMMAND_HARD_STOP_1: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js); + break; + default: + BUG(); + break; + } + } +#endif +} + +/* Helper macros used by kbasep_job_slot_soft_or_hard_stop */ +#define JM_SLOT_MAX_JOB_SUBMIT_REGS 2 +#define JM_JOB_IS_CURRENT_JOB_INDEX(n) (1 == n) /* Index of the last job to process */ +#define JM_JOB_IS_NEXT_JOB_INDEX(n) (2 == n) /* Index of the prior to last job to process */ + +/** Soft or hard-stop a slot + * + * This function safely ensures that the correct job is either hard or soft-stopped. + * It deals with evicting jobs from the next registers where appropriate. + * + * This does not attempt to stop or evict jobs that are 'dummy' jobs for HW workarounds. + * + * @param kbdev The kbase device + * @param kctx The context to soft/hard-stop job(s) from (or NULL is all jobs should be targeted) + * @param js The slot that the job(s) are on + * @param target_katom The atom that should be targeted (or NULL if all jobs from the context should be targeted) + * @param action The action to perform, either JSn_COMMAND_HARD_STOP or JSn_COMMAND_SOFT_STOP + */ +static void kbasep_job_slot_soft_or_hard_stop(kbase_device *kbdev, kbase_context *kctx, int js, kbase_jd_atom *target_katom, u32 action) +{ + kbase_jd_atom *katom; + u8 i; + u8 jobs_submitted; + kbase_jm_slot *slot; + u16 core_reqs; + kbasep_js_device_data *js_devdata; + mali_bool can_safely_stop = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION); + + KBASE_DEBUG_ASSERT(action == JSn_COMMAND_HARD_STOP || action == JSn_COMMAND_SOFT_STOP); + KBASE_DEBUG_ASSERT(kbdev); + js_devdata = &kbdev->js_data; + + slot = &kbdev->jm_slots[js]; + KBASE_DEBUG_ASSERT(slot); + lockdep_assert_held(&js_devdata->runpool_irq.lock); + + jobs_submitted = kbasep_jm_nr_jobs_submitted(slot); + + KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, 1); + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SLOT_SOFT_OR_HARD_STOP, kctx, NULL, 0u, js, jobs_submitted); + + if (jobs_submitted > JM_SLOT_MAX_JOB_SUBMIT_REGS) + i = jobs_submitted - JM_SLOT_MAX_JOB_SUBMIT_REGS; + else + i = 0; + + /* Loop through all jobs that have been submitted to the slot and haven't completed */ + for (; i < jobs_submitted; i++) { + katom = kbasep_jm_peek_idx_submit_slot(slot, i); + + if (kctx && katom->kctx != kctx) + continue; + + if (target_katom && katom != target_katom) + continue; + + if (kbasep_jm_is_dummy_workaround_job(kbdev, katom)) + continue; + + core_reqs = katom->core_req; + + if (JM_JOB_IS_CURRENT_JOB_INDEX(jobs_submitted - i)) { + /* The last job in the slot, check if there is a job in the next register */ + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), NULL) == 0) + kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, action, core_reqs, katom); + else { + /* The job is in the next registers */ + beenthere(kctx, "clearing job from next registers on slot %d", js); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), JSn_COMMAND_NOP, NULL); + /* Check to see if we did remove a job from the next registers */ + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_LO), NULL) != 0 || kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_HI), NULL) != 0) { + /* The job was successfully cleared from the next registers, requeue it */ + kbase_jd_atom *dequeued_katom = kbasep_jm_dequeue_tail_submit_slot(slot); + KBASE_DEBUG_ASSERT(dequeued_katom == katom); + jobs_submitted--; + + /* Set the next registers to NULL */ + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_LO), 0, NULL); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_HI), 0, NULL); + + /* As the job is removed from the next registers we undo the associated + * update to the job_chain_flag for the job slot. */ + if (can_safely_stop) + slot->job_chain_flag = !slot->job_chain_flag; + + KBASE_TRACE_ADD_SLOT(kbdev, JM_SLOT_EVICT, dequeued_katom->kctx, dequeued_katom, dequeued_katom->jc, js); + + /* Complete the job, indicate it took no time, but don't submit any more at this point */ + kbase_jd_done(dequeued_katom, js, NULL, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT); + } else { + /* The job transitioned into the current registers before we managed to evict it, + * in this case we fall back to soft/hard-stopping the job */ + beenthere(kctx, "missed job in next register, soft/hard-stopping slot %d", js); + kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, action, core_reqs, katom); + } + } + } else if (JM_JOB_IS_NEXT_JOB_INDEX(jobs_submitted - i)) { + /* There's a job after this one, check to see if that job is in the next registers. + * If so, we need to pay attention to not accidently stop that one when issueing + * the command to stop the one pointed to by the head registers (as the one in the head + * may finish in the mean time and the one in the next moves to the head). Either the hardware + * has support for this using job chain disambiguation or we need to evict the job + * from the next registers first to ensure we can safely stop the one pointed to by + * the head registers. */ + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), NULL) != 0) { + kbase_jd_atom *check_next_atom; + /* It is - we should remove that job and soft/hard-stop the slot */ + + /* Only proceed when the next job isn't a HW workaround 'dummy' job + * + * This can't be an ASSERT due to MMU fault code: + * - This first hard-stops the job that caused the fault + * - Under HW Issue 8245, it will then reset the GPU + * - This causes a Soft-stop to occur on all slots + * - By the time of the soft-stop, we may (depending on timing) still have: + * - The original job in HEAD, if it's not finished the hard-stop + * - The dummy workaround job in NEXT + * + * Other cases could be coded in future that cause back-to-back Soft/Hard + * stops with dummy workaround jobs in place, e.g. MMU handler code and Job + * Scheduler watchdog timer running in parallel. + * + * Note, the index i+1 is valid to peek from: i == jobs_submitted-2, therefore + * i+1 == jobs_submitted-1 */ + check_next_atom = kbasep_jm_peek_idx_submit_slot(slot, i + 1); + if (kbasep_jm_is_dummy_workaround_job(kbdev, check_next_atom) != MALI_FALSE) + continue; + + if (!can_safely_stop) { + beenthere(kctx, "clearing job from next registers on slot %d", js); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), JSn_COMMAND_NOP, NULL); + + /* Check to see if we did remove a job from the next registers */ + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_LO), NULL) != 0 || kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_HI), NULL) != 0) { + /* We did remove a job from the next registers, requeue it */ + kbase_jd_atom *dequeued_katom = kbasep_jm_dequeue_tail_submit_slot(slot); + KBASE_DEBUG_ASSERT(dequeued_katom != NULL); + jobs_submitted--; + + /* Set the next registers to NULL */ + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_LO), 0, NULL); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_HI), 0, NULL); + + KBASE_TRACE_ADD_SLOT(kbdev, JM_SLOT_EVICT, dequeued_katom->kctx, dequeued_katom, dequeued_katom->jc, js); + + /* Complete the job, indicate it took no time, but don't submit any more at this point */ + kbase_jd_done(dequeued_katom, js, NULL, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT); + } else { + /* We missed the job, that means the job we're interested in left the hardware before + * we managed to do anything, so we can proceed to the next job */ + continue; + } + } + + /* Next is now free, so we can soft/hard-stop the slot */ + beenthere(kctx, "soft/hard-stopped slot %d (there was a job in next which was successfully cleared)\n", js); + kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, action, core_reqs, katom); + } + /* If there was no job in the next registers, then the job we were + * interested in has finished, so we need not take any action + */ + } + } + + KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, 0); +} + +void kbase_job_kill_jobs_from_context(kbase_context *kctx) +{ + unsigned long flags; + kbase_device *kbdev; + kbasep_js_device_data *js_devdata; + int i; + + KBASE_DEBUG_ASSERT(kctx != NULL); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + + /* Cancel any remaining running jobs for this kctx */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + /* Invalidate all jobs in context, to prevent re-submitting */ + for (i = 0; i < BASE_JD_ATOM_COUNT; i++) + kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_CANCELLED; + + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbase_job_slot_hardstop(kctx, i, NULL); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); +} + +void kbase_job_zap_context(kbase_context *kctx) +{ + kbase_device *kbdev; + kbasep_js_device_data *js_devdata; + kbasep_js_kctx_info *js_kctx_info; + int i; + mali_bool evict_success; + + KBASE_DEBUG_ASSERT(kctx != NULL); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + /* + * Critical assumption: No more submission is possible outside of the + * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs) + * whilst the kbase_context is terminating. + */ + + /* First, atomically do the following: + * - mark the context as dying + * - try to evict it from the policy queue */ + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + js_kctx_info->ctx.is_dying = MALI_TRUE; + + KBASE_LOG(1, kbdev->dev, "Zap: Try Evict Ctx %p", kctx); + mutex_lock(&js_devdata->queue_mutex); + evict_success = kbasep_js_policy_try_evict_ctx(&js_devdata->policy, kctx); + mutex_unlock(&js_devdata->queue_mutex); + + /* + * At this point we know: + * - If eviction succeeded, it was in the policy queue, but now no longer is + * - We must cancel the jobs here. No Power Manager active reference to + * release. + * - This happens asynchronously - kbase_jd_zap_context() will wait for + * those jobs to be killed. + * - If eviction failed, then it wasn't in the policy queue. It is one of + * the following: + * - a. it didn't have any jobs, and so is not in the Policy Queue or the + * Run Pool (not scheduled) + * - Hence, no more work required to cancel jobs. No Power Manager active + * reference to release. + * - b. it was in the middle of a scheduling transaction (and thus must + * have at least 1 job). This can happen from a syscall or a kernel thread. + * We still hold the jsctx_mutex, and so the thread must be waiting inside + * kbasep_js_try_schedule_head_ctx(), before checking whether the runpool + * is full. That thread will continue after we drop the mutex, and will + * notice the context is dying. It will rollback the transaction, killing + * all jobs at the same time. kbase_jd_zap_context() will wait for those + * jobs to be killed. + * - Hence, no more work required to cancel jobs, or to release the Power + * Manager active reference. + * - c. it is scheduled, and may or may not be running jobs + * - We must cause it to leave the runpool by stopping it from submitting + * any more jobs. When it finally does leave, + * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs + * (because it is dying), release the Power Manager active reference, and + * will not requeue the context in the policy queue. kbase_jd_zap_context() + * will wait for those jobs to be killed. + * - Hence, work required just to make it leave the runpool. Cancelling + * jobs and releasing the Power manager active reference will be handled + * when it leaves the runpool. + */ + + if (evict_success != MALI_FALSE || js_kctx_info->ctx.is_scheduled == MALI_FALSE) { + /* The following events require us to kill off remaining jobs and + * update PM book-keeping: + * - we evicted it correctly (it must have jobs to be in the Policy Queue) + * + * These events need no action, but take this path anyway: + * - Case a: it didn't have any jobs, and was never in the Queue + * - Case b: scheduling transaction will be partially rolled-back (this + * already cancels the jobs) + */ + + KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, js_kctx_info->ctx.is_scheduled); + + KBASE_LOG(2, kbdev->dev, "Zap: Ctx %p evict_success=%d, scheduled=%d", kctx, evict_success, js_kctx_info->ctx.is_scheduled); + + if (evict_success != MALI_FALSE) { + /* Only cancel jobs when we evicted from the policy queue. No Power + * Manager active reference was held. + * + * Having is_dying set ensures that this kills, and doesn't requeue */ + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, MALI_FALSE); + } + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + } else { + unsigned long flags; + mali_bool was_retained; + /* Case c: didn't evict, but it is scheduled - it's in the Run Pool */ + KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, js_kctx_info->ctx.is_scheduled); + KBASE_LOG(2, kbdev->dev, "Zap: Ctx %p is in RunPool", kctx); + + /* Disable the ctx from submitting any more jobs */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbasep_js_clear_submit_allowed(js_devdata, kctx); + + /* Retain and (later) release the context whilst it is is now disallowed from submitting + * jobs - ensures that someone somewhere will be removing the context later on */ + was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); + + /* Since it's scheduled and we have the jsctx_mutex, it must be retained successfully */ + KBASE_DEBUG_ASSERT(was_retained != MALI_FALSE); + + KBASE_LOG(2, kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx); + /* Cancel any remaining running jobs for this kctx - if any. Submit is disallowed + * which takes effect immediately, so no more new jobs will appear after we do this. */ + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbase_job_slot_hardstop(kctx, i, NULL); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + KBASE_LOG(2, kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", kctx); + kbasep_js_runpool_release_ctx(kbdev, kctx); + } + KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u); + + /* After this, you must wait on both the kbase_jd_context::zero_jobs_wait + * and the kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the + * jobs to be destroyed, and the context to be de-scheduled (if it was on + * the runpool). + * + * kbase_jd_zap_context() will do this. */ +} +KBASE_EXPORT_TEST_API(kbase_job_zap_context) + +mali_error kbase_job_slot_init(kbase_device *kbdev) +{ + int i; + KBASE_DEBUG_ASSERT(kbdev); + + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbasep_jm_init_submit_slot(&kbdev->jm_slots[i]); + + return MALI_ERROR_NONE; +} +KBASE_EXPORT_TEST_API(kbase_job_slot_init) + +void kbase_job_slot_halt(kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +void kbase_job_slot_term(kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); +} +KBASE_EXPORT_TEST_API(kbase_job_slot_term) + +/** + * Soft-stop the specified job slot + * + * The job slot lock must be held when calling this function. + * The job slot must not already be in the process of being soft-stopped. + * + * Where possible any job in the next register is evicted before the soft-stop. + * + * @param kbdev The kbase device + * @param js The job slot to soft-stop + * @param target_katom The job that should be soft-stopped (or NULL for any job) + */ +void kbase_job_slot_softstop(kbase_device *kbdev, int js, kbase_jd_atom *target_katom) +{ + kbasep_job_slot_soft_or_hard_stop(kbdev, NULL, js, target_katom, JSn_COMMAND_SOFT_STOP); +} + +/** + * Hard-stop the specified job slot + * + * The job slot lock must be held when calling this function. + * + * @param kctx The kbase context that contains the job(s) that should + * be hard-stopped + * @param js The job slot to hard-stop + * @param target_katom The job that should be hard-stopped (or NULL for all + * jobs from the context) + */ +void kbase_job_slot_hardstop(kbase_context *kctx, int js, + kbase_jd_atom *target_katom) +{ + kbase_device *kbdev = kctx->kbdev; + + kbasep_job_slot_soft_or_hard_stop(kbdev, kctx, js, target_katom, + JSn_COMMAND_HARD_STOP); + if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) || + kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) || + (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_T76X_3542) && + (target_katom == NULL || target_katom->core_req & BASE_JD_REQ_FS_AFBC))) { + /* MIDBASE-2916 if a fragment job with AFBC encoding is + * hardstopped, ensure to do a soft reset also in order to + * clear the GPU status. + * Workaround for HW issue 8401 has an issue,so after + * hard-stopping just reset the GPU. This will ensure that the + * jobs leave the GPU.*/ + if (kbase_prepare_to_reset_gpu_locked(kbdev)) { + dev_err(kbdev->dev, "Issueing GPU\ + soft-reset after hard stopping due to hardware issue"); + kbase_reset_gpu_locked(kbdev); + } + } +} + + +void kbase_debug_dump_registers(kbase_device *kbdev) +{ + int i; + dev_err(kbdev->dev, "Register state:"); + dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL)); + dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x JOB_IRQ_THROTTLE=0x%08x", + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL), + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL), + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), NULL)); + for (i = 0; i < 3; i++) { + dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x", + i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JSn_STATUS), + NULL), + i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JSn_HEAD_LO), + NULL)); + } + dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", + kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL)); + dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL), + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL), + kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL)); + dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL)); + dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL)); +} + +void kbasep_reset_timeout_worker(struct work_struct *data) +{ + unsigned long flags; + kbase_device *kbdev; + int i; + ktime_t end_timestamp = ktime_get(); + kbasep_js_device_data *js_devdata; + kbase_uk_hwcnt_setup hwcnt_setup = { {0} }; + kbase_instr_state bckp_state; + + KBASE_DEBUG_ASSERT(data); + + kbdev = container_of(data, kbase_device, reset_work); + + KBASE_DEBUG_ASSERT(kbdev); + js_devdata = &kbdev->js_data; + + KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); + + /* Make sure the timer has completed - this cannot be done from interrupt context, + * so this cannot be done within kbasep_try_reset_gpu_early. */ + hrtimer_cancel(&kbdev->reset_timer); + + if (kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + /* This would re-activate the GPU. Since it's already idle, there's no + * need to reset it */ + atomic_set(&kbdev->reset_gpu, KBASE_RESET_GPU_NOT_PENDING); + wake_up(&kbdev->reset_wait); + return; + } + + mutex_lock(&kbdev->pm.lock); + /* We hold the pm lock, so there ought to be a current policy */ + KBASE_DEBUG_ASSERT(kbdev->pm.pm_current_policy); + + /* All slot have been soft-stopped and we've waited SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point + * we assume that anything that is still left on the GPU is stuck there and we'll kill it when we reset the GPU */ + + dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", RESET_TIMEOUT); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) { /*the same interrupt handler preempted itself */ + /* GPU is being reset */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + /* Save the HW counters setup */ + if (kbdev->hwcnt.kctx != NULL) { + kbase_context *kctx = kbdev->hwcnt.kctx; + hwcnt_setup.dump_buffer = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kctx) & 0xffffffff; + hwcnt_setup.dump_buffer |= (mali_addr64) kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), kctx) << 32; + hwcnt_setup.jm_bm = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), kctx); + hwcnt_setup.shader_bm = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), kctx); + hwcnt_setup.tiler_bm = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), kctx); + hwcnt_setup.l3_cache_bm = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_L3_CACHE_EN), kctx); + hwcnt_setup.mmu_l2_bm = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), kctx); + } + + /* Output the state of some interesting registers to help in the + * debugging of GPU resets */ + kbase_debug_dump_registers(kbdev); + + bckp_state = kbdev->hwcnt.state; + kbdev->hwcnt.state = KBASE_INSTR_STATE_RESETTING; + kbdev->hwcnt.triggered = 0; + /* Disable IRQ to avoid IRQ handlers to kick in after releaseing the spinlock; + * this also clears any outstanding interrupts */ + kbase_pm_disable_interrupts(kbdev); + /* Ensure that any IRQ handlers have finished */ + kbase_synchronize_irqs(kbdev); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Reset the GPU */ + kbase_pm_init_hw(kbdev, MALI_TRUE); + /* IRQs were re-enabled by kbase_pm_init_hw, and GPU is still powered */ + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + /* Restore the HW counters setup */ + if (kbdev->hwcnt.kctx != NULL) { + kbase_context *kctx = kbdev->hwcnt.kctx; + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | PRFCNT_CONFIG_MODE_OFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), hwcnt_setup.dump_buffer & 0xFFFFFFFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), hwcnt_setup.dump_buffer >> 32, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), hwcnt_setup.jm_bm, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), hwcnt_setup.shader_bm, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_L3_CACHE_EN), hwcnt_setup.l3_cache_bm, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), hwcnt_setup.mmu_l2_bm, kctx); + + /* Due to PRLAM-8186 we need to disable the Tiler before we enable the HW counter dump. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0, kctx); + else + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), hwcnt_setup.tiler_bm, kctx); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | PRFCNT_CONFIG_MODE_MANUAL, kctx); + + /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), hwcnt_setup.tiler_bm, kctx); + } + kbdev->hwcnt.state = bckp_state; + switch(kbdev->hwcnt.state) { + /* Cases for waking kbasep_cache_clean_worker worker */ + case KBASE_INSTR_STATE_CLEANED: + /* Cache-clean IRQ occurred, but we reset: + * Wakeup incase the waiter saw RESETTING */ + case KBASE_INSTR_STATE_REQUEST_CLEAN: + /* After a clean was requested, but before the regs were written: + * Wakeup incase the waiter saw RESETTING */ + wake_up(&kbdev->hwcnt.cache_clean_wait); + break; + case KBASE_INSTR_STATE_CLEANING: + /* Either: + * 1) We've not got the Cache-clean IRQ yet: it was lost, or: + * 2) We got it whilst resetting: it was voluntarily lost + * + * So, move to the next state and wakeup: */ + kbdev->hwcnt.state = KBASE_INSTR_STATE_CLEANED; + wake_up(&kbdev->hwcnt.cache_clean_wait); + break; + + /* Cases for waking anyone else */ + case KBASE_INSTR_STATE_DUMPING: + /* If dumping, abort the dump, because we may've lost the IRQ */ + kbdev->hwcnt.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.triggered = 1; + wake_up(&kbdev->hwcnt.wait); + break; + case KBASE_INSTR_STATE_DISABLED: + case KBASE_INSTR_STATE_IDLE: + case KBASE_INSTR_STATE_FAULT: + /* Every other reason: wakeup in that state */ + kbdev->hwcnt.triggered = 1; + wake_up(&kbdev->hwcnt.wait); + break; + + /* Unhandled cases */ + case KBASE_INSTR_STATE_RESETTING: + default: + BUG(); + break; + } + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Complete any jobs that were still on the GPU */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) { + int nr_done; + kbase_jm_slot *slot = &kbdev->jm_slots[i]; + + nr_done = kbasep_jm_nr_jobs_submitted(slot); + while (nr_done) { + dev_err(kbdev->dev, "Job stuck in slot %d on the GPU was cancelled", i); + kbase_job_done_slot(kbdev, i, BASE_JD_EVENT_JOB_CANCELLED, 0, &end_timestamp); + nr_done--; + } + } + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + mutex_lock(&js_devdata->runpool_mutex); + + /* Reprogram the GPU's MMU */ + for (i = 0; i < BASE_MAX_NR_AS; i++) { + if (js_devdata->runpool_irq.per_as_data[i].kctx) { + kbase_as *as = &kbdev->as[i]; + mutex_lock(&as->transaction_mutex); + kbase_mmu_update(js_devdata->runpool_irq.per_as_data[i].kctx); + mutex_unlock(&as->transaction_mutex); + } + } + + atomic_set(&kbdev->reset_gpu, KBASE_RESET_GPU_NOT_PENDING); + wake_up(&kbdev->reset_wait); + dev_err(kbdev->dev, "Reset complete"); + + /* Find out what cores are required now */ + kbase_pm_update_cores_state(kbdev); + + /* Synchronously request and wait for those cores, because if + * instrumentation is enabled it would need them immediately. */ + kbase_pm_check_transitions_sync(kbdev); + + /* Try submitting some jobs to restart processing */ + if (js_devdata->nr_user_contexts_running > 0) { + KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0); + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbasep_js_try_run_next_job_nolock(kbdev); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + } + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&kbdev->pm.lock); + + kbase_pm_context_idle(kbdev); + KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); +} + +enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) +{ + kbase_device *kbdev = container_of(timer, kbase_device, reset_timer); + + KBASE_DEBUG_ASSERT(kbdev); + + /* Reset still pending? */ + if (atomic_cmpxchg(&kbdev->reset_gpu, KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == KBASE_RESET_GPU_COMMITTED) + queue_work(kbdev->reset_workq, &kbdev->reset_work); + + return HRTIMER_NORESTART; +} + +/* + * If all jobs are evicted from the GPU then we can reset the GPU + * immediately instead of waiting for the timeout to elapse + */ + +static void kbasep_try_reset_gpu_early_locked(kbase_device *kbdev) +{ + int i; + int pending_jobs = 0; + + KBASE_DEBUG_ASSERT(kbdev); + + /* Count the number of jobs */ + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) { + kbase_jm_slot *slot = &kbdev->jm_slots[i]; + pending_jobs += kbasep_jm_nr_jobs_submitted(slot); + } + + if (pending_jobs > 0) { + /* There are still jobs on the GPU - wait */ + return; + } + + /* Check that the reset has been committed to (i.e. kbase_reset_gpu has been called), and that no other + * thread beat this thread to starting the reset */ + if (atomic_cmpxchg(&kbdev->reset_gpu, KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != KBASE_RESET_GPU_COMMITTED) { + /* Reset has already occurred */ + return; + } + queue_work(kbdev->reset_workq, &kbdev->reset_work); +} + +static void kbasep_try_reset_gpu_early(kbase_device *kbdev) +{ + unsigned long flags; + kbasep_js_device_data *js_devdata; + + js_devdata = &kbdev->js_data; + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbasep_try_reset_gpu_early_locked(kbdev); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); +} + +/* + * Prepare for resetting the GPU. + * This function just soft-stops all the slots to ensure that as many jobs as possible are saved. + * + * The function returns a boolean which should be interpreted as follows: + * - MALI_TRUE - Prepared for reset, kbase_reset_gpu should be called. + * - MALI_FALSE - Another thread is performing a reset, kbase_reset_gpu should not be called. + * + * @return See description + */ +mali_bool kbase_prepare_to_reset_gpu_locked(kbase_device *kbdev) +{ + int i; + + KBASE_DEBUG_ASSERT(kbdev); + + if (atomic_cmpxchg(&kbdev->reset_gpu, KBASE_RESET_GPU_NOT_PENDING, KBASE_RESET_GPU_PREPARED) != KBASE_RESET_GPU_NOT_PENDING) { + /* Some other thread is already resetting the GPU */ + return MALI_FALSE; + } + + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbase_job_slot_softstop(kbdev, i, NULL); + + return MALI_TRUE; +} + +mali_bool kbase_prepare_to_reset_gpu(kbase_device *kbdev) +{ + unsigned long flags; + mali_bool ret; + kbasep_js_device_data *js_devdata; + + js_devdata = &kbdev->js_data; + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + ret = kbase_prepare_to_reset_gpu_locked(kbdev); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return ret; +} +KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu) + +/* + * This function should be called after kbase_prepare_to_reset_gpu iff it returns MALI_TRUE. + * It should never be called without a corresponding call to kbase_prepare_to_reset_gpu. + * + * After this function is called (or not called if kbase_prepare_to_reset_gpu returned MALI_FALSE), + * the caller should wait for kbdev->reset_waitq to be signalled to know when the reset has completed. + */ +void kbase_reset_gpu(kbase_device *kbdev) +{ + u32 timeout_ms; + + KBASE_DEBUG_ASSERT(kbdev); + + /* Note this is an assert/atomic_set because it is a software issue for a race to be occuring here */ + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->reset_gpu) == KBASE_RESET_GPU_PREPARED); + atomic_set(&kbdev->reset_gpu, KBASE_RESET_GPU_COMMITTED); + + timeout_ms = kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS); + dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", timeout_ms); + hrtimer_start(&kbdev->reset_timer, HR_TIMER_DELAY_MSEC(timeout_ms), HRTIMER_MODE_REL); + + /* Try resetting early */ + kbasep_try_reset_gpu_early(kbdev); +} +KBASE_EXPORT_TEST_API(kbase_reset_gpu) + +void kbase_reset_gpu_locked(kbase_device *kbdev) +{ + u32 timeout_ms; + + KBASE_DEBUG_ASSERT(kbdev); + + /* Note this is an assert/atomic_set because it is a software issue for a race to be occuring here */ + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->reset_gpu) == KBASE_RESET_GPU_PREPARED); + atomic_set(&kbdev->reset_gpu, KBASE_RESET_GPU_COMMITTED); + + timeout_ms = kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS); + dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", timeout_ms); + hrtimer_start(&kbdev->reset_timer, HR_TIMER_DELAY_MSEC(timeout_ms), HRTIMER_MODE_REL); + + /* Try resetting early */ + kbasep_try_reset_gpu_early_locked(kbdev); +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.h b/drivers/gpu/arm/midgard/mali_kbase_jm.h new file mode 100755 index 00000000000..9d004d91a49 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_jm.h @@ -0,0 +1,199 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_jm.h + * Job Manager Low-level APIs. + */ + +#ifndef _KBASE_JM_H_ +#define _KBASE_JM_H_ + +#include +#include +#include + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_jm Job Manager Low-level APIs + * @{ + * + */ + +static INLINE int kbasep_jm_is_js_free(kbase_device *kbdev, int js, kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(0 <= js && js < kbdev->gpu_props.num_job_slots); + + return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), kctx); +} + +/** + * This checks that: + * - there is enough space in the GPU's buffers (JSn_NEXT and JSn_HEAD registers) to accomodate the job. + * - there is enough space to track the job in a our Submit Slots. Note that we have to maintain space to + * requeue one job in case the next registers on the hardware need to be cleared. + */ +static INLINE mali_bool kbasep_jm_is_submit_slots_free(kbase_device *kbdev, int js, kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(0 <= js && js < kbdev->gpu_props.num_job_slots); + + if (atomic_read(&kbdev->reset_gpu) != KBASE_RESET_GPU_NOT_PENDING) { + /* The GPU is being reset - so prevent submission */ + return MALI_FALSE; + } + + return (mali_bool) (kbasep_jm_is_js_free(kbdev, js, kctx) + && kbdev->jm_slots[js].submitted_nr < (BASE_JM_SUBMIT_SLOTS - 2)); +} + +/** + * Initialize a submit slot + */ +static INLINE void kbasep_jm_init_submit_slot(kbase_jm_slot *slot) +{ + slot->submitted_nr = 0; + slot->submitted_head = 0; +} + +/** + * Find the atom at the idx'th element in the queue without removing it, starting at the head with idx==0. + */ +static INLINE kbase_jd_atom *kbasep_jm_peek_idx_submit_slot(kbase_jm_slot *slot, u8 idx) +{ + u8 pos; + kbase_jd_atom *katom; + + KBASE_DEBUG_ASSERT(idx < BASE_JM_SUBMIT_SLOTS); + + pos = (slot->submitted_head + idx) & BASE_JM_SUBMIT_SLOTS_MASK; + katom = slot->submitted[pos]; + + return katom; +} + +/** + * Pop front of the submitted + */ +static INLINE kbase_jd_atom *kbasep_jm_dequeue_submit_slot(kbase_jm_slot *slot) +{ + u8 pos; + kbase_jd_atom *katom; + + pos = slot->submitted_head & BASE_JM_SUBMIT_SLOTS_MASK; + katom = slot->submitted[pos]; + slot->submitted[pos] = NULL; /* Just to catch bugs... */ + KBASE_DEBUG_ASSERT(katom); + + /* rotate the buffers */ + slot->submitted_head = (slot->submitted_head + 1) & BASE_JM_SUBMIT_SLOTS_MASK; + slot->submitted_nr--; + + KBASE_LOG(2, katom->kctx->kbdev->dev, "katom %p new head %u", (void *)katom, (unsigned int)slot->submitted_head); + + return katom; +} + +/* Pop back of the submitted queue (unsubmit a job) + */ +static INLINE kbase_jd_atom *kbasep_jm_dequeue_tail_submit_slot(kbase_jm_slot *slot) +{ + u8 pos; + + slot->submitted_nr--; + + pos = (slot->submitted_head + slot->submitted_nr) & BASE_JM_SUBMIT_SLOTS_MASK; + + return slot->submitted[pos]; +} + +static INLINE u8 kbasep_jm_nr_jobs_submitted(kbase_jm_slot *slot) +{ + return slot->submitted_nr; +} + +/** + * Push back of the submitted + */ +static INLINE void kbasep_jm_enqueue_submit_slot(kbase_jm_slot *slot, kbase_jd_atom *katom) +{ + u8 nr; + u8 pos; + nr = slot->submitted_nr++; + KBASE_DEBUG_ASSERT(nr < BASE_JM_SUBMIT_SLOTS); + + pos = (slot->submitted_head + nr) & BASE_JM_SUBMIT_SLOTS_MASK; + slot->submitted[pos] = katom; +} + +/** + * @brief Query whether a job peeked/dequeued from the submit slots is a + * 'dummy' job that is used for hardware workaround purposes. + * + * Any time a job is peeked/dequeued from the submit slots, this should be + * queried on that job. + * + * If a \a atom is indicated as being a dummy job, then you must not attempt + * to use \a atom. This is because its members will not necessarily be + * initialized, and so could lead to a fault if they were used. + * + * @param[in] kbdev kbase device pointer + * @param[in] atom The atom to query + * + * @return MALI_TRUE if \a atom is for a dummy job, in which case you must not + * attempt to use it. + * @return MALI_FALSE otherwise, and \a atom is safe to use. + */ +static INLINE mali_bool kbasep_jm_is_dummy_workaround_job(kbase_device *kbdev, kbase_jd_atom *atom) +{ + /* Query the set of workaround jobs here */ + /* none exists today */ + return MALI_FALSE; +} + +/** + * @brief Submit a job to a certain job-slot + * + * The caller must check kbasep_jm_is_submit_slots_free() != MALI_FALSE before calling this. + * + * The following locking conditions are made on the caller: + * - it must hold the kbasep_js_device_data::runpoool_irq::lock + */ +void kbase_job_submit_nolock(kbase_device *kbdev, kbase_jd_atom *katom, int js); + +/** + * @brief Complete the head job on a particular job-slot + */ +void kbase_job_done_slot(kbase_device *kbdev, int s, u32 completion_code, u64 job_tail, ktime_t *end_timestamp); + + /** @} *//* end group kbase_jm */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_JM_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c new file mode 100755 index 00000000000..85b82d86c82 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js.c @@ -0,0 +1,2144 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Job Scheduler Implementation + */ +#include +#include +#include +#include +#include + +#include "mali_kbase_jm.h" +#include + +/* + * Private types + */ + +/** Bitpattern indicating the result of releasing a context */ +enum { + /** The context was descheduled - caller should try scheduling in a new one + * to keep the runpool full */ + KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0), +}; + +typedef u32 kbasep_js_release_result; + +/* + * Private function prototypes + */ +STATIC INLINE void kbasep_js_deref_permon_check_and_disable_cycle_counter(kbase_device *kbdev, kbase_jd_atom *katom); + +STATIC INLINE void kbasep_js_ref_permon_check_and_enable_cycle_counter(kbase_device *kbdev, kbase_jd_atom *katom); + +STATIC kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(kbase_device *kbdev, kbase_context *kctx, kbasep_js_atom_retained_state *katom_retained_state); + +/** Helper for trace subcodes */ +#if KBASE_TRACE_ENABLE != 0 +STATIC int kbasep_js_trace_get_refcnt(kbase_device *kbdev, kbase_context *kctx) +{ + unsigned long flags; + kbasep_js_device_data *js_devdata; + int as_nr; + int refcnt = 0; + + js_devdata = &kbdev->js_data; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + as_nr = kctx->as_nr; + if (as_nr != KBASEP_AS_NR_INVALID) { + kbasep_js_per_as_data *js_per_as_data; + js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; + + refcnt = js_per_as_data->as_busy_refcount; + } + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return refcnt; +} +#else /* KBASE_TRACE_ENABLE != 0 */ +STATIC int kbasep_js_trace_get_refcnt(kbase_device *kbdev, kbase_context *kctx) +{ + CSTD_UNUSED(kbdev); + CSTD_UNUSED(kctx); + return 0; +} +#endif /* KBASE_TRACE_ENABLE != 0 */ + +/* + * Private types + */ +enum { + JS_DEVDATA_INIT_NONE = 0, + JS_DEVDATA_INIT_CONSTANTS = (1 << 0), + JS_DEVDATA_INIT_POLICY = (1 << 1), + JS_DEVDATA_INIT_ALL = ((1 << 2) - 1) +}; + +enum { + JS_KCTX_INIT_NONE = 0, + JS_KCTX_INIT_CONSTANTS = (1 << 0), + JS_KCTX_INIT_POLICY = (1 << 1), + JS_KCTX_INIT_ALL = ((1 << 2) - 1) +}; + +/* + * Private functions + */ + +/** + * Check if the job had performance monitoring enabled and decrement the count. If no jobs require + * performance monitoring, then the cycle counters will be disabled in the GPU. + * + * No locks need to be held - locking is handled further down + * + * This function does not sleep. + */ + +STATIC INLINE void kbasep_js_deref_permon_check_and_disable_cycle_counter(kbase_device *kbdev, kbase_jd_atom *katom) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + + if (katom->core_req & BASE_JD_REQ_PERMON) + kbase_pm_release_gpu_cycle_counter(kbdev); +} + +/** + * Check if the job has performance monitoring enabled and keep a count of it. If at least one + * job requires performance monitoring, then the cycle counters will be enabled in the GPU. + * + * No locks need to be held - locking is handled further down + * + * This function does not sleep. + */ + +STATIC INLINE void kbasep_js_ref_permon_check_and_enable_cycle_counter(kbase_device *kbdev, kbase_jd_atom *katom) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + + if (katom->core_req & BASE_JD_REQ_PERMON) + kbase_pm_request_gpu_cycle_counter(kbdev); +} + +/* + * The following locking conditions are made on the caller: + * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. + * - The caller must hold the kbasep_js_device_data::runpool_mutex + */ +STATIC INLINE void runpool_inc_context_count(kbase_device *kbdev, kbase_context *kctx) +{ + kbasep_js_device_data *js_devdata; + kbasep_js_kctx_info *js_kctx_info; + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex)); + BUG_ON(!mutex_is_locked(&js_devdata->runpool_mutex)); + + /* Track total contexts */ + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX); + ++(js_devdata->nr_all_contexts_running); + + if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) { + /* Track contexts that can submit jobs */ + KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running < S8_MAX); + ++(js_devdata->nr_user_contexts_running); + } +} + +/* + * The following locking conditions are made on the caller: + * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. + * - The caller must hold the kbasep_js_device_data::runpool_mutex + */ +STATIC INLINE void runpool_dec_context_count(kbase_device *kbdev, kbase_context *kctx) +{ + kbasep_js_device_data *js_devdata; + kbasep_js_kctx_info *js_kctx_info; + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex)); + BUG_ON(!mutex_is_locked(&js_devdata->runpool_mutex)); + + /* Track total contexts */ + --(js_devdata->nr_all_contexts_running); + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0); + + if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) { + /* Track contexts that can submit jobs */ + --(js_devdata->nr_user_contexts_running); + KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0); + } +} + +/** + * @brief check whether the runpool is full for a specified context + * + * If kctx == NULL, then this makes the least restrictive check on the + * runpool. A specific context that is supplied immediately after could fail + * the check, even under the same conditions. + * + * Therefore, once a context is obtained you \b must re-check it with this + * function, since the return value could change to MALI_FALSE. + * + * The following locking conditions are made on the caller: + * - In all cases, the caller must hold kbasep_js_device_data::runpool_mutex + * - When kctx != NULL the caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. + * - When kctx == NULL, then the caller need not hold any jsctx_mutex locks (but it doesn't do any harm to do so). + */ +STATIC mali_bool check_is_runpool_full(kbase_device *kbdev, kbase_context *kctx) +{ + kbasep_js_device_data *js_devdata; + mali_bool is_runpool_full; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + js_devdata = &kbdev->js_data; + BUG_ON(!mutex_is_locked(&js_devdata->runpool_mutex)); + + /* Regardless of whether a context is submitting or not, can't have more than there + * are HW address spaces */ + is_runpool_full = (mali_bool) (js_devdata->nr_all_contexts_running >= kbdev->nr_hw_address_spaces); + + if (kctx != NULL && (kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) { + BUG_ON(!mutex_is_locked(&kctx->jctx.sched_info.ctx.jsctx_mutex)); + /* Contexts that submit might use less of the address spaces available, due to HW + * workarounds. In which case, the runpool is also full when the number of + * submitting contexts exceeds the number of submittable address spaces. + * + * Both checks must be made: can have nr_user_address_spaces == nr_hw_address spaces, + * and at the same time can have nr_user_contexts_running < nr_all_contexts_running. */ + is_runpool_full |= (mali_bool) (js_devdata->nr_user_contexts_running >= kbdev->nr_user_address_spaces); + } + + return is_runpool_full; +} + +STATIC base_jd_core_req core_reqs_from_jsn_features(u16 features) /* JS_FEATURE register value */ +{ + base_jd_core_req core_req = 0u; + + if ((features & JSn_FEATURE_SET_VALUE_JOB) != 0) + core_req |= BASE_JD_REQ_V; + + if ((features & JSn_FEATURE_CACHE_FLUSH_JOB) != 0) + core_req |= BASE_JD_REQ_CF; + + if ((features & JSn_FEATURE_COMPUTE_JOB) != 0) + core_req |= BASE_JD_REQ_CS; + + if ((features & JSn_FEATURE_TILER_JOB) != 0) + core_req |= BASE_JD_REQ_T; + + if ((features & JSn_FEATURE_FRAGMENT_JOB) != 0) + core_req |= BASE_JD_REQ_FS; + + return core_req; +} + +/** + * Picks and reserves an address space. + * + * When this function returns, the address space returned is reserved and + * cannot be picked for another context until it is released. + * + * The caller must ensure there \b is a free address space before calling this. + * + * The following locking conditions are made on the caller: + * - it must hold kbasep_js_device_data::runpool_mutex + * + * @return a non-NULL pointer to a kbase_as that is not in use by any other context + */ +STATIC kbase_as *pick_free_addr_space(kbase_device *kbdev) +{ + kbasep_js_device_data *js_devdata; + kbase_as *current_as; + long ffs_result; + js_devdata = &kbdev->js_data; + + lockdep_assert_held(&js_devdata->runpool_mutex); + + /* Find the free address space */ + ffs_result = ffs(js_devdata->as_free) - 1; + + /* ASSERT that we should've found a free one */ + KBASE_DEBUG_ASSERT(0 <= ffs_result && ffs_result < kbdev->nr_hw_address_spaces); + /* Ensure no-one else picks this one */ + js_devdata->as_free &= ~((u16) (1u << ffs_result)); + + current_as = &kbdev->as[ffs_result]; + + return current_as; +} + +/** + * Release an address space, making it available for being picked again. + * + * The following locking conditions are made on the caller: + * - it must hold kbasep_js_device_data::runpool_mutex + */ +STATIC INLINE void release_addr_space(kbase_device *kbdev, int kctx_as_nr) +{ + kbasep_js_device_data *js_devdata; + u16 as_bit = (1u << kctx_as_nr); + + js_devdata = &kbdev->js_data; + lockdep_assert_held(&js_devdata->runpool_mutex); + + /* The address space must not already be free */ + KBASE_DEBUG_ASSERT(!(js_devdata->as_free & as_bit)); + + js_devdata->as_free |= as_bit; +} + +/** + * Assign an Address Space (AS) to a context, and add the context to the Policy. + * + * This includes: + * - setting up the global runpool_irq structure and the context on the AS + * - Activating the MMU on the AS + * - Allowing jobs to be submitted on the AS + * + * Locking conditions: + * - Caller must hold the kbasep_js_kctx_info::jsctx_mutex + * - Caller must hold the kbasep_js_device_data::runpool_mutex + * - Caller must hold AS transaction mutex + * - Caller must hold Runpool IRQ lock + */ +STATIC void assign_and_activate_kctx_addr_space(kbase_device *kbdev, kbase_context *kctx, kbase_as *current_as) +{ + kbasep_js_device_data *js_devdata; + kbasep_js_per_as_data *js_per_as_data; + int as_nr; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(current_as != NULL); + + js_devdata = &kbdev->js_data; + as_nr = current_as->number; + + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(¤t_as->transaction_mutex); + lockdep_assert_held(&js_devdata->runpool_irq.lock); + + js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; + + /* Attribute handling */ + kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx); + + /* Assign addr space */ + kctx->as_nr = as_nr; +#ifdef CONFIG_MALI_GATOR_SUPPORT + kbase_trace_mali_mmu_as_in_use(kctx->as_nr); +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + /* Activate this address space on the MMU */ + kbase_mmu_update(kctx); + + /* Allow it to run jobs */ + kbasep_js_set_submit_allowed(js_devdata, kctx); + + /* Book-keeping */ + js_per_as_data->kctx = kctx; + js_per_as_data->as_busy_refcount = 0; + + /* Lastly, add the context to the policy's runpool - this really allows it to run jobs */ + kbasep_js_policy_runpool_add_ctx(&js_devdata->policy, kctx); + +} + +void kbasep_js_try_run_next_job_nolock(kbase_device *kbdev) +{ + kbasep_js_device_data *js_devdata; + int js; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&js_devdata->runpool_irq.lock); + + /* It's cheap and simple to retest this here - otherwise we burden the + * caller with it. In some cases, we do this higher up to optimize out the + * spinlock. */ + if (js_devdata->nr_user_contexts_running == 0) + return; /* No contexts present - the GPU might be powered off, so just return */ + + for (js = 0; js < kbdev->gpu_props.num_job_slots; ++js) + kbasep_js_try_run_next_job_on_slot_nolock(kbdev, js); +} + +/** Hold the kbasep_js_device_data::runpool_irq::lock for this */ +mali_bool kbasep_js_runpool_retain_ctx_nolock(kbase_device *kbdev, kbase_context *kctx) +{ + kbasep_js_device_data *js_devdata; + kbasep_js_per_as_data *js_per_as_data; + mali_bool result = MALI_FALSE; + int as_nr; + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_devdata = &kbdev->js_data; + + as_nr = kctx->as_nr; + if (as_nr != KBASEP_AS_NR_INVALID) { + int new_refcnt; + + KBASE_DEBUG_ASSERT(as_nr >= 0); + js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; + + KBASE_DEBUG_ASSERT(js_per_as_data->kctx != NULL); + + new_refcnt = ++(js_per_as_data->as_busy_refcount); + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx, NULL, 0u, new_refcnt); + result = MALI_TRUE; + } + + return result; +} + +/* + * Functions private to KBase ('Protected' functions) + */ +void kbase_js_try_run_jobs(kbase_device *kbdev) +{ + kbasep_js_device_data *js_devdata; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + + mutex_lock(&js_devdata->runpool_mutex); + if (js_devdata->nr_user_contexts_running != 0) { + /* Only try running jobs when we have contexts present, otherwise the GPU might be powered off. */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + kbasep_js_try_run_next_job_nolock(kbdev); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + } + mutex_unlock(&js_devdata->runpool_mutex); +} + +void kbase_js_try_run_jobs_on_slot(kbase_device *kbdev, int js) +{ + unsigned long flags; + kbasep_js_device_data *js_devdata; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + + mutex_lock(&js_devdata->runpool_mutex); + if (js_devdata->nr_user_contexts_running != 0) { + /* Only try running jobs when we have contexts present, otherwise the GPU might be powered off. */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + kbasep_js_try_run_next_job_on_slot_nolock(kbdev, js); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + } + mutex_unlock(&js_devdata->runpool_mutex); +} + +mali_error kbasep_js_devdata_init(kbase_device * const kbdev) +{ + kbasep_js_device_data *js_devdata; + mali_error err; + int i; + u16 as_present; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + js_devdata = &kbdev->js_data; + + KBASE_DEBUG_ASSERT(js_devdata->init_status == JS_DEVDATA_INIT_NONE); + + /* These two must be recalculated if nr_hw_address_spaces changes (e.g. for HW workarounds) */ + as_present = (1U << kbdev->nr_hw_address_spaces) - 1; + kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces; + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) { + mali_bool use_workaround_for_security; + use_workaround_for_security = (mali_bool) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_SECURE_BUT_LOSS_OF_PERFORMANCE); + if (use_workaround_for_security != MALI_FALSE) { + KBASE_LOG(2, kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only"); + kbdev->nr_user_address_spaces = 1; + } + } +#ifdef CONFIG_MALI_DEBUG + /* Soft-stop will be disabled on a single context by default unless softstop_always is set */ + js_devdata->softstop_always = MALI_FALSE; +#endif /* CONFIG_MALI_DEBUG */ + js_devdata->nr_all_contexts_running = 0; + js_devdata->nr_user_contexts_running = 0; + js_devdata->as_free = as_present; /* All ASs initially free */ + js_devdata->runpool_irq.submit_allowed = 0u; /* No ctx allowed to submit */ + memset(js_devdata->runpool_irq.ctx_attr_ref_count, 0, sizeof(js_devdata->runpool_irq.ctx_attr_ref_count)); + memset(js_devdata->runpool_irq.slot_affinities, 0, sizeof(js_devdata->runpool_irq.slot_affinities)); + js_devdata->runpool_irq.slots_blocked_on_affinity = 0u; + memset(js_devdata->runpool_irq.slot_affinity_refcount, 0, sizeof(js_devdata->runpool_irq.slot_affinity_refcount)); + INIT_LIST_HEAD(&js_devdata->suspended_soft_jobs_list); + + /* Config attributes */ + js_devdata->scheduling_tick_ns = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS); + js_devdata->soft_stop_ticks = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS); + js_devdata->soft_stop_ticks_cl = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL); + js_devdata->hard_stop_ticks_ss = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS); + js_devdata->hard_stop_ticks_cl = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL); + js_devdata->hard_stop_ticks_nss = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS); + js_devdata->gpu_reset_ticks_ss = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS); + js_devdata->gpu_reset_ticks_cl = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL); + js_devdata->gpu_reset_ticks_nss = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS); + js_devdata->ctx_timeslice_ns = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS); + js_devdata->cfs_ctx_runtime_init_slices = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_CFS_CTX_RUNTIME_INIT_SLICES); + js_devdata->cfs_ctx_runtime_min_slices = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_CFS_CTX_RUNTIME_MIN_SLICES); + + KBASE_LOG(2, kbdev->dev, "JS Config Attribs: "); + KBASE_LOG(2, kbdev->dev, "\tscheduling_tick_ns:%u", js_devdata->scheduling_tick_ns); + KBASE_LOG(2, kbdev->dev, "\tsoft_stop_ticks:%u", js_devdata->soft_stop_ticks); + KBASE_LOG(2, kbdev->dev, "\tsoft_stop_ticks_cl:%u", js_devdata->soft_stop_ticks_cl); + KBASE_LOG(2, kbdev->dev, "\thard_stop_ticks_ss:%u", js_devdata->hard_stop_ticks_ss); + KBASE_LOG(2, kbdev->dev, "\thard_stop_ticks_cl:%u", js_devdata->hard_stop_ticks_cl); + KBASE_LOG(2, kbdev->dev, "\thard_stop_ticks_nss:%u", js_devdata->hard_stop_ticks_nss); + KBASE_LOG(2, kbdev->dev, "\tgpu_reset_ticks_ss:%u", js_devdata->gpu_reset_ticks_ss); + KBASE_LOG(2, kbdev->dev, "\tgpu_reset_ticks_cl:%u", js_devdata->gpu_reset_ticks_cl); + KBASE_LOG(2, kbdev->dev, "\tgpu_reset_ticks_nss:%u", js_devdata->gpu_reset_ticks_nss); + KBASE_LOG(2, kbdev->dev, "\tctx_timeslice_ns:%u", js_devdata->ctx_timeslice_ns); + KBASE_LOG(2, kbdev->dev, "\tcfs_ctx_runtime_init_slices:%u", js_devdata->cfs_ctx_runtime_init_slices); + KBASE_LOG(2, kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u", js_devdata->cfs_ctx_runtime_min_slices); + +#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS != 0 + KBASE_LOG(2, kbdev->dev, "Job Scheduling Policy Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.", js_devdata->soft_stop_ticks, js_devdata->scheduling_tick_ns); +#endif +#if KBASE_DISABLE_SCHEDULING_HARD_STOPS != 0 + KBASE_LOG(2, kbdev->dev, "Job Scheduling Policy Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_nss==%u at %uns per tick. Other hard-stops may still occur.", js_devdata->hard_stop_ticks_ss, js_devdata->hard_stop_ticks_nss, js_devdata->scheduling_tick_ns); +#endif +#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS != 0 && KBASE_DISABLE_SCHEDULING_HARD_STOPS != 0 + KBASE_LOG(2, kbdev->dev, "Note: The JS policy's tick timer (if coded) will still be run, but do nothing."); +#endif + + /* setup the number of irq throttle cycles base on given time */ + { + int irq_throttle_time_us = kbdev->gpu_props.irq_throttle_time_us; + int irq_throttle_cycles = kbasep_js_convert_us_to_gpu_ticks_max_freq(kbdev, irq_throttle_time_us); + atomic_set(&kbdev->irq_throttle_cycles, irq_throttle_cycles); + } + + /* Clear the AS data, including setting NULL pointers */ + memset(&js_devdata->runpool_irq.per_as_data[0], 0, sizeof(js_devdata->runpool_irq.per_as_data)); + + for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) + js_devdata->js_reqs[i] = core_reqs_from_jsn_features(kbdev->gpu_props.props.raw_props.js_features[i]); + + js_devdata->init_status |= JS_DEVDATA_INIT_CONSTANTS; + + /* On error, we could continue on: providing none of the below resources + * rely on the ones above */ + + mutex_init(&js_devdata->runpool_mutex); + mutex_init(&js_devdata->queue_mutex); + spin_lock_init(&js_devdata->runpool_irq.lock); + + err = kbasep_js_policy_init(kbdev); + if (err == MALI_ERROR_NONE) + js_devdata->init_status |= JS_DEVDATA_INIT_POLICY; + + /* On error, do no cleanup; this will be handled by the caller(s), since + * we've designed this resource to be safe to terminate on init-fail */ + if (js_devdata->init_status != JS_DEVDATA_INIT_ALL) + return MALI_ERROR_FUNCTION_FAILED; + + return MALI_ERROR_NONE; +} + +void kbasep_js_devdata_halt(kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +void kbasep_js_devdata_term(kbase_device *kbdev) +{ + kbasep_js_device_data *js_devdata; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + js_devdata = &kbdev->js_data; + + if ((js_devdata->init_status & JS_DEVDATA_INIT_CONSTANTS)) { + s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, }; + /* The caller must de-register all contexts before calling this */ + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0); + KBASE_DEBUG_ASSERT(memcmp(js_devdata->runpool_irq.ctx_attr_ref_count, zero_ctx_attr_ref_count, sizeof(js_devdata->runpool_irq.ctx_attr_ref_count)) == 0); + CSTD_UNUSED(zero_ctx_attr_ref_count); + } + if ((js_devdata->init_status & JS_DEVDATA_INIT_POLICY)) + kbasep_js_policy_term(&js_devdata->policy); + + js_devdata->init_status = JS_DEVDATA_INIT_NONE; +} + +mali_error kbasep_js_kctx_init(kbase_context * const kctx) +{ + kbase_device *kbdev; + kbasep_js_kctx_info *js_kctx_info; + mali_error err; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + js_kctx_info = &kctx->jctx.sched_info; + KBASE_DEBUG_ASSERT(js_kctx_info->init_status == JS_KCTX_INIT_NONE); + + js_kctx_info->ctx.nr_jobs = 0; + js_kctx_info->ctx.is_scheduled = MALI_FALSE; + js_kctx_info->ctx.is_dying = MALI_FALSE; + memset(js_kctx_info->ctx.ctx_attr_ref_count, 0, sizeof(js_kctx_info->ctx.ctx_attr_ref_count)); + + /* Initially, the context is disabled from submission until the create flags are set */ + js_kctx_info->ctx.flags = KBASE_CTX_FLAG_SUBMIT_DISABLED; + + js_kctx_info->init_status |= JS_KCTX_INIT_CONSTANTS; + + /* On error, we could continue on: providing none of the below resources + * rely on the ones above */ + mutex_init(&js_kctx_info->ctx.jsctx_mutex); + + init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait); + + err = kbasep_js_policy_init_ctx(kbdev, kctx); + if (err == MALI_ERROR_NONE) + js_kctx_info->init_status |= JS_KCTX_INIT_POLICY; + + /* On error, do no cleanup; this will be handled by the caller(s), since + * we've designed this resource to be safe to terminate on init-fail */ + if (js_kctx_info->init_status != JS_KCTX_INIT_ALL) + return MALI_ERROR_FUNCTION_FAILED; + + return MALI_ERROR_NONE; +} + +void kbasep_js_kctx_term(kbase_context *kctx) +{ + kbase_device *kbdev; + kbasep_js_kctx_info *js_kctx_info; + kbasep_js_policy *js_policy; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + js_policy = &kbdev->js_data.policy; + js_kctx_info = &kctx->jctx.sched_info; + + if ((js_kctx_info->init_status & JS_KCTX_INIT_CONSTANTS)) { + /* The caller must de-register all jobs before calling this */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled == MALI_FALSE); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0); + } + + if ((js_kctx_info->init_status & JS_KCTX_INIT_POLICY)) + kbasep_js_policy_term_ctx(js_policy, kctx); + + js_kctx_info->init_status = JS_KCTX_INIT_NONE; +} + +/* Evict jobs from the NEXT registers + * + * The caller must hold: + * - kbasep_js_kctx_info::ctx::jsctx_mutex + * - kbasep_js_device_data::runpool_mutex + */ +STATIC void kbasep_js_runpool_evict_next_jobs(kbase_device *kbdev, kbase_context *kctx) +{ + unsigned long flags; + int js; + kbasep_js_device_data *js_devdata; + + js_devdata = &kbdev->js_data; + + BUG_ON(!mutex_is_locked(&kctx->jctx.sched_info.ctx.jsctx_mutex)); + BUG_ON(!mutex_is_locked(&js_devdata->runpool_mutex)); + + /* Prevent contexts in the runpool from submitting jobs */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + /* There's no need to prevent contexts in the runpool from submitting jobs, + * because we complete this operation by the time we release the + * runpool_irq.lock */ + + /* Evict jobs from the NEXT registers */ + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + kbase_jm_slot *slot; + kbase_jd_atom *tail; + + if (!kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), NULL)) { + /* No job in the NEXT register */ + continue; + } + + slot = &kbdev->jm_slots[js]; + tail = kbasep_jm_peek_idx_submit_slot(slot, slot->submitted_nr - 1); + + KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, 1); + /* Clearing job from next registers */ + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), JSn_COMMAND_NOP, NULL); + + /* Check to see if we did remove a job from the next registers */ + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_LO), NULL) != 0 || kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_HI), NULL) != 0) { + /* The job was successfully cleared from the next registers, requeue it */ + kbase_jd_atom *dequeued_katom = kbasep_jm_dequeue_tail_submit_slot(slot); + KBASE_DEBUG_ASSERT(dequeued_katom == tail); + + /* Set the next registers to NULL */ + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_LO), 0, NULL); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_HI), 0, NULL); + + KBASE_TRACE_ADD_SLOT(kbdev, JM_SLOT_EVICT, dequeued_katom->kctx, dequeued_katom, dequeued_katom->jc, js); + + /* Complete the job, indicate that it took no time, and don't start + * new atoms */ + kbase_jd_done(dequeued_katom, js, NULL, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT); + } + KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, 0); + } + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); +} + +/** + * Fast start a higher priority job + * If the runpool is full, the lower priority contexts with no running jobs + * will be evicted from the runpool + * + * If \a kctx_new is NULL, the first context with no running jobs will be evicted + * + * The following locking conditions are made on the caller: + * - The caller must \b not hold \a kctx_new's + * kbasep_js_kctx_info::ctx::jsctx_mutex, or that mutex of any ctx in the + * runpool. This is because \a kctx_new's jsctx_mutex and one of the other + * scheduled ctx's jsctx_mutex will be obtained internally. + * - it must \em not hold kbasep_js_device_data::runpool_irq::lock (as this will be + * obtained internally) + * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used + * internally). + */ +STATIC void kbasep_js_runpool_attempt_fast_start_ctx(kbase_device *kbdev, kbase_context *kctx_new) +{ + unsigned long flags; + kbasep_js_device_data *js_devdata; + kbasep_js_kctx_info *js_kctx_new; + kbasep_js_policy *js_policy; + kbasep_js_per_as_data *js_per_as_data; + int evict_as_nr; + kbasep_js_atom_retained_state katom_retained_state; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + js_devdata = &kbdev->js_data; + js_policy = &kbdev->js_data.policy; + + if (kctx_new != NULL) { + js_kctx_new = &kctx_new->jctx.sched_info; + mutex_lock(&js_kctx_new->ctx.jsctx_mutex); + } else { + js_kctx_new = NULL; + CSTD_UNUSED(js_kctx_new); + } + + /* Setup a dummy katom_retained_state */ + kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); + + mutex_lock(&js_devdata->runpool_mutex); + + /* If the runpool is full and either there is no specified context or the specified context is not dying, then + attempt to fast start the specified context or evict the first context with no running jobs. */ + if (check_is_runpool_full(kbdev, kctx_new) && + (!js_kctx_new || (js_kctx_new && !js_kctx_new->ctx.is_dying))) { + /* No free address spaces - attempt to evict non-running lower priority context */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + for (evict_as_nr = 0; evict_as_nr < kbdev->nr_hw_address_spaces; evict_as_nr++) { + kbase_context *kctx_evict; + js_per_as_data = &js_devdata->runpool_irq.per_as_data[evict_as_nr]; + kctx_evict = js_per_as_data->kctx; + + /* Look for the AS which is not currently running */ + if (0 == js_per_as_data->as_busy_refcount && kctx_evict != NULL) { + /* Now compare the scheduled priority we are considering evicting with the new ctx priority + * and take into consideration if the scheduled priority is a realtime policy or not. + * Note that the lower the number, the higher the priority + */ + if ((kctx_new == NULL) || kbasep_js_policy_ctx_has_priority(js_policy, kctx_evict, kctx_new)) { + mali_bool retain_result; + kbasep_js_release_result release_result; + KBASE_TRACE_ADD(kbdev, JS_FAST_START_EVICTS_CTX, kctx_evict, NULL, 0u, (uintptr_t)kctx_new); + + /* Retain the ctx to work on it - this shouldn't be able to fail */ + retain_result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx_evict); + KBASE_DEBUG_ASSERT(retain_result != MALI_FALSE); + CSTD_UNUSED(retain_result); + + /* This will cause the context to be scheduled out on the next runpool_release_ctx(), + * and also stop its refcount increasing */ + kbasep_js_clear_submit_allowed(js_devdata, kctx_evict); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&js_devdata->runpool_mutex); + if (kctx_new != NULL) + mutex_unlock(&js_kctx_new->ctx.jsctx_mutex); + + /* Stop working on the target context, start working on the kctx_evict context */ + + mutex_lock(&kctx_evict->jctx.sched_info.ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx_evict, &katom_retained_state); + mutex_unlock(&js_devdata->runpool_mutex); + /* Only requeue if actually descheduled, which is more robust in case + * something else retains it (e.g. two high priority contexts racing + * to evict the same lower priority context) */ + if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx_evict, MALI_TRUE); + + mutex_unlock(&kctx_evict->jctx.sched_info.ctx.jsctx_mutex); + + /* release_result isn't propogated further: + * - the caller will be scheduling in a context anyway + * - which will also cause new jobs to run */ + + /* ctx fast start has taken place */ + return; + } + } + } + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + } + + /* ctx fast start has not taken place */ + mutex_unlock(&js_devdata->runpool_mutex); + if (kctx_new != NULL) + mutex_unlock(&js_kctx_new->ctx.jsctx_mutex); +} + +mali_bool kbasep_js_add_job(kbase_context *kctx, kbase_jd_atom *atom) +{ + unsigned long flags; + kbasep_js_kctx_info *js_kctx_info; + kbase_device *kbdev; + kbasep_js_device_data *js_devdata; + kbasep_js_policy *js_policy; + + mali_bool policy_queue_updated = MALI_FALSE; + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(atom != NULL); + lockdep_assert_held(&kctx->jctx.lock); + + kbdev = kctx->kbdev; + js_devdata = &kbdev->js_data; + js_policy = &kbdev->js_data.policy; + js_kctx_info = &kctx->jctx.sched_info; + + KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom)); + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + /* Policy-specific initialization of atoms (which cannot fail). Anything that + * could've failed must've been done at kbasep_jd_policy_init_job() time. */ + kbasep_js_policy_register_job(js_policy, kctx, atom); + + /* + * Begin Runpool transaction + */ + mutex_lock(&js_devdata->runpool_mutex); + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, kbasep_js_trace_get_refcnt(kbdev, kctx)); + + /* Refcount ctx.nr_jobs */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); + ++(js_kctx_info->ctx.nr_jobs); + + /* Setup any scheduling information */ + kbasep_js_clear_job_retry_submit(atom); + + /* Lock for state available during IRQ */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + /* Context Attribute Refcounting */ + kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom); + + /* Enqueue the job in the policy, causing it to be scheduled if the + * parent context gets scheduled */ + kbasep_js_policy_enqueue_job(js_policy, atom); + + if (js_kctx_info->ctx.is_scheduled != MALI_FALSE) { + /* Handle an already running context - try to run the new job, in case it + * matches requirements that aren't matched by any other job in the Run + * Pool */ + kbasep_js_try_run_next_job_nolock(kbdev); + } + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&js_devdata->runpool_mutex); + /* End runpool transaction */ + + if (js_kctx_info->ctx.is_scheduled == MALI_FALSE) { + if (js_kctx_info->ctx.is_dying) { + /* A job got added while/after kbase_job_zap_context() was called + * on a non-scheduled context (e.g. KDS dependency resolved). Kill + * that job by killing the context. */ + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, MALI_FALSE); + } else if (js_kctx_info->ctx.nr_jobs == 1) { + /* Handle Refcount going from 0 to 1: schedule the context on the Policy Queue */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled == MALI_FALSE); + KBASE_LOG(1, kbdev->dev, "JS: Enqueue Context %p", kctx); + + mutex_lock(&js_devdata->queue_mutex); + kbasep_js_policy_enqueue_ctx(js_policy, kctx); + mutex_unlock(&js_devdata->queue_mutex); + + /* Policy Queue was updated - caller must try to schedule the head context + * We also try to encourage a fast-start from here. */ + policy_queue_updated = MALI_TRUE; + } + } + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + /* If the runpool is full and this job has a higher priority than the + * non-running job in the runpool - evict it so this higher priority job + * starts faster. Fast-starting requires the jsctx_mutex to be dropped, + * because it works on multiple ctxs + * + * Note: If the context is being killed with kbase_job_zap_context(), then + * kctx can't disappear after the jsctx_mutex was dropped. This is because + * the caller holds kctx->jctx.lock */ + if (policy_queue_updated) + kbasep_js_runpool_attempt_fast_start_ctx(kbdev, kctx); + + return policy_queue_updated; +} + +void kbasep_js_remove_job(kbase_device *kbdev, kbase_context *kctx, kbase_jd_atom *atom) +{ + kbasep_js_kctx_info *js_kctx_info; + kbasep_js_device_data *js_devdata; + kbasep_js_policy *js_policy; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(atom != NULL); + + js_devdata = &kbdev->js_data; + js_policy = &kbdev->js_data.policy; + js_kctx_info = &kctx->jctx.sched_info; + + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc, kbasep_js_trace_get_refcnt(kbdev, kctx)); + + /* De-refcount ctx.nr_jobs */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); + --(js_kctx_info->ctx.nr_jobs); + + /* De-register the job from the system */ + kbasep_js_policy_deregister_job(js_policy, kctx, atom); +} + +void kbasep_js_remove_cancelled_job(kbase_device *kbdev, kbase_context *kctx, kbase_jd_atom *katom) +{ + unsigned long flags; + kbasep_js_atom_retained_state katom_retained_state; + kbasep_js_device_data *js_devdata; + mali_bool attr_state_changed; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + + js_devdata = &kbdev->js_data; + + kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); + kbasep_js_remove_job(kbdev, kctx, katom); + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + /* The atom has 'finished' (will not be re-run), so no need to call + * kbasep_js_has_atom_finished(). + * + * This is because it returns MALI_FALSE for soft-stopped atoms, but we + * want to override that, because we're cancelling an atom regardless of + * whether it was soft-stopped or not */ + attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, &katom_retained_state); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + if (attr_state_changed != MALI_FALSE) { + /* A change in runpool ctx attributes might mean we can run more jobs + * than before. */ + kbase_js_try_run_jobs(kbdev); + } +} + +mali_bool kbasep_js_runpool_retain_ctx(kbase_device *kbdev, kbase_context *kctx) +{ + unsigned long flags; + kbasep_js_device_data *js_devdata; + mali_bool result; + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + + /* KBASE_TRACE_ADD_REFCOUNT( kbdev, JS_RETAIN_CTX, kctx, NULL, 0, + kbasep_js_trace_get_refcnt(kbdev, kctx)); */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return result; +} + +kbase_context *kbasep_js_runpool_lookup_ctx(kbase_device *kbdev, int as_nr) +{ + unsigned long flags; + kbasep_js_device_data *js_devdata; + kbase_context *found_kctx = NULL; + kbasep_js_per_as_data *js_per_as_data; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); + js_devdata = &kbdev->js_data; + js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + found_kctx = js_per_as_data->kctx; + + if (found_kctx != NULL) + ++(js_per_as_data->as_busy_refcount); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return found_kctx; +} + +/** + * @brief Try running more jobs after releasing a context and/or atom + * + * This collates a set of actions that must happen whilst + * kbasep_js_device_data::runpool_irq::lock is held. + * + * This includes running more jobs when: + * - The previously released kctx caused a ctx attribute change + * - The released atom caused a ctx attribute change + * - Slots were previously blocked due to affinity restrictions + * - Submission during IRQ handling failed + */ +STATIC void kbasep_js_run_jobs_after_ctx_and_atom_release(kbase_device *kbdev, kbase_context *kctx, kbasep_js_atom_retained_state *katom_retained_state, mali_bool runpool_ctx_attr_change) +{ + kbasep_js_device_data *js_devdata; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(katom_retained_state != NULL); + js_devdata = &kbdev->js_data; + + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&js_devdata->runpool_irq.lock); + + if (js_devdata->nr_user_contexts_running != 0) { + mali_bool retry_submit; + int retry_jobslot; + + retry_submit = kbasep_js_get_atom_retry_submit_slot(katom_retained_state, &retry_jobslot); + + if (runpool_ctx_attr_change != MALI_FALSE) { + /* A change in runpool ctx attributes might mean we can run more jobs + * than before */ + kbasep_js_try_run_next_job_nolock(kbdev); + + /* A retry submit on all slots has now happened, so don't need to do it again */ + retry_submit = MALI_FALSE; + } + + /* Submit on any slots that might've had atoms blocked by the affinity of + * a completed atom. + * + * If no atom has recently completed, then this is harmelss */ + kbase_js_affinity_submit_to_blocked_slots(kbdev); + + /* If the IRQ handler failed to get a job from the policy, try again from + * outside the IRQ handler + * NOTE: We may've already cleared retry_submit from submitting above */ + if (retry_submit != MALI_FALSE) { + KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, kctx, NULL, 0u, retry_jobslot); + kbasep_js_try_run_next_job_on_slot_nolock(kbdev, retry_jobslot); + } + } +} + +/** + * Internal function to release the reference on a ctx and an atom's "retained + * state", only taking the runpool and as transaction mutexes + * + * This also starts more jobs running in the case of an ctx-attribute state change + * + * This does none of the followup actions for scheduling: + * - It does not schedule in a new context + * - It does not requeue or handle dying contexts + * + * For those tasks, just call kbasep_js_runpool_release_ctx() instead + * + * Requires: + * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr + * - Context has a non-zero refcount + * - Caller holds js_kctx_info->ctx.jsctx_mutex + * - Caller holds js_devdata->runpool_mutex + */ +STATIC kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(kbase_device *kbdev, kbase_context *kctx, kbasep_js_atom_retained_state *katom_retained_state) +{ + unsigned long flags; + kbasep_js_device_data *js_devdata; + kbasep_js_kctx_info *js_kctx_info; + kbasep_js_policy *js_policy; + kbasep_js_per_as_data *js_per_as_data; + + kbasep_js_release_result release_result = 0u; + mali_bool runpool_ctx_attr_change = MALI_FALSE; + int kctx_as_nr; + kbase_as *current_as; + int new_ref_count; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + js_policy = &kbdev->js_data.policy; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != MALI_FALSE); + + /* kctx->as_nr and js_per_as_data are only read from here. The caller's + * js_ctx_mutex provides a barrier that ensures they are up-to-date. + * + * They will not change whilst we're reading them, because the refcount + * is non-zero (and we ASSERT on that last fact). + */ + kctx_as_nr = kctx->as_nr; + KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID); + js_per_as_data = &js_devdata->runpool_irq.per_as_data[kctx_as_nr]; + KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0); + + /* + * Transaction begins on AS and runpool_irq + * + * Assert about out calling contract + */ + current_as = &kbdev->as[kctx_as_nr]; + mutex_lock(¤t_as->transaction_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr); + KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0); + + /* Update refcount */ + new_ref_count = --(js_per_as_data->as_busy_refcount); + + /* Release the atom if it finished (i.e. wasn't soft-stopped) */ + if (kbasep_js_has_atom_finished(katom_retained_state) != MALI_FALSE) + runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, katom_retained_state); + + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u, new_ref_count); + + if (new_ref_count == 1 && kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_PRIVILEGED + && !kbase_pm_is_suspending(kbdev) ) { + /* Context is kept scheduled into an address space even when there are no jobs, in this case we have + * to handle the situation where all jobs have been evicted from the GPU and submission is disabled. + * + * At this point we re-enable submission to allow further jobs to be executed + */ + kbasep_js_set_submit_allowed(js_devdata, kctx); + } + + /* Make a set of checks to see if the context should be scheduled out */ + if (new_ref_count == 0 && (kctx->jctx.sched_info.ctx.nr_jobs == 0 || kbasep_js_is_submit_allowed(js_devdata, kctx) == MALI_FALSE)) { + /* Last reference, and we've been told to remove this context from the Run Pool */ + KBASE_LOG(2, kbdev->dev, "JS: RunPool Remove Context %p because as_busy_refcount=%d, jobs=%d, allowed=%d", kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, kbasep_js_is_submit_allowed(js_devdata, kctx)); + + kbasep_js_policy_runpool_remove_ctx(js_policy, kctx); + + /* Stop any more refcounts occuring on the context */ + js_per_as_data->kctx = NULL; + + /* Ensure we prevent the context from submitting any new jobs + * e.g. from kbasep_js_try_run_next_job_on_slot_irq_nolock() */ + kbasep_js_clear_submit_allowed(js_devdata, kctx); + + /* Disable the MMU on the affected address space, and indicate it's invalid */ + kbase_mmu_disable(kctx); + +#ifdef CONFIG_MALI_GATOR_SUPPORT + kbase_trace_mali_mmu_as_released(kctx->as_nr); +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + + kctx->as_nr = KBASEP_AS_NR_INVALID; + + /* Ctx Attribute handling + * + * Releasing atoms attributes must either happen before this, or after + * 'is_scheduled' is changed, otherwise we double-decount the attributes*/ + runpool_ctx_attr_change |= kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx); + + /* Early update of context count, to optimize the + * kbasep_js_run_jobs_after_ctx_and_atom_release() call */ + runpool_dec_context_count(kbdev, kctx); + + /* Releasing the context and katom retained state can allow more jobs to run */ + kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, katom_retained_state, runpool_ctx_attr_change); + + /* + * Transaction ends on AS and runpool_irq: + * + * By this point, the AS-related data is now clear and ready for re-use. + * + * Since releases only occur once for each previous successful retain, and no more + * retains are allowed on this context, no other thread will be operating in this + * code whilst we are + */ + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(¤t_as->transaction_mutex); + + /* Free up the address space */ + release_addr_space(kbdev, kctx_as_nr); + /* Note: Don't reuse kctx_as_nr now */ + + /* Synchronize with any policy timers */ + kbasep_js_policy_runpool_timers_sync(js_policy); + + /* update book-keeping info */ + js_kctx_info->ctx.is_scheduled = MALI_FALSE; + /* Signal any waiter that the context is not scheduled, so is safe for + * termination - once the jsctx_mutex is also dropped, and jobs have + * finished. */ + wake_up(&js_kctx_info->ctx.is_scheduled_wait); + + /* Queue an action to occur after we've dropped the lock */ + release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED; + + } else { + kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, katom_retained_state, runpool_ctx_attr_change); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(¤t_as->transaction_mutex); + } + + return release_result; +} + +void kbasep_js_runpool_requeue_or_kill_ctx(kbase_device *kbdev, kbase_context *kctx, mali_bool has_pm_ref) +{ + kbasep_js_device_data *js_devdata; + kbasep_js_policy *js_policy; + kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + js_policy = &kbdev->js_data.policy; + js_devdata = &kbdev->js_data; + + /* This is called if and only if you've you've detached the context from + * the Runpool or the Policy Queue, and not added it back to the Runpool */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled == MALI_FALSE); + + if (js_kctx_info->ctx.is_dying != MALI_FALSE) { + /* Dying: don't requeue, but kill all jobs on the context. This happens + * asynchronously */ + KBASE_LOG(2, kbdev->dev, "JS: ** Killing Context %p on RunPool Remove **", kctx); + kbasep_js_policy_foreach_ctx_job(js_policy, kctx, &kbase_jd_cancel, MALI_TRUE); + } else if (js_kctx_info->ctx.nr_jobs > 0) { + /* Not dying, has jobs: de-ref core counts from each job before addding + * back to the queue */ + kbasep_js_policy_foreach_ctx_job(js_policy, kctx, &kbasep_js_job_check_deref_cores, MALI_FALSE); + + KBASE_LOG(1, kbdev->dev, "JS: Requeue Context %p", kctx); + mutex_lock(&js_devdata->queue_mutex); + kbasep_js_policy_enqueue_ctx(js_policy, kctx); + mutex_unlock(&js_devdata->queue_mutex); + } else { + /* Not dying, no jobs: don't add back to the queue */ + KBASE_LOG(1, kbdev->dev, "JS: Idling Context %p (not requeued)", kctx); + } + + if (has_pm_ref) { + /* In all cases where we had a pm active refcount, release it */ + kbase_pm_context_idle(kbdev); + } +} + +void kbasep_js_runpool_release_ctx_and_katom_retained_state(kbase_device *kbdev, kbase_context *kctx, kbasep_js_atom_retained_state *katom_retained_state) +{ + kbasep_js_device_data *js_devdata; + kbasep_js_kctx_info *js_kctx_info; + kbasep_js_release_result release_result; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, katom_retained_state); + + /* Drop the runpool mutex to allow requeing kctx */ + mutex_unlock(&js_devdata->runpool_mutex); + if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, MALI_TRUE); + + /* Drop the jsctx_mutex to allow scheduling in a new context */ + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) { + /* We've freed up an address space, so let's try to schedule in another + * context + * + * Note: if there's a context to schedule in, then it also tries to run + * another job, in case the new context has jobs satisfying requirements + * that no other context/job in the runpool does */ + kbasep_js_try_schedule_head_ctx(kbdev); + } +} + +void kbasep_js_runpool_release_ctx(kbase_device *kbdev, kbase_context *kctx) +{ + kbasep_js_atom_retained_state katom_retained_state; + + kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); + + kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state); +} + +/** Variant of kbasep_js_runpool_release_ctx() that doesn't call into + * kbasep_js_try_schedule_head_ctx() */ +STATIC void kbasep_js_runpool_release_ctx_no_schedule(kbase_device *kbdev, kbase_context *kctx) +{ + kbasep_js_device_data *js_devdata; + kbasep_js_kctx_info *js_kctx_info; + kbasep_js_release_result release_result; + kbasep_js_atom_retained_state katom_retained_state_struct; + kbasep_js_atom_retained_state *katom_retained_state = &katom_retained_state_struct; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + kbasep_js_atom_retained_state_init_invalid(katom_retained_state); + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, katom_retained_state); + + /* Drop the runpool mutex to allow requeing kctx */ + mutex_unlock(&js_devdata->runpool_mutex); + if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, MALI_TRUE); + + /* Drop the jsctx_mutex to allow scheduling in a new context */ + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + /* NOTE: could return release_result if the caller would like to know + * whether it should schedule a new context, but currently no callers do */ +} + + +/** + * @brief Handle retaining cores for power management and affinity management, + * ensuring that cores are powered up and won't violate affinity restrictions. + * + * This function enters at the following @ref kbase_atom_coreref_state states: + * + * - NO_CORES_REQUESTED, + * - WAITING_FOR_REQUESTED_CORES, + * - RECHECK_AFFINITY, + * + * The transitions are as folows: + * - NO_CORES_REQUESTED -> WAITING_FOR_REQUESTED_CORES + * - WAITING_FOR_REQUESTED_CORES -> ( WAITING_FOR_REQUESTED_CORES or RECHECK_AFFINITY ) + * - RECHECK_AFFINITY -> ( WAITING_FOR_REQUESTED_CORES or CHECK_AFFINITY_VIOLATIONS ) + * - CHECK_AFFINITY_VIOLATIONS -> ( RECHECK_AFFINITY or READY ) + * + * The caller must hold: + * - kbasep_js_device_data::runpool_irq::lock + * + * @return MALI_FALSE when the function makes a transition to the same or lower state, indicating + * that the cores are not ready. + * @return MALI_TRUE once READY state is reached, indicating that the cores are 'ready' and won't + * violate affinity restrictions. + * + */ +STATIC mali_bool kbasep_js_job_check_ref_cores(kbase_device *kbdev, int js, kbase_jd_atom *katom) +{ + /* The most recently checked affinity. Having this at this scope allows us + * to guarantee that we've checked the affinity in this function call. */ + u64 recently_chosen_affinity = 0; + mali_bool chosen_affinity = MALI_FALSE; + mali_bool retry; + + do { + retry = MALI_FALSE; + + /* NOTE: The following uses a number of FALLTHROUGHs to optimize the + * calls to this function. Ending of the function is indicated by BREAK OUT */ + switch (katom->coreref_state) { + /* State when job is first attempted to be run */ + case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: + KBASE_DEBUG_ASSERT(katom->affinity == 0); + + /* Compute affinity */ + if (MALI_FALSE == kbase_js_choose_affinity(&recently_chosen_affinity, kbdev, katom, js)) { + /* No cores are currently available */ + /* *** BREAK OUT: No state transition *** */ + break; + } + + chosen_affinity = MALI_TRUE; + + /* Request the cores */ + kbase_pm_request_cores(kbdev, katom->core_req & BASE_JD_REQ_T, recently_chosen_affinity); + + katom->affinity = recently_chosen_affinity; + + /* Proceed to next state */ + katom->coreref_state = KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: + { + kbase_pm_cores_ready cores_ready; + KBASE_DEBUG_ASSERT(katom->affinity != 0 || (katom->core_req & BASE_JD_REQ_T)); + + cores_ready = kbase_pm_register_inuse_cores(kbdev, katom->core_req & BASE_JD_REQ_T, katom->affinity); + if (cores_ready == KBASE_NEW_AFFINITY) { + /* Affinity no longer valid - return to previous state */ + kbasep_js_job_check_deref_cores(kbdev, katom); + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_CORE_REF_REGISTER_INUSE_FAILED, katom->kctx, katom, katom->jc, js, (u32) katom->affinity); + /* *** BREAK OUT: Return to previous state, retry *** */ + retry = MALI_TRUE; + break; + } + if (cores_ready == KBASE_CORES_NOT_READY) { + /* Stay in this state and return, to retry at this state later */ + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_CORE_REF_REGISTER_INUSE_FAILED, katom->kctx, katom, katom->jc, js, (u32) katom->affinity); + /* *** BREAK OUT: No state transition *** */ + break; + } + /* Proceed to next state */ + katom->coreref_state = KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; + } + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: + KBASE_DEBUG_ASSERT(katom->affinity != 0 || (katom->core_req & BASE_JD_REQ_T)); + + /* Optimize out choosing the affinity twice in the same function call */ + if (chosen_affinity == MALI_FALSE) { + /* See if the affinity changed since a previous call. */ + if (MALI_FALSE == kbase_js_choose_affinity(&recently_chosen_affinity, kbdev, katom, js)) { + /* No cores are currently available */ + kbasep_js_job_check_deref_cores(kbdev, katom); + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_CORE_REF_REQUEST_ON_RECHECK_FAILED, katom->kctx, katom, katom->jc, js, (u32) recently_chosen_affinity); + /* *** BREAK OUT: Transition to lower state *** */ + break; + } + chosen_affinity = MALI_TRUE; + } + + /* Now see if this requires a different set of cores */ + if (recently_chosen_affinity != katom->affinity) { + kbase_pm_cores_ready cores_ready; + + kbase_pm_request_cores(kbdev, katom->core_req & BASE_JD_REQ_T, recently_chosen_affinity); + + /* Register new cores whilst we still hold the old ones, to minimize power transitions */ + cores_ready = kbase_pm_register_inuse_cores(kbdev, katom->core_req & BASE_JD_REQ_T, recently_chosen_affinity); + kbasep_js_job_check_deref_cores(kbdev, katom); + + /* Fixup the state that was reduced by deref_cores: */ + katom->coreref_state = KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; + katom->affinity = recently_chosen_affinity; + if (cores_ready == KBASE_NEW_AFFINITY) { + /* Affinity no longer valid - return to previous state */ + katom->coreref_state = KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; + kbasep_js_job_check_deref_cores(kbdev, katom); + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_CORE_REF_REGISTER_INUSE_FAILED, katom->kctx, katom, katom->jc, js, (u32) katom->affinity); + /* *** BREAK OUT: Return to previous state, retry *** */ + retry = MALI_TRUE; + break; + } + /* Now might be waiting for powerup again, with a new affinity */ + if (cores_ready == KBASE_CORES_NOT_READY) { + /* Return to previous state */ + katom->coreref_state = KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_CORE_REF_REGISTER_ON_RECHECK_FAILED, katom->kctx, katom, katom->jc, js, (u32) katom->affinity); + /* *** BREAK OUT: Transition to lower state *** */ + break; + } + } + /* Proceed to next state */ + katom->coreref_state = KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS: + KBASE_DEBUG_ASSERT(katom->affinity != 0 || (katom->core_req & BASE_JD_REQ_T)); + KBASE_DEBUG_ASSERT(katom->affinity == recently_chosen_affinity); + + /* Note: this is where the caller must've taken the runpool_irq.lock */ + + /* Check for affinity violations - if there are any, then we just ask + * the caller to requeue and try again later */ + if (kbase_js_affinity_would_violate(kbdev, js, katom->affinity) != MALI_FALSE) { + /* Cause a re-attempt to submit from this slot on the next job complete */ + kbase_js_affinity_slot_blocked_an_atom(kbdev, js); + /* Return to previous state */ + katom->coreref_state = KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; + /* *** BREAK OUT: Transition to lower state *** */ + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_CORE_REF_AFFINITY_WOULD_VIOLATE, katom->kctx, katom, katom->jc, js, (u32) katom->affinity); + break; + } + + /* No affinity violations would result, so the cores are ready */ + katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY; + /* *** BREAK OUT: Cores Ready *** */ + break; + + default: + KBASE_DEBUG_ASSERT_MSG(MALI_FALSE, "Unhandled kbase_atom_coreref_state %d", katom->coreref_state); + break; + } + } while (retry != MALI_FALSE); + + return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY); +} + +void kbasep_js_job_check_deref_cores(kbase_device *kbdev, struct kbase_jd_atom *katom) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + + switch (katom->coreref_state) { + case KBASE_ATOM_COREREF_STATE_READY: + /* State where atom was submitted to the HW - just proceed to power-down */ + KBASE_DEBUG_ASSERT(katom->affinity != 0 || (katom->core_req & BASE_JD_REQ_T)); + + /* *** FALLTHROUGH *** */ + + case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: + /* State where cores were registered */ + KBASE_DEBUG_ASSERT(katom->affinity != 0 || (katom->core_req & BASE_JD_REQ_T)); + kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T, katom->affinity); + + /* Note: We do not clear the state for kbase_js_affinity_slot_blocked_an_atom(). + * That is handled after finishing the job. This might be slightly + * suboptimal for some corner cases, but is otherwise not a problem + * (and resolves itself after the next job completes). */ + + break; + + case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: + /* State where cores were requested, but not registered */ + KBASE_DEBUG_ASSERT(katom->affinity != 0 || (katom->core_req & BASE_JD_REQ_T)); + kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T, katom->affinity); + break; + + case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: + /* Initial state - nothing required */ + KBASE_DEBUG_ASSERT(katom->affinity == 0); + break; + + default: + KBASE_DEBUG_ASSERT_MSG(MALI_FALSE, "Unhandled coreref_state: %d", katom->coreref_state); + break; + } + + katom->affinity = 0; + katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; +} + +/* + * Note: this function is quite similar to kbasep_js_try_run_next_job_on_slot() + */ +mali_bool kbasep_js_try_run_next_job_on_slot_irq_nolock(kbase_device *kbdev, int js, s8 *submit_count) +{ + kbasep_js_device_data *js_devdata; + mali_bool cores_ready; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + js_devdata = &kbdev->js_data; + + /* The caller of this function may not be aware of Ctx Attribute state changes so we + * must recheck if the given slot is still valid. Otherwise do not try to run. + */ + if (kbase_js_can_run_job_on_slot_no_lock(kbdev, js)) { + /* Keep submitting while there's space to run a job on this job-slot, + * and there are jobs to get that match its requirements (see 'break' + * statement below) */ + while (*submit_count < KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ && kbasep_jm_is_submit_slots_free(kbdev, js, NULL) != MALI_FALSE) { + kbase_jd_atom *dequeued_atom; + mali_bool has_job = MALI_FALSE; + + /* Dequeue a job that matches the requirements */ + has_job = kbasep_js_policy_dequeue_job(kbdev, js, &dequeued_atom); + + if (has_job != MALI_FALSE) { + /* NOTE: since the runpool_irq lock is currently held and acts across + * all address spaces, any context whose busy refcount has reached + * zero won't yet be scheduled out whilst we're trying to run jobs + * from it */ + kbase_context *parent_ctx = dequeued_atom->kctx; + mali_bool retain_success; + + /* Retain/power up the cores it needs, check if cores are ready */ + cores_ready = kbasep_js_job_check_ref_cores(kbdev, js, dequeued_atom); + + if (cores_ready != MALI_TRUE && dequeued_atom->event_code != BASE_JD_EVENT_PM_EVENT) { + /* The job can't be submitted until the cores are ready, requeue the job */ + kbasep_js_policy_enqueue_job(&kbdev->js_data.policy, dequeued_atom); + break; + } + + /* ASSERT that the Policy picked a job from an allowed context */ + KBASE_DEBUG_ASSERT(kbasep_js_is_submit_allowed(js_devdata, parent_ctx)); + + /* Retain the context to stop it from being scheduled out + * This is released when the job finishes */ + retain_success = kbasep_js_runpool_retain_ctx_nolock(kbdev, parent_ctx); + KBASE_DEBUG_ASSERT(retain_success != MALI_FALSE); + CSTD_UNUSED(retain_success); + + /* Retain the affinity on the slot */ + kbase_js_affinity_retain_slot_cores(kbdev, js, dequeued_atom->affinity); + + /* Check if this job needs the cycle counter enabled before submission */ + kbasep_js_ref_permon_check_and_enable_cycle_counter(kbdev, dequeued_atom); + + if (dequeued_atom->event_code == BASE_JD_EVENT_PM_EVENT) { + dev_warn(kbdev->dev, "Rejecting atom due to BASE_JD_EVENT_PM_EVENT\n"); + /* The job has failed due to the specified core group being unavailable */ + kbase_jd_done(dequeued_atom, js, NULL, 0); + } else { + /* Submit the job */ + kbase_job_submit_nolock(kbdev, dequeued_atom, js); + + ++(*submit_count); + } + } else { + /* No more jobs - stop submitting for this slot */ + break; + } + } + } + + /* Indicate whether a retry in submission should be tried on a different + * dequeue function. These are the reasons why it *must* happen: + * - the KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ threshold was reached + * and new scheduling must be performed outside of IRQ mode. + * + * Failure to indicate this correctly could stop further jobs being processed. + * + * However, we do not _need_ to indicate a retry for the following: + * - kbasep_js_policy_dequeue_job() couldn't get a job. In which case, + * there's no point re-trying outside of IRQ, because the result will be + * the same until job dependencies are resolved, or user-space provides + * more jobs. In both those cases, we try to run jobs anyway, so + * processing does not stop. + * - kbasep_jm_is_submit_slots_free() was MALI_FALSE, indicating jobs were + * already running. When those jobs complete, that will still cause events + * that cause us to resume job submission. + * - kbase_js_can_run_job_on_slot_no_lock() was MALI_FALSE - this is for + * Ctx Attribute handling. That _can_ change outside of IRQ context, but + * is handled explicitly by kbasep_js_runpool_release_ctx_and_katom_retained_state(). + */ + return (mali_bool) (*submit_count >= KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ); +} + +void kbasep_js_try_run_next_job_on_slot_nolock(kbase_device *kbdev, int js) +{ + kbasep_js_device_data *js_devdata; + mali_bool has_job; + mali_bool cores_ready; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + js_devdata = &kbdev->js_data; + + KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running > 0); + + /* Keep submitting while there's space to run a job on this job-slot, + * and there are jobs to get that match its requirements (see 'break' + * statement below) */ + if (kbasep_jm_is_submit_slots_free(kbdev, js, NULL) != MALI_FALSE) { + /* The caller of this function may not be aware of Ctx Attribute state changes so we + * must recheck if the given slot is still valid. Otherwise do not try to run. + */ + if (kbase_js_can_run_job_on_slot_no_lock(kbdev, js)) { + do { + kbase_jd_atom *dequeued_atom; + + /* Dequeue a job that matches the requirements */ + has_job = kbasep_js_policy_dequeue_job(kbdev, js, &dequeued_atom); + + if (has_job != MALI_FALSE) { + /* NOTE: since the runpool_irq lock is currently held and acts across + * all address spaces, any context whose busy refcount has reached + * zero won't yet be scheduled out whilst we're trying to run jobs + * from it */ + kbase_context *parent_ctx = dequeued_atom->kctx; + mali_bool retain_success; + + /* Retain/power up the cores it needs, check if cores are ready */ + cores_ready = kbasep_js_job_check_ref_cores(kbdev, js, dequeued_atom); + + if (cores_ready != MALI_TRUE && dequeued_atom->event_code != BASE_JD_EVENT_PM_EVENT) { + /* The job can't be submitted until the cores are ready, requeue the job */ + kbasep_js_policy_enqueue_job(&kbdev->js_data.policy, dequeued_atom); + break; + } + /* ASSERT that the Policy picked a job from an allowed context */ + KBASE_DEBUG_ASSERT(kbasep_js_is_submit_allowed(js_devdata, parent_ctx)); + + /* Retain the context to stop it from being scheduled out + * This is released when the job finishes */ + retain_success = kbasep_js_runpool_retain_ctx_nolock(kbdev, parent_ctx); + KBASE_DEBUG_ASSERT(retain_success != MALI_FALSE); + CSTD_UNUSED(retain_success); + + /* Retain the affinity on the slot */ + kbase_js_affinity_retain_slot_cores(kbdev, js, dequeued_atom->affinity); + + /* Check if this job needs the cycle counter enabled before submission */ + kbasep_js_ref_permon_check_and_enable_cycle_counter(kbdev, dequeued_atom); + + if (dequeued_atom->event_code == BASE_JD_EVENT_PM_EVENT) { + dev_warn(kbdev->dev, "Rejecting atom due to BASE_JD_EVENT_PM_EVENT\n"); + /* The job has failed due to the specified core group being unavailable */ + kbase_jd_done(dequeued_atom, js, NULL, 0); + } else { + /* Submit the job */ + kbase_job_submit_nolock(kbdev, dequeued_atom, js); + } + } + + } while (kbasep_jm_is_submit_slots_free(kbdev, js, NULL) != MALI_FALSE && has_job != MALI_FALSE); + } + } +} + +void kbasep_js_try_schedule_head_ctx(kbase_device *kbdev) +{ + kbasep_js_device_data *js_devdata; + mali_bool has_kctx; + kbase_context *head_kctx; + kbasep_js_kctx_info *js_kctx_info; + mali_bool is_runpool_full; + kbase_as *new_address_space; + unsigned long flags; + mali_bool head_kctx_suspended = MALI_FALSE; + int pm_active_err; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + js_devdata = &kbdev->js_data; + + /* We *don't* make a speculative check on whether we can fit a context in the + * runpool, because most of our use-cases assume 2 or fewer contexts, and + * so we will usually have enough address spaces free. + * + * In any case, the check will be done later on once we have a context */ + + /* Grab the context off head of queue - if there is one */ + mutex_lock(&js_devdata->queue_mutex); + has_kctx = kbasep_js_policy_dequeue_head_ctx(&js_devdata->policy, &head_kctx); + mutex_unlock(&js_devdata->queue_mutex); + + if (has_kctx == MALI_FALSE) { + /* No ctxs to run - nothing to do */ + return; + } + js_kctx_info = &head_kctx->jctx.sched_info; + + KBASE_LOG(1, kbdev->dev, "JS: Dequeue Context %p", head_kctx); + + pm_active_err = kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE); + + /* + * Atomic transaction on the Context and Run Pool begins + */ + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + /* Check to see if we shouldn't add the context to run Run Pool: + * - it can't take the specified context, and so is 'full'. This may be + * 'full' even when there are addres spaces available, since some contexts + * are allowed in whereas others may not due to HW workarounds + * - A suspend is taking place + * - The context is dying due to kbase_job_zap_context() */ + is_runpool_full = check_is_runpool_full(kbdev, head_kctx); + if (is_runpool_full || pm_active_err || js_kctx_info->ctx.is_dying) { + /* Roll back the transaction so far and return */ + mutex_unlock(&js_devdata->runpool_mutex); + + /* Note: If a Power Management active reference was taken, it's released by + * this: */ + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, head_kctx, !pm_active_err); + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + return; + } + + /* From the point on, the Power Management active reference is released + * only if kbasep_js_runpool_release_ctx() causes the context to be removed + * from the runpool */ + + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, head_kctx, NULL, 0u, kbasep_js_trace_get_refcnt(kbdev, head_kctx)); + +#if MALI_CUSTOMER_RELEASE == 0 + if (js_devdata->nr_user_contexts_running == 0) { + /* Only when there are no other contexts submitting jobs: + * Latch in run-time job scheduler timeouts that were set through js_timeouts sysfs file */ + if (kbdev->js_soft_stop_ticks != 0) + js_devdata->soft_stop_ticks = kbdev->js_soft_stop_ticks; + + if (kbdev->js_soft_stop_ticks_cl != 0) + js_devdata->soft_stop_ticks_cl = kbdev->js_soft_stop_ticks_cl; + + if (kbdev->js_hard_stop_ticks_ss != 0) + js_devdata->hard_stop_ticks_ss = kbdev->js_hard_stop_ticks_ss; + + if (kbdev->js_hard_stop_ticks_cl != 0) + js_devdata->hard_stop_ticks_cl = kbdev->js_hard_stop_ticks_cl; + + if (kbdev->js_hard_stop_ticks_nss != 0) + js_devdata->hard_stop_ticks_nss = kbdev->js_hard_stop_ticks_nss; + + if (kbdev->js_reset_ticks_ss != 0) + js_devdata->gpu_reset_ticks_ss = kbdev->js_reset_ticks_ss; + + if (kbdev->js_reset_ticks_cl != 0) + js_devdata->gpu_reset_ticks_cl = kbdev->js_reset_ticks_cl; + + if (kbdev->js_reset_ticks_nss != 0) + js_devdata->gpu_reset_ticks_nss = kbdev->js_reset_ticks_nss; + } +#endif + + runpool_inc_context_count(kbdev, head_kctx); + /* Cause any future waiter-on-termination to wait until the context is + * descheduled */ + js_kctx_info->ctx.is_scheduled = MALI_TRUE; + wake_up(&js_kctx_info->ctx.is_scheduled_wait); + + /* Pick the free address space (guaranteed free by check_is_runpool_full() ) */ + new_address_space = pick_free_addr_space(kbdev); + + /* Lock the address space whilst working on it */ + mutex_lock(&new_address_space->transaction_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + /* Do all the necessaries to assign the address space (inc. update book-keeping info) + * Add the context to the Run Pool, and allow it to run jobs */ + assign_and_activate_kctx_addr_space(kbdev, head_kctx, new_address_space); + + /* NOTE: If Linux allows, then we can drop the new_address_space->transaction mutex here */ + + if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) != 0) { + /* We need to retain it to keep the corresponding address space */ + kbasep_js_runpool_retain_ctx_nolock(kbdev, head_kctx); + } + + /* Re-check for suspending: a suspend could've occurred after we + * pm_context_active'd, and all the contexts could've been removed from the + * runpool before we took this lock. In this case, we don't want to allow + * this context to run jobs, we just want it out immediately. + * + * The DMB required to read the suspend flag was issued recently as part of + * the runpool_irq locking. If a suspend occurs *after* that lock was taken + * (i.e. this condition doesn't execute), then the kbasep_js_suspend() code + * will cleanup this context instead (by virtue of it being called strictly + * after the suspend flag is set, and will wait for this lock to drop) */ + if (kbase_pm_is_suspending(kbdev)) { + /* Cause it to leave at some later point */ + mali_bool retained; + retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, head_kctx); + KBASE_DEBUG_ASSERT(retained); + kbasep_js_clear_submit_allowed(js_devdata, head_kctx); + head_kctx_suspended = MALI_TRUE; + } + + /* Try to run the next job, in case this context has jobs that match the + * job slot requirements, but none of the other currently running contexts + * do */ + kbasep_js_try_run_next_job_nolock(kbdev); + + /* Transaction complete */ + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&new_address_space->transaction_mutex); + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + /* Note: after this point, the context could potentially get scheduled out immediately */ + + if (head_kctx_suspended) { + /* Finishing forcing out the context due to a suspend. Use a variant of + * kbasep_js_runpool_release_ctx() that doesn't schedule a new context, + * to prevent a risk of recursion back into this function */ + kbasep_js_runpool_release_ctx_no_schedule(kbdev, head_kctx); + } + return; +} + +void kbasep_js_schedule_privileged_ctx(kbase_device *kbdev, kbase_context *kctx) +{ + kbasep_js_kctx_info *js_kctx_info; + kbasep_js_device_data *js_devdata; + mali_bool is_scheduled; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + /* This must never be attempted whilst suspending - i.e. it should only + * happen in response to a syscall from a user-space thread */ + BUG_ON(kbase_pm_is_suspending(kbdev)); + + kbase_pm_request_l2_caches(kbdev); + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + /* Mark the context as privileged */ + js_kctx_info->ctx.flags |= KBASE_CTX_FLAG_PRIVILEGED; + + is_scheduled = js_kctx_info->ctx.is_scheduled; + if (is_scheduled == MALI_FALSE) { + mali_bool is_runpool_full; + + /* Add the context to the runpool */ + mutex_lock(&js_devdata->queue_mutex); + kbasep_js_policy_enqueue_ctx(&js_devdata->policy, kctx); + mutex_unlock(&js_devdata->queue_mutex); + + mutex_lock(&js_devdata->runpool_mutex); + { + is_runpool_full = check_is_runpool_full(kbdev, kctx); + if (is_runpool_full != MALI_FALSE) { + /* Evict jobs from the NEXT registers to free an AS asap */ + kbasep_js_runpool_evict_next_jobs(kbdev, kctx); + } + } + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + /* Fast-starting requires the jsctx_mutex to be dropped, because it works on multiple ctxs */ + + if (is_runpool_full != MALI_FALSE) { + /* Evict non-running contexts from the runpool */ + kbasep_js_runpool_attempt_fast_start_ctx(kbdev, NULL); + } + /* Try to schedule the context in */ + kbasep_js_try_schedule_head_ctx(kbdev); + + /* Wait for the context to be scheduled in */ + wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, kctx->jctx.sched_info.ctx.is_scheduled == MALI_TRUE); + } else { + /* Already scheduled in - We need to retain it to keep the corresponding address space */ + kbasep_js_runpool_retain_ctx(kbdev, kctx); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + } +} + +void kbasep_js_release_privileged_ctx(kbase_device *kbdev, kbase_context *kctx) +{ + kbasep_js_kctx_info *js_kctx_info; + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + + /* We don't need to use the address space anymore */ + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + js_kctx_info->ctx.flags &= (~KBASE_CTX_FLAG_PRIVILEGED); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + kbase_pm_release_l2_caches(kbdev); + + /* Release the context - it will be scheduled out if there is no pending job */ + kbasep_js_runpool_release_ctx(kbdev, kctx); +} + +void kbasep_js_job_done_slot_irq(kbase_jd_atom *katom, int slot_nr, + ktime_t *end_timestamp, + kbasep_js_atom_done_code done_code) +{ + kbase_device *kbdev; + kbasep_js_policy *js_policy; + kbasep_js_device_data *js_devdata; + mali_bool submit_retry_needed = MALI_TRUE; /* If we don't start jobs here, start them from the workqueue */ + ktime_t tick_diff; + u64 microseconds_spent = 0u; + kbase_context *parent_ctx; + + KBASE_DEBUG_ASSERT(katom); + parent_ctx = katom->kctx; + KBASE_DEBUG_ASSERT(parent_ctx); + kbdev = parent_ctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev); + + js_devdata = &kbdev->js_data; + js_policy = &kbdev->js_data.policy; + + lockdep_assert_held(&js_devdata->runpool_irq.lock); + + /* + * Release resources before submitting new jobs (bounds the refcount of + * the resource to BASE_JM_SUBMIT_SLOTS) + */ +#ifdef CONFIG_MALI_GATOR_SUPPORT + kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP, slot_nr), NULL, 0); +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + + /* Check if submitted jobs no longer require the cycle counter to be enabled */ + kbasep_js_deref_permon_check_and_disable_cycle_counter(kbdev, katom); + + /* Release the affinity from the slot - must happen before next submission to this slot */ + kbase_js_affinity_release_slot_cores(kbdev, slot_nr, katom->affinity); + kbase_js_debug_log_current_affinities(kbdev); + /* Calculate the job's time used */ + if (end_timestamp != NULL) { + /* Only calculating it for jobs that really run on the HW (e.g. removed + * from next jobs never actually ran, so really did take zero time) */ + tick_diff = ktime_sub(*end_timestamp, katom->start_timestamp); + + microseconds_spent = ktime_to_ns(tick_diff); + do_div(microseconds_spent, 1000); + + /* Round up time spent to the minimum timer resolution */ + if (microseconds_spent < KBASEP_JS_TICK_RESOLUTION_US) + microseconds_spent = KBASEP_JS_TICK_RESOLUTION_US; + } + + /* Log the result of the job (completion status, and time spent). */ + kbasep_js_policy_log_job_result(js_policy, katom, microseconds_spent); + /* Determine whether the parent context's timeslice is up */ + if (kbasep_js_policy_should_remove_ctx(js_policy, parent_ctx) != MALI_FALSE) + kbasep_js_clear_submit_allowed(js_devdata, parent_ctx); + + if (done_code & KBASE_JS_ATOM_DONE_START_NEW_ATOMS) { + /* Submit a new job (if there is one) to help keep the GPU's HEAD and NEXT registers full */ + KBASE_TRACE_ADD_SLOT(kbdev, JS_JOB_DONE_TRY_RUN_NEXT_JOB, parent_ctx, katom, katom->jc, slot_nr); + + submit_retry_needed = kbasep_js_try_run_next_job_on_slot_irq_nolock(kbdev, slot_nr, &kbdev->slot_submit_count_irq[slot_nr]); + } + + if (submit_retry_needed != MALI_FALSE || katom->event_code == BASE_JD_EVENT_STOPPED) { + /* The extra condition on STOPPED jobs is needed because they may be + * the only job present, but they won't get re-run until the JD work + * queue activates. Crucially, work queues can run items out of order + * e.g. on different CPUs, so being able to submit from the IRQ handler + * is not a good indication that we don't need to run jobs; the + * submitted job could be processed on the work-queue *before* the + * stopped job, even though it was submitted after. + * + * Therefore, we must try to run it, otherwise it might not get run at + * all after this. */ + + KBASE_TRACE_ADD_SLOT(kbdev, JS_JOB_DONE_RETRY_NEEDED, parent_ctx, katom, katom->jc, slot_nr); + kbasep_js_set_job_retry_submit_slot(katom, slot_nr); + } +} + +void kbasep_js_suspend(kbase_device *kbdev) +{ + unsigned long flags; + kbasep_js_device_data *js_devdata; + int i; + u16 retained = 0u; + int nr_privileged_ctx = 0; + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev)); + js_devdata = &kbdev->js_data; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + /* Prevent all contexts from submitting */ + js_devdata->runpool_irq.submit_allowed = 0; + + /* Retain each of the contexts, so we can cause it to leave even if it had + * no refcount to begin with */ + for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) { + kbasep_js_per_as_data *js_per_as_data = &js_devdata->runpool_irq.per_as_data[i]; + kbase_context *kctx = js_per_as_data->kctx; + retained = retained << 1; + + if (kctx) { + ++(js_per_as_data->as_busy_refcount); + retained |= 1u; + /* We can only cope with up to 1 privileged context - the + * instrumented context. It'll be suspended by disabling + * instrumentation */ + if (kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) + KBASE_DEBUG_ASSERT(++nr_privileged_ctx == 1); + } + } + CSTD_UNUSED(nr_privileged_ctx); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + /* De-ref the previous retain to ensure each context gets pulled out + * sometime later. */ + for (i = 0; + i < BASE_MAX_NR_AS; + ++i, retained = retained >> 1) { + kbasep_js_per_as_data *js_per_as_data = &js_devdata->runpool_irq.per_as_data[i]; + kbase_context *kctx = js_per_as_data->kctx; + + if (retained & 1u) + kbasep_js_runpool_release_ctx(kbdev,kctx); + } + + /* Caller must wait for all Power Manager active references to be dropped */ +} + +void kbasep_js_resume(kbase_device *kbdev) +{ + kbasep_js_device_data *js_devdata; + int i; + KBASE_DEBUG_ASSERT(kbdev); + js_devdata = &kbdev->js_data; + + KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + + /* Schedule in as many contexts as address spaces. This also starts atoms. */ + for (i = 0 ; i < kbdev->nr_hw_address_spaces; ++i) + { + kbasep_js_try_schedule_head_ctx(kbdev); + } + /* JS Resume complete */ +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h new file mode 100755 index 00000000000..ec20c9d45c7 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js.h @@ -0,0 +1,930 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_js.h + * Job Scheduler APIs. + */ + +#ifndef _KBASE_JS_H_ +#define _KBASE_JS_H_ + +#include + +#include "mali_kbase_js_defs.h" +#include "mali_kbase_js_policy.h" +#include "mali_kbase_defs.h" +#include "mali_kbase_debug.h" + +#include "mali_kbase_js_ctx_attr.h" + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_js Job Scheduler Internal APIs + * @{ + * + * These APIs are Internal to KBase and are available for use by the + * @ref kbase_js_policy "Job Scheduler Policy APIs" + */ + +/** + * @brief Initialize the Job Scheduler + * + * The kbasep_js_device_data sub-structure of \a kbdev must be zero + * initialized before passing to the kbasep_js_devdata_init() function. This is + * to give efficient error path code. + */ +mali_error kbasep_js_devdata_init(kbase_device * const kbdev); + +/** + * @brief Halt the Job Scheduler. + * + * It is safe to call this on \a kbdev even if it the kbasep_js_device_data + * sub-structure was never initialized/failed initialization, to give efficient + * error-path code. + * + * For this to work, the kbasep_js_device_data sub-structure of \a kbdev must + * be zero initialized before passing to the kbasep_js_devdata_init() + * function. This is to give efficient error path code. + * + * It is a Programming Error to call this whilst there are still kbase_context + * structures registered with this scheduler. + * + */ +void kbasep_js_devdata_halt(kbase_device *kbdev); + +/** + * @brief Terminate the Job Scheduler + * + * It is safe to call this on \a kbdev even if it the kbasep_js_device_data + * sub-structure was never initialized/failed initialization, to give efficient + * error-path code. + * + * For this to work, the kbasep_js_device_data sub-structure of \a kbdev must + * be zero initialized before passing to the kbasep_js_devdata_init() + * function. This is to give efficient error path code. + * + * It is a Programming Error to call this whilst there are still kbase_context + * structures registered with this scheduler. + */ +void kbasep_js_devdata_term(kbase_device *kbdev); + +/** + * @brief Initialize the Scheduling Component of a kbase_context on the Job Scheduler. + * + * This effectively registers a kbase_context with a Job Scheduler. + * + * It does not register any jobs owned by the kbase_context with the scheduler. + * Those must be separately registered by kbasep_js_add_job(). + * + * The kbase_context must be zero intitialized before passing to the + * kbase_js_init() function. This is to give efficient error path code. + */ +mali_error kbasep_js_kctx_init(kbase_context * const kctx); + +/** + * @brief Terminate the Scheduling Component of a kbase_context on the Job Scheduler + * + * This effectively de-registers a kbase_context from its Job Scheduler + * + * It is safe to call this on a kbase_context that has never had or failed + * initialization of its jctx.sched_info member, to give efficient error-path + * code. + * + * For this to work, the kbase_context must be zero intitialized before passing + * to the kbase_js_init() function. + * + * It is a Programming Error to call this whilst there are still jobs + * registered with this context. + */ +void kbasep_js_kctx_term(kbase_context *kctx); + +/** + * @brief Add a job chain to the Job Scheduler, and take necessary actions to + * schedule the context/run the job. + * + * This atomically does the following: + * - Update the numbers of jobs information + * - Add the job to the run pool if necessary (part of init_job) + * + * Once this is done, then an appropriate action is taken: + * - If the ctx is scheduled, it attempts to start the next job (which might be + * this added job) + * - Otherwise, and if this is the first job on the context, it enqueues it on + * the Policy Queue + * + * The Policy's Queue can be updated by this in the following ways: + * - In the above case that this is the first job on the context + * - If the job is high priority and the context is not scheduled, then it + * could cause the Policy to schedule out a low-priority context, allowing + * this context to be scheduled in. + * + * If the context is already scheduled on the RunPool, then adding a job to it + * is guarenteed not to update the Policy Queue. And so, the caller is + * guarenteed to not need to try scheduling a context from the Run Pool - it + * can safely assert that the result is MALI_FALSE. + * + * It is a programming error to have more than U32_MAX jobs in flight at a time. + * + * The following locking conditions are made on the caller: + * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it must \em not hold kbasep_js_device_data::runpool_irq::lock (as this will be + * obtained internally) + * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). + * + * @return MALI_TRUE indicates that the Policy Queue was updated, and so the + * caller will need to try scheduling a context onto the Run Pool. + * @return MALI_FALSE indicates that no updates were made to the Policy Queue, + * so no further action is required from the caller. This is \b always returned + * when the context is currently scheduled. + */ +mali_bool kbasep_js_add_job(kbase_context *kctx, kbase_jd_atom *atom); + +/** + * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'. + * + * Completely removing a job requires several calls: + * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of + * the atom + * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler + * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the + * remaining state held as part of the job having been run. + * + * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions. + * + * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions. + * + * It is a programming error to call this when: + * - \a atom is not a job belonging to kctx. + * - \a atom has already been removed from the Job Scheduler. + * - \a atom is still in the runpool: + * - it has not been removed with kbasep_js_policy_dequeue_job() + * - or, it has not been removed with kbasep_js_policy_dequeue_job_irq() + * + * Do not use this for removing jobs being killed by kbase_jd_cancel() - use + * kbasep_js_remove_cancelled_job() instead. + * + * The following locking conditions are made on the caller: + * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * + */ +void kbasep_js_remove_job(kbase_device *kbdev, kbase_context *kctx, kbase_jd_atom *atom); + +/** + * @brief Completely remove a job chain from the Job Scheduler, in the case + * where the job chain was cancelled. + * + * This is a variant of kbasep_js_remove_job() that takes care of removing all + * of the retained state too. This is generally useful for cancelled atoms, + * which need not be handled in an optimal way. + * + * It is a programming error to call this when: + * - \a atom is not a job belonging to kctx. + * - \a atom has already been removed from the Job Scheduler. + * - \a atom is still in the runpool: + * - it is not being killed with kbasep_jd_cancel() + * - or, it has not been removed with kbasep_js_policy_dequeue_job() + * - or, it has not been removed with kbasep_js_policy_dequeue_job_irq() + * + * The following locking conditions are made on the caller: + * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, (as this will be + * obtained internally) + * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be + * obtained internally) + */ +void kbasep_js_remove_cancelled_job(kbase_device *kbdev, kbase_context *kctx, kbase_jd_atom *katom); + +/** + * @brief Refcount a context as being busy, preventing it from being scheduled + * out. + * + * @note This function can safely be called from IRQ context. + * + * The following locking conditions are made on the caller: + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because + * it will be used internally. + * + * @return value != MALI_FALSE if the retain succeeded, and the context will not be scheduled out. + * @return MALI_FALSE if the retain failed (because the context is being/has been scheduled out). + */ +mali_bool kbasep_js_runpool_retain_ctx(kbase_device *kbdev, kbase_context *kctx); + +/** + * @brief Refcount a context as being busy, preventing it from being scheduled + * out. + * + * @note This function can safely be called from IRQ context. + * + * The following locks must be held by the caller: + * - kbasep_js_device_data::runpool_irq::lock + * + * @return value != MALI_FALSE if the retain succeeded, and the context will not be scheduled out. + * @return MALI_FALSE if the retain failed (because the context is being/has been scheduled out). + */ +mali_bool kbasep_js_runpool_retain_ctx_nolock(kbase_device *kbdev, kbase_context *kctx); + +/** + * @brief Lookup a context in the Run Pool based upon its current address space + * and ensure that is stays scheduled in. + * + * The context is refcounted as being busy to prevent it from scheduling + * out. It must be released with kbasep_js_runpool_release_ctx() when it is no + * longer required to stay scheduled in. + * + * @note This function can safely be called from IRQ context. + * + * The following locking conditions are made on the caller: + * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because + * it will be used internally. + * + * @return a valid kbase_context on success, which has been refcounted as being busy. + * @return NULL on failure, indicating that no context was found in \a as_nr + */ +kbase_context *kbasep_js_runpool_lookup_ctx(kbase_device *kbdev, int as_nr); + +/** + * @brief Handling the requeuing/killing of a context that was evicted from the + * policy queue or runpool. + * + * This should be used whenever handing off a context that has been evicted + * from the policy queue or the runpool: + * - If the context is not dying and has jobs, it gets re-added to the policy + * queue + * - Otherwise, it is not added + * + * In addition, if the context is dying the jobs are killed asynchronously. + * + * In all cases, the Power Manager active reference is released + * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true. \a + * has_pm_ref must be set to false whenever the context was not previously in + * the runpool and does not hold a Power Manager active refcount. Note that + * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an + * active refcount even though they weren't in the runpool. + * + * The following locking conditions are made on the caller: + * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be + * obtained internally) + */ +void kbasep_js_runpool_requeue_or_kill_ctx(kbase_device *kbdev, kbase_context *kctx, mali_bool has_pm_ref); + +/** + * @brief Release a refcount of a context being busy, allowing it to be + * scheduled out. + * + * When the refcount reaches zero and the context \em might be scheduled out + * (depending on whether the Scheudling Policy has deemed it so, or if it has run + * out of jobs). + * + * If the context does get scheduled out, then The following actions will be + * taken as part of deschduling a context: + * - For the context being descheduled: + * - If the context is in the processing of dying (all the jobs are being + * removed from it), then descheduling also kills off any jobs remaining in the + * context. + * - If the context is not dying, and any jobs remain after descheduling the + * context then it is re-enqueued to the Policy's Queue. + * - Otherwise, the context is still known to the scheduler, but remains absent + * from the Policy Queue until a job is next added to it. + * - In all descheduling cases, the Power Manager active reference (obtained + * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()). + * + * Whilst the context is being descheduled, this also handles actions that + * cause more atoms to be run: + * - Attempt submitting atoms when the Context Attributes on the Runpool have + * changed. This is because the context being scheduled out could mean that + * there are more opportunities to run atoms. + * - Attempt submitting to a slot that was previously blocked due to affinity + * restrictions. This is usually only necessary when releasing a context + * happens as part of completing a previous job, but is harmless nonetheless. + * - Attempt scheduling in a new context (if one is available), and if necessary, + * running a job from that new context. + * + * Unlike retaining a context in the runpool, this function \b cannot be called + * from IRQ context. + * + * It is a programming error to call this on a \a kctx that is not currently + * scheduled, or that already has a zero refcount. + * + * The following locking conditions are made on the caller: + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because + * it will be used internally. + * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally) + * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be + * obtained internally) + * + */ +void kbasep_js_runpool_release_ctx(kbase_device *kbdev, kbase_context *kctx); + +/** + * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional + * actions from completing an atom. + * + * This is usually called as part of completing an atom and releasing the + * refcount on the context held by the atom. + * + * Therefore, the extra actions carried out are part of handling actions queued + * on a completed atom, namely: + * - Releasing the atom's context attributes + * - Retrying the submission on a particular slot, because we couldn't submit + * on that slot from an IRQ handler. + * + * The locking conditions of this function are the same as those for + * kbasep_js_runpool_release_ctx() + */ +void kbasep_js_runpool_release_ctx_and_katom_retained_state(kbase_device *kbdev, kbase_context *kctx, kbasep_js_atom_retained_state *katom_retained_state); + +/** + * @brief Try to submit the next job on a \b particular slot whilst in IRQ + * context, and whilst the caller already holds the runpool IRQ spinlock. + * + * \a *submit_count will be checked against + * KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ to see whether too many jobs have + * been submitted. This is to prevent the IRQ handler looping over lots of GPU + * NULL jobs, which may complete whilst the IRQ handler is still processing. \a + * submit_count itself should point to kbase_device::slot_submit_count_irq[ \a js ], + * which is initialized to zero on entry to the IRQ handler. + * + * The following locks must be held by the caller: + * - kbasep_js_device_data::runpool_irq::lock + * + * @return truthful (i.e. != MALI_FALSE) if too many jobs were submitted from + * IRQ. Therefore, this indicates that submission should be retried from a + * work-queue, by using + * kbasep_js_try_run_next_job_on_slot_nolock()/kbase_js_try_run_jobs_on_slot(). + * @return MALI_FALSE if submission had no problems: the GPU is either already + * full of jobs in the HEAD and NEXT registers, or we were able to get enough + * jobs from the Run Pool to fill the GPU's HEAD and NEXT registers. + */ +mali_bool kbasep_js_try_run_next_job_on_slot_irq_nolock(kbase_device *kbdev, int js, s8 *submit_count); + +/** + * @brief Try to submit the next job on a particular slot, outside of IRQ context + * + * This obtains the Job Slot lock for the duration of the call only. + * + * Unlike kbasep_js_try_run_next_job_on_slot_irq_nolock(), there is no limit on + * submission, because eventually IRQ_THROTTLE will kick in to prevent us + * getting stuck in a loop of submitting GPU NULL jobs. This is because the IRQ + * handler will be delayed, and so this function will eventually fill up the + * space in our software 'submitted' slot (kbase_jm_slot::submitted). + * + * In addition, there's no return value - we'll run the maintenence functions + * on the Policy's Run Pool, but if there's nothing there after that, then the + * Run Pool is truely empty, and so no more action need be taken. + * + * The following locking conditions are made on the caller: + * - it must hold kbasep_js_device_data::runpool_mutex + * - it must hold kbasep_js_device_data::runpool_irq::lock + * + * This must only be called whilst the GPU is powered - for example, when + * kbdev->jsdata.nr_user_contexts_running > 0. + * + * @note The caller \em might be holding one of the + * kbasep_js_kctx_info::ctx::jsctx_mutex locks. + * + */ +void kbasep_js_try_run_next_job_on_slot_nolock(kbase_device *kbdev, int js); + +/** + * @brief Try to submit the next job for each slot in the system, outside of IRQ context + * + * This will internally call kbasep_js_try_run_next_job_on_slot_nolock(), so similar + * locking conditions on the caller are required. + * + * The following locking conditions are made on the caller: + * - it must hold kbasep_js_device_data::runpool_mutex + * - it must hold kbasep_js_device_data::runpool_irq::lock + * + * @note The caller \em might be holding one of the + * kbasep_js_kctx_info::ctx::jsctx_mutex locks. + * + */ +void kbasep_js_try_run_next_job_nolock(kbase_device *kbdev); + +/** + * @brief Try to schedule the next context onto the Run Pool + * + * This checks whether there's space in the Run Pool to accommodate a new + * context. If so, it attempts to dequeue a context from the Policy Queue, and + * submit this to the Run Pool. + * + * If the scheduling succeeds, then it also makes a call to + * kbasep_js_try_run_next_job_nolock(), in case the new context has jobs + * matching the job slot requirements, but no other currently scheduled context + * has such jobs. + * + * Whilst attempting to obtain a context from the policy queue, or add a + * context to the runpool, this function takes a Power Manager active + * reference. If for any reason a context cannot be added to the runpool, any + * reference obtained is released once the context is safely back in the policy + * queue. If no context was available on the policy queue, any reference + * obtained is released too. + * + * Only if the context gets placed in the runpool does the Power Manager active + * reference stay held (and is effectively now owned by the + * context/runpool). It is only released once the context is removed + * completely, or added back to the policy queue + * (e.g. kbasep_js_runpool_release_ctx(), + * kbasep_js_runpool_requeue_or_kill_ctx(), etc) + * + * If any of these actions fail (Run Pool Full, Policy Queue empty, can't get + * PM active reference due to a suspend, etc) then any actions taken are rolled + * back and the function just returns normally. + * + * The following locking conditions are made on the caller: + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because + * it will be used internally. + * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally) + * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). + * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will + * be used internally. + * + */ +void kbasep_js_try_schedule_head_ctx(kbase_device *kbdev); + +/** + * @brief Schedule in a privileged context + * + * This schedules a context in regardless of the context priority. + * If the runpool is full, a context will be forced out of the runpool and the function will wait + * for the new context to be scheduled in. + * The context will be kept scheduled in (and the corresponding address space reserved) until + * kbasep_js_release_privileged_ctx is called). + * + * The following locking conditions are made on the caller: + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because + * it will be used internally. + * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally) + * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). + * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will + * be used internally. + * + */ +void kbasep_js_schedule_privileged_ctx(kbase_device *kbdev, kbase_context *kctx); + +/** + * @brief Release a privileged context, allowing it to be scheduled out. + * + * See kbasep_js_runpool_release_ctx for potential side effects. + * + * The following locking conditions are made on the caller: + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because + * it will be used internally. + * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally) + * + */ +void kbasep_js_release_privileged_ctx(kbase_device *kbdev, kbase_context *kctx); + +/** + * @brief Handle the Job Scheduler component for the IRQ of a job finishing + * + * This does the following: + * -# Releases resources held by the atom + * -# if \a end_timestamp != NULL, updates the runpool's notion of time spent by a running ctx + * -# determines whether a context should be marked for scheduling out + * -# examines done_code to determine whether to submit the next job on the slot + * (picking from all ctxs in the runpool) + * + * In addition, if submission didn't happen (the submit-from-IRQ function + * failed or done_code didn't specify to start new jobs), then this sets a + * message on katom that submission needs to be retried from the worker thread. + * + * Normally, the time calculated from end_timestamp is rounded up to the + * minimum time precision. Therefore, to ensure the job is recorded as not + * spending any time, then set end_timestamp to NULL. For example, this is necessary when + * evicting jobs from JSn_HEAD_NEXT (because they didn't actually run). + * + * NOTE: It's possible to move the steps (2) and (3) (inc calculating job's time + * used) into the worker (outside of IRQ context), but this may allow a context + * to use up to twice as much timeslice as is allowed by the policy. For + * policies that order by time spent, this is not a problem for overall + * 'fairness', but can still increase latency between contexts. + * + * The following locking conditions are made on the caller: + * - it must hold kbasep_js_device_data::runpoool_irq::lock + */ +void kbasep_js_job_done_slot_irq(kbase_jd_atom *katom, int slot_nr, + ktime_t *end_timestamp, + kbasep_js_atom_done_code done_code); + +/** + * @brief Try to submit the next job on each slot + * + * The following locks may be used: + * - kbasep_js_device_data::runpool_mutex + * - kbasep_js_device_data::runpool_irq::lock + */ +void kbase_js_try_run_jobs(kbase_device *kbdev); + +/** + * @brief Try to submit the next job on a specfic slot + * + * The following locking conditions are made on the caller: + * + * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * - it must \em not hold kbasep_js_device_data::runpool_irq::lock (as this + * will be obtained internally) + * + */ +void kbase_js_try_run_jobs_on_slot(kbase_device *kbdev, int js); + +/** + * @brief Handle releasing cores for power management and affinity management, + * ensuring that cores are powered down and affinity tracking is updated. + * + * This must only be called on an atom that is not currently running, and has + * not been re-queued onto the context (and so does not need locking) + * + * This function enters at the following @ref kbase_atom_coreref_state states: + * - NO_CORES_REQUESTED + * - WAITING_FOR_REQUESTED_CORES + * - RECHECK_AFFINITY + * - READY + * + * It transitions the above states back to NO_CORES_REQUESTED by the end of the + * function call (possibly via intermediate states). + * + * No locks need be held by the caller, since this takes the necessary Power + * Management locks itself. The runpool_irq.lock is not taken (the work that + * requires it is handled by kbase_js_affinity_submit_to_blocked_slots() ). + * + * @note The corresponding kbasep_js_job_check_ref_cores() is private to the + * Job Scheduler, and is called automatically when running the next job. + */ +void kbasep_js_job_check_deref_cores(kbase_device *kbdev, struct kbase_jd_atom *katom); + +/** + * @brief Suspend the job scheduler during a Power Management Suspend event. + * + * Causes all contexts to be removed from the runpool, and prevents any + * contexts from (re)entering the runpool. + * + * This does not handle suspending the one privileged context: the caller must + * instead do this by by suspending the GPU HW Counter Instrumentation. + * + * This will eventually cause all Power Management active references held by + * contexts on the runpool to be released, without running any more atoms. + * + * The caller must then wait for all Power Mangement active refcount to become + * zero before completing the suspend. + * + * The emptying mechanism may take some time to complete, since it can wait for + * jobs to complete naturally instead of forcing them to end quickly. However, + * this is bounded by the Job Scheduling Policy's Job Timeouts. Hence, this + * function is guaranteed to complete in a finite time whenever the Job + * Scheduling Policy implements Job Timeouts (such as those done by CFS). + */ +void kbasep_js_suspend(kbase_device *kbdev); + +/** + * @brief Resume the Job Scheduler after a Power Management Resume event. + * + * This restores the actions from kbasep_js_suspend(): + * - Schedules contexts back into the runpool + * - Resumes running atoms on the GPU + */ +void kbasep_js_resume(kbase_device *kbdev); + + +/* + * Helpers follow + */ + +/** + * @brief Check that a context is allowed to submit jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, and wrap up + * the long repeated line of code. + * + * As with any mali_bool, never test the return value with MALI_TRUE. + * + * The caller must hold kbasep_js_device_data::runpool_irq::lock. + */ +static INLINE mali_bool kbasep_js_is_submit_allowed(kbasep_js_device_data *js_devdata, kbase_context *kctx) +{ + u16 test_bit; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled != MALI_FALSE); + + test_bit = (u16) (1u << kctx->as_nr); + + return (mali_bool) (js_devdata->runpool_irq.submit_allowed & test_bit); +} + +/** + * @brief Allow a context to submit jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, and wrap up + * the long repeated line of code. + * + * The caller must hold kbasep_js_device_data::runpool_irq::lock. + */ +static INLINE void kbasep_js_set_submit_allowed(kbasep_js_device_data *js_devdata, kbase_context *kctx) +{ + u16 set_bit; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled != MALI_FALSE); + + set_bit = (u16) (1u << kctx->as_nr); + + KBASE_LOG(3, kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr); + + js_devdata->runpool_irq.submit_allowed |= set_bit; +} + +/** + * @brief Prevent a context from submitting more jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, and wrap up + * the long repeated line of code. + * + * The caller must hold kbasep_js_device_data::runpool_irq::lock. + */ +static INLINE void kbasep_js_clear_submit_allowed(kbasep_js_device_data *js_devdata, kbase_context *kctx) +{ + u16 clear_bit; + u16 clear_mask; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled != MALI_FALSE); + + clear_bit = (u16) (1u << kctx->as_nr); + clear_mask = ~clear_bit; + + KBASE_LOG(3, kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr); + + js_devdata->runpool_irq.submit_allowed &= clear_mask; +} + +/** + * @brief Manage the 'retry_submit_on_slot' part of a kbase_jd_atom + */ +static INLINE void kbasep_js_clear_job_retry_submit(kbase_jd_atom *atom) +{ + atom->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID; +} + +/** + * Mark a slot as requiring resubmission by carrying that information on a + * completing atom. + * + * @note This can ASSERT in debug builds if the submit slot has been set to + * something other than the current value for @a js. This is because you might + * be unintentionally stopping more jobs being submitted on the old submit + * slot, and that might cause a scheduling-hang. + * + * @note If you can guarantee that the atoms for the original slot will be + * submitted on some other slot, then call kbasep_js_clear_job_retry_submit() + * first to silence the ASSERT. + */ +static INLINE void kbasep_js_set_job_retry_submit_slot(kbase_jd_atom *atom, int js) +{ + KBASE_DEBUG_ASSERT(0 <= js && js <= BASE_JM_MAX_NR_SLOTS); + KBASE_DEBUG_ASSERT(atom->retry_submit_on_slot == KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID + || atom->retry_submit_on_slot == js); + + atom->retry_submit_on_slot = js; +} + +/** + * Create an initial 'invalid' atom retained state, that requires no + * atom-related work to be done on releasing with + * kbasep_js_runpool_release_ctx_and_katom_retained_state() + */ +static INLINE void kbasep_js_atom_retained_state_init_invalid(kbasep_js_atom_retained_state *retained_state) +{ + retained_state->event_code = BASE_JD_EVENT_NOT_STARTED; + retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID; + retained_state->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID; +} + +/** + * Copy atom state that can be made available after jd_done_nolock() is called + * on that atom. + */ +static INLINE void kbasep_js_atom_retained_state_copy(kbasep_js_atom_retained_state *retained_state, const kbase_jd_atom *katom) +{ + retained_state->event_code = katom->event_code; + retained_state->core_req = katom->core_req; + retained_state->retry_submit_on_slot = katom->retry_submit_on_slot; +} + +/** + * @brief Determine whether an atom has finished (given its retained state), + * and so should be given back to userspace/removed from the system. + * + * Reasons for an atom not finishing include: + * - Being soft-stopped (and so, the atom should be resubmitted sometime later) + * + * @param[in] katom_retained_state the retained state of the atom to check + * @return MALI_FALSE if the atom has not finished + * @return !=MALI_FALSE if the atom has finished + */ +static INLINE mali_bool kbasep_js_has_atom_finished(const kbasep_js_atom_retained_state *katom_retained_state) +{ + return (mali_bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT); +} + +/** + * @brief Determine whether a kbasep_js_atom_retained_state is valid + * + * An invalid kbasep_js_atom_retained_state is allowed, and indicates that the + * code should just ignore it. + * + * @param[in] katom_retained_state the atom's retained state to check + * @return MALI_FALSE if the retained state is invalid, and can be ignored + * @return !=MALI_FALSE if the retained state is valid + */ +static INLINE mali_bool kbasep_js_atom_retained_state_is_valid(const kbasep_js_atom_retained_state *katom_retained_state) +{ + return (mali_bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID); +} + +static INLINE mali_bool kbasep_js_get_atom_retry_submit_slot(const kbasep_js_atom_retained_state *katom_retained_state, int *res) +{ + int js = katom_retained_state->retry_submit_on_slot; + *res = js; + return (mali_bool) (js >= 0); +} + +#if KBASE_DEBUG_DISABLE_ASSERTS == 0 +/** + * Debug Check the refcount of a context. Only use within ASSERTs + * + * Obtains kbasep_js_device_data::runpool_irq::lock + * + * @return negative value if the context is not scheduled in + * @return current refcount of the context if it is scheduled in. The refcount + * is not guarenteed to be kept constant. + */ +static INLINE int kbasep_js_debug_check_ctx_refcount(kbase_device *kbdev, kbase_context *kctx) +{ + unsigned long flags; + kbasep_js_device_data *js_devdata; + int result = -1; + int as_nr; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_devdata = &kbdev->js_data; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + as_nr = kctx->as_nr; + if (as_nr != KBASEP_AS_NR_INVALID) + result = js_devdata->runpool_irq.per_as_data[as_nr].as_busy_refcount; + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return result; +} +#endif /* KBASE_DEBUG_DISABLE_ASSERTS == 0 */ + +/** + * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the + * context is guarenteed to be already previously retained. + * + * It is a programming error to supply the \a as_nr of a context that has not + * been previously retained/has a busy refcount of zero. The only exception is + * when there is no ctx in \a as_nr (NULL returned). + * + * The following locking conditions are made on the caller: + * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because + * it will be used internally. + * + * @return a valid kbase_context on success, with a refcount that is guarenteed + * to be non-zero and unmodified by this function. + * @return NULL on failure, indicating that no context was found in \a as_nr + */ +static INLINE kbase_context *kbasep_js_runpool_lookup_ctx_noretain(kbase_device *kbdev, int as_nr) +{ + unsigned long flags; + kbasep_js_device_data *js_devdata; + kbase_context *found_kctx; + kbasep_js_per_as_data *js_per_as_data; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); + js_devdata = &kbdev->js_data; + js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + found_kctx = js_per_as_data->kctx; + KBASE_DEBUG_ASSERT(found_kctx == NULL || js_per_as_data->as_busy_refcount > 0); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return found_kctx; +} + +/** + * This will provide a conversion from time (us) to ticks of the gpu clock + * based on the minimum available gpu frequency. + * This is usually good to compute best/worst case (where the use of current + * frequency is not valid due to DVFS). + * e.g.: when you need the number of cycles to guarantee you won't wait for + * longer than 'us' time (you might have a shorter wait). + */ +static INLINE u32 kbasep_js_convert_us_to_gpu_ticks_min_freq(kbase_device *kbdev, u32 us) +{ + u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min; + KBASE_DEBUG_ASSERT(0 != gpu_freq); + return us * (gpu_freq / 1000); +} + +/** + * This will provide a conversion from time (us) to ticks of the gpu clock + * based on the maximum available gpu frequency. + * This is usually good to compute best/worst case (where the use of current + * frequency is not valid due to DVFS). + * e.g.: When you need the number of cycles to guarantee you'll wait at least + * 'us' amount of time (but you might wait longer). + */ +static INLINE u32 kbasep_js_convert_us_to_gpu_ticks_max_freq(kbase_device *kbdev, u32 us) +{ + u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max; + KBASE_DEBUG_ASSERT(0 != gpu_freq); + return us * (u32) (gpu_freq / 1000); +} + +/** + * This will provide a conversion from ticks of the gpu clock to time (us) + * based on the minimum available gpu frequency. + * This is usually good to compute best/worst case (where the use of current + * frequency is not valid due to DVFS). + * e.g.: When you need to know the worst-case wait that 'ticks' cycles will + * take (you guarantee that you won't wait any longer than this, but it may + * be shorter). + */ +static INLINE u32 kbasep_js_convert_gpu_ticks_to_us_min_freq(kbase_device *kbdev, u32 ticks) +{ + u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min; + KBASE_DEBUG_ASSERT(0 != gpu_freq); + return ticks / gpu_freq * 1000; +} + +/** + * This will provide a conversion from ticks of the gpu clock to time (us) + * based on the maximum available gpu frequency. + * This is usually good to compute best/worst case (where the use of current + * frequency is not valid due to DVFS). + * e.g.: When you need to know the best-case wait for 'tick' cycles (you + * guarantee to be waiting for at least this long, but it may be longer). + */ +static INLINE u32 kbasep_js_convert_gpu_ticks_to_us_max_freq(kbase_device *kbdev, u32 ticks) +{ + u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max; + KBASE_DEBUG_ASSERT(0 != gpu_freq); + return ticks / gpu_freq * 1000; +} + + /** @} *//* end group kbase_js */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_JS_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/mali_kbase_js_affinity.c new file mode 100755 index 00000000000..6ded87dfe85 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js_affinity.c @@ -0,0 +1,382 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_js_affinity.c + * Base kernel affinity manager APIs + */ + +#include +#include "mali_kbase_js_affinity.h" + + +STATIC INLINE mali_bool affinity_job_uses_high_cores(kbase_device *kbdev, kbase_jd_atom *katom) +{ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) { + kbase_context *kctx; + kbase_context_flags ctx_flags; + + kctx = katom->kctx; + ctx_flags = kctx->jctx.sched_info.ctx.flags; + + /* In this HW Workaround, compute-only jobs/contexts use the high cores + * during a core-split, all other contexts use the low cores. */ + return (mali_bool) ((katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) != 0 || (ctx_flags & KBASE_CTX_FLAG_HINT_ONLY_COMPUTE) != 0); + } + return MALI_FALSE; +} + +/** + * @brief Decide whether a split in core affinity is required across job slots + * + * The following locking conditions are made on the caller: + * - it must hold kbasep_js_device_data::runpool_irq::lock + * + * @param kbdev The kbase device structure of the device + * @return MALI_FALSE if a core split is not required + * @return != MALI_FALSE if a core split is required. + */ +STATIC INLINE mali_bool kbase_affinity_requires_split(kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) { + s8 nr_compute_ctxs = kbasep_js_ctx_attr_count_on_runpool(kbdev, KBASEP_JS_CTX_ATTR_COMPUTE); + s8 nr_noncompute_ctxs = kbasep_js_ctx_attr_count_on_runpool(kbdev, KBASEP_JS_CTX_ATTR_NON_COMPUTE); + + /* In this case, a mix of Compute+Non-Compute determines whether a + * core-split is required, to ensure jobs with different numbers of RMUs + * don't use the same cores. + * + * When it's entirely compute, or entirely non-compute, then no split is + * required. + * + * A context can be both Compute and Non-compute, in which case this will + * correctly decide that a core-split is required. */ + + return (mali_bool) (nr_compute_ctxs > 0 && nr_noncompute_ctxs > 0); + } + return MALI_FALSE; +} + +mali_bool kbase_js_can_run_job_on_slot_no_lock(kbase_device *kbdev, int js) +{ + /* + * Here are the reasons for using job slot 2: + * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose) + * - In absence of the above, then: + * - Atoms with BASE_JD_REQ_COHERENT_GROUP + * - But, only when there aren't contexts with + * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on + * all cores on slot 1 could be blocked by those using a coherent group + * on slot 2 + * - And, only when you actually have 2 or more coregroups - if you only + * have 1 coregroup, then having jobs for slot 2 implies they'd also be + * for slot 1, meaning you'll get interference from them. Jobs able to + * run on slot 2 could also block jobs that can only run on slot 1 + * (tiler jobs) + */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) + return MALI_TRUE; + + if (js != 2) + return MALI_TRUE; + + /* Only deal with js==2 now: */ + if (kbdev->gpu_props.num_core_groups > 1) { + /* Only use slot 2 in the 2+ coregroup case */ + if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) == MALI_FALSE) { + /* ...But only when we *don't* have atoms that run on all cores */ + + /* No specific check for BASE_JD_REQ_COHERENT_GROUP atoms - the policy will sort that out */ + return MALI_TRUE; + } + } + + /* Above checks failed mean we shouldn't use slot 2 */ + return MALI_FALSE; +} + +/* + * As long as it has been decided to have a deeper modification of + * what job scheduler, power manager and affinity manager will + * implement, this function is just an intermediate step that + * assumes: + * - all working cores will be powered on when this is called. + * - largest current configuration is 2 core groups. + * - It has been decided not to have hardcoded values so the low + * and high cores in a core split will be evently distributed. + * - Odd combinations of core requirements have been filtered out + * and do not get to this function (e.g. CS+T+NSS is not + * supported here). + * - This function is frequently called and can be optimized, + * (see notes in loops), but as the functionallity will likely + * be modified, optimization has not been addressed. +*/ +mali_bool kbase_js_choose_affinity(u64 * const affinity, kbase_device *kbdev, kbase_jd_atom *katom, int js) +{ + base_jd_core_req core_req = katom->core_req; + unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; + u64 core_availability_mask; + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + core_availability_mask = kbase_pm_ca_get_core_mask(kbdev); + + /* + * If no cores are currently available (core availability policy is + * transitioning) then fail. + */ + if (0 == core_availability_mask) + { + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + *affinity = 0; + return MALI_FALSE; + } + + KBASE_DEBUG_ASSERT(js >= 0); + + if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == BASE_JD_REQ_T) + { + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + /* Tiler only job, bit 0 needed to enable tiler but no shader cores required */ + *affinity = 1; + return MALI_TRUE; + } + + if (1 == kbdev->gpu_props.num_cores) { + /* trivial case only one core, nothing to do */ + *affinity = core_availability_mask; + } else if (kbase_affinity_requires_split(kbdev) == MALI_FALSE) { + if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { + if (js == 0 || num_core_groups == 1) { + /* js[0] and single-core-group systems just get the first core group */ + *affinity = kbdev->gpu_props.props.coherency_info.group[0].core_mask & core_availability_mask; + } else { + /* js[1], js[2] use core groups 0, 1 for dual-core-group systems */ + u32 core_group_idx = ((u32) js) - 1; + KBASE_DEBUG_ASSERT(core_group_idx < num_core_groups); + *affinity = kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask & core_availability_mask; + + /* If the job is specifically targeting core group 1 and the core + * availability policy is keeping that core group off, then fail */ + if (*affinity == 0 && core_group_idx == 1 && kbdev->pm.cg1_disabled == MALI_TRUE) + katom->event_code = BASE_JD_EVENT_PM_EVENT; + } + } else { + /* All cores are available when no core split is required */ + *affinity = core_availability_mask; + } + } else { + /* Core split required - divide cores in two non-overlapping groups */ + u64 low_bitmap, high_bitmap; + int n_high_cores = kbdev->gpu_props.num_cores >> 1; + KBASE_DEBUG_ASSERT(1 == num_core_groups); + KBASE_DEBUG_ASSERT(0 != n_high_cores); + + /* compute the reserved high cores bitmap */ + high_bitmap = ~0; + /* note: this can take a while, optimization desirable */ + while (n_high_cores != hweight32(high_bitmap & kbdev->shader_present_bitmap)) + high_bitmap = high_bitmap << 1; + + high_bitmap &= core_availability_mask; + low_bitmap = core_availability_mask ^ high_bitmap; + + if (affinity_job_uses_high_cores(kbdev, katom)) + *affinity = high_bitmap; + else + *affinity = low_bitmap; + } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + /* + * If no cores are currently available in the desired core group(s) + * (core availability policy is transitioning) then fail. + */ + if (*affinity == 0) + return MALI_FALSE; + + /* Enable core 0 if tiler required */ + if (core_req & BASE_JD_REQ_T) + *affinity = *affinity | 1; + + return MALI_TRUE; +} + +STATIC INLINE mali_bool kbase_js_affinity_is_violating(kbase_device *kbdev, u64 *affinities) +{ + /* This implementation checks whether the two slots involved in Generic thread creation + * have intersecting affinity. This is due to micro-architectural issues where a job in + * slot A targetting cores used by slot B could prevent the job in slot B from making + * progress until the job in slot A has completed. + * + * @note It just so happens that this restriction also allows + * BASE_HW_ISSUE_8987 to be worked around by placing on job slot 2 the + * atoms from ctxs with KBASE_CTX_FLAG_HINT_ONLY_COMPUTE flag set + */ + u64 affinity_set_left; + u64 affinity_set_right; + u64 intersection; + KBASE_DEBUG_ASSERT(affinities != NULL); + + affinity_set_left = affinities[1]; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) { + /* The left set also includes those on the Fragment slot when + * we are using the HW workaround for BASE_HW_ISSUE_8987 */ + affinity_set_left |= affinities[0]; + } + + affinity_set_right = affinities[2]; + + /* A violation occurs when any bit in the left_set is also in the right_set */ + intersection = affinity_set_left & affinity_set_right; + + return (mali_bool) (intersection != (u64) 0u); +} + +mali_bool kbase_js_affinity_would_violate(kbase_device *kbdev, int js, u64 affinity) +{ + kbasep_js_device_data *js_devdata; + u64 new_affinities[BASE_JM_MAX_NR_SLOTS]; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); + js_devdata = &kbdev->js_data; + + memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities, sizeof(js_devdata->runpool_irq.slot_affinities)); + + new_affinities[js] |= affinity; + + return kbase_js_affinity_is_violating(kbdev, new_affinities); +} + +void kbase_js_affinity_retain_slot_cores(kbase_device *kbdev, int js, u64 affinity) +{ + kbasep_js_device_data *js_devdata; + u64 cores; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); + js_devdata = &kbdev->js_data; + + KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity) == MALI_FALSE); + + cores = affinity; + while (cores) { + int bitnum = fls64(cores) - 1; + u64 bit = 1ULL << bitnum; + s8 cnt; + + KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] < BASE_JM_SUBMIT_SLOTS); + + cnt = ++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]); + + if (cnt == 1) + js_devdata->runpool_irq.slot_affinities[js] |= bit; + + cores &= ~bit; + } + +} + +void kbase_js_affinity_release_slot_cores(kbase_device *kbdev, int js, u64 affinity) +{ + kbasep_js_device_data *js_devdata; + u64 cores; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); + js_devdata = &kbdev->js_data; + + cores = affinity; + while (cores) { + int bitnum = fls64(cores) - 1; + u64 bit = 1ULL << bitnum; + s8 cnt; + + KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0); + + cnt = --(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]); + + if (0 == cnt) + js_devdata->runpool_irq.slot_affinities[js] &= ~bit; + + cores &= ~bit; + } + +} + +void kbase_js_affinity_slot_blocked_an_atom(kbase_device *kbdev, int js) +{ + kbasep_js_device_data *js_devdata; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); + js_devdata = &kbdev->js_data; + + js_devdata->runpool_irq.slots_blocked_on_affinity |= 1u << js; +} + +void kbase_js_affinity_submit_to_blocked_slots(kbase_device *kbdev) +{ + kbasep_js_device_data *js_devdata; + u16 slots; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + + KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running != 0); + + /* Must take a copy because submitting jobs will update this member. */ + slots = js_devdata->runpool_irq.slots_blocked_on_affinity; + + while (slots) { + int bitnum = fls(slots) - 1; + u16 bit = 1u << bitnum; + slots &= ~bit; + + KBASE_TRACE_ADD_SLOT(kbdev, JS_AFFINITY_SUBMIT_TO_BLOCKED, NULL, NULL, 0u, bitnum); + + /* must update this before we submit, incase it's set again */ + js_devdata->runpool_irq.slots_blocked_on_affinity &= ~bit; + + kbasep_js_try_run_next_job_on_slot_nolock(kbdev, bitnum); + + /* Don't re-read slots_blocked_on_affinity after this - it could loop for a long time */ + } +} + +#if KBASE_TRACE_ENABLE != 0 +void kbase_js_debug_log_current_affinities(kbase_device *kbdev) +{ + kbasep_js_device_data *js_devdata; + int slot_nr; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + + for (slot_nr = 0; slot_nr < 3; ++slot_nr) + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL, NULL, 0u, slot_nr, (u32) js_devdata->runpool_irq.slot_affinities[slot_nr]); +} +#endif /* KBASE_TRACE_ENABLE != 0 */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/mali_kbase_js_affinity.h new file mode 100755 index 00000000000..38de8b31a5c --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js_affinity.h @@ -0,0 +1,157 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_js_affinity.h + * Affinity Manager internal APIs. + */ + +#ifndef _KBASE_JS_AFFINITY_H_ +#define _KBASE_JS_AFFINITY_H_ + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_js_affinity Affinity Manager internal APIs. + * @{ + * + */ + +/** + * @brief Decide whether it is possible to submit a job to a particular job slot in the current status + * + * Will check if submitting to the given job slot is allowed in the current + * status. For example using job slot 2 while in soft-stoppable state and only + * having 1 coregroup is not allowed by the policy. This function should be + * called prior to submitting a job to a slot to make sure policy rules are not + * violated. + * + * The following locking conditions are made on the caller: + * - it must hold kbasep_js_device_data::runpool_irq::lock + * + * @param kbdev The kbase device structure of the device + * @param js Job slot number to check for allowance + */ +mali_bool kbase_js_can_run_job_on_slot_no_lock(kbase_device *kbdev, int js); + +/** + * @brief Compute affinity for a given job. + * + * Currently assumes an all-on/all-off power management policy. + * Also assumes there is at least one core with tiler available. + * + * Returns MALI_TRUE if a valid affinity was chosen, MALI_FALSE if + * no cores were available. + * + * @param[out] affinity Affinity bitmap computed + * @param kbdev The kbase device structure of the device + * @param katom Job chain of which affinity is going to be found + * @param js Slot the job chain is being submitted + + */ +mali_bool kbase_js_choose_affinity(u64 * const affinity, kbase_device *kbdev, kbase_jd_atom *katom, int js); + +/** + * @brief Determine whether a proposed \a affinity on job slot \a js would + * cause a violation of affinity restrictions. + * + * The following locks must be held by the caller: + * - kbasep_js_device_data::runpool_irq::lock + */ +mali_bool kbase_js_affinity_would_violate(kbase_device *kbdev, int js, u64 affinity); + +/** + * @brief Affinity tracking: retain cores used by a slot + * + * The following locks must be held by the caller: + * - kbasep_js_device_data::runpool_irq::lock + */ +void kbase_js_affinity_retain_slot_cores(kbase_device *kbdev, int js, u64 affinity); + +/** + * @brief Affinity tracking: release cores used by a slot + * + * Cores \b must be released as soon as a job is dequeued from a slot's 'submit + * slots', and before another job is submitted to those slots. Otherwise, the + * refcount could exceed the maximum number submittable to a slot, + * BASE_JM_SUBMIT_SLOTS. + * + * The following locks must be held by the caller: + * - kbasep_js_device_data::runpool_irq::lock + */ +void kbase_js_affinity_release_slot_cores(kbase_device *kbdev, int js, u64 affinity); + +/** + * @brief Register a slot as blocking atoms due to affinity violations + * + * Once a slot has been registered, we must check after every atom completion + * (including those on different slots) to see if the slot can be + * unblocked. This is done by calling + * kbase_js_affinity_submit_to_blocked_slots(), which will also deregister the + * slot if it no long blocks atoms due to affinity violations. + * + * The following locks must be held by the caller: + * - kbasep_js_device_data::runpool_irq::lock + */ +void kbase_js_affinity_slot_blocked_an_atom(kbase_device *kbdev, int js); + +/** + * @brief Submit to job slots that have registered that an atom was blocked on + * the slot previously due to affinity violations. + * + * This submits to all slots registered by + * kbase_js_affinity_slot_blocked_an_atom(). If submission succeeded, then the + * slot is deregistered as having blocked atoms due to affinity + * violations. Otherwise it stays registered, and the next atom to complete + * must attempt to submit to the blocked slots again. + * + * This must only be called whilst the GPU is powered - for example, when + * kbdev->jsdata.nr_user_contexts_running > 0. + * + * The following locking conditions are made on the caller: + * - it must hold kbasep_js_device_data::runpool_mutex + * - it must hold kbasep_js_device_data::runpool_irq::lock + */ +void kbase_js_affinity_submit_to_blocked_slots(kbase_device *kbdev); + +/** + * @brief Output to the Trace log the current tracked affinities on all slots + */ +#if KBASE_TRACE_ENABLE != 0 +void kbase_js_debug_log_current_affinities(kbase_device *kbdev); +#else /* KBASE_TRACE_ENABLE != 0 */ +static INLINE void kbase_js_debug_log_current_affinities(kbase_device *kbdev) +{ +} +#endif /* KBASE_TRACE_ENABLE != 0 */ + + /** @} *//* end group kbase_js_affinity */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + + +#endif /* _KBASE_JS_AFFINITY_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c new file mode 100755 index 00000000000..6e051b18a63 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c @@ -0,0 +1,309 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +#include + +/* + * Private functions follow + */ + +/** + * @brief Check whether a ctx has a certain attribute, and if so, retain that + * attribute on the runpool. + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx is scheduled on the runpool + * + * @return MALI_TRUE indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return MALI_FALSE indicates no change in ctx attributes state of the runpool. + */ +STATIC mali_bool kbasep_js_ctx_attr_runpool_retain_attr(kbase_device *kbdev, kbase_context *kctx, kbasep_js_ctx_attr attribute) +{ + kbasep_js_device_data *js_devdata; + kbasep_js_kctx_info *js_kctx_info; + mali_bool runpool_state_changed = MALI_FALSE; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex)); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != MALI_FALSE); + + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != MALI_FALSE) { + KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX); + ++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); + + if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) { + /* First refcount indicates a state change */ + runpool_state_changed = MALI_TRUE; + KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute); + } + } + + return runpool_state_changed; +} + +/** + * @brief Check whether a ctx has a certain attribute, and if so, release that + * attribute on the runpool. + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx is scheduled on the runpool + * + * @return MALI_TRUE indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return MALI_FALSE indicates no change in ctx attributes state of the runpool. + */ +STATIC mali_bool kbasep_js_ctx_attr_runpool_release_attr(kbase_device *kbdev, kbase_context *kctx, kbasep_js_ctx_attr attribute) +{ + kbasep_js_device_data *js_devdata; + kbasep_js_kctx_info *js_kctx_info; + mali_bool runpool_state_changed = MALI_FALSE; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex)); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != MALI_FALSE); + + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != MALI_FALSE) { + KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0); + --(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); + + if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) { + /* Last de-refcount indicates a state change */ + runpool_state_changed = MALI_TRUE; + KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute); + } + } + + return runpool_state_changed; +} + +/** + * @brief Retain a certain attribute on a ctx, also retaining it on the runpool + * if the context is scheduled. + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + * + * @return MALI_TRUE indicates a change in ctx attributes state of the runpool. + * This may allow the scheduler to submit more jobs than previously. + * @return MALI_FALSE indicates no change in ctx attributes state of the runpool. + */ +STATIC mali_bool kbasep_js_ctx_attr_ctx_retain_attr(kbase_device *kbdev, kbase_context *kctx, kbasep_js_ctx_attr attribute) +{ + kbasep_js_kctx_info *js_kctx_info; + mali_bool runpool_state_changed = MALI_FALSE; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_kctx_info = &kctx->jctx.sched_info; + + BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex)); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX); + + ++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); + + if (js_kctx_info->ctx.is_scheduled != MALI_FALSE && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + /* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */ + KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute); + runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute); + } + + return runpool_state_changed; +} + +/** + * @brief Release a certain attribute on a ctx, also releasign it from the runpool + * if the context is scheduled. + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + * + * @return MALI_TRUE indicates a change in ctx attributes state of the runpool. + * This may allow the scheduler to submit more jobs than previously. + * @return MALI_FALSE indicates no change in ctx attributes state of the runpool. + */ +STATIC mali_bool kbasep_js_ctx_attr_ctx_release_attr(kbase_device *kbdev, kbase_context *kctx, kbasep_js_ctx_attr attribute) +{ + kbasep_js_kctx_info *js_kctx_info; + mali_bool runpool_state_changed = MALI_FALSE; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_kctx_info = &kctx->jctx.sched_info; + + BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex)); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0); + + if (js_kctx_info->ctx.is_scheduled != MALI_FALSE && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + /* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */ + runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute); + KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute); + } + + /* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */ + --(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); + + return runpool_state_changed; +} + +/* + * More commonly used public functions + */ + +void kbasep_js_ctx_attr_set_initial_attrs(kbase_device *kbdev, kbase_context *kctx) +{ + kbasep_js_kctx_info *js_kctx_info; + mali_bool runpool_state_changed = MALI_FALSE; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + + if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != MALI_FALSE) { + /* This context never submits, so don't track any scheduling attributes */ + return; + } + + /* Transfer attributes held in the context flags for contexts that have submit enabled */ + + if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_HINT_ONLY_COMPUTE) != MALI_FALSE) { + /* Compute context */ + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); + } + /* NOTE: Whether this is a non-compute context depends on the jobs being + * run, e.g. it might be submitting jobs with BASE_JD_REQ_ONLY_COMPUTE */ + + /* ... More attributes can be added here ... */ + + /* The context should not have been scheduled yet, so ASSERT if this caused + * runpool state changes (note that other threads *can't* affect the value + * of runpool_state_changed, due to how it's calculated) */ + KBASE_DEBUG_ASSERT(runpool_state_changed == MALI_FALSE); + CSTD_UNUSED(runpool_state_changed); +} + +void kbasep_js_ctx_attr_runpool_retain_ctx(kbase_device *kbdev, kbase_context *kctx) +{ + mali_bool runpool_state_changed; + int i; + + /* Retain any existing attributes */ + for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (kbasep_js_ctx_attr) i) != MALI_FALSE) { + /* The context is being scheduled in, so update the runpool with the new attributes */ + runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (kbasep_js_ctx_attr) i); + + /* We don't need to know about state changed, because retaining a + * context occurs on scheduling it, and that itself will also try + * to run new atoms */ + CSTD_UNUSED(runpool_state_changed); + } + } +} + +mali_bool kbasep_js_ctx_attr_runpool_release_ctx(kbase_device *kbdev, kbase_context *kctx) +{ + mali_bool runpool_state_changed = MALI_FALSE; + int i; + + /* Release any existing attributes */ + for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (kbasep_js_ctx_attr) i) != MALI_FALSE) { + /* The context is being scheduled out, so update the runpool on the removed attributes */ + runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (kbasep_js_ctx_attr) i); + } + } + + return runpool_state_changed; +} + +void kbasep_js_ctx_attr_ctx_retain_atom(kbase_device *kbdev, kbase_context *kctx, kbase_jd_atom *katom) +{ + mali_bool runpool_state_changed = MALI_FALSE; + base_jd_core_req core_req; + + KBASE_DEBUG_ASSERT(katom); + core_req = katom->core_req; + + if (core_req & BASE_JD_REQ_ONLY_COMPUTE) + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); + else + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); + + if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { + /* Atom that can run on slot1 or slot2, and can use all cores */ + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); + } + + /* We don't need to know about state changed, because retaining an + * atom occurs on adding it, and that itself will also try to run + * new atoms */ + CSTD_UNUSED(runpool_state_changed); +} + +mali_bool kbasep_js_ctx_attr_ctx_release_atom(kbase_device *kbdev, kbase_context *kctx, kbasep_js_atom_retained_state *katom_retained_state) +{ + mali_bool runpool_state_changed = MALI_FALSE; + base_jd_core_req core_req; + + KBASE_DEBUG_ASSERT(katom_retained_state); + core_req = katom_retained_state->core_req; + + /* No-op for invalid atoms */ + if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == MALI_FALSE) + return MALI_FALSE; + + if (core_req & BASE_JD_REQ_ONLY_COMPUTE) + runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); + else + runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); + + if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { + /* Atom that can run on slot1 or slot2, and can use all cores */ + runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); + } + + return runpool_state_changed; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h new file mode 100755 index 00000000000..6e722297bb8 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h @@ -0,0 +1,158 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_js_ctx_attr.h + * Job Scheduler Context Attribute APIs + */ + +#ifndef _KBASE_JS_CTX_ATTR_H_ +#define _KBASE_JS_CTX_ATTR_H_ + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_js + * @{ + */ + +/** + * Set the initial attributes of a context (when context create flags are set) + * + * Requires: + * - Hold the jsctx_mutex + */ +void kbasep_js_ctx_attr_set_initial_attrs(kbase_device *kbdev, kbase_context *kctx); + +/** + * Retain all attributes of a context + * + * This occurs on scheduling in the context on the runpool (but after + * is_scheduled is set) + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx->is_scheduled is true + */ +void kbasep_js_ctx_attr_runpool_retain_ctx(kbase_device *kbdev, kbase_context *kctx); + +/** + * Release all attributes of a context + * + * This occurs on scheduling out the context from the runpool (but before + * is_scheduled is cleared) + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx->is_scheduled is true + * + * @return MALI_TRUE indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return MALI_FALSE indicates no change in ctx attributes state of the runpool. + */ +mali_bool kbasep_js_ctx_attr_runpool_release_ctx(kbase_device *kbdev, kbase_context *kctx); + +/** + * Retain all attributes of an atom + * + * This occurs on adding an atom to a context + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + */ +void kbasep_js_ctx_attr_ctx_retain_atom(kbase_device *kbdev, kbase_context *kctx, kbase_jd_atom *katom); + +/** + * Release all attributes of an atom, given its retained state. + * + * This occurs after (permanently) removing an atom from a context + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + * + * This is a no-op when \a katom_retained_state is invalid. + * + * @return MALI_TRUE indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return MALI_FALSE indicates no change in ctx attributes state of the runpool. + */ +mali_bool kbasep_js_ctx_attr_ctx_release_atom(kbase_device *kbdev, kbase_context *kctx, kbasep_js_atom_retained_state *katom_retained_state); + +/** + * Requires: + * - runpool_irq spinlock + */ +static INLINE s8 kbasep_js_ctx_attr_count_on_runpool(kbase_device *kbdev, kbasep_js_ctx_attr attribute) +{ + kbasep_js_device_data *js_devdata; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_devdata = &kbdev->js_data; + + return js_devdata->runpool_irq.ctx_attr_ref_count[attribute]; +} + +/** + * Requires: + * - runpool_irq spinlock + */ +static INLINE mali_bool kbasep_js_ctx_attr_is_attr_on_runpool(kbase_device *kbdev, kbasep_js_ctx_attr attribute) +{ + /* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */ + return (mali_bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute); +} + +/** + * Requires: + * - jsctx mutex + */ +static INLINE mali_bool kbasep_js_ctx_attr_is_attr_on_ctx(kbase_context *kctx, kbasep_js_ctx_attr attribute) +{ + kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_kctx_info = &kctx->jctx.sched_info; + + /* In general, attributes are 'on' when they have a refcount (which should never be < 0) */ + return (mali_bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]); +} + + /** @} *//* end group kbase_js */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_JS_DEFS_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h new file mode 100755 index 00000000000..695bf466c79 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h @@ -0,0 +1,479 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_js.h + * Job Scheduler Type Definitions + */ + +#ifndef _KBASE_JS_DEFS_H_ +#define _KBASE_JS_DEFS_H_ + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_js + * @{ + */ +/* Forward decls */ +struct kbase_device; +struct kbase_jd_atom; + + +/* Types used by the policies must go here */ +enum { + /** Context will not submit any jobs */ + KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0), + + /** Set if the context uses an address space and should be kept scheduled in */ + KBASE_CTX_FLAG_PRIVILEGED = (1u << 1), + + /** Kernel-side equivalent of BASE_CONTEXT_HINT_ONLY_COMPUTE. Non-mutable after creation flags set */ + KBASE_CTX_FLAG_HINT_ONLY_COMPUTE = (1u << 2) + + /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */ +}; + +typedef u32 kbase_context_flags; + +typedef struct kbasep_atom_req { + base_jd_core_req core_req; + kbase_context_flags ctx_req; + u32 device_nr; +} kbasep_atom_req; + +#include "mali_kbase_js_policy_cfs.h" + +/* Wrapper Interface - doxygen is elsewhere */ +typedef union kbasep_js_policy { +#ifdef KBASE_JS_POLICY_AVAILABLE_FCFS + kbasep_js_policy_fcfs fcfs; +#endif +#ifdef KBASE_JS_POLICY_AVAILABLE_CFS + kbasep_js_policy_cfs cfs; +#endif +} kbasep_js_policy; + +/* Wrapper Interface - doxygen is elsewhere */ +typedef union kbasep_js_policy_ctx_info { +#ifdef KBASE_JS_POLICY_AVAILABLE_FCFS + kbasep_js_policy_fcfs_ctx fcfs; +#endif +#ifdef KBASE_JS_POLICY_AVAILABLE_CFS + kbasep_js_policy_cfs_ctx cfs; +#endif +} kbasep_js_policy_ctx_info; + +/* Wrapper Interface - doxygen is elsewhere */ +typedef union kbasep_js_policy_job_info { +#ifdef KBASE_JS_POLICY_AVAILABLE_FCFS + kbasep_js_policy_fcfs_job fcfs; +#endif +#ifdef KBASE_JS_POLICY_AVAILABLE_CFS + kbasep_js_policy_cfs_job cfs; +#endif +} kbasep_js_policy_job_info; + + +/** Callback function run on all of a context's jobs registered with the Job + * Scheduler */ +typedef void (*kbasep_js_policy_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom); + +/** + * @brief Maximum number of jobs that can be submitted to a job slot whilst + * inside the IRQ handler. + * + * This is important because GPU NULL jobs can complete whilst the IRQ handler + * is running. Otherwise, it potentially allows an unlimited number of GPU NULL + * jobs to be submitted inside the IRQ handler, which increases IRQ latency. + */ +#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2 + +/** + * @brief the IRQ_THROTTLE time in microseconds + * + * This will be converted via the GPU's clock frequency into a cycle-count. + * + * @note we can make an estimate of the GPU's frequency by periodically + * sampling its CYCLE_COUNT register + */ +#define KBASE_JS_IRQ_THROTTLE_TIME_US 20 + +/** + * @brief Context attributes + * + * Each context attribute can be thought of as a boolean value that caches some + * state information about either the runpool, or the context: + * - In the case of the runpool, it is a cache of "Do any contexts owned by + * the runpool have attribute X?" + * - In the case of a context, it is a cache of "Do any atoms owned by the + * context have attribute X?" + * + * The boolean value of the context attributes often affect scheduling + * decisions, such as affinities to use and job slots to use. + * + * To accomodate changes of state in the context, each attribute is refcounted + * in the context, and in the runpool for all running contexts. Specifically: + * - The runpool holds a refcount of how many contexts in the runpool have this + * attribute. + * - The context holds a refcount of how many atoms have this attribute. + * + * Examples of use: + * - Finding out when there are a mix of @ref BASE_CONTEXT_HINT_ONLY_COMPUTE + * and ! @ref BASE_CONTEXT_HINT_ONLY_COMPUTE contexts in the runpool + */ +typedef enum { + /** Attribute indicating a context that contains Compute jobs. That is, + * @ref BASE_CONTEXT_HINT_ONLY_COMPUTE is \b set and/or the context has jobs of type + * @ref BASE_JD_REQ_ONLY_COMPUTE + * + * @note A context can be both 'Compute' and 'Non Compute' if it contains + * both types of jobs. + */ + KBASEP_JS_CTX_ATTR_COMPUTE, + + /** Attribute indicating a context that contains Non-Compute jobs. That is, + * the context has some jobs that are \b not of type @ref + * BASE_JD_REQ_ONLY_COMPUTE. The context usually has + * BASE_CONTEXT_HINT_COMPUTE \b clear, but this depends on the HW + * workarounds in use in the Job Scheduling Policy. + * + * @note A context can be both 'Compute' and 'Non Compute' if it contains + * both types of jobs. + */ + KBASEP_JS_CTX_ATTR_NON_COMPUTE, + + /** Attribute indicating that a context contains compute-job atoms that + * aren't restricted to a coherent group, and can run on all cores. + * + * Specifically, this is when the atom's \a core_req satisfy: + * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 + * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups + * + * Such atoms could be blocked from running if one of the coherent groups + * is being used by another job slot, so tracking this context attribute + * allows us to prevent such situations. + * + * @note This doesn't take into account the 1-coregroup case, where all + * compute atoms would effectively be able to run on 'all cores', but + * contexts will still not always get marked with this attribute. Instead, + * it is the caller's responsibility to take into account the number of + * coregroups when interpreting this attribute. + * + * @note Whilst Tiler atoms are normally combined with + * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without + * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy + * enough to handle anyway. + */ + KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, + + /** Must be the last in the enum */ + KBASEP_JS_CTX_ATTR_COUNT +} kbasep_js_ctx_attr; + +enum { + /** Bit indicating that new atom should be started because this atom completed */ + KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0), + /** Bit indicating that the atom was evicted from the JSn_NEXT registers */ + KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1) +}; + +/** Combination of KBASE_JS_ATOM_DONE_<...> bits */ +typedef u32 kbasep_js_atom_done_code; + +/** + * Data used by the scheduler that is unique for each Address Space. + * + * This is used in IRQ context and kbasep_js_device_data::runpoool_irq::lock + * must be held whilst accessing this data (inculding reads and atomic + * decisions based on the read). + */ +typedef struct kbasep_js_per_as_data { + /** + * Ref count of whether this AS is busy, and must not be scheduled out + * + * When jobs are running this is always positive. However, it can still be + * positive when no jobs are running. If all you need is a heuristic to + * tell you whether jobs might be running, this should be sufficient. + */ + int as_busy_refcount; + + /** Pointer to the current context on this address space, or NULL for no context */ + kbase_context *kctx; +} kbasep_js_per_as_data; + +/** + * @brief KBase Device Data Job Scheduler sub-structure + * + * This encapsulates the current context of the Job Scheduler on a particular + * device. This context is global to the device, and is not tied to any + * particular kbase_context running on the device. + * + * nr_contexts_running and as_free are optimized for packing together (by making + * them smaller types than u32). The operations on them should rarely involve + * masking. The use of signed types for arithmetic indicates to the compiler that + * the value will not rollover (which would be undefined behavior), and so under + * the Total License model, it is free to make optimizations based on that (i.e. + * to remove masking). + */ +typedef struct kbasep_js_device_data { + /** Sub-structure to collect together Job Scheduling data used in IRQ context */ + struct runpool_irq { + /** + * Lock for accessing Job Scheduling data used in IRQ context + * + * This lock must be held whenever this data is accessed (read, or + * write). Even for read-only access, memory barriers would be needed. + * In any case, it is likely that decisions based on only reading must + * also be atomic with respect to data held here and elsewhere in the + * Job Scheduler. + * + * This lock must also be held for accessing: + * - kbase_context::as_nr + * - kbase_device::jm_slots + * - Parts of the kbasep_js_policy, dependent on the policy (refer to + * the policy in question for more information) + * - Parts of kbasep_js_policy_ctx_info, dependent on the policy (refer to + * the policy in question for more information) + */ + spinlock_t lock; + + /** Bitvector indicating whether a currently scheduled context is allowed to submit jobs. + * When bit 'N' is set in this, it indicates whether the context bound to address space + * 'N' (per_as_data[N].kctx) is allowed to submit jobs. + * + * It is placed here because it's much more memory efficient than having a mali_bool8 in + * kbasep_js_per_as_data to store this flag */ + u16 submit_allowed; + + /** Context Attributes: + * Each is large enough to hold a refcount of the number of contexts + * that can fit into the runpool. This is currently BASE_MAX_NR_AS + * + * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store + * the refcount. Hence, it's not worthwhile reducing this to + * bit-manipulation on u32s to save space (where in contrast, 4 bit + * sub-fields would be easy to do and would save space). + * + * Whilst this must not become negative, the sign bit is used for: + * - error detection in debug builds + * - Optimization: it is undefined for a signed int to overflow, and so + * the compiler can optimize for that never happening (thus, no masking + * is required on updating the variable) */ + s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; + + /** Data that is unique for each AS */ + kbasep_js_per_as_data per_as_data[BASE_MAX_NR_AS]; + + /* + * Affinity management and tracking + */ + /** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates + * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */ + u64 slot_affinities[BASE_JM_MAX_NR_SLOTS]; + /** Bitvector indicating which slots \em might have atoms blocked on + * them because otherwise they'd violate affinity restrictions */ + u16 slots_blocked_on_affinity; + /** Refcount for each core owned by each slot. Used to generate the + * slot_affinities array of bitvectors + * + * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, + * because it is refcounted only when a job is definitely about to be + * submitted to a slot, and is de-refcounted immediately after a job + * finishes */ + s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; + } runpool_irq; + + /** + * Run Pool mutex, for managing contexts within the runpool. + * Unless otherwise specified, you must hold this lock whilst accessing any + * members that follow + * + * In addition, this is used to access: + * - the kbasep_js_kctx_info::runpool substructure + */ + struct mutex runpool_mutex; + + /** + * Queue Lock, used to access the Policy's queue of contexts independently + * of the Run Pool. + * + * Of course, you don't need the Run Pool lock to access this. + */ + struct mutex queue_mutex; + + u16 as_free; /**< Bitpattern of free Address Spaces */ + + /** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */ + s8 nr_user_contexts_running; + /** Number of currently scheduled contexts (including ones that are not submitting jobs) */ + s8 nr_all_contexts_running; + + /** + * Policy-specific information. + * + * Refer to the structure defined by the current policy to determine which + * locks must be held when accessing this. + */ + kbasep_js_policy policy; + + /** Core Requirements to match up with base_js_atom's core_req memeber + * @note This is a write-once member, and so no locking is required to read */ + base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS]; + + u32 scheduling_tick_ns; /**< Value for KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS */ + u32 soft_stop_ticks; /**< Value for KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS */ + u32 soft_stop_ticks_cl; /**< Value for KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL */ + u32 hard_stop_ticks_ss; /**< Value for KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS */ + u32 hard_stop_ticks_cl; /**< Value for KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL */ + u32 hard_stop_ticks_nss; /**< Value for KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS */ + u32 gpu_reset_ticks_ss; /**< Value for KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS */ + u32 gpu_reset_ticks_cl; /**< Value for KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL */ + u32 gpu_reset_ticks_nss; /**< Value for KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS */ + u32 ctx_timeslice_ns; /**< Value for KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS */ + u32 cfs_ctx_runtime_init_slices; /**< Value for KBASE_CONFIG_ATTR_JS_CFS_CTX_RUNTIME_INIT_SLICES */ + u32 cfs_ctx_runtime_min_slices; /**< Value for KBASE_CONFIG_ATTR_JS_CFS_CTX_RUNTIME_MIN_SLICES */ + + /** List of suspended soft jobs */ + struct list_head suspended_soft_jobs_list; + +#ifdef CONFIG_MALI_DEBUG + /* Support soft-stop on a single context */ + mali_bool softstop_always; +#endif /* CONFIG_MALI_DEBUG */ + /** The initalized-flag is placed at the end, to avoid cache-pollution (we should + * only be using this during init/term paths). + * @note This is a write-once member, and so no locking is required to read */ + int init_status; +} kbasep_js_device_data; + +/** + * @brief KBase Context Job Scheduling information structure + * + * This is a substructure in the kbase_context that encapsulates all the + * scheduling information. + */ +typedef struct kbasep_js_kctx_info { + /** + * Runpool substructure. This must only be accessed whilst the Run Pool + * mutex ( kbasep_js_device_data::runpool_mutex ) is held. + * + * In addition, the kbasep_js_device_data::runpool_irq::lock may need to be + * held for certain sub-members. + * + * @note some of the members could be moved into kbasep_js_device_data for + * improved d-cache/tlb efficiency. + */ + struct { + kbasep_js_policy_ctx_info policy_ctx; /**< Policy-specific context */ + } runpool; + + /** + * Job Scheduler Context information sub-structure. These members are + * accessed regardless of whether the context is: + * - In the Policy's Run Pool + * - In the Policy's Queue + * - Not queued nor in the Run Pool. + * + * You must obtain the jsctx_mutex before accessing any other members of + * this substructure. + * + * You may not access any of these members from IRQ context. + */ + struct { + struct mutex jsctx_mutex; /**< Job Scheduler Context lock */ + + /** Number of jobs ready to run - does \em not include the jobs waiting in + * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr + * for such jobs*/ + u32 nr_jobs; + + /** Context Attributes: + * Each is large enough to hold a refcount of the number of atoms on + * the context. **/ + u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; + + kbase_context_flags flags; + /* NOTE: Unify the following flags into kbase_context_flags */ + /** + * Is the context scheduled on the Run Pool? + * + * This is only ever updated whilst the jsctx_mutex is held. + */ + mali_bool is_scheduled; + /** + * Wait queue to wait for is_scheduled state changes. + * */ + wait_queue_head_t is_scheduled_wait; + + mali_bool is_dying; /**< Is the context in the process of being evicted? */ + } ctx; + + /* The initalized-flag is placed at the end, to avoid cache-pollution (we should + * only be using this during init/term paths) */ + int init_status; +} kbasep_js_kctx_info; + +/** Subset of atom state that can be available after jd_done_nolock() is called + * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(), + * because the original atom could disappear. */ +typedef struct kbasep_js_atom_retained_state { + /** Event code - to determine whether the atom has finished */ + base_jd_event_code event_code; + /** core requirements */ + base_jd_core_req core_req; + /** Job Slot to retry submitting to if submission from IRQ handler failed */ + int retry_submit_on_slot; + +} kbasep_js_atom_retained_state; + +/** + * Value signifying 'no retry on a slot required' for: + * - kbase_js_atom_retained_state::retry_submit_on_slot + * - kbase_jd_atom::retry_submit_on_slot + */ +#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1) + +/** + * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state. + * + * @see kbase_atom_retained_state_is_valid() + */ +#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP + +/** + * @brief The JS timer resolution, in microseconds + * + * Any non-zero difference in time will be at least this size. + */ +#define KBASEP_JS_TICK_RESOLUTION_US 1 + +#endif /* _KBASE_JS_DEFS_H_ */ + + /** @} *//* end group kbase_js */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h new file mode 100755 index 00000000000..f746f1d9146 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h @@ -0,0 +1,767 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_js_policy.h + * Job Scheduler Policy APIs. + */ + +#ifndef _KBASE_JS_POLICY_H_ +#define _KBASE_JS_POLICY_H_ + +/** + * @page page_kbase_js_policy Job Scheduling Policies + * The Job Scheduling system is described in the following: + * - @subpage page_kbase_js_policy_overview + * - @subpage page_kbase_js_policy_operation + * + * The API details are as follows: + * - @ref kbase_jm + * - @ref kbase_js + * - @ref kbase_js_policy + */ + +/** + * @page page_kbase_js_policy_overview Overview of the Policy System + * + * The Job Scheduler Policy manages: + * - The assigning of KBase Contexts to GPU Address Spaces (\em ASs) + * - The choosing of Job Chains (\em Jobs) from a KBase context, to run on the + * GPU's Job Slots (\em JSs). + * - The amount of \em time a context is assigned to (scheduled on) an + * Address Space + * - The amount of \em time a Job spends running on the GPU + * + * The Policy implements this management via 2 components: + * - A Policy Queue, which manages a set of contexts that are ready to run, + * but not currently running. + * - A Policy Run Pool, which manages the currently running contexts (one per Address + * Space) and the jobs to run on the Job Slots. + * + * Each Graphics Process in the system has at least one KBase Context. Therefore, + * the Policy Queue can be seen as a queue of Processes waiting to run Jobs on + * the GPU. + * + * + * @dotfile policy_overview.dot "Diagram showing a very simplified overview of the Policy System. IRQ handling, soft/hard-stopping, contexts re-entering the system and Policy details are omitted" + * + * The main operations on the queue are: + * - Enqueuing a Context to it + * - Dequeuing a Context from it, to run it. + * - Note: requeuing a context is much the same as enqueuing a context, but + * occurs when a context is scheduled out of the system to allow other contexts + * to run. + * + * These operations have much the same meaning for the Run Pool - Jobs are + * dequeued to run on a Jobslot, and requeued when they are scheduled out of + * the GPU. + * + * @note This is an over-simplification of the Policy APIs - there are more + * operations than 'Enqueue'/'Dequeue', and a Dequeue from the Policy Queue + * takes at least two function calls: one to Dequeue from the Queue, one to add + * to the Run Pool. + * + * As indicated on the diagram, Jobs permanently leave the scheduling system + * when they are completed, otherwise they get dequeued/requeued until this + * happens. Similarly, Contexts leave the scheduling system when their jobs + * have all completed. However, Contexts may later return to the scheduling + * system (not shown on the diagram) if more Bags of Jobs are submitted to + * them. + */ + +/** + * @page page_kbase_js_policy_operation Policy Operation + * + * We describe the actions that the Job Scheduler Core takes on the Policy in + * the following cases: + * - The IRQ Path + * - The Job Submission Path + * - The High Priority Job Submission Path + * + * This shows how the Policy APIs will be used by the Job Scheduler core. + * + * The following diagram shows an example Policy that contains a Low Priority + * queue, and a Real-time (High Priority) Queue. The RT queue is examined + * before the LowP one on dequeuing from the head. The Low Priority Queue is + * ordered by time, and the RT queue is ordered by RT-priority, and then by + * time. In addition, it shows that the Job Scheduler Core will start a + * Soft-Stop Timer (SS-Timer) when it dequeue's and submits a job. The + * Soft-Stop time is set by a global configuration value, and must be a value + * appropriate for the policy. For example, this could include "don't run a + * soft-stop timer" for a First-Come-First-Served (FCFS) policy. + * + * + * @dotfile policy_operation_diagram.dot "Diagram showing the objects managed by an Example Policy, and the operations made upon these objects by the Job Scheduler Core." + * + * @section sec_kbase_js_policy_operation_prio Dealing with Priority + * + * Priority applies both to a context as a whole, and to the jobs within a + * context. The jobs specify a priority in the base_jd_atom::prio member, which + * is relative to that of the context. A positive setting indicates a reduction + * in priority, whereas a negative setting indicates a boost in priority. Of + * course, the boost in priority should only be honoured when the originating + * process has sufficient priviledges, and should be ignored for unpriviledged + * processes. The meaning of the combined priority value is up to the policy + * itself, and could be a logarithmic scale instead of a linear scale (e.g. the + * policy could implement an increase/decrease in priority by 1 results in an + * increase/decrease in \em proportion of time spent scheduled in by 25%, an + * effective change in timeslice by 11%). + * + * It is up to the policy whether a boost in priority boosts the priority of + * the entire context (e.g. to such an extent where it may pre-empt other + * running contexts). If it chooses to do this, the Policy must make sure that + * only the high-priority jobs are run, and that the context is scheduled out + * once only low priority jobs remain. This ensures that the low priority jobs + * within the context do not gain from the priority boost, yet they still get + * scheduled correctly with respect to other low priority contexts. + * + * + * @section sec_kbase_js_policy_operation_irq IRQ Path + * + * The following happens on the IRQ path from the Job Scheduler Core: + * - Note the slot that completed (for later) + * - Log the time spent by the job (and implicitly, the time spent by the + * context) + * - call kbasep_js_policy_log_job_result() in the context of the irq + * handler. + * - This must happen regardless of whether the job completed successfully or + * not (otherwise the context gets away with DoS'ing the system with faulty jobs) + * - What was the result of the job? + * - If Completed: job is just removed from the system + * - If Hard-stop or failure: job is removed from the system + * - If Soft-stop: queue the book-keeping work onto a work-queue: have a + * work-queue call kbasep_js_policy_enqueue_job() + * - Check the timeslice used by the owning context + * - call kbasep_js_policy_should_remove_ctx() in the context of the irq + * handler. + * - If this returns true, clear the "allowed" flag. + * - Check the ctx's flags for "allowed", "has jobs to run" and "is running + * jobs" + * - And so, should the context stay scheduled in? + * - If No, push onto a work-queue the work of scheduling out the old context, + * and getting a new one. That is: + * - kbasep_js_policy_runpool_remove_ctx() on old_ctx + * - kbasep_js_policy_enqueue_ctx() on old_ctx + * - kbasep_js_policy_dequeue_head_ctx() to get new_ctx + * - kbasep_js_policy_runpool_add_ctx() on new_ctx + * - (all of this work is deferred on a work-queue to keep the IRQ handler quick) + * - If there is space in the completed job slots' HEAD/NEXT registers, run the next job: + * - kbasep_js_policy_dequeue_job() in the context of the irq + * handler with core_req set to that of the completing slot + * - if this returned MALI_TRUE, submit the job to the completed slot. + * - This is repeated until kbasep_js_policy_dequeue_job() returns + * MALI_FALSE, or the job slot has a job queued on both the HEAD and NEXT registers. + * - If kbasep_js_policy_dequeue_job() returned false, submit some work to + * the work-queue to retry from outside of IRQ context (calling + * kbasep_js_policy_dequeue_job() from a work-queue). + * + * Since the IRQ handler submits new jobs \em and re-checks the IRQ_RAWSTAT, + * this sequence could loop a large number of times: this could happen if + * the jobs submitted completed on the GPU very quickly (in a few cycles), such + * as GPU NULL jobs. Then, the HEAD/NEXT registers will always be free to take + * more jobs, causing us to loop until we run out of jobs. + * + * To mitigate this, we must limit the number of jobs submitted per slot during + * the IRQ handler - for example, no more than 2 jobs per slot per IRQ should + * be sufficient (to fill up the HEAD + NEXT registers in normal cases). For + * Mali-T600 with 3 job slots, this means that up to 6 jobs could be submitted per + * slot. Note that IRQ Throttling can make this situation commonplace: 6 jobs + * could complete but the IRQ for each of them is delayed by the throttling. By + * the time you get the IRQ, all 6 jobs could've completed, meaning you can + * submit jobs to fill all 6 HEAD+NEXT registers again. + * + * @note As much work is deferred as possible, which includes the scheduling + * out of a context and scheduling in a new context. However, we can still make + * starting a single high-priorty context quick despite this: + * - On Mali-T600 family, there is one more AS than JSs. + * - This means we can very quickly schedule out one AS, no matter what the + * situation (because there will always be one AS that's not currently running + * on the job slot - it can only have a job in the NEXT register). + * - Even with this scheduling out, fair-share can still be guaranteed e.g. by + * a timeline-based Completely Fair Scheduler. + * - When our high-priority context comes in, we can do this quick-scheduling + * out immediately, and then schedule in the high-priority context without having to block. + * - This all assumes that the context to schedule out is of lower + * priority. Otherwise, we will have to block waiting for some other low + * priority context to finish its jobs. Note that it's likely (but not + * impossible) that the high-priority context \b is running jobs, by virtue of + * it being high priority. + * - Therefore, we can give a high liklihood that on Mali-T600 at least one + * high-priority context can be started very quickly. For the general case, we + * can guarantee starting (no. ASs) - (no. JSs) high priority contexts + * quickly. In any case, there is a high likelihood that we're able to start + * more than one high priority context quickly. + * + * In terms of the functions used in the IRQ handler directly, these are the + * perfomance considerations: + * - kbase_js_policy_log_job_result(): + * - This is just adding to a 64-bit value (possibly even a 32-bit value if we + * only store the time the job's recently spent - see below on 'priority weighting') + * - For priority weighting, a divide operation ('div') could happen, but + * this can happen in a deferred context (outside of IRQ) when scheduling out + * the ctx; as per our Engineering Specification, the contexts of different + * priority still stay scheduled in for the same timeslice, but higher priority + * ones scheduled back in more often. + * - That is, the weighted and unweighted times must be stored separately, and + * the weighted time is only updated \em outside of IRQ context. + * - Of course, this divide is more likely to be a 'multiply by inverse of the + * weight', assuming that the weight (priority) doesn't change. + * - kbasep_js_policy_should_remove_ctx(): + * - This is usually just a comparison of the stored time value against some + * maximum value. + * + * @note all deferred work can be wrapped up into one call - we usually need to + * indicate that a job/bag is done outside of IRQ context anyway. + * + * + * + * @section sec_kbase_js_policy_operation_submit Submission path + * + * Start with a Context with no jobs present, and assume equal priority of all + * contexts in the system. The following work all happens outside of IRQ + * Context : + * - As soon as job is made 'ready to 'run', then is must be registerd with the Job + * Scheduler Policy: + * - 'Ready to run' means they've satisified their dependencies in the + * Kernel-side Job Dispatch system. + * - Call kbasep_js_policy_enqueue_job() + * - This indicates that the job should be scheduled (it is ready to run). + * - As soon as a ctx changes from having 0 jobs 'ready to run' to >0 jobs + * 'ready to run', we enqueue the context on the policy queue: + * - Call kbasep_js_policy_enqueue_ctx() + * - This indicates that the \em ctx should be scheduled (it is ready to run) + * + * Next, we need to handle adding a context to the Run Pool - if it's sensible + * to do so. This can happen due to two reasons: + * -# A context is enqueued as above, and there are ASs free for it to run on + * (e.g. it is the first context to be run, in which case it can be added to + * the Run Pool immediately after enqueuing on the Policy Queue) + * -# A previous IRQ caused another ctx to be scheduled out, requiring that the + * context at the head of the queue be scheduled in. Such steps would happen in + * a work queue (work deferred from the IRQ context). + * + * In both cases, we'd handle it as follows: + * - Get the context at the Head of the Policy Queue: + * - Call kbasep_js_policy_dequeue_head_ctx() + * - Assign the Context an Address Space (Assert that there will be one free, + * given the above two reasons) + * - Add this context to the Run Pool: + * - Call kbasep_js_policy_runpool_add_ctx() + * - Now see if a job should be run: + * - Mostly, this will be done in the IRQ handler at the completion of a + * previous job. + * - However, there are two cases where this cannot be done: a) The first job + * enqueued to the system (there is no previous IRQ to act upon) b) When jobs + * are submitted at a low enough rate to not fill up all Job Slots (or, not to + * fill both the 'HEAD' and 'NEXT' registers in the job-slots) + * - Hence, on each ctx and job submission we should try to see if we + * can run a job: + * - For each job slot that has free space (in NEXT or HEAD+NEXT registers): + * - Call kbasep_js_policy_dequeue_job() with core_req set to that of the + * slot + * - if we got one, submit it to the job slot. + * - This is repeated until kbasep_js_policy_dequeue_job() returns + * MALI_FALSE, or the job slot has a job queued on both the HEAD and NEXT registers. + * + * The above case shows that we should attempt to run jobs in cases where a) a ctx + * has been added to the Run Pool, and b) new jobs have been added to a context + * in the Run Pool: + * - In the latter case, the context is in the runpool because it's got a job + * ready to run, or is already running a job + * - We could just wait until the IRQ handler fires, but for certain types of + * jobs this can take comparatively a long time to complete, e.g. GLES FS jobs + * generally take much longer to run that GLES CS jobs, which are vertex shader + * jobs. + * - Therefore, when a new job appears in the ctx, we must check the job-slots + * to see if they're free, and run the jobs as before. + * + * + * + * @section sec_kbase_js_policy_operation_submit_hipri Submission path for High Priority Contexts + * + * For High Priority Contexts on Mali-T600, we can make sure that at least 1 of + * them can be scheduled in immediately to start high prioriy jobs. In general, + * (no. ASs) - (no JSs) high priority contexts may be started immediately. The + * following describes how this happens: + * + * Similar to the previous section, consider what happens with a high-priority + * context (a context with a priority higher than that of any in the Run Pool) + * that starts out with no jobs: + * - A job becomes ready to run on the context, and so we enqueue the context + * on the Policy's Queue. + * - However, we'd like to schedule in this context immediately, instead of + * waiting for one of the Run Pool contexts' timeslice to expire + * - The policy's Enqueue function must detect this (because it is the policy + * that embodies the concept of priority), and take appropriate action + * - That is, kbasep_js_policy_enqueue_ctx() should check the Policy's Run + * Pool to see if a lower priority context should be scheduled out, and then + * schedule in the High Priority context. + * - For Mali-T600, we can always pick a context to schedule out immediately + * (because there are more ASs than JSs), and so scheduling out a victim context + * and scheduling in the high priority context can happen immediately. + * - If a policy implements fair-sharing, then this can still ensure the + * victim later on gets a fair share of the GPU. + * - As a note, consider whether the victim can be of equal/higher priority + * than the incoming context: + * - Usually, higher priority contexts will be the ones currently running + * jobs, and so the context with the lowest priority is usually not running + * jobs. + * - This makes it likely that the victim context is low priority, but + * it's not impossible for it to be a high priority one: + * - Suppose 3 high priority contexts are submitting only FS jobs, and one low + * priority context submitting CS jobs. Then, the context not running jobs will + * be one of the hi priority contexts (because only 2 FS jobs can be + * queued/running on the GPU HW for Mali-T600). + * - The problem can be mitigated by extra action, but it's questionable + * whether we need to: we already have a high likelihood that there's at least + * one high priority context - that should be good enough. + * - And so, this method makes sure that at least one high priority context + * can be started very quickly, but more than one high priority contexts could be + * delayed (up to one timeslice). + * - To improve this, use a GPU with a higher number of Address Spaces vs Job + * Slots. + * - At this point, let's assume this high priority context has been scheduled + * in immediately. The next step is to ensure it can start some jobs quickly. + * - It must do this by Soft-Stopping jobs on any of the Job Slots that it can + * submit to. + * - The rest of the logic for starting the jobs is taken care of by the IRQ + * handler. All the policy needs to do is ensure that + * kbasep_js_policy_dequeue_job() will return the jobs from the high priority + * context. + * + * @note in SS state, we currently only use 2 job-slots (even for T608, but + * this might change in future). In this case, it's always possible to schedule + * out 2 ASs quickly (their jobs won't be in the HEAD registers). At the same + * time, this maximizes usage of the job-slots (only 2 are in use), because you + * can guarantee starting of the jobs from the High Priority contexts immediately too. + * + * + * + * @section sec_kbase_js_policy_operation_notes Notes + * + * - In this design, a separate 'init' is needed from dequeue/requeue, so that + * information can be retained between the dequeue/requeue calls. For example, + * the total time spent for a context/job could be logged between + * dequeue/requeuing, to implement Fair Sharing. In this case, 'init' just + * initializes that information to some known state. + * + * + * + */ + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_js_policy Job Scheduler Policy APIs + * @{ + * + * Refer to @ref page_kbase_js_policy for an overview and detailed operation of + * the Job Scheduler Policy and its use from the Job Scheduler Core. + */ + +/** + * @brief Job Scheduler Policy structure + */ +union kbasep_js_policy; + +/** + * @brief Initialize the Job Scheduler Policy + */ +mali_error kbasep_js_policy_init(kbase_device *kbdev); + +/** + * @brief Terminate the Job Scheduler Policy + */ +void kbasep_js_policy_term(kbasep_js_policy *js_policy); + +/** + * @addtogroup kbase_js_policy_ctx Job Scheduler Policy, Context Management API + * @{ + * + * Refer to @ref page_kbase_js_policy for an overview and detailed operation of + * the Job Scheduler Policy and its use from the Job Scheduler Core. + */ + +/** + * @brief Job Scheduler Policy Ctx Info structure + * + * This structure is embedded in the kbase_context structure. It is used to: + * - track information needed for the policy to schedule the context (e.g. time + * used, OS priority etc.) + * - link together kbase_contexts into a queue, so that a kbase_context can be + * obtained as the container of the policy ctx info. This allows the API to + * return what "the next context" should be. + * - obtain other information already stored in the kbase_context for + * scheduling purposes (e.g process ID to get the priority of the originating + * process) + */ +union kbasep_js_policy_ctx_info; + +/** + * @brief Initialize a ctx for use with the Job Scheduler Policy + * + * This effectively initializes the kbasep_js_policy_ctx_info structure within + * the kbase_context (itself located within the kctx->jctx.sched_info structure). + */ +mali_error kbasep_js_policy_init_ctx(kbase_device *kbdev, kbase_context *kctx); + +/** + * @brief Terminate resources associated with using a ctx in the Job Scheduler + * Policy. + */ +void kbasep_js_policy_term_ctx(kbasep_js_policy *js_policy, kbase_context *kctx); + +/** + * @brief Enqueue a context onto the Job Scheduler Policy Queue + * + * If the context enqueued has a priority higher than any in the Run Pool, then + * it is the Policy's responsibility to decide whether to schedule out a low + * priority context from the Run Pool to allow the high priority context to be + * scheduled in. + * + * If the context has the privileged flag set, it will always be kept at the + * head of the queue. + * + * The caller will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. + * The caller will be holding kbasep_js_device_data::queue_mutex. + */ +void kbasep_js_policy_enqueue_ctx(kbasep_js_policy *js_policy, kbase_context *kctx); + +/** + * @brief Dequeue a context from the Head of the Job Scheduler Policy Queue + * + * The caller will be holding kbasep_js_device_data::queue_mutex. + * + * @return MALI_TRUE if a context was available, and *kctx_ptr points to + * the kctx dequeued. + * @return MALI_FALSE if no contexts were available. + */ +mali_bool kbasep_js_policy_dequeue_head_ctx(kbasep_js_policy *js_policy, kbase_context ** const kctx_ptr); + +/** + * @brief Evict a context from the Job Scheduler Policy Queue + * + * This is only called as part of destroying a kbase_context. + * + * There are many reasons why this might fail during the lifetime of a + * context. For example, the context is in the process of being scheduled. In + * that case a thread doing the scheduling might have a pointer to it, but the + * context is neither in the Policy Queue, nor is it in the Run + * Pool. Crucially, neither the Policy Queue, Run Pool, or the Context itself + * are locked. + * + * Hence to find out where in the system the context is, it is important to do + * more than just check the kbasep_js_kctx_info::ctx::is_scheduled member. + * + * The caller will be holding kbasep_js_device_data::queue_mutex. + * + * @return MALI_TRUE if the context was evicted from the Policy Queue + * @return MALI_FALSE if the context was not found in the Policy Queue + */ +mali_bool kbasep_js_policy_try_evict_ctx(kbasep_js_policy *js_policy, kbase_context *kctx); + +/** + * @brief Call a function on all jobs belonging to a non-queued, non-running + * context, optionally detaching the jobs from the context as it goes. + * + * At the time of the call, the context is guarenteed to be not-currently + * scheduled on the Run Pool (is_scheduled == MALI_FALSE), and not present in + * the Policy Queue. This is because one of the following functions was used + * recently on the context: + * - kbasep_js_policy_evict_ctx() + * - kbasep_js_policy_runpool_remove_ctx() + * + * In both cases, no subsequent call was made on the context to any of: + * - kbasep_js_policy_runpool_add_ctx() + * - kbasep_js_policy_enqueue_ctx() + * + * Due to the locks that might be held at the time of the call, the callback + * may need to defer work on a workqueue to complete its actions (e.g. when + * cancelling jobs) + * + * \a detach_jobs must only be set when cancelling jobs (which occurs as part + * of context destruction). + * + * The locking conditions on the caller are as follows: + * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. + */ +void kbasep_js_policy_foreach_ctx_job(kbasep_js_policy *js_policy, kbase_context *kctx, + kbasep_js_policy_ctx_job_cb callback, mali_bool detach_jobs); + +/** + * @brief Add a context to the Job Scheduler Policy's Run Pool + * + * If the context enqueued has a priority higher than any in the Run Pool, then + * it is the Policy's responsibility to decide whether to schedule out low + * priority jobs that are currently running on the GPU. + * + * The number of contexts present in the Run Pool will never be more than the + * number of Address Spaces. + * + * The following guarentees are made about the state of the system when this + * is called: + * - kctx->as_nr member is valid + * - the context has its submit_allowed flag set + * - kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is valid + * - The refcount of the context is guarenteed to be zero. + * - kbasep_js_kctx_info::ctx::is_scheduled will be MALI_TRUE. + * + * The locking conditions on the caller are as follows: + * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it will be holding kbasep_js_device_data::runpool_mutex. + * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock) + * + * Due to a spinlock being held, this function must not call any APIs that sleep. + */ +void kbasep_js_policy_runpool_add_ctx(kbasep_js_policy *js_policy, kbase_context *kctx); + +/** + * @brief Remove a context from the Job Scheduler Policy's Run Pool + * + * The kctx->as_nr member is valid and the context has its submit_allowed flag + * set when this is called. The state of + * kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is also + * valid. The refcount of the context is guarenteed to be zero. + * + * The locking conditions on the caller are as follows: + * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it will be holding kbasep_js_device_data::runpool_mutex. + * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock) + * + * Due to a spinlock being held, this function must not call any APIs that sleep. + */ +void kbasep_js_policy_runpool_remove_ctx(kbasep_js_policy *js_policy, kbase_context *kctx); + +/** + * @brief Indicate whether a context should be removed from the Run Pool + * (should be scheduled out). + * + * The kbasep_js_device_data::runpool_irq::lock will be held by the caller. + * + * @note This API is called from IRQ context. + */ +mali_bool kbasep_js_policy_should_remove_ctx(kbasep_js_policy *js_policy, kbase_context *kctx); + +/** + * @brief Synchronize with any timers acting upon the runpool + * + * The policy should check whether any timers it owns should be running. If + * they should not, the policy must cancel such timers and ensure they are not + * re-run by the time this function finishes. + * + * In particular, the timers must not be running when there are no more contexts + * on the runpool, because the GPU could be powered off soon after this call. + * + * The locking conditions on the caller are as follows: + * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it will be holding kbasep_js_device_data::runpool_mutex. + */ +void kbasep_js_policy_runpool_timers_sync(kbasep_js_policy *js_policy); + + +/** + * @brief Indicate whether a new context has an higher priority than the current context. + * + * + * The caller has the following conditions on locking: + * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held for \a new_ctx + * + * This function must not sleep, because an IRQ spinlock might be held whilst + * this is called. + * + * @note There is nothing to stop the priority of \a current_ctx changing + * during or immediately after this function is called (because its jsctx_mutex + * cannot be held). Therefore, this function should only be seen as a heuristic + * guide as to whether \a new_ctx is higher priority than \a current_ctx + */ +mali_bool kbasep_js_policy_ctx_has_priority(kbasep_js_policy *js_policy, kbase_context *current_ctx, kbase_context *new_ctx); + + /** @} *//* end group kbase_js_policy_ctx */ + +/** + * @addtogroup kbase_js_policy_job Job Scheduler Policy, Job Chain Management API + * @{ + * + * Refer to @ref page_kbase_js_policy for an overview and detailed operation of + * the Job Scheduler Policy and its use from the Job Scheduler Core. + */ + +/** + * @brief Job Scheduler Policy Job Info structure + * + * This structure is embedded in the kbase_jd_atom structure. It is used to: + * - track information needed for the policy to schedule the job (e.g. time + * used, OS priority etc.) + * - link together jobs into a queue/buffer, so that a kbase_jd_atom can be + * obtained as the container of the policy job info. This allows the API to + * return what "the next job" should be. + * - obtain other information already stored in the kbase_context for + * scheduling purposes (e.g user-side relative priority) + */ +union kbasep_js_policy_job_info; + +/** + * @brief Initialize a job for use with the Job Scheduler Policy + * + * This function initializes the kbasep_js_policy_job_info structure within the + * kbase_jd_atom. It will only initialize/allocate resources that are specific + * to the job. + * + * That is, this function makes \b no attempt to: + * - initialize any context/policy-wide information + * - enqueue the job on the policy. + * + * At some later point, the following functions must be called on the job, in this order: + * - kbasep_js_policy_register_job() to register the job and initialize policy/context wide data. + * - kbasep_js_policy_enqueue_job() to enqueue the job + * + * A job must only ever be initialized on the Policy once, and must be + * terminated on the Policy before the job is freed. + * + * The caller will not be holding any locks, and so this function will not + * modify any information in \a kctx or \a js_policy. + * + * @return MALI_ERROR_NONE if initialization was correct. + */ +mali_error kbasep_js_policy_init_job(const kbasep_js_policy *js_policy, const kbase_context *kctx, kbase_jd_atom *katom); + +/** + * @brief Register context/policy-wide information for a job on the Job Scheduler Policy. + * + * Registers the job with the policy. This is used to track the job before it + * has been enqueued/requeued by kbasep_js_policy_enqueue_job(). Specifically, + * it is used to update information under a lock that could not be updated at + * kbasep_js_policy_init_job() time (such as context/policy-wide data). + * + * @note This function will not fail, and hence does not allocate any + * resources. Any failures that could occur on registration will be caught + * during kbasep_js_policy_init_job() instead. + * + * A job must only ever be registerd on the Policy once, and must be + * deregistered on the Policy on completion (whether or not that completion was + * success/failure). + * + * The caller has the following conditions on locking: + * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held. + */ +void kbasep_js_policy_register_job(kbasep_js_policy *js_policy, kbase_context *kctx, kbase_jd_atom *katom); + +/** + * @brief De-register context/policy-wide information for a on the Job Scheduler Policy. + * + * This must be used before terminating the resources associated with using a + * job in the Job Scheduler Policy. This function does not itself terminate any + * resources, at most it just updates information in the policy and context. + * + * The caller has the following conditions on locking: + * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held. + */ +void kbasep_js_policy_deregister_job(kbasep_js_policy *js_policy, kbase_context *kctx, kbase_jd_atom *katom); + +/** + * @brief Dequeue a Job for a job slot from the Job Scheduler Policy Run Pool + * + * The job returned by the policy will match at least one of the bits in the + * job slot's core requirements (but it may match more than one, or all @ref + * base_jd_core_req bits supported by the job slot). + * + * In addition, the requirements of the job returned will be a subset of those + * requested - the job returned will not have requirements that \a job_slot_idx + * cannot satisfy. + * + * The caller will submit the job to the GPU as soon as the GPU's NEXT register + * for the corresponding slot is empty. Of course, the GPU will then only run + * this new job when the currently executing job (in the jobslot's HEAD + * register) has completed. + * + * @return MALI_TRUE if a job was available, and *kctx_ptr points to + * the kctx dequeued. + * @return MALI_FALSE if no jobs were available among all ctxs in the Run Pool. + * + * @note base_jd_core_req is currently a u8 - beware of type conversion. + * + * The caller has the following conditions on locking: + * - kbasep_js_device_data::runpool_lock::irq will be held. + * - kbasep_js_device_data::runpool_mutex will be held. + * - kbasep_js_kctx_info::ctx::jsctx_mutex. will be held + */ +mali_bool kbasep_js_policy_dequeue_job(kbase_device *kbdev, int job_slot_idx, kbase_jd_atom ** const katom_ptr); + +/** + * @brief Requeue a Job back into the the Job Scheduler Policy Run Pool + * + * This will be used to enqueue a job after its creation and also to requeue + * a job into the Run Pool that was previously dequeued (running). It notifies + * the policy that the job should be run again at some point later. + * + * The caller has the following conditions on locking: + * - kbasep_js_device_data::runpool_irq::lock (a spinlock) will be held. + * - kbasep_js_device_data::runpool_mutex will be held. + * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held. + */ +void kbasep_js_policy_enqueue_job(kbasep_js_policy *js_policy, kbase_jd_atom *katom); + +/** + * @brief Log the result of a job: the time spent on a job/context, and whether + * the job failed or not. + * + * Since a kbase_jd_atom contains a pointer to the kbase_context owning it, + * then this can also be used to log time on either/both the job and the + * containing context. + * + * The completion state of the job can be found by examining \a katom->event.event_code + * + * If the Job failed and the policy is implementing fair-sharing, then the + * policy must penalize the failing job/context: + * - At the very least, it should penalize the time taken by the amount of + * time spent processing the IRQ in SW. This because a job in the NEXT slot + * waiting to run will be delayed until the failing job has had the IRQ + * cleared. + * - \b Optionally, the policy could apply other penalties. For example, based + * on a threshold of a number of failing jobs, after which a large penalty is + * applied. + * + * The kbasep_js_device_data::runpool_mutex will be held by the caller. + * + * @note This API is called from IRQ context. + * + * The caller has the following conditions on locking: + * - kbasep_js_device_data::runpool_irq::lock will be held. + * + * @param js_policy job scheduler policy + * @param katom job dispatch atom + * @param time_spent_us the time spent by the job, in microseconds (10^-6 seconds). + */ +void kbasep_js_policy_log_job_result(kbasep_js_policy *js_policy, kbase_jd_atom *katom, u64 time_spent_us); + + /** @} *//* end group kbase_js_policy_job */ + + /** @} *//* end group kbase_js_policy */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_JS_POLICY_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c new file mode 100755 index 00000000000..78c0fd290b5 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c @@ -0,0 +1,1449 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Job Scheduler: Completely Fair Policy Implementation + */ + +#include +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) +#include +#endif + +/** + * Define for when dumping is enabled. + * This should not be based on the instrumentation level as whether dumping is enabled for a particular level is down to the integrator. + * However this is being used for now as otherwise the cinstr headers would be needed. + */ +#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL) + +/** Fixed point constants used for runtime weight calculations */ +#define WEIGHT_FIXEDPOINT_SHIFT 10 +#define WEIGHT_TABLE_SIZE 40 +#define WEIGHT_0_NICE (WEIGHT_TABLE_SIZE/2) +#define WEIGHT_0_VAL (1 << WEIGHT_FIXEDPOINT_SHIFT) + +#define LOOKUP_VARIANT_MASK ((1u<process_priority + ctx_info->bag_priority; + + /* Adjust runtime_us using priority weight if required */ + if (priority != 0 && time_us != 0) { + int clamped_priority; + + /* Clamp values to min..max weights */ + if (priority > PROCESS_PRIORITY_MAX) + clamped_priority = PROCESS_PRIORITY_MAX; + else if (priority < PROCESS_PRIORITY_MIN) + clamped_priority = PROCESS_PRIORITY_MIN; + else + clamped_priority = priority; + + /* Fixed point multiplication */ + time_delta_us = (time_us * weight_of_priority[WEIGHT_0_NICE + clamped_priority]); + /* Remove fraction */ + time_delta_us = time_delta_us >> WEIGHT_FIXEDPOINT_SHIFT; + /* Make sure the time always increases */ + if (0 == time_delta_us) + time_delta_us++; + } else { + time_delta_us = time_us; + } + + return time_delta_us; +} + +#if KBASE_TRACE_ENABLE != 0 +STATIC int kbasep_js_policy_trace_get_refcnt_nolock(kbase_device *kbdev, kbase_context *kctx) +{ + kbasep_js_device_data *js_devdata; + int as_nr; + int refcnt = 0; + + js_devdata = &kbdev->js_data; + + as_nr = kctx->as_nr; + if (as_nr != KBASEP_AS_NR_INVALID) { + kbasep_js_per_as_data *js_per_as_data; + js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; + + refcnt = js_per_as_data->as_busy_refcount; + } + + return refcnt; +} + +STATIC INLINE int kbasep_js_policy_trace_get_refcnt(kbase_device *kbdev, kbase_context *kctx) +{ + unsigned long flags; + kbasep_js_device_data *js_devdata; + int refcnt = 0; + + js_devdata = &kbdev->js_data; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + refcnt = kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return refcnt; +} +#else /* KBASE_TRACE_ENABLE != 0 */ +STATIC int kbasep_js_policy_trace_get_refcnt_nolock(kbase_device *kbdev, kbase_context *kctx) +{ + CSTD_UNUSED(kbdev); + CSTD_UNUSED(kctx); + return 0; +} + +STATIC INLINE int kbasep_js_policy_trace_get_refcnt(kbase_device *kbdev, kbase_context *kctx) +{ + CSTD_UNUSED(kbdev); + CSTD_UNUSED(kctx); + return 0; +} +#endif /* KBASE_TRACE_ENABLE != 0 */ + +#ifdef CONFIG_MALI_DEBUG +STATIC void kbasep_js_debug_check(kbasep_js_policy_cfs *policy_info, kbase_context *kctx, kbasep_js_check check_flag) +{ + /* This function uses the ternary operator and non-explicit comparisons, + * because it makes for much shorter, easier to read code */ + + if (check_flag & KBASEP_JS_CHECKFLAG_QUEUED) { + mali_bool is_queued; + mali_bool expect_queued; + is_queued = (kbasep_list_member_of(&policy_info->ctx_queue_head, &kctx->jctx.sched_info.runpool.policy_ctx.cfs.list)) ? MALI_TRUE : MALI_FALSE; + + if (!is_queued) + is_queued = (kbasep_list_member_of(&policy_info->ctx_rt_queue_head, &kctx->jctx.sched_info.runpool.policy_ctx.cfs.list)) ? MALI_TRUE : MALI_FALSE; + + expect_queued = (check_flag & KBASEP_JS_CHECKFLAG_IS_QUEUED) ? MALI_TRUE : MALI_FALSE; + + KBASE_DEBUG_ASSERT_MSG(expect_queued == is_queued, "Expected context %p to be %s but it was %s\n", kctx, (expect_queued) ? "queued" : "not queued", (is_queued) ? "queued" : "not queued"); + + } + + if (check_flag & KBASEP_JS_CHECKFLAG_SCHEDULED) { + mali_bool is_scheduled; + mali_bool expect_scheduled; + is_scheduled = (kbasep_list_member_of(&policy_info->scheduled_ctxs_head, &kctx->jctx.sched_info.runpool.policy_ctx.cfs.list)) ? MALI_TRUE : MALI_FALSE; + + expect_scheduled = (check_flag & KBASEP_JS_CHECKFLAG_IS_SCHEDULED) ? MALI_TRUE : MALI_FALSE; + KBASE_DEBUG_ASSERT_MSG(expect_scheduled == is_scheduled, "Expected context %p to be %s but it was %s\n", kctx, (expect_scheduled) ? "scheduled" : "not scheduled", (is_scheduled) ? "scheduled" : "not scheduled"); + + } + +} +#else /* CONFIG_MALI_DEBUG */ +STATIC void kbasep_js_debug_check(kbasep_js_policy_cfs *policy_info, kbase_context *kctx, kbasep_js_check check_flag) +{ + CSTD_UNUSED(policy_info); + CSTD_UNUSED(kctx); + CSTD_UNUSED(check_flag); + return; +} +#endif /* CONFIG_MALI_DEBUG */ + +STATIC INLINE void set_slot_to_variant_lookup(u32 *bit_array, u32 slot_idx, u32 variants_supported) +{ + u32 overall_bit_idx = slot_idx * KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS; + u32 word_idx = overall_bit_idx / 32; + u32 bit_idx = overall_bit_idx % 32; + + KBASE_DEBUG_ASSERT(slot_idx < BASE_JM_MAX_NR_SLOTS); + KBASE_DEBUG_ASSERT((variants_supported & ~LOOKUP_VARIANT_MASK) == 0); + + bit_array[word_idx] |= variants_supported << bit_idx; +} + +STATIC INLINE u32 get_slot_to_variant_lookup(u32 *bit_array, u32 slot_idx) +{ + u32 overall_bit_idx = slot_idx * KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS; + u32 word_idx = overall_bit_idx / 32; + u32 bit_idx = overall_bit_idx % 32; + + u32 res; + + KBASE_DEBUG_ASSERT(slot_idx < BASE_JM_MAX_NR_SLOTS); + + res = bit_array[word_idx] >> bit_idx; + res &= LOOKUP_VARIANT_MASK; + + return res; +} + +/* Check the core_req_variants: make sure that every job slot is satisifed by + * one of the variants. This checks that cached_variant_idx_init will produce a + * valid result for jobs that make maximum use of the job slots. + * + * @note The checks are limited to the job slots - this does not check that + * every context requirement is covered (because some are intentionally not + * supported, such as KBASE_CTX_FLAG_SUBMIT_DISABLED) */ +#ifdef CONFIG_MALI_DEBUG +STATIC void debug_check_core_req_variants(kbase_device *kbdev, kbasep_js_policy_cfs *policy_info) +{ + kbasep_js_device_data *js_devdata; + u32 i; + int j; + + js_devdata = &kbdev->js_data; + + for (j = 0; j < kbdev->gpu_props.num_job_slots; ++j) { + base_jd_core_req job_core_req; + mali_bool found = MALI_FALSE; + + job_core_req = js_devdata->js_reqs[j]; + for (i = 0; i < policy_info->num_core_req_variants; ++i) { + base_jd_core_req var_core_req; + var_core_req = policy_info->core_req_variants[i].core_req; + + if ((var_core_req & job_core_req) == job_core_req) { + found = MALI_TRUE; + break; + } + } + + /* Early-out on any failure */ + KBASE_DEBUG_ASSERT_MSG(found != MALI_FALSE, "Job slot %d features 0x%x not matched by core_req_variants. " "Rework core_req_variants and vairants_supported_<...>_state[] to match\n", j, job_core_req); + } +} +#endif + +STATIC void build_core_req_variants(kbase_device *kbdev, kbasep_js_policy_cfs *policy_info) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(policy_info != NULL); + CSTD_UNUSED(kbdev); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) { + KBASE_DEBUG_ASSERT(NUM_CORE_REQ_VARIANTS_8987 <= KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS); + + /* Assume a static set of variants */ + memcpy(policy_info->core_req_variants, core_req_variants_8987, sizeof(core_req_variants_8987)); + + policy_info->num_core_req_variants = NUM_CORE_REQ_VARIANTS_8987; + } else { + KBASE_DEBUG_ASSERT(NUM_CORE_REQ_VARIANTS <= KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS); + + /* Assume a static set of variants */ + memcpy(policy_info->core_req_variants, core_req_variants, sizeof(core_req_variants)); + + policy_info->num_core_req_variants = NUM_CORE_REQ_VARIANTS; + } + + KBASE_DEBUG_CODE(debug_check_core_req_variants(kbdev, policy_info)); +} + +STATIC void build_slot_lookups(kbase_device *kbdev, kbasep_js_policy_cfs *policy_info) +{ + u8 i; + const u32 *variants_supported_ss_for_this_hw = variants_supported_ss_state; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(policy_info != NULL); + + KBASE_DEBUG_ASSERT(kbdev->gpu_props.num_job_slots <= NELEMS(variants_supported_ss_state)); + KBASE_DEBUG_ASSERT(kbdev->gpu_props.num_job_slots <= NELEMS(variants_supported_ss_allcore_state)); + KBASE_DEBUG_ASSERT(kbdev->gpu_props.num_job_slots <= NELEMS(variants_supported_ss_state_8987)); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) + variants_supported_ss_for_this_hw = variants_supported_ss_state_8987; + + /* Given the static set of variants, provide a static set of lookups */ + for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) { + set_slot_to_variant_lookup(policy_info->slot_to_variant_lookup_ss_state, i, variants_supported_ss_for_this_hw[i]); + + set_slot_to_variant_lookup(policy_info->slot_to_variant_lookup_ss_allcore_state, i, variants_supported_ss_allcore_state[i]); + } + +} + +STATIC mali_error cached_variant_idx_init(const kbasep_js_policy_cfs *policy_info, const kbase_context *kctx, kbase_jd_atom *atom) +{ + kbasep_js_policy_cfs_job *job_info; + u32 i; + base_jd_core_req job_core_req; + u32 job_device_nr; + kbase_context_flags ctx_flags; + const kbasep_js_kctx_info *js_kctx_info; + const kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(policy_info != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(atom != NULL); + + kbdev = container_of(policy_info, const kbase_device, js_data.policy.cfs); + job_info = &atom->sched_info.cfs; + job_core_req = atom->core_req; + job_device_nr = atom->device_nr; + js_kctx_info = &kctx->jctx.sched_info; + ctx_flags = js_kctx_info->ctx.flags; + + /* Initial check for atoms targetting a specific coregroup */ + if ((job_core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) != MALI_FALSE && job_device_nr >= kbdev->gpu_props.num_core_groups) { + /* device_nr exceeds the number of coregroups - not allowed by + * @ref base_jd_atom API contract */ + return MALI_ERROR_FUNCTION_FAILED; + } + + /* Pick a core_req variant that matches us. Since they're ordered by least + * restrictive first, it picks the least restrictive variant */ + for (i = 0; i < policy_info->num_core_req_variants; ++i) { + base_jd_core_req var_core_req; + kbase_context_flags var_ctx_req; + u32 var_device_nr; + var_core_req = policy_info->core_req_variants[i].core_req; + var_ctx_req = policy_info->core_req_variants[i].ctx_req; + var_device_nr = policy_info->core_req_variants[i].device_nr; + + if ((var_core_req & job_core_req) == job_core_req && (var_ctx_req & ctx_flags) == ctx_flags && ((var_core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) == MALI_FALSE || var_device_nr == job_device_nr)) { + job_info->cached_variant_idx = i; + return MALI_ERROR_NONE; + } + } + + /* Could not find a matching requirement, this should only be caused by an + * attempt to attack the driver. */ + return MALI_ERROR_FUNCTION_FAILED; +} + +STATIC mali_bool dequeue_job(kbase_device *kbdev, + kbase_context *kctx, + u32 variants_supported, + kbase_jd_atom ** const katom_ptr, + int job_slot_idx) +{ + kbasep_js_device_data *js_devdata; + kbasep_js_policy_cfs *policy_info; + kbasep_js_policy_cfs_ctx *ctx_info; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(katom_ptr != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + policy_info = &js_devdata->policy.cfs; + ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs; + + /* Only submit jobs from contexts that are allowed */ + if (kbasep_js_is_submit_allowed(js_devdata, kctx) != MALI_FALSE) { + /* Check each variant in turn */ + while (variants_supported != 0) { + long variant_idx; + struct list_head *job_list; + variant_idx = ffs(variants_supported) - 1; + job_list = &ctx_info->job_list_head[variant_idx]; + + if (!list_empty(job_list)) { + /* Found a context with a matching job */ + { + kbase_jd_atom *front_atom = list_entry(job_list->next, kbase_jd_atom, sched_info.cfs.list); + KBASE_TRACE_ADD_SLOT(kbdev, JS_POLICY_DEQUEUE_JOB, front_atom->kctx, front_atom, front_atom->jc, job_slot_idx); + } + *katom_ptr = list_entry(job_list->next, kbase_jd_atom, sched_info.cfs.list); + list_del(job_list->next); + + (*katom_ptr)->sched_info.cfs.ticks = 0; + + /* Put this context at the back of the Run Pool */ + list_del(&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list); + list_add_tail(&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list, &policy_info->scheduled_ctxs_head); + + return MALI_TRUE; + } + + variants_supported &= ~(1u << variant_idx); + } + /* All variants checked by here */ + } + + /* The context does not have a matching job */ + + return MALI_FALSE; +} + +/** + * Hold the runpool_irq spinlock for this + */ +STATIC INLINE mali_bool timer_callback_should_run(kbase_device *kbdev) +{ + kbasep_js_device_data *js_devdata; + s8 nr_running_ctxs; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + + /* nr_user_contexts_running is updated with the runpool_mutex. However, the + * locking in the caller gives us a barrier that ensures nr_user_contexts is + * up-to-date for reading */ + nr_running_ctxs = js_devdata->nr_user_contexts_running; + +#ifdef CONFIG_MALI_DEBUG + if (js_devdata->softstop_always && nr_running_ctxs > 0) { + /* Debug support for allowing soft-stop on a single context */ + return MALI_TRUE; + } +#endif /* CONFIG_MALI_DEBUG */ + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) { + /* Timeouts would have to be 4x longer (due to micro-architectural design) + * to support OpenCL conformance tests, so only run the timer when there's: + * - 2 or more CL contexts + * - 1 or more GLES contexts + * + * NOTE: We will treat a context that has both Compute and Non-Compute jobs + * will be treated as an OpenCL context (hence, we don't check + * KBASEP_JS_CTX_ATTR_NON_COMPUTE). + */ + { + s8 nr_compute_ctxs = kbasep_js_ctx_attr_count_on_runpool(kbdev, KBASEP_JS_CTX_ATTR_COMPUTE); + s8 nr_noncompute_ctxs = nr_running_ctxs - nr_compute_ctxs; + + return (mali_bool) (nr_compute_ctxs >= 2 || nr_noncompute_ctxs > 0); + } + } else { + /* Run the timer callback whenever you have at least 1 context */ + return (mali_bool) (nr_running_ctxs > 0); + } +} + +static enum hrtimer_restart timer_callback(struct hrtimer *timer) +{ + unsigned long flags; + kbase_device *kbdev; + kbasep_js_device_data *js_devdata; + kbasep_js_policy_cfs *policy_info; + int s; + mali_bool reset_needed = MALI_FALSE; + + KBASE_DEBUG_ASSERT(timer != NULL); + + policy_info = container_of(timer, kbasep_js_policy_cfs, scheduling_timer); + kbdev = container_of(policy_info, kbase_device, js_data.policy.cfs); + js_devdata = &kbdev->js_data; + + /* Loop through the slots */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) { + kbase_jm_slot *slot = &kbdev->jm_slots[s]; + kbase_jd_atom *atom = NULL; + + if (kbasep_jm_nr_jobs_submitted(slot) > 0) { + atom = kbasep_jm_peek_idx_submit_slot(slot, 0); + KBASE_DEBUG_ASSERT(atom != NULL); + + if (kbasep_jm_is_dummy_workaround_job(kbdev, atom) != MALI_FALSE) { + /* Prevent further use of the atom - never cause a soft-stop, hard-stop, or a GPU reset due to it. */ + atom = NULL; + } + } + + if (atom != NULL) { + /* The current version of the model doesn't support Soft-Stop */ + if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) { + u32 ticks = atom->sched_info.cfs.ticks++; + +#if !CINSTR_DUMPING_ENABLED + u32 soft_stop_ticks, hard_stop_ticks, gpu_reset_ticks; + if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { + soft_stop_ticks = js_devdata->soft_stop_ticks_cl; + hard_stop_ticks = js_devdata->hard_stop_ticks_cl; + gpu_reset_ticks = js_devdata->gpu_reset_ticks_cl; + } else { + soft_stop_ticks = js_devdata->soft_stop_ticks; + hard_stop_ticks = js_devdata->hard_stop_ticks_ss; + gpu_reset_ticks = js_devdata->gpu_reset_ticks_ss; + } + + /* Job is Soft-Stoppable */ + if (ticks == soft_stop_ticks) { + /* Job has been scheduled for at least js_devdata->soft_stop_ticks ticks. + * Soft stop the slot so we can run other jobs. + */ + KBASE_LOG(1, kbdev->dev, "Soft-stop"); + +#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS == 0 + kbase_job_slot_softstop(kbdev, s, atom); +#endif + } else if (ticks == hard_stop_ticks) { + /* Job has been scheduled for at least js_devdata->hard_stop_ticks_ss ticks. + * It should have been soft-stopped by now. Hard stop the slot. + */ +#if KBASE_DISABLE_SCHEDULING_HARD_STOPS == 0 + dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", (unsigned long)ticks, (unsigned long)(js_devdata->scheduling_tick_ns / 1000000u)); + kbase_job_slot_hardstop(atom->kctx, s, atom); +#endif + } else if (ticks == gpu_reset_ticks) { + /* Job has been scheduled for at least js_devdata->gpu_reset_ticks_ss ticks. + * It should have left the GPU by now. Signal that the GPU needs to be reset. + */ + reset_needed = MALI_TRUE; + } +#else /* !CINSTR_DUMPING_ENABLED */ + /* NOTE: During CINSTR_DUMPING_ENABLED, we use the alternate timeouts, which + * makes the hard-stop and GPU reset timeout much longer. We also ensure that + * we don't soft-stop at all. */ + if (ticks == js_devdata->soft_stop_ticks) { + /* Job has been scheduled for at least js_devdata->soft_stop_ticks. + * We do not soft-stop during CINSTR_DUMPING_ENABLED, however. + */ + KBASE_LOG(1, kbdev->dev, "Soft-stop"); + } else if (ticks == js_devdata->hard_stop_ticks_nss) { + /* Job has been scheduled for at least js_devdata->hard_stop_ticks_nss ticks. + * Hard stop the slot. + */ +#if KBASE_DISABLE_SCHEDULING_HARD_STOPS == 0 + dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", (unsigned long)ticks, (unsigned long)(js_devdata->scheduling_tick_ns / 1000000u)); + kbase_job_slot_hardstop(atom->kctx, s, atom); +#endif + } else if (ticks == js_devdata->gpu_reset_ticks_nss) { + /* Job has been scheduled for at least js_devdata->gpu_reset_ticks_nss ticks. + * It should have left the GPU by now. Signal that the GPU needs to be reset. + */ + reset_needed = MALI_TRUE; + } +#endif /* !CINSTR_DUMPING_ENABLED */ + } + } + } + + if (reset_needed) { + dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS/NSS timeout hit). Issueing GPU soft-reset to resolve."); + + if (kbase_prepare_to_reset_gpu_locked(kbdev)) + kbase_reset_gpu_locked(kbdev); + } + + /* the timer is re-issued if there is contexts in the run-pool */ + + if (timer_callback_should_run(kbdev) != MALI_FALSE) { + hrtimer_start(&policy_info->scheduling_timer, HR_TIMER_DELAY_NSEC(js_devdata->scheduling_tick_ns), HRTIMER_MODE_REL); + } else { + KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_END, NULL, NULL, 0u, 0u); + /* timer_running state is updated by kbasep_js_policy_runpool_timers_sync() */ + } + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return HRTIMER_NORESTART; +} + +/* + * Non-private functions + */ + +mali_error kbasep_js_policy_init(kbase_device *kbdev) +{ + kbasep_js_device_data *js_devdata; + kbasep_js_policy_cfs *policy_info; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + policy_info = &js_devdata->policy.cfs; + + INIT_LIST_HEAD(&policy_info->ctx_queue_head); + INIT_LIST_HEAD(&policy_info->scheduled_ctxs_head); + INIT_LIST_HEAD(&policy_info->ctx_rt_queue_head); + + atomic64_set(&policy_info->least_runtime_us, KBASEP_JS_RUNTIME_EMPTY); + atomic64_set(&policy_info->rt_least_runtime_us, KBASEP_JS_RUNTIME_EMPTY); + + hrtimer_init(&policy_info->scheduling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + policy_info->scheduling_timer.function = timer_callback; + + policy_info->timer_running = MALI_FALSE; + policy_info->head_runtime_us = 0; + + /* Build up the core_req variants */ + build_core_req_variants(kbdev, policy_info); + /* Build the slot to variant lookups */ + build_slot_lookups(kbdev, policy_info); + + return MALI_ERROR_NONE; +} + +void kbasep_js_policy_term(kbasep_js_policy *js_policy) +{ + kbasep_js_policy_cfs *policy_info; + KBASE_DEBUG_ASSERT(js_policy != NULL); + policy_info = &js_policy->cfs; + + /* ASSERT that there are no contexts queued */ + KBASE_DEBUG_ASSERT(list_empty(&policy_info->ctx_queue_head)); + KBASE_DEBUG_ASSERT(KBASEP_JS_RUNTIME_EMPTY == atomic64_read(&policy_info->least_runtime_us)); + + /* ASSERT that there are no contexts scheduled */ + KBASE_DEBUG_ASSERT(list_empty(&policy_info->scheduled_ctxs_head)); + + /* ASSERT that there are no contexts queued */ + KBASE_DEBUG_ASSERT(list_empty(&policy_info->ctx_rt_queue_head)); + KBASE_DEBUG_ASSERT(KBASEP_JS_RUNTIME_EMPTY == atomic64_read(&policy_info->rt_least_runtime_us)); + + hrtimer_cancel(&policy_info->scheduling_timer); +} + +mali_error kbasep_js_policy_init_ctx(kbase_device *kbdev, kbase_context *kctx) +{ + kbasep_js_device_data *js_devdata; + kbasep_js_policy_cfs_ctx *ctx_info; + kbasep_js_policy_cfs *policy_info; + u32 i; + int policy; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + policy_info = &kbdev->js_data.policy.cfs; + ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs; + + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_INIT_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx)); + + for (i = 0; i < policy_info->num_core_req_variants; ++i) + INIT_LIST_HEAD(&ctx_info->job_list_head[i]); + + policy = current->policy; + if (policy == SCHED_FIFO || policy == SCHED_RR) { + ctx_info->process_rt_policy = MALI_TRUE; + ctx_info->process_priority = (((MAX_RT_PRIO - 1) - current->rt_priority) / 5) - 20; + } else { + ctx_info->process_rt_policy = MALI_FALSE; + ctx_info->process_priority = (current->static_prio - MAX_RT_PRIO) - 20; + } + + ctx_info->bag_total_priority = 0; + ctx_info->bag_total_nr_atoms = 0; + + /* Initial runtime (relative to least-run context runtime) + * + * This uses the Policy Queue's most up-to-date head_runtime_us by using the + * queue mutex to issue memory barriers - also ensure future updates to + * head_runtime_us occur strictly after this context is initialized */ + mutex_lock(&js_devdata->queue_mutex); + + /* No need to hold the the runpool_irq.lock here, because we're initializing + * the value, and the context is definitely not being updated in the + * runpool at this point. The queue_mutex ensures the memory barrier. */ + ctx_info->runtime_us = policy_info->head_runtime_us + priority_weight(ctx_info, (u64) js_devdata->cfs_ctx_runtime_init_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u)); + + mutex_unlock(&js_devdata->queue_mutex); + + return MALI_ERROR_NONE; +} + +void kbasep_js_policy_term_ctx(kbasep_js_policy *js_policy, kbase_context *kctx) +{ + kbasep_js_policy_cfs_ctx *ctx_info; + kbasep_js_policy_cfs *policy_info; + u32 i; + + KBASE_DEBUG_ASSERT(js_policy != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + policy_info = &js_policy->cfs; + ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs; + + { + kbase_device *kbdev = container_of(js_policy, kbase_device, js_data.policy); + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_TERM_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx)); + } + + /* ASSERT that no jobs are present */ + for (i = 0; i < policy_info->num_core_req_variants; ++i) + KBASE_DEBUG_ASSERT(list_empty(&ctx_info->job_list_head[i])); + + /* No work to do */ +} + +/* + * Context Management + */ + +void kbasep_js_policy_enqueue_ctx(kbasep_js_policy *js_policy, kbase_context *kctx) +{ + kbasep_js_policy_cfs *policy_info; + kbasep_js_policy_cfs_ctx *ctx_info; + kbase_context *head_ctx; + kbase_context *list_kctx = NULL; + kbasep_js_device_data *js_devdata; + struct list_head *queue_head; + struct list_head *pos; + kbase_device *kbdev; + atomic64_t *least_runtime_us; + u64 head_runtime; + + KBASE_DEBUG_ASSERT(js_policy != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + policy_info = &js_policy->cfs; + ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs; + kbdev = container_of(js_policy, kbase_device, js_data.policy); + js_devdata = &kbdev->js_data; + + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_ENQUEUE_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx)); + + /* ASSERT about scheduled-ness/queued-ness */ + kbasep_js_debug_check(policy_info, kctx, KBASEP_JS_CHECK_NOTQUEUED); + + /* Clamp the runtime to prevent DoS attacks through "stored-up" runtime */ + if (policy_info->head_runtime_us > ctx_info->runtime_us + (u64) js_devdata->cfs_ctx_runtime_min_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u)) { + /* No need to hold the the runpool_irq.lock here, because we're essentially + * initializing the value, and the context is definitely not being updated in the + * runpool at this point. The queue_mutex held by the caller ensures the memory + * barrier. */ + ctx_info->runtime_us = policy_info->head_runtime_us - (u64) js_devdata->cfs_ctx_runtime_min_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u); + } + + /* Find the position where the context should be enqueued */ + if (ctx_info->process_rt_policy) { + queue_head = &policy_info->ctx_rt_queue_head; + least_runtime_us = &policy_info->rt_least_runtime_us; + } else { + queue_head = &policy_info->ctx_queue_head; + least_runtime_us = &policy_info->least_runtime_us; + } + + if (list_empty(queue_head)) { + list_add_tail(&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list, queue_head); + } else { + list_for_each(pos, queue_head) { + kbasep_js_policy_cfs_ctx *list_ctx_info; + + list_kctx = list_entry(pos, kbase_context, jctx.sched_info.runpool.policy_ctx.cfs.list); + list_ctx_info = &list_kctx->jctx.sched_info.runpool.policy_ctx.cfs; + + if ((kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) != 0) + break; + + if ((list_ctx_info->runtime_us > ctx_info->runtime_us) && ((list_kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) == 0)) + break; + } + + /* Add the context to the queue */ + list_add_tail(&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list, &list_kctx->jctx.sched_info.runpool.policy_ctx.cfs.list); + } + + /* Ensure least_runtime_us is up to date*/ + head_ctx = list_entry(queue_head->next, kbase_context, jctx.sched_info.runpool.policy_ctx.cfs.list); + head_runtime = head_ctx->jctx.sched_info.runpool.policy_ctx.cfs.runtime_us; + atomic64_set(least_runtime_us, head_runtime); +} + +mali_bool kbasep_js_policy_dequeue_head_ctx(kbasep_js_policy *js_policy, kbase_context ** const kctx_ptr) +{ + kbasep_js_policy_cfs *policy_info; + kbase_context *head_ctx; + struct list_head *queue_head; + atomic64_t *least_runtime_us; + kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(js_policy != NULL); + KBASE_DEBUG_ASSERT(kctx_ptr != NULL); + + policy_info = &js_policy->cfs; + kbdev = container_of(js_policy, kbase_device, js_data.policy); + + /* attempt to dequeue from the 'realttime' queue first */ + if (list_empty(&policy_info->ctx_rt_queue_head)) { + if (list_empty(&policy_info->ctx_queue_head)) { + /* Nothing to dequeue */ + return MALI_FALSE; + } else { + queue_head = &policy_info->ctx_queue_head; + least_runtime_us = &policy_info->least_runtime_us; + } + } else { + queue_head = &policy_info->ctx_rt_queue_head; + least_runtime_us = &policy_info->rt_least_runtime_us; + } + + /* Contexts are dequeued from the front of the queue */ + *kctx_ptr = list_entry(queue_head->next, kbase_context, jctx.sched_info.runpool.policy_ctx.cfs.list); + /* If dequeuing will empty the list, then set least_runtime_us prior to deletion */ + if (queue_head->next->next == queue_head) + atomic64_set(least_runtime_us, KBASEP_JS_RUNTIME_EMPTY); + list_del(queue_head->next); + + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_DEQUEUE_HEAD_CTX, *kctx_ptr, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, *kctx_ptr)); + + /* Update the head runtime */ + if (!list_empty(queue_head)) { + u64 head_runtime; + + head_ctx = list_entry(queue_head->next, kbase_context, jctx.sched_info.runpool.policy_ctx.cfs.list); + + /* No need to hold the the runpool_irq.lock here for reading - the + * context is definitely not being updated in the runpool at this + * point. The queue_mutex held by the caller ensures the memory barrier. */ + head_runtime = head_ctx->jctx.sched_info.runpool.policy_ctx.cfs.runtime_us; + + if (head_runtime > policy_info->head_runtime_us) + policy_info->head_runtime_us = head_runtime; + + atomic64_set(least_runtime_us, head_runtime); + } + + return MALI_TRUE; +} + +mali_bool kbasep_js_policy_try_evict_ctx(kbasep_js_policy *js_policy, kbase_context *kctx) +{ + kbasep_js_policy_cfs_ctx *ctx_info; + kbasep_js_policy_cfs *policy_info; + mali_bool is_present; + struct list_head *queue_head; + atomic64_t *least_runtime_us; + struct list_head *qhead; + kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(js_policy != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + policy_info = &js_policy->cfs; + ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs; + kbdev = container_of(js_policy, kbase_device, js_data.policy); + + if (ctx_info->process_rt_policy) { + queue_head = &policy_info->ctx_rt_queue_head; + least_runtime_us = &policy_info->rt_least_runtime_us; + } else { + queue_head = &policy_info->ctx_queue_head; + least_runtime_us = &policy_info->least_runtime_us; + } + + qhead = queue_head; + + is_present = kbasep_list_member_of(qhead, &kctx->jctx.sched_info.runpool.policy_ctx.cfs.list); + + KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, JS_POLICY_TRY_EVICT_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx), is_present); + + if (is_present != MALI_FALSE) { + kbase_context *head_ctx; + qhead = queue_head; + + /* If dequeuing will empty the list, then set least_runtime_us prior to deletion */ + if (queue_head->next->next == queue_head) + atomic64_set(least_runtime_us, KBASEP_JS_RUNTIME_EMPTY); + + /* Remove the context */ + list_del(&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list); + + qhead = queue_head; + /* Update the head runtime */ + if (!list_empty(qhead)) { + u64 head_runtime; + + head_ctx = list_entry(qhead->next, kbase_context, jctx.sched_info.runpool.policy_ctx.cfs.list); + + /* No need to hold the the runpool_irq.lock here for reading - the + * context is definitely not being updated in the runpool at this + * point. The queue_mutex held by the caller ensures the memory barrier. */ + head_runtime = head_ctx->jctx.sched_info.runpool.policy_ctx.cfs.runtime_us; + + if (head_runtime > policy_info->head_runtime_us) + policy_info->head_runtime_us = head_runtime; + + atomic64_set(least_runtime_us, head_runtime); + } + } + + return is_present; +} + +void kbasep_js_policy_foreach_ctx_job(kbasep_js_policy *js_policy, kbase_context *kctx, + kbasep_js_policy_ctx_job_cb callback, mali_bool detach_jobs) +{ + kbasep_js_policy_cfs *policy_info; + kbasep_js_policy_cfs_ctx *ctx_info; + kbase_device *kbdev; + u32 i; + + KBASE_DEBUG_ASSERT(js_policy != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + kbdev = container_of(js_policy, kbase_device, js_data.policy); + policy_info = &js_policy->cfs; + ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs; + + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx)); + + /* Invoke callback on jobs on each variant in turn */ + for (i = 0; i < policy_info->num_core_req_variants; ++i) { + struct list_head *job_list; + struct kbase_jd_atom *atom; + struct kbase_jd_atom *tmp_iter; + job_list = &ctx_info->job_list_head[i]; + /* Invoke callback on all kbase_jd_atoms in this list, optionally + * removing them from the list */ + list_for_each_entry_safe(atom, tmp_iter, job_list, sched_info.cfs.list) { + if (detach_jobs) + list_del(&atom->sched_info.cfs.list); + callback(kbdev, atom); + } + } + +} + +void kbasep_js_policy_runpool_add_ctx(kbasep_js_policy *js_policy, kbase_context *kctx) +{ + kbasep_js_policy_cfs *policy_info; + kbasep_js_device_data *js_devdata; + kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(js_policy != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + policy_info = &js_policy->cfs; + js_devdata = container_of(js_policy, kbasep_js_device_data, policy); + + kbdev = kctx->kbdev; + + { + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_RUNPOOL_ADD_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx)); + } + + /* ASSERT about scheduled-ness/queued-ness */ + kbasep_js_debug_check(policy_info, kctx, KBASEP_JS_CHECK_NOTSCHEDULED); + + /* All enqueued contexts go to the back of the runpool */ + list_add_tail(&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list, &policy_info->scheduled_ctxs_head); + + if (timer_callback_should_run(kbdev) != MALI_FALSE && policy_info->timer_running == MALI_FALSE) { + hrtimer_start(&policy_info->scheduling_timer, HR_TIMER_DELAY_NSEC(js_devdata->scheduling_tick_ns), HRTIMER_MODE_REL); + + KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u); + policy_info->timer_running = MALI_TRUE; + } +} + +void kbasep_js_policy_runpool_remove_ctx(kbasep_js_policy *js_policy, kbase_context *kctx) +{ + kbasep_js_policy_cfs *policy_info; + + KBASE_DEBUG_ASSERT(js_policy != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + policy_info = &js_policy->cfs; + + { + kbase_device *kbdev = container_of(js_policy, kbase_device, js_data.policy); + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_RUNPOOL_REMOVE_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx)); + } + + /* ASSERT about scheduled-ness/queued-ness */ + kbasep_js_debug_check(policy_info, kctx, KBASEP_JS_CHECK_SCHEDULED); + + /* No searching or significant list maintenance required to remove this context */ + list_del(&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list); + +} + +mali_bool kbasep_js_policy_should_remove_ctx(kbasep_js_policy *js_policy, kbase_context *kctx) +{ + kbasep_js_policy_cfs_ctx *ctx_info; + kbasep_js_policy_cfs *policy_info; + kbasep_js_device_data *js_devdata; + u64 least_runtime_us; + + KBASE_DEBUG_ASSERT(js_policy != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + policy_info = &js_policy->cfs; + ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs; + js_devdata = container_of(js_policy, kbasep_js_device_data, policy); + + if (ctx_info->process_rt_policy) + least_runtime_us = atomic64_read(&policy_info->rt_least_runtime_us); + else + least_runtime_us = atomic64_read(&policy_info->least_runtime_us); + + if (KBASEP_JS_RUNTIME_EMPTY == least_runtime_us) { + /* Queue is empty */ + return MALI_FALSE; + } + + if ((least_runtime_us + priority_weight(ctx_info, (u64) (js_devdata->ctx_timeslice_ns / 1000u))) + < ctx_info->runtime_us) { + /* The context is scheduled out if it's not the least-run context anymore. + * The "real" head runtime is used instead of the cached runtime so the current + * context is not scheduled out when there is less contexts than address spaces. + */ + return MALI_TRUE; + } + + return MALI_FALSE; +} + +void kbasep_js_policy_runpool_timers_sync(kbasep_js_policy *js_policy) +{ + kbasep_js_policy_cfs *policy_info; + kbase_device *kbdev; + kbasep_js_device_data *js_devdata; + + KBASE_DEBUG_ASSERT(js_policy != NULL); + + policy_info = &js_policy->cfs; + kbdev = container_of(js_policy, kbase_device, js_data.policy); + js_devdata = &kbdev->js_data; + + if (!timer_callback_should_run(kbdev)) { + unsigned long flags; + + /* If the timer is running now, synchronize with it by + * locking/unlocking its spinlock, to ensure it's not using an old value + * from timer_callback_should_run() */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + /* From now on, return value of timer_callback_should_run() will also + * cause the timer to not requeue itself. Its return value cannot + * change, because it depends on variables updated with the + * runpool_mutex held, which the caller of this must also hold */ + hrtimer_cancel(&policy_info->scheduling_timer); + + policy_info->timer_running = MALI_FALSE; + } +} + +/* + * Job Chain Management + */ + +mali_error kbasep_js_policy_init_job(const kbasep_js_policy *js_policy, const kbase_context *kctx, kbase_jd_atom *katom) +{ + const kbasep_js_policy_cfs *policy_info; + + KBASE_DEBUG_ASSERT(js_policy != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + policy_info = &js_policy->cfs; + + /* Determine the job's index into the job list head, will return error if the + * atom is malformed and so is reported. */ + return cached_variant_idx_init(policy_info, kctx, katom); +} + +void kbasep_js_policy_register_job(kbasep_js_policy *js_policy, kbase_context *kctx, kbase_jd_atom *katom) +{ + kbasep_js_policy_cfs_ctx *ctx_info; + + KBASE_DEBUG_ASSERT(js_policy != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs; + + /* Adjust context priority to include the new job */ + ctx_info->bag_total_nr_atoms++; + ctx_info->bag_total_priority += katom->nice_prio; + + /* Get average priority and convert to NICE range -20..19 */ + if (ctx_info->bag_total_nr_atoms) + ctx_info->bag_priority = (ctx_info->bag_total_priority / ctx_info->bag_total_nr_atoms) - 20; +} + +void kbasep_js_policy_deregister_job(kbasep_js_policy *js_policy, kbase_context *kctx, kbase_jd_atom *katom) +{ + kbasep_js_policy_cfs_ctx *ctx_info; + + KBASE_DEBUG_ASSERT(js_policy != NULL); + CSTD_UNUSED(js_policy); + KBASE_DEBUG_ASSERT(katom != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs; + + /* Adjust context priority to no longer include removed job */ + KBASE_DEBUG_ASSERT(ctx_info->bag_total_nr_atoms > 0); + ctx_info->bag_total_nr_atoms--; + ctx_info->bag_total_priority -= katom->nice_prio; + KBASE_DEBUG_ASSERT(ctx_info->bag_total_priority >= 0); + + /* Get average priority and convert to NICE range -20..19 */ + if (ctx_info->bag_total_nr_atoms) + ctx_info->bag_priority = (ctx_info->bag_total_priority / ctx_info->bag_total_nr_atoms) - 20; +} +KBASE_EXPORT_TEST_API(kbasep_js_policy_deregister_job) + +mali_bool kbasep_js_policy_dequeue_job(kbase_device *kbdev, + int job_slot_idx, + kbase_jd_atom ** const katom_ptr) +{ + kbasep_js_device_data *js_devdata; + kbasep_js_policy_cfs *policy_info; + kbase_context *kctx; + u32 variants_supported; + struct list_head *pos; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(katom_ptr != NULL); + KBASE_DEBUG_ASSERT(job_slot_idx < BASE_JM_MAX_NR_SLOTS); + + js_devdata = &kbdev->js_data; + policy_info = &js_devdata->policy.cfs; + + /* Get the variants for this slot */ + if (kbdev->gpu_props.num_core_groups > 1 && kbasep_js_ctx_attr_is_attr_on_runpool(kbdev, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) != MALI_FALSE) { + /* SS-allcore state, and there's more than one coregroup */ + variants_supported = get_slot_to_variant_lookup(policy_info->slot_to_variant_lookup_ss_allcore_state, job_slot_idx); + } else { + /* SS-state */ + variants_supported = get_slot_to_variant_lookup(policy_info->slot_to_variant_lookup_ss_state, job_slot_idx); + } + + /* First pass through the runpool we consider the realtime priority jobs */ + list_for_each(pos, &policy_info->scheduled_ctxs_head) { + kctx = list_entry(pos, kbase_context, jctx.sched_info.runpool.policy_ctx.cfs.list); + if (kctx->jctx.sched_info.runpool.policy_ctx.cfs.process_rt_policy) { + if (dequeue_job(kbdev, kctx, variants_supported, katom_ptr, job_slot_idx)) { + /* Realtime policy job matched */ + return MALI_TRUE; + } + } + } + + /* Second pass through the runpool we consider the non-realtime priority jobs */ + list_for_each(pos, &policy_info->scheduled_ctxs_head) { + kctx = list_entry(pos, kbase_context, jctx.sched_info.runpool.policy_ctx.cfs.list); + if (kctx->jctx.sched_info.runpool.policy_ctx.cfs.process_rt_policy == MALI_FALSE) { + if (dequeue_job(kbdev, kctx, variants_supported, katom_ptr, job_slot_idx)) { + /* Non-realtime policy job matched */ + return MALI_TRUE; + } + } + } + + /* By this point, no contexts had a matching job */ + return MALI_FALSE; +} + +void kbasep_js_policy_enqueue_job(kbasep_js_policy *js_policy, kbase_jd_atom *katom) +{ + kbasep_js_policy_cfs_job *job_info; + kbasep_js_policy_cfs_ctx *ctx_info; + kbase_context *parent_ctx; + + KBASE_DEBUG_ASSERT(js_policy != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + parent_ctx = katom->kctx; + KBASE_DEBUG_ASSERT(parent_ctx != NULL); + + job_info = &katom->sched_info.cfs; + ctx_info = &parent_ctx->jctx.sched_info.runpool.policy_ctx.cfs; + + { + kbase_device *kbdev = container_of(js_policy, kbase_device, js_data.policy); + KBASE_TRACE_ADD(kbdev, JS_POLICY_ENQUEUE_JOB, katom->kctx, katom, katom->jc, 0); + } + list_add_tail(&katom->sched_info.cfs.list, &ctx_info->job_list_head[job_info->cached_variant_idx]); +} + +void kbasep_js_policy_log_job_result(kbasep_js_policy *js_policy, kbase_jd_atom *katom, u64 time_spent_us) +{ + kbasep_js_policy_cfs_ctx *ctx_info; + kbase_context *parent_ctx; + KBASE_DEBUG_ASSERT(js_policy != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + CSTD_UNUSED(js_policy); + + parent_ctx = katom->kctx; + KBASE_DEBUG_ASSERT(parent_ctx != NULL); + + ctx_info = &parent_ctx->jctx.sched_info.runpool.policy_ctx.cfs; + + ctx_info->runtime_us += priority_weight(ctx_info, time_spent_us); +} + +mali_bool kbasep_js_policy_ctx_has_priority(kbasep_js_policy *js_policy, kbase_context *current_ctx, kbase_context *new_ctx) +{ + kbasep_js_policy_cfs_ctx *current_ctx_info; + kbasep_js_policy_cfs_ctx *new_ctx_info; + + KBASE_DEBUG_ASSERT(current_ctx != NULL); + KBASE_DEBUG_ASSERT(new_ctx != NULL); + CSTD_UNUSED(js_policy); + + current_ctx_info = ¤t_ctx->jctx.sched_info.runpool.policy_ctx.cfs; + new_ctx_info = &new_ctx->jctx.sched_info.runpool.policy_ctx.cfs; + + if ((current_ctx_info->process_rt_policy == MALI_FALSE) && (new_ctx_info->process_rt_policy == MALI_TRUE)) + return MALI_TRUE; + + if ((current_ctx_info->process_rt_policy == new_ctx_info->process_rt_policy) && (current_ctx_info->bag_priority > new_ctx_info->bag_priority)) + return MALI_TRUE; + + return MALI_FALSE; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h new file mode 100755 index 00000000000..9c4f3c66bb6 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h @@ -0,0 +1,167 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_js_policy_cfs.h + * Completely Fair Job Scheduler Policy structure definitions + */ + +#ifndef _KBASE_JS_POLICY_CFS_H_ +#define _KBASE_JS_POLICY_CFS_H_ + +#define KBASE_JS_POLICY_AVAILABLE_CFS + +/** @addtogroup base_api + * @{ */ +/** @addtogroup base_kbase_api + * @{ */ +/** @addtogroup kbase_js_policy + * @{ */ + +/** + * Internally, this policy keeps a few internal queues for different variants + * of core requirements, which are used to decide how to schedule onto the + * different job slots. + * + * Must be a power of 2 to keep the lookup math simple + */ +#define KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS_LOG2 3 +#define KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS (1u << KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS_LOG2) + +/** Bits needed in the lookup to support all slots */ +#define KBASEP_JS_VARIANT_LOOKUP_BITS_NEEDED (BASE_JM_MAX_NR_SLOTS * KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS) +/** Number of u32s needed in the lookup array to support all slots */ +#define KBASEP_JS_VARIANT_LOOKUP_WORDS_NEEDED ((KBASEP_JS_VARIANT_LOOKUP_BITS_NEEDED + 31) / 32) + +#define KBASEP_JS_RUNTIME_EMPTY ((u64)-1) + +typedef struct kbasep_js_policy_cfs { + /** List of all contexts in the context queue. Hold + * kbasep_js_device_data::queue_mutex whilst accessing. */ + struct list_head ctx_queue_head; + + /** List of all contexts in the realtime (priority) context queue */ + struct list_head ctx_rt_queue_head; + + /** List of scheduled contexts. Hold kbasep_jd_device_data::runpool_irq::lock + * whilst accessing, which is a spinlock */ + struct list_head scheduled_ctxs_head; + + /** Number of valid elements in the core_req_variants member, and the + * kbasep_js_policy_rr_ctx::job_list_head array */ + u32 num_core_req_variants; + + /** Variants of the core requirements */ + kbasep_atom_req core_req_variants[KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS]; + + /* Lookups per job slot against which core_req_variants match it */ + u32 slot_to_variant_lookup_ss_state[KBASEP_JS_VARIANT_LOOKUP_WORDS_NEEDED]; + u32 slot_to_variant_lookup_ss_allcore_state[KBASEP_JS_VARIANT_LOOKUP_WORDS_NEEDED]; + + /* The timer tick used for rescheduling jobs */ + struct hrtimer scheduling_timer; + + /* Is the timer running? + * + * The kbasep_js_device_data::runpool_mutex must be held whilst modifying this. */ + mali_bool timer_running; + + /* Number of us the least-run context has been running for + * + * The kbasep_js_device_data::queue_mutex must be held whilst updating this + * Reads are possible without this mutex, but an older value might be read + * if no memory barriers are issued beforehand. */ + u64 head_runtime_us; + + /* Number of us the least-run context in the context queue has been running for. + * -1 if context queue is empty. */ + atomic64_t least_runtime_us; + + /* Number of us the least-run context in the realtime (priority) context queue + * has been running for. -1 if realtime context queue is empty. */ + atomic64_t rt_least_runtime_us; +} kbasep_js_policy_cfs; + +/** + * This policy contains a single linked list of all contexts. + */ +typedef struct kbasep_js_policy_cfs_ctx { + /** Link implementing the Policy's Queue, and Currently Scheduled list */ + struct list_head list; + + /** Job lists for use when in the Run Pool - only using + * kbasep_js_policy_fcfs::num_unique_slots of them. We still need to track + * the jobs when we're not in the runpool, so this member is accessed from + * outside the policy queue (for the first job), inside the policy queue, + * and inside the runpool. + * + * If the context is in the runpool, then this must only be accessed with + * kbasep_js_device_data::runpool_irq::lock held + * + * Jobs are still added to this list even when the context is not in the + * runpool. In that case, the kbasep_js_kctx_info::ctx::jsctx_mutex must be + * held before accessing this. */ + struct list_head job_list_head[KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS]; + + /** Number of us this context has been running for + * + * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held + * whilst updating this. Initializing will occur on context init and + * context enqueue (which can only occur in one thread at a time), but + * multi-thread access only occurs while the context is in the runpool. + * + * Reads are possible without this spinlock, but an older value might be read + * if no memory barriers are issued beforehand */ + u64 runtime_us; + + /* Calling process policy scheme is a realtime scheduler and will use the priority queue + * Non-mutable after ctx init */ + mali_bool process_rt_policy; + /* Calling process NICE priority */ + int process_priority; + /* Average NICE priority of all atoms in bag: + * Hold the kbasep_js_kctx_info::ctx::jsctx_mutex when accessing */ + int bag_priority; + /* Total NICE priority of all atoms in bag + * Hold the kbasep_js_kctx_info::ctx::jsctx_mutex when accessing */ + int bag_total_priority; + /* Total number of atoms in the bag + * Hold the kbasep_js_kctx_info::ctx::jsctx_mutex when accessing */ + int bag_total_nr_atoms; + +} kbasep_js_policy_cfs_ctx; + +/** + * In this policy, each Job is part of at most one of the per_corereq lists + */ +typedef struct kbasep_js_policy_cfs_job { + struct list_head list; /**< Link implementing the Run Pool list/Jobs owned by the ctx */ + u32 cached_variant_idx; /**< Cached index of the list this should be entered into on re-queue */ + + /** Number of ticks that this job has been executing for + * + * To access this, the kbasep_js_device_data::runpool_irq::lock must be held */ + u32 ticks; +} kbasep_js_policy_cfs_job; + + /** @} *//* end group kbase_js_policy */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_linux.h b/drivers/gpu/arm/midgard/mali_kbase_linux.h new file mode 100755 index 00000000000..0d024f27976 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_linux.h @@ -0,0 +1,47 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_linux.h + * Base kernel APIs, Linux implementation. + */ + +#ifndef _KBASE_LINUX_H_ +#define _KBASE_LINUX_H_ + +/* All things that are needed for the Linux port. */ +#include +#include +#include +#include +#include + +#if defined(MALI_KERNEL_TEST_API) +#if (1 == MALI_KERNEL_TEST_API) +#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func); +#else +#define KBASE_EXPORT_TEST_API(func) +#endif +#else +#define KBASE_EXPORT_TEST_API(func) +#endif + +#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func); + +#endif /* _KBASE_LINUX_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c new file mode 100755 index 00000000000..c913c14708d --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -0,0 +1,1287 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_mem.c + * Base kernel memory APIs + */ +#ifdef CONFIG_DMA_SHARED_BUFFER +#include +#endif /* CONFIG_DMA_SHARED_BUFFER */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +/** + * @brief Check the zone compatibility of two regions. + */ +STATIC int kbase_region_tracker_match_zone(struct kbase_va_region *reg1, struct kbase_va_region *reg2) +{ + return ((reg1->flags & KBASE_REG_ZONE_MASK) == (reg2->flags & KBASE_REG_ZONE_MASK)); +} + +KBASE_EXPORT_TEST_API(kbase_region_tracker_match_zone) + +/* This function inserts a region into the tree. */ +static void kbase_region_tracker_insert(struct kbase_context *kctx, struct kbase_va_region *new_reg) +{ + u64 start_pfn = new_reg->start_pfn; + struct rb_node **link = &(kctx->reg_rbtree.rb_node); + struct rb_node *parent = NULL; + + /* Find the right place in the tree using tree search */ + while (*link) { + struct kbase_va_region *old_reg; + + parent = *link; + old_reg = rb_entry(parent, struct kbase_va_region, rblink); + + /* RBTree requires no duplicate entries. */ + KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn); + + if (old_reg->start_pfn > start_pfn) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + /* Put the new node there, and rebalance tree */ + rb_link_node(&(new_reg->rblink), parent, link); + rb_insert_color(&(new_reg->rblink), &(kctx->reg_rbtree)); +} + +/* Find allocated region enclosing range. */ +struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range(kbase_context *kctx, u64 start_pfn, size_t nr_pages) +{ + struct rb_node *rbnode; + struct kbase_va_region *reg; + u64 end_pfn = start_pfn + nr_pages; + + rbnode = kctx->reg_rbtree.rb_node; + + while (rbnode) { + u64 tmp_start_pfn, tmp_end_pfn; + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + tmp_start_pfn = reg->start_pfn; + tmp_end_pfn = reg->start_pfn + kbase_reg_current_backed_size(reg); + + /* If start is lower than this, go left. */ + if (start_pfn < tmp_start_pfn) + rbnode = rbnode->rb_left; + /* If end is higher than this, then go right. */ + else if (end_pfn > tmp_end_pfn) + rbnode = rbnode->rb_right; + else /* Enclosing */ + return reg; + } + + return NULL; +} + +/* Find allocated region enclosing free range. */ +struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free(kbase_context *kctx, u64 start_pfn, size_t nr_pages) +{ + struct rb_node *rbnode; + struct kbase_va_region *reg; + u64 end_pfn = start_pfn + nr_pages; + + rbnode = kctx->reg_rbtree.rb_node; + while (rbnode) { + u64 tmp_start_pfn, tmp_end_pfn; + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + tmp_start_pfn = reg->start_pfn; + tmp_end_pfn = reg->start_pfn + reg->nr_pages; + + /* If start is lower than this, go left. */ + if (start_pfn < tmp_start_pfn) + rbnode = rbnode->rb_left; + /* If end is higher than this, then go right. */ + else if (end_pfn > tmp_end_pfn) + rbnode = rbnode->rb_right; + else /* Enclosing */ + return reg; + } + + return NULL; +} + +/* Find region enclosing given address. */ +kbase_va_region *kbase_region_tracker_find_region_enclosing_address(kbase_context *kctx, mali_addr64 gpu_addr) +{ + struct rb_node *rbnode; + struct kbase_va_region *reg; + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + rbnode = kctx->reg_rbtree.rb_node; + while (rbnode) { + u64 tmp_start_pfn, tmp_end_pfn; + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + tmp_start_pfn = reg->start_pfn; + tmp_end_pfn = reg->start_pfn + reg->nr_pages; + + /* If start is lower than this, go left. */ + if (gpu_pfn < tmp_start_pfn) + rbnode = rbnode->rb_left; + /* If end is higher than this, then go right. */ + else if (gpu_pfn >= tmp_end_pfn) + rbnode = rbnode->rb_right; + else /* Enclosing */ + return reg; + } + + return NULL; +} + +KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address) + +/* Find region with given base address */ +kbase_va_region *kbase_region_tracker_find_region_base_address(kbase_context *kctx, mali_addr64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_node *rbnode; + struct kbase_va_region *reg; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + rbnode = kctx->reg_rbtree.rb_node; + while (rbnode) { + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + if (reg->start_pfn > gpu_pfn) + rbnode = rbnode->rb_left; + else if (reg->start_pfn < gpu_pfn) + rbnode = rbnode->rb_right; + else if (gpu_pfn == reg->start_pfn) + return reg; + else + rbnode = NULL; + } + + return NULL; +} + +KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address) + +/* Find region meeting given requirements */ +static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align) +{ + struct rb_node *rbnode; + struct kbase_va_region *reg; + + /* Note that this search is a linear search, as we do not have a target + address in mind, so does not benefit from the rbtree search */ + rbnode = rb_first(&(kctx->reg_rbtree)); + while (rbnode) { + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + if ((reg->nr_pages >= nr_pages) && (reg->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(reg, reg_reqs)) { + + /* Check alignment */ + u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1); + if ((start_pfn >= reg->start_pfn) && (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) && ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) + return reg; + } + rbnode = rb_next(rbnode); + } + + return NULL; +} + +/** + * @brief Remove a region object from the global list. + * + * The region reg is removed, possibly by merging with other free and + * compatible adjacent regions. It must be called with the context + * region lock held. The associated memory is not released (see + * kbase_free_alloced_region). Internal use only. + */ +STATIC mali_error kbase_remove_va_region(kbase_context *kctx, struct kbase_va_region *reg) +{ + struct rb_node *rbprev; + struct kbase_va_region *prev = NULL; + struct rb_node *rbnext; + struct kbase_va_region *next = NULL; + + int merged_front = 0; + int merged_back = 0; + mali_error err = MALI_ERROR_NONE; + + /* Try to merge with the previous block first */ + rbprev = rb_prev(&(reg->rblink)); + if (rbprev) { + prev = rb_entry(rbprev, struct kbase_va_region, rblink); + if ((prev->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(prev, reg)) { + /* We're compatible with the previous VMA, merge with it */ + prev->nr_pages += reg->nr_pages; + rb_erase(&(reg->rblink), &kctx->reg_rbtree); + reg = prev; + merged_front = 1; + } + } + + /* Try to merge with the next block second */ + /* Note we do the lookup here as the tree may have been rebalanced. */ + rbnext = rb_next(&(reg->rblink)); + if (rbnext) { + /* We're compatible with the next VMA, merge with it */ + next = rb_entry(rbnext, struct kbase_va_region, rblink); + if ((next->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(next, reg)) { + next->start_pfn = reg->start_pfn; + next->nr_pages += reg->nr_pages; + rb_erase(&(reg->rblink), &kctx->reg_rbtree); + merged_back = 1; + if (merged_front) { + /* We already merged with prev, free it */ + kbase_free_alloced_region(reg); + } + } + } + + /* If we failed to merge then we need to add a new block */ + if (!(merged_front || merged_back)) { + /* + * We didn't merge anything. Add a new free + * placeholder and remove the original one. + */ + struct kbase_va_region *free_reg; + + free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK); + if (!free_reg) { + err = MALI_ERROR_OUT_OF_MEMORY; + goto out; + } + + rb_replace_node(&(reg->rblink), &(free_reg->rblink), &(kctx->reg_rbtree)); + } + + out: + return err; +} + +KBASE_EXPORT_TEST_API(kbase_remove_va_region) + +/** + * @brief Insert a VA region to the list, replacing the current at_reg. + */ +static mali_error kbase_insert_va_region_nolock(kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages) +{ + mali_error err = MALI_ERROR_NONE; + + /* Must be a free region */ + KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0); + /* start_pfn should be contained within at_reg */ + KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages)); + /* at least nr_pages from start_pfn should be contained within at_reg */ + KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages); + + new_reg->start_pfn = start_pfn; + new_reg->nr_pages = nr_pages; + + /* Regions are a whole use, so swap and delete old one. */ + if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) { + rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), &(kctx->reg_rbtree)); + kbase_free_alloced_region(at_reg); + } + /* New region replaces the start of the old one, so insert before. */ + else if (at_reg->start_pfn == start_pfn) { + at_reg->start_pfn += nr_pages; + KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages); + at_reg->nr_pages -= nr_pages; + + kbase_region_tracker_insert(kctx, new_reg); + } + /* New region replaces the end of the old one, so insert after. */ + else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) { + at_reg->nr_pages -= nr_pages; + + kbase_region_tracker_insert(kctx, new_reg); + } + /* New region splits the old one, so insert and create new */ + else { + struct kbase_va_region *new_front_reg = kbase_alloc_free_region(kctx, at_reg->start_pfn, start_pfn - at_reg->start_pfn, at_reg->flags & KBASE_REG_ZONE_MASK); + if (new_front_reg) { + at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; + at_reg->start_pfn = start_pfn + nr_pages; + + kbase_region_tracker_insert(kctx, new_front_reg); + kbase_region_tracker_insert(kctx, new_reg); + } else { + err = MALI_ERROR_OUT_OF_MEMORY; + } + } + + return err; +} + +/** + * @brief Add a VA region to the list. + */ +mali_error kbase_add_va_region(kbase_context *kctx, struct kbase_va_region *reg, mali_addr64 addr, size_t nr_pages, size_t align) +{ + struct kbase_va_region *tmp; + u64 gpu_pfn = addr >> PAGE_SHIFT; + mali_error err = MALI_ERROR_NONE; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != reg); + + if (!align) + align = 1; + + /* must be a power of 2 */ + KBASE_DEBUG_ASSERT((align & (align - 1)) == 0); + KBASE_DEBUG_ASSERT(nr_pages > 0); + + /* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */ + if (gpu_pfn) { + struct device *dev = kctx->kbdev->dev; + KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1))); + + tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages); + if (!tmp) { + dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages); + err = MALI_ERROR_OUT_OF_GPU_MEMORY; + goto exit; + } + + if ((!kbase_region_tracker_match_zone(tmp, reg)) || (!(tmp->flags & KBASE_REG_FREE))) { + dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK); + dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages); + dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align); + err = MALI_ERROR_OUT_OF_GPU_MEMORY; + goto exit; + } + + err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages); + if (err) { + dev_warn(dev, "Failed to insert va region"); + err = MALI_ERROR_OUT_OF_GPU_MEMORY; + goto exit; + } + + goto exit; + } + + /* Path 2: Map any free address which meets the requirements. */ + { + u64 start_pfn; + tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg, nr_pages, align); + if (!tmp) { + err = MALI_ERROR_OUT_OF_GPU_MEMORY; + goto exit; + } + start_pfn = (tmp->start_pfn + align - 1) & ~(align - 1); + err = kbase_insert_va_region_nolock(kctx, reg, tmp, start_pfn, nr_pages); + } + + exit: + return err; +} + +KBASE_EXPORT_TEST_API(kbase_add_va_region) + +/** + * @brief Initialize the internal region tracker data structure. + */ +static void kbase_region_tracker_ds_init(kbase_context *kctx, struct kbase_va_region *same_va_reg, struct kbase_va_region *exec_reg, struct kbase_va_region *custom_va_reg) +{ + kctx->reg_rbtree = RB_ROOT; + kbase_region_tracker_insert(kctx, same_va_reg); + + /* exec and custom_va_reg doesn't always exist */ + if (exec_reg && custom_va_reg) { + kbase_region_tracker_insert(kctx, exec_reg); + kbase_region_tracker_insert(kctx, custom_va_reg); + } +} + +void kbase_region_tracker_term(kbase_context *kctx) +{ + struct rb_node *rbnode; + struct kbase_va_region *reg; + do { + rbnode = rb_first(&(kctx->reg_rbtree)); + if (rbnode) { + rb_erase(rbnode, &(kctx->reg_rbtree)); + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + kbase_free_alloced_region(reg); + } + } while (rbnode); +} + +/** + * Initialize the region tracker data structure. + */ +mali_error kbase_region_tracker_init(kbase_context *kctx) +{ + struct kbase_va_region *same_va_reg; + struct kbase_va_region *exec_reg = NULL; + struct kbase_va_region *custom_va_reg = NULL; + size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE; + u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; + u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; + +#if defined(CONFIG_ARM64) + same_va_bits = VA_BITS; +#elif defined(CONFIG_X86_64) + same_va_bits = 47; +#elif defined(CONFIG_64BIT) +#error Unsupported 64-bit architecture +#endif + +#ifdef CONFIG_64BIT + if (is_compat_task()) + same_va_bits = 32; +#endif + + if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) + return MALI_ERROR_FUNCTION_FAILED; + + /* all have SAME_VA */ + same_va_reg = kbase_alloc_free_region(kctx, 1, (1ULL << (same_va_bits - PAGE_SHIFT)) - 2, KBASE_REG_ZONE_SAME_VA); + if (!same_va_reg) + return MALI_ERROR_OUT_OF_MEMORY; + +#ifdef CONFIG_64BIT + /* only 32-bit clients have the other two zones */ + if (is_compat_task()) { +#endif + if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { + kbase_free_alloced_region(same_va_reg); + return MALI_ERROR_FUNCTION_FAILED; + } + /* If the current size of TMEM is out of range of the + * virtual address space addressable by the MMU then + * we should shrink it to fit + */ + if( (KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit ) + custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; + + exec_reg = kbase_alloc_free_region(kctx, KBASE_REG_ZONE_EXEC_BASE, KBASE_REG_ZONE_EXEC_SIZE, KBASE_REG_ZONE_EXEC); + if (!exec_reg) { + kbase_free_alloced_region(same_va_reg); + return MALI_ERROR_OUT_OF_MEMORY; + } + + custom_va_reg = kbase_alloc_free_region(kctx, KBASE_REG_ZONE_CUSTOM_VA_BASE, custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); + if (!custom_va_reg) { + kbase_free_alloced_region(same_va_reg); + kbase_free_alloced_region(exec_reg); + return MALI_ERROR_OUT_OF_MEMORY; + } +#ifdef CONFIG_64BIT + } +#endif + + kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg); + + return MALI_ERROR_NONE; +} + +mali_error kbase_mem_init(struct kbase_device *kbdev) +{ + kbasep_mem_device *memdev; + KBASE_DEBUG_ASSERT(kbdev); + + memdev = &kbdev->memdev; + + /* Initialize memory usage */ + atomic_set(&memdev->used_pages, 0); + + /* nothing to do, zero-inited when kbase_device was created */ + return MALI_ERROR_NONE; +} + +void kbase_mem_halt(kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +void kbase_mem_term(kbase_device *kbdev) +{ + kbasep_mem_device *memdev; + int pages; + + KBASE_DEBUG_ASSERT(kbdev); + + memdev = &kbdev->memdev; + + pages = atomic_read(&memdev->used_pages); + if (pages != 0) + dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); +} + +KBASE_EXPORT_TEST_API(kbase_mem_term) + +/** + * @brief Wait for GPU write flush - only in use for BASE_HW_ISSUE_6367 + * + * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush its write buffer. + * @note If GPU resets occur then the counters are reset to zero, the delay may not be as expected. + */ +#ifndef CONFIG_MALI_NO_MALI +void kbase_wait_write_flush(kbase_context *kctx) +{ + u32 base_count = 0; + /* A suspend won't happen here, because we're in a syscall from a userspace thread */ + kbase_pm_context_active(kctx->kbdev); + kbase_pm_request_gpu_cycle_counter(kctx->kbdev); + while (MALI_TRUE) { + u32 new_count; + new_count = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL); + /* First time around, just store the count. */ + if (base_count == 0) { + base_count = new_count; + continue; + } + + /* No need to handle wrapping, unsigned maths works for this. */ + if ((new_count - base_count) > 1000) + break; + } + kbase_pm_release_gpu_cycle_counter(kctx->kbdev); + kbase_pm_context_idle(kctx->kbdev); +} +#endif /* CONFIG_MALI_NO_MALI */ + + + +/** + * @brief Allocate a free region object. + * + * The allocated object is not part of any list yet, and is flagged as + * KBASE_REG_FREE. No mapping is allocated yet. + * + * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC + * + */ +struct kbase_va_region *kbase_alloc_free_region(kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone) +{ + struct kbase_va_region *new_reg; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + /* zone argument should only contain zone related region flags */ + KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0); + KBASE_DEBUG_ASSERT(nr_pages > 0); + KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (UINT64_MAX / PAGE_SIZE)); /* 64-bit address range is the max */ + + new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL); + + if (!new_reg) { + dev_warn(kctx->kbdev->dev, "kzalloc failed"); + return NULL; + } + + new_reg->alloc = NULL; /* no alloc bound yet */ + new_reg->kctx = kctx; + new_reg->flags = zone | KBASE_REG_FREE; + + new_reg->flags |= KBASE_REG_GROWABLE; + + /* Set up default MEMATTR usage */ + new_reg->flags |= KBASE_REG_MEMATTR_INDEX(ASn_MEMATTR_INDEX_DEFAULT); + + new_reg->start_pfn = start_pfn; + new_reg->nr_pages = nr_pages; + + return new_reg; +} + +KBASE_EXPORT_TEST_API(kbase_alloc_free_region) + +/** + * @brief Free a region object. + * + * The described region must be freed of any mapping. + * + * If the region is not flagged as KBASE_REG_FREE, the region's + * alloc object will be released. + * It is a bug if no alloc object exists for non-free regions. + * + */ +void kbase_free_alloced_region(struct kbase_va_region *reg) +{ + KBASE_DEBUG_ASSERT(NULL != reg); + if (!(reg->flags & KBASE_REG_FREE)) { + kbase_mem_phy_alloc_put(reg->alloc); + KBASE_DEBUG_CODE( + /* To detect use-after-free in debug builds */ + reg->flags |= KBASE_REG_FREE); + } + kfree(reg); +} + +KBASE_EXPORT_TEST_API(kbase_free_alloced_region) + +void kbase_mmu_update(kbase_context *kctx) +{ + /* Use GPU implementation-defined caching policy. */ + u64 mem_attrs; + u32 pgd_high; + + KBASE_DEBUG_ASSERT(NULL != kctx); + mem_attrs = kctx->mem_attrs; + /* ASSERT that the context has a valid as_nr, which is only the case + * when it's scheduled in. + * + * as_nr won't change because the caller has the runpool_irq lock */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + pgd_high = sizeof(kctx->pgd) > 4 ? (kctx->pgd >> 32) : 0; + + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_TRANSTAB_LO), + (kctx->pgd & ASn_TRANSTAB_ADDR_SPACE_MASK) | + ASn_TRANSTAB_READ_INNER | ASn_TRANSTAB_ADRMODE_TABLE, + kctx); + + /* Need to use a conditional expression to avoid + * "right shift count >= width of type" error when using an if statement + * - although the size_of condition is evaluated at compile time the + * unused branch is not removed until after it is type-checked and the + * error produced. + */ + pgd_high = sizeof(kctx->pgd) > 4 ? (kctx->pgd >> 32) : 0; + + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_TRANSTAB_HI), + pgd_high, kctx); + + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_MEMATTR_LO), + mem_attrs & 0xFFFFFFFFUL, kctx); + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_MEMATTR_HI), + (mem_attrs >> 32) & 0xFFFFFFFFUL, kctx); + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), + ASn_COMMAND_UPDATE, kctx); +} + +KBASE_EXPORT_TEST_API(kbase_mmu_update) + +void kbase_mmu_disable(kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(NULL != kctx); + /* ASSERT that the context has a valid as_nr, which is only the case + * when it's scheduled in. + * + * as_nr won't change because the caller has the runpool_irq lock */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_TRANSTAB_LO), 0, kctx); + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_TRANSTAB_HI), 0, kctx); + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_UPDATE, kctx); +} + +KBASE_EXPORT_TEST_API(kbase_mmu_disable) + +mali_error kbase_gpu_mmap(kbase_context *kctx, struct kbase_va_region *reg, mali_addr64 addr, size_t nr_pages, size_t align) +{ + mali_error err; + size_t i = 0; + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != reg); + + err = kbase_add_va_region(kctx, reg, addr, nr_pages, align); + if (MALI_ERROR_NONE != err) + return err; + + if (reg->alloc->type == KBASE_MEM_TYPE_ALIAS) { + u64 stride; + stride = reg->alloc->imported.alias.stride; + KBASE_DEBUG_ASSERT(reg->alloc->imported.alias.aliased); + for (i = 0; i < reg->alloc->imported.alias.nents; i++) { + if (reg->alloc->imported.alias.aliased[i].alloc) { + err = kbase_mmu_insert_pages(kctx, + reg->start_pfn + (i * stride), + reg->alloc->imported.alias.aliased[i].alloc->pages + reg->alloc->imported.alias.aliased[i].offset, + reg->alloc->imported.alias.aliased[i].length, + reg->flags); + if (MALI_ERROR_NONE != err) + goto bad_insert; + + kbase_mem_phy_alloc_gpu_mapped(reg->alloc->imported.alias.aliased[i].alloc); + } else { + err = kbase_mmu_insert_single_page(kctx, + reg->start_pfn + i * stride, + kctx->aliasing_sink_page, + reg->alloc->imported.alias.aliased[i].length, + (reg->flags & ~KBASE_REG_MEMATTR_MASK) | KBASE_REG_MEMATTR_INDEX(ASn_MEMATTR_INDEX_WRITE_ALLOC) + ); + if (MALI_ERROR_NONE != err) + goto bad_insert; + } + } + } else { + err = kbase_mmu_insert_pages(kctx, reg->start_pfn, + kbase_get_phy_pages(reg), + kbase_reg_current_backed_size(reg), + reg->flags); + if (MALI_ERROR_NONE != err) + goto bad_insert; + kbase_mem_phy_alloc_gpu_mapped(reg->alloc); + } + + return err; + +bad_insert: + if (reg->alloc->type == KBASE_MEM_TYPE_ALIAS) { + u64 stride; + stride = reg->alloc->imported.alias.stride; + KBASE_DEBUG_ASSERT(reg->alloc->imported.alias.aliased); + while (i--) + if (reg->alloc->imported.alias.aliased[i].alloc) { + kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->alloc->imported.alias.aliased[i].length); + kbase_mem_phy_alloc_gpu_unmapped(reg->alloc->imported.alias.aliased[i].alloc); + } + } + + kbase_remove_va_region(kctx, reg); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_gpu_mmap) + +mali_error kbase_gpu_munmap(kbase_context *kctx, struct kbase_va_region *reg) +{ + mali_error err; + + if (reg->start_pfn == 0) + return MALI_ERROR_NONE; + + if (reg->alloc && reg->alloc->type == KBASE_MEM_TYPE_ALIAS) { + size_t i; + err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages); + KBASE_DEBUG_ASSERT(reg->alloc->imported.alias.aliased); + for (i = 0; i < reg->alloc->imported.alias.nents; i++) + if (reg->alloc->imported.alias.aliased[i].alloc) + kbase_mem_phy_alloc_gpu_unmapped(reg->alloc->imported.alias.aliased[i].alloc); + } else { + err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg)); + kbase_mem_phy_alloc_gpu_unmapped(reg->alloc); + } + + if (MALI_ERROR_NONE != err) + return err; + + err = kbase_remove_va_region(kctx, reg); + return err; +} + +STATIC struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping_of_region(const struct kbase_va_region *reg, unsigned long uaddr, size_t size) +{ + struct kbase_cpu_mapping *map; + struct list_head *pos; + + KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT(reg->alloc); + + if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */ + return NULL; + + list_for_each(pos, ®->alloc->mappings) { + map = list_entry(pos, kbase_cpu_mapping, mappings_list); + if (map->vm_start <= uaddr && map->vm_end >= uaddr + size) + return map; + } + + return NULL; +} + +KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_of_region) + +mali_error kbasep_find_enclosing_cpu_mapping_offset(kbase_context *kctx, + mali_addr64 gpu_addr, + unsigned long uaddr, + size_t size, + mali_size64 *offset) +{ + struct kbase_cpu_mapping *map = NULL; + const struct kbase_va_region *reg; + mali_error err = MALI_ERROR_FUNCTION_FAILED; + KBASE_DEBUG_ASSERT(kctx != NULL); + + kbase_gpu_vm_lock(kctx); + + reg = kbase_region_tracker_find_region_enclosing_address(kctx, + gpu_addr); + if (reg) { + map = kbasep_find_enclosing_cpu_mapping_of_region(reg, uaddr, + size); + if (map) { + *offset = (uaddr - PTR_TO_U64(map->vm_start)) + + (map->page_off << PAGE_SHIFT); + err = MALI_ERROR_NONE; + } + } + + kbase_gpu_vm_unlock(kctx); + + return err; +} + +KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset) + +static mali_error kbase_do_syncset(kbase_context *kctx, struct base_syncset *set, kbase_sync_kmem_fn sync_fn) +{ + mali_error err = MALI_ERROR_NONE; + struct basep_syncset *sset = &set->basep_sset; + struct kbase_va_region *reg; + struct kbase_cpu_mapping *map; + unsigned long start; + size_t size; + phys_addr_t base_phy_addr = 0; + phys_addr_t *pa; + u64 page_off, page_count; + u64 i; + unsigned int offset_within_page; + void *base_virt_addr = 0; + size_t area_size = 0; + + kbase_os_mem_map_lock(kctx); + + kbase_gpu_vm_lock(kctx); + + /* find the region where the virtual address is contained */ + reg = kbase_region_tracker_find_region_enclosing_address(kctx, sset->mem_handle); + if (!reg) { + dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX", sset->mem_handle); + err = MALI_ERROR_FUNCTION_FAILED; + goto out_unlock; + } + + if (!(reg->flags & KBASE_REG_CPU_CACHED)) + goto out_unlock; + + start = (uintptr_t)sset->user_addr; + size = (size_t)sset->size; + + map = kbasep_find_enclosing_cpu_mapping_of_region(reg, start, size); + if (!map) { + dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", start, sset->mem_handle); + err = MALI_ERROR_FUNCTION_FAILED; + goto out_unlock; + } + + offset_within_page = start & (PAGE_SIZE - 1); + page_off = map->page_off + ((start - map->vm_start) >> PAGE_SHIFT); + page_count = ((size + offset_within_page + (PAGE_SIZE - 1)) & PAGE_MASK) >> PAGE_SHIFT; + pa = kbase_get_phy_pages(reg); + + pagefault_disable(); + + for (i = 0; i < page_count; i++) { + u32 offset = start & (PAGE_SIZE - 1); + phys_addr_t paddr = pa[page_off + i] + offset; + size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); + u8 tmp; + + if (copy_from_user(&tmp, (void*)(uintptr_t)start, 1)) { + /* Not accessible */ + err = MALI_ERROR_FUNCTION_FAILED; + goto out_enable_pagefaults; + } + + if (paddr == base_phy_addr + area_size && start == ((uintptr_t) base_virt_addr + area_size)) { + area_size += sz; + } else if (area_size > 0) { + sync_fn(base_phy_addr, base_virt_addr, area_size); + area_size = 0; + } + + if (area_size == 0) { + base_phy_addr = paddr; + base_virt_addr = (void *)(uintptr_t)start; + area_size = sz; + } + + start += sz; + size -= sz; + } + + if (area_size > 0) + sync_fn(base_phy_addr, base_virt_addr, area_size); + + KBASE_DEBUG_ASSERT(size == 0); + +out_enable_pagefaults: + pagefault_enable(); +out_unlock: + kbase_gpu_vm_unlock(kctx); + kbase_os_mem_map_unlock(kctx); + return err; +} + +mali_error kbase_sync_now(kbase_context *kctx, struct base_syncset *syncset) +{ + mali_error err = MALI_ERROR_FUNCTION_FAILED; + struct basep_syncset *sset; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != syncset); + + sset = &syncset->basep_sset; + + switch (sset->type) { + case BASE_SYNCSET_OP_MSYNC: + err = kbase_do_syncset(kctx, syncset, kbase_sync_to_memory); + break; + + case BASE_SYNCSET_OP_CSYNC: + err = kbase_do_syncset(kctx, syncset, kbase_sync_to_cpu); + break; + + default: + dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type); + break; + } + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_sync_now) + +/* vm lock must be held */ +mali_error kbase_mem_free_region(kbase_context *kctx, kbase_va_region *reg) +{ + mali_error err; + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != reg); + BUG_ON(!mutex_is_locked(&kctx->reg_lock)); + err = kbase_gpu_munmap(kctx, reg); + if (err) { + dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n"); + goto out; + } + + if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) { + /* Wait for GPU to flush write buffer before freeing physical pages */ + kbase_wait_write_flush(kctx); + } + + /* This will also free the physical pages */ + kbase_free_alloced_region(reg); + + out: + return err; +} + +KBASE_EXPORT_TEST_API(kbase_mem_free_region) + +/** + * @brief Free the region from the GPU and unregister it. + * + * This function implements the free operation on a memory segment. + * It will loudly fail if called with outstanding mappings. + */ +mali_error kbase_mem_free(kbase_context *kctx, mali_addr64 gpu_addr) +{ + mali_error err = MALI_ERROR_NONE; + struct kbase_va_region *reg; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + if (0 == gpu_addr) { + dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n"); + return MALI_ERROR_FUNCTION_FAILED; + } + kbase_gpu_vm_lock(kctx); + + if (gpu_addr >= BASE_MEM_COOKIE_BASE && + gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) { + int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE); + reg = kctx->pending_regions[cookie]; + if (!reg) { + err = MALI_ERROR_FUNCTION_FAILED; + goto out_unlock; + } + + /* ask to unlink the cookie as we'll free it */ + + kctx->pending_regions[cookie] = NULL; + kctx->cookies |= (1UL << cookie); + + kbase_free_alloced_region(reg); + } else { + /* A real GPU va */ + + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (!reg) { + dev_warn(kctx->kbdev->dev, + "kbase_mem_free called with nonexistent gpu_addr 0x%llX", + gpu_addr); + err = MALI_ERROR_FUNCTION_FAILED; + goto out_unlock; + } + + if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) { + /* SAME_VA must be freed through munmap */ + dev_warn(kctx->kbdev->dev, + "%s called on SAME_VA memory 0x%llX", __func__, gpu_addr); + err = MALI_ERROR_FUNCTION_FAILED; + goto out_unlock; + } + + err = kbase_mem_free_region(kctx, reg); + } + + out_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +} + +KBASE_EXPORT_TEST_API(kbase_mem_free) + +void kbase_update_region_flags(struct kbase_va_region *reg, unsigned long flags) +{ + KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT((flags & ~((1 << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0); + + reg->flags |= kbase_cache_enabled(flags, reg->nr_pages); + /* all memory is now growable */ + reg->flags |= KBASE_REG_GROWABLE; + + if (flags & BASE_MEM_GROW_ON_GPF) + reg->flags |= KBASE_REG_PF_GROW; + + if (flags & BASE_MEM_PROT_CPU_WR) + reg->flags |= KBASE_REG_CPU_WR; + + if (flags & BASE_MEM_PROT_CPU_RD) + reg->flags |= KBASE_REG_CPU_RD; + + if (flags & BASE_MEM_PROT_GPU_WR) + reg->flags |= KBASE_REG_GPU_WR; + + if (flags & BASE_MEM_PROT_GPU_RD) + reg->flags |= KBASE_REG_GPU_RD; + + if (0 == (flags & BASE_MEM_PROT_GPU_EX)) + reg->flags |= KBASE_REG_GPU_NX; + + if (flags & BASE_MEM_COHERENT_LOCAL) + reg->flags |= KBASE_REG_SHARE_IN; + else if (flags & BASE_MEM_COHERENT_SYSTEM) + reg->flags |= KBASE_REG_SHARE_BOTH; + +} +KBASE_EXPORT_TEST_API(kbase_update_region_flags) + +int kbase_alloc_phy_pages_helper( + struct kbase_mem_phy_alloc *alloc, + size_t nr_pages_requested) +{ + KBASE_DEBUG_ASSERT(alloc); + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); + KBASE_DEBUG_ASSERT(alloc->imported.kctx); + + if (nr_pages_requested == 0) + goto done; /*nothing to do*/ + + kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->used_pages); + kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages); + + if (MALI_ERROR_NONE != kbase_mem_allocator_alloc(&alloc->imported.kctx->osalloc, nr_pages_requested, alloc->pages + alloc->nents)) + goto no_alloc; + + alloc->nents += nr_pages_requested; + + kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested); +done: + return 0; + +no_alloc: + kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->used_pages); + kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages); + + return -ENOMEM; +} + +int kbase_free_phy_pages_helper( + struct kbase_mem_phy_alloc *alloc, + size_t nr_pages_to_free) +{ + mali_bool syncback; + phys_addr_t *start_free; + KBASE_DEBUG_ASSERT(alloc); + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); + KBASE_DEBUG_ASSERT(alloc->imported.kctx); + KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); + + /* early out if nothing to do */ + if (0 == nr_pages_to_free) + return 0; + + start_free = alloc->pages + alloc->nents - nr_pages_to_free; + + syncback = alloc->accessed_cached ? MALI_TRUE : MALI_FALSE; + + kbase_mem_allocator_free(&alloc->imported.kctx->osalloc, + nr_pages_to_free, + start_free, + syncback); + + alloc->nents -= nr_pages_to_free; + kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_to_free); + kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->used_pages); + kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->kbdev->memdev.used_pages); + + return 0; +} + +void kbase_mem_kref_free(struct kref *kref) +{ + struct kbase_mem_phy_alloc *alloc; + alloc = container_of(kref, struct kbase_mem_phy_alloc, kref); + + switch (alloc->type) { + case KBASE_MEM_TYPE_NATIVE: { + KBASE_DEBUG_ASSERT(alloc->imported.kctx); + kbase_free_phy_pages_helper(alloc, alloc->nents); + break; + } + case KBASE_MEM_TYPE_ALIAS: { + /* just call put on the underlying phy allocs */ + size_t i; + struct kbase_aliased *aliased; + aliased = alloc->imported.alias.aliased; + if (aliased) { + for (i = 0; i < alloc->imported.alias.nents; i++) + if (aliased[i].alloc) + kbase_mem_phy_alloc_put(aliased[i].alloc); + vfree(aliased); + } + break; + } + case KBASE_MEM_TYPE_RAW: + /* raw pages, external cleanup */ + break; + #ifdef CONFIG_UMP + case KBASE_MEM_TYPE_IMPORTED_UMP: + ump_dd_release(alloc->imported.ump_handle); + break; +#endif +#ifdef CONFIG_DMA_SHARED_BUFFER + case KBASE_MEM_TYPE_IMPORTED_UMM: + dma_buf_detach(alloc->imported.umm.dma_buf, + alloc->imported.umm.dma_attachment); + dma_buf_put(alloc->imported.umm.dma_buf); + break; +#endif + case KBASE_MEM_TYPE_TB:{ + void *tb; + tb = alloc->imported.kctx->jctx.tb; + kbase_device_trace_buffer_uninstall(alloc->imported.kctx); + vfree(tb); + break; + } + default: + WARN(1, "Unexecpted free of type %d\n", alloc->type); + break; + } + vfree(alloc); +} + +KBASE_EXPORT_TEST_API(kbase_mem_kref_free); + +int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size) +{ + KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT(vsize > 0); + + /* validate user provided arguments */ + if (size > vsize || vsize > reg->nr_pages) + goto out_term; + + /* Prevent vsize*sizeof from wrapping around. + * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail. + */ + if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->alloc->pages))) + goto out_term; + + KBASE_DEBUG_ASSERT(0 != vsize); + + if (MALI_ERROR_NONE != kbase_alloc_phy_pages_helper(reg->alloc, size)) + goto out_term; + + return 0; + + out_term: + return -1; +} + +KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages) + +mali_bool kbase_check_alloc_flags(unsigned long flags) +{ + /* Only known flags should be set. */ + if (flags & ~((1 << BASE_MEM_FLAGS_NR_BITS) - 1)) + return MALI_FALSE; + + /* At least one flag should be set */ + if (flags == 0) + return MALI_FALSE; + + /* Either the GPU or CPU must be reading from the allocated memory */ + if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0) + return MALI_FALSE; + + /* Either the GPU or CPU must be writing to the allocated memory */ + if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0) + return MALI_FALSE; + + /* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */ + if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF))) + return MALI_FALSE; + + /* GPU should have at least read or write access otherwise there is no + reason for allocating. */ + if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) + return MALI_FALSE; + + return MALI_TRUE; +} + +/** + * @brief Acquire the per-context region list lock + */ +void kbase_gpu_vm_lock(kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx != NULL); + mutex_lock(&kctx->reg_lock); +} + +KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock) + +/** + * @brief Release the per-context region list lock + */ +void kbase_gpu_vm_unlock(kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx != NULL); + mutex_unlock(&kctx->reg_lock); +} + +KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock) diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h new file mode 100755 index 00000000000..8cbde3210a4 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h @@ -0,0 +1,616 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_mem.h + * Base kernel memory APIs + */ + +#ifndef _KBASE_MEM_H_ +#define _KBASE_MEM_H_ + +#ifndef _KBASE_H_ +#error "Don't include this file directly, use mali_kbase.h instead" +#endif + +#include +#include + +#ifdef CONFIG_UMP +#include +#endif /* CONFIG_UMP */ +#include "mali_base_kernel.h" +#include +#include "mali_kbase_pm.h" +#include "mali_kbase_defs.h" +#ifdef CONFIG_MALI_GATOR_SUPPORT +#include "mali_kbase_gator.h" +#endif /*CONFIG_MALI_GATOR_SUPPORT*/ + +/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ + +/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages. +The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and +page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table +updates and generates duplicate page faults as the page table information used by the MMU is not valid. */ +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3) /* round to 8 pages */ + +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0) /* round to 1 page */ + +/* This must always be a power of 2 */ +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2) +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316) +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630) +/** + * A CPU mapping + */ +typedef struct kbase_cpu_mapping { + struct list_head mappings_list; + struct kbase_mem_phy_alloc *alloc; + struct kbase_context *kctx; + struct kbase_va_region *region; + pgoff_t page_off; + int count; + + unsigned long vm_start; + unsigned long vm_end; +} kbase_cpu_mapping; + +enum kbase_memory_type { + KBASE_MEM_TYPE_NATIVE, + KBASE_MEM_TYPE_IMPORTED_UMP, + KBASE_MEM_TYPE_IMPORTED_UMM, + KBASE_MEM_TYPE_ALIAS, + KBASE_MEM_TYPE_TB, + KBASE_MEM_TYPE_RAW +}; + +/* internal structure, mirroring base_mem_aliasing_info, + * but with alloc instead of a gpu va (handle) */ +struct kbase_aliased { + struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */ + u64 offset; /* in pages */ + u64 length; /* in pages */ +}; + +/* physical pages tracking object. + * Set up to track N pages. + * N not stored here, the creator holds that info. + * This object only tracks how many elements are actually valid (present). + * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not + * shared with another region or client. CPU mappings are OK to exist when changing, as + * long as the tracked mappings objects are updated as part of the change. + */ +struct kbase_mem_phy_alloc +{ + struct kref kref; /* number of users of this alloc */ + atomic_t gpu_mappings; + size_t nents; /* 0..N */ + phys_addr_t * pages; /* N elements, only 0..nents are valid */ + + /* kbase_cpu_mappings */ + struct list_head mappings; + + /* type of buffer */ + enum kbase_memory_type type; + + int accessed_cached; + + /* member in union valid based on @a type */ + union { +#ifdef CONFIG_UMP + ump_dd_handle ump_handle; +#endif /* CONFIG_UMP */ +#if defined(CONFIG_DMA_SHARED_BUFFER) + struct { + struct dma_buf *dma_buf; + struct dma_buf_attachment *dma_attachment; + unsigned int current_mapping_usage_count; + struct sg_table *sgt; + } umm; +#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ + struct { + mali_size64 stride; + size_t nents; + struct kbase_aliased *aliased; + } alias; + /* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */ + struct kbase_context *kctx; + } imported; +}; + +static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc) +{ + KBASE_DEBUG_ASSERT(alloc); + /* we only track mappings of NATIVE buffers */ + if (alloc->type == KBASE_MEM_TYPE_NATIVE) + atomic_inc(&alloc->gpu_mappings); +} + +static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc) +{ + KBASE_DEBUG_ASSERT(alloc); + /* we only track mappings of NATIVE buffers */ + if (alloc->type == KBASE_MEM_TYPE_NATIVE) + if (0 > atomic_dec_return(&alloc->gpu_mappings)) { + pr_err("Mismatched %s:\n", __func__); + dump_stack(); + } +} + +void kbase_mem_kref_free(struct kref * kref); + +mali_error kbase_mem_init(kbase_device * kbdev); +void kbase_mem_halt(kbase_device * kbdev); +void kbase_mem_term(kbase_device * kbdev); + +static inline struct kbase_mem_phy_alloc * kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc * alloc) +{ + kref_get(&alloc->kref); + return alloc; +} + +static inline struct kbase_mem_phy_alloc * kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc * alloc) +{ + kref_put(&alloc->kref, kbase_mem_kref_free); + return NULL; +} + +/** + * A GPU memory region, and attributes for CPU mappings. + */ +typedef struct kbase_va_region { + struct rb_node rblink; + struct list_head link; + + kbase_context *kctx; /* Backlink to base context */ + + u64 start_pfn; /* The PFN in GPU space */ + size_t nr_pages; + +/* Free region */ +#define KBASE_REG_FREE (1ul << 0) +/* CPU write access */ +#define KBASE_REG_CPU_WR (1ul << 1) +/* GPU write access */ +#define KBASE_REG_GPU_WR (1ul << 2) +/* No eXecute flag */ +#define KBASE_REG_GPU_NX (1ul << 3) +/* Is CPU cached? */ +#define KBASE_REG_CPU_CACHED (1ul << 4) +/* Is GPU cached? */ +#define KBASE_REG_GPU_CACHED (1ul << 5) + +#define KBASE_REG_GROWABLE (1ul << 6) +/* Can grow on pf? */ +#define KBASE_REG_PF_GROW (1ul << 7) + +/* VA managed by us */ +#define KBASE_REG_CUSTOM_VA (1ul << 8) + +/* inner shareable coherency */ +#define KBASE_REG_SHARE_IN (1ul << 9) +/* inner & outer shareable coherency */ +#define KBASE_REG_SHARE_BOTH (1ul << 10) + +/* Space for 4 different zones */ +#define KBASE_REG_ZONE_MASK (3ul << 11) +#define KBASE_REG_ZONE(x) (((x) & 3) << 11) + +/* GPU read access */ +#define KBASE_REG_GPU_RD (1ul<<13) +/* CPU read access */ +#define KBASE_REG_CPU_RD (1ul<<14) + +/* Aligned for GPU EX in SAME_VA */ +#define KBASE_REG_ALIGNED (1ul<<15) + +/* Index of chosen MEMATTR for this region (0..7) */ +#define KBASE_REG_MEMATTR_MASK (7ul << 16) +#define KBASE_REG_MEMATTR_INDEX(x) (((x) & 7) << 16) +#define KBASE_REG_MEMATTR_VALUE(x) (((x) & KBASE_REG_MEMATTR_MASK) >> 16) + +#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) + +/* only used with 32-bit clients */ +/* + * On a 32bit platform, custom VA should be wired from (4GB + shader region) + * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface + * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference). + * So we put the default limit to the maximum possible on Linux and shrink + * it down, if required by the GPU, during initialization. + */ +#define KBASE_REG_ZONE_EXEC KBASE_REG_ZONE(1) /* Dedicated 16MB region for shader code */ +#define KBASE_REG_ZONE_EXEC_BASE ((1ULL << 32) >> PAGE_SHIFT) +#define KBASE_REG_ZONE_EXEC_SIZE ((16ULL * 1024 * 1024) >> PAGE_SHIFT) + +#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(2) +#define KBASE_REG_ZONE_CUSTOM_VA_BASE (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */ +#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) +/* end 32-bit clients only */ + + unsigned long flags; + + size_t extent; /* nr of pages alloc'd on PF */ + + struct kbase_mem_phy_alloc * alloc; /* the one alloc object we mmap to the GPU and CPU when mapping this region */ + + /* non-NULL if this memory object is a kds_resource */ + struct kds_resource *kds_res; + +} kbase_va_region; + +/* Common functions */ +static INLINE phys_addr_t *kbase_get_phy_pages(struct kbase_va_region *reg) +{ + KBASE_DEBUG_ASSERT(reg); + KBASE_DEBUG_ASSERT(reg->alloc); + + return reg->alloc->pages; +} + +static INLINE size_t kbase_reg_current_backed_size(struct kbase_va_region * reg) +{ + KBASE_DEBUG_ASSERT(reg); + /* if no alloc object the backed size naturally is 0 */ + if (reg->alloc) + return reg->alloc->nents; + else + return 0; +} + +static INLINE struct kbase_mem_phy_alloc * kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type) +{ + struct kbase_mem_phy_alloc * alloc; + const size_t extra_pages = (sizeof(*alloc) + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + + /* Prevent nr_pages*sizeof + sizeof(*alloc) from wrapping around. */ + if (nr_pages > (((size_t) -1 / sizeof(*alloc->pages))) - extra_pages) + return ERR_PTR(-ENOMEM); + + alloc = vzalloc(sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages); + if (!alloc) + return ERR_PTR(-ENOMEM); + + kref_init(&alloc->kref); + atomic_set(&alloc->gpu_mappings, 0); + alloc->nents = 0; + alloc->pages = (void*)(alloc + 1); + INIT_LIST_HEAD(&alloc->mappings); + alloc->type = type; + + return alloc; +} + +static INLINE int kbase_reg_prepare_native(struct kbase_va_region * reg, struct kbase_context * kctx) +{ + KBASE_DEBUG_ASSERT(reg); + KBASE_DEBUG_ASSERT(!reg->alloc); + KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE); + + reg->alloc = kbase_alloc_create(reg->nr_pages, KBASE_MEM_TYPE_NATIVE); + if (IS_ERR(reg->alloc)) + return PTR_ERR(reg->alloc); + else if (!reg->alloc) + return -ENOMEM; + reg->alloc->imported.kctx = kctx; + reg->flags &= ~KBASE_REG_FREE; + return 0; +} + +static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages) +{ + int new_val = atomic_add_return(num_pages, used_pages); +#ifdef CONFIG_MALI_GATOR_SUPPORT + kbase_trace_mali_total_alloc_pages_change((long long int)new_val); +#endif + return new_val; +} + +static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages) +{ + int new_val = atomic_sub_return(num_pages, used_pages); +#ifdef CONFIG_MALI_GATOR_SUPPORT + kbase_trace_mali_total_alloc_pages_change((long long int)new_val); +#endif + return new_val; +} + +/** + * @brief Initialize an OS based memory allocator. + * + * Initializes a allocator. + * Must be called before any allocation is attempted. + * \a kbase_mem_allocator_alloc and \a kbase_mem_allocator_free is used + * to allocate and free memory. + * \a kbase_mem_allocator_term must be called to clean up the allocator. + * All memory obtained via \a kbase_mem_allocator_alloc must have been + * \a kbase_mem_allocator_free before \a kbase_mem_allocator_term is called. + * + * @param allocator Allocator object to initialize + * @param max_size Maximum number of pages to keep on the freelist. + * @return MALI_ERROR_NONE on success, an error code indicating what failed on error. + */ +mali_error kbase_mem_allocator_init(kbase_mem_allocator * allocator, unsigned int max_size); + +/** + * @brief Allocate memory via an OS based memory allocator. + * + * @param[in] allocator Allocator to obtain the memory from + * @param nr_pages Number of pages to allocate + * @param[out] pages Pointer to an array where the physical address of the allocated pages will be stored + * @return MALI_ERROR_NONE if the pages were allocated, an error code indicating what failed on error + */ +mali_error kbase_mem_allocator_alloc(kbase_mem_allocator * allocator, size_t nr_pages, phys_addr_t *pages); + +/** + * @brief Free memory obtained for an OS based memory allocator. + * + * @param[in] allocator Allocator to free the memory back to + * @param nr_pages Number of pages to free + * @param[in] pages Pointer to an array holding the physical address of the paghes to free. + * @param[in] sync_back MALI_TRUE case the memory should be synced back + */ +void kbase_mem_allocator_free(kbase_mem_allocator * allocator, size_t nr_pages, phys_addr_t *pages, mali_bool sync_back); + +/** + * @brief Terminate an OS based memory allocator. + * + * Frees all cached allocations and clean up internal state. + * All allocate pages must have been \a kbase_mem_allocator_free before + * this function is called. + * + * @param[in] allocator Allocator to terminate + */ +void kbase_mem_allocator_term(kbase_mem_allocator * allocator); + + + +mali_error kbase_region_tracker_init(kbase_context *kctx); +void kbase_region_tracker_term(kbase_context *kctx); + +struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range(kbase_context *kctx, u64 start_pgoff, size_t nr_pages); + +struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(kbase_context *kctx, mali_addr64 gpu_addr); + +/** + * @brief Check that a pointer is actually a valid region. + * + * Must be called with context lock held. + */ +struct kbase_va_region *kbase_region_tracker_find_region_base_address(kbase_context *kctx, mali_addr64 gpu_addr); + +struct kbase_va_region *kbase_alloc_free_region(kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone); +void kbase_free_alloced_region(struct kbase_va_region *reg); +mali_error kbase_add_va_region(kbase_context *kctx, struct kbase_va_region *reg, mali_addr64 addr, size_t nr_pages, size_t align); + +mali_error kbase_gpu_mmap(kbase_context *kctx, struct kbase_va_region *reg, mali_addr64 addr, size_t nr_pages, size_t align); +mali_bool kbase_check_alloc_flags(unsigned long flags); +void kbase_update_region_flags(struct kbase_va_region *reg, unsigned long flags); + +void kbase_gpu_vm_lock(kbase_context *kctx); +void kbase_gpu_vm_unlock(kbase_context *kctx); + +int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); + +mali_error kbase_mmu_init(kbase_context *kctx); +void kbase_mmu_term(kbase_context *kctx); + +phys_addr_t kbase_mmu_alloc_pgd(kbase_context *kctx); +void kbase_mmu_free_pgd(kbase_context *kctx); +mali_error kbase_mmu_insert_pages(kbase_context *kctx, u64 vpfn, + phys_addr_t *phys, size_t nr, + unsigned long flags); +mali_error kbase_mmu_insert_single_page(kbase_context *kctx, u64 vpfn, + phys_addr_t phys, size_t nr, + unsigned long flags); + +mali_error kbase_mmu_teardown_pages(kbase_context *kctx, u64 vpfn, size_t nr); +mali_error kbase_mmu_update_pages(kbase_context* kctx, u64 vpfn, phys_addr_t* phys, size_t nr, unsigned long flags); + +/** + * @brief Register region and map it on the GPU. + * + * Call kbase_add_va_region() and map the region on the GPU. + */ +mali_error kbase_gpu_mmap(kbase_context *kctx, struct kbase_va_region *reg, mali_addr64 addr, size_t nr_pages, size_t align); + +/** + * @brief Remove the region from the GPU and unregister it. + * + * Must be called with context lock held. + */ +mali_error kbase_gpu_munmap(kbase_context *kctx, struct kbase_va_region *reg); + +/** + * The caller has the following locking conditions: + * - It must hold kbase_as::transaction_mutex on kctx's address space + * - It must hold the kbasep_js_device_data::runpool_irq::lock + */ +void kbase_mmu_update(kbase_context *kctx); + +/** + * The caller has the following locking conditions: + * - It must hold kbase_as::transaction_mutex on kctx's address space + * - It must hold the kbasep_js_device_data::runpool_irq::lock + */ +void kbase_mmu_disable(kbase_context *kctx); + +void kbase_mmu_interrupt(kbase_device *kbdev, u32 irq_stat); + +/** Dump the MMU tables to a buffer + * + * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the + * buffer is too small then the return value will be NULL. + * + * The GPU vm lock must be held when calling this function. + * + * The buffer returned should be freed with @ref vfree when it is no longer required. + * + * @param[in] kctx The kbase context to dump + * @param[in] nr_pages The number of pages to allocate for the buffer. + * + * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too + * small) + */ +void *kbase_mmu_dump(kbase_context *kctx, int nr_pages); + +mali_error kbase_sync_now(kbase_context *kctx, base_syncset *syncset); +void kbase_pre_job_sync(kbase_context *kctx, base_syncset *syncsets, size_t nr); +void kbase_post_job_sync(kbase_context *kctx, base_syncset *syncsets, size_t nr); + +/** + * Set attributes for imported tmem region + * + * This function sets (extends with) requested attributes for given region + * of imported external memory + * + * @param[in] kctx The kbase context which the tmem belongs to + * @param[in] gpu_adr The base address of the tmem region + * @param[in] attributes The attributes of tmem region to be set + * + * @return MALI_ERROR_NONE on success. Any other value indicates failure. + */ +mali_error kbase_tmem_set_attributes(kbase_context *kctx, mali_addr64 gpu_adr, u32 attributes ); + +/** + * Get attributes of imported tmem region + * + * This function retrieves the attributes of imported external memory + * + * @param[in] kctx The kbase context which the tmem belongs to + * @param[in] gpu_adr The base address of the tmem region + * @param[out] attributes The actual attributes of tmem region + * + * @return MALI_ERROR_NONE on success. Any other value indicates failure. + */ +mali_error kbase_tmem_get_attributes(kbase_context *kctx, mali_addr64 gpu_adr, u32 * const attributes ); + +/* OS specific functions */ +mali_error kbase_mem_free(kbase_context *kctx, mali_addr64 gpu_addr); +mali_error kbase_mem_free_region(kbase_context *kctx, struct kbase_va_region *reg); +void kbase_os_mem_map_lock(kbase_context *kctx); +void kbase_os_mem_map_unlock(kbase_context *kctx); + +/** + * @brief Update the memory allocation counters for the current process + * + * OS specific call to updates the current memory allocation counters for the current process with + * the supplied delta. + * + * @param[in] kctx The kbase context + * @param[in] pages The desired delta to apply to the memory usage counters. + */ + +void kbasep_os_process_page_usage_update( struct kbase_context * kctx, int pages ); + +/** + * @brief Add to the memory allocation counters for the current process + * + * OS specific call to add to the current memory allocation counters for the current process by + * the supplied amount. + * + * @param[in] kctx The kernel base context used for the allocation. + * @param[in] pages The desired delta to apply to the memory usage counters. + */ + +static INLINE void kbase_process_page_usage_inc( struct kbase_context *kctx, int pages ) +{ + kbasep_os_process_page_usage_update( kctx, pages ); +} + +/** + * @brief Subtract from the memory allocation counters for the current process + * + * OS specific call to subtract from the current memory allocation counters for the current process by + * the supplied amount. + * + * @param[in] kctx The kernel base context used for the allocation. + * @param[in] pages The desired delta to apply to the memory usage counters. + */ + +static INLINE void kbase_process_page_usage_dec( struct kbase_context *kctx, int pages ) +{ + kbasep_os_process_page_usage_update( kctx, 0 - pages ); +} + +/** + * @brief Find the offset of the CPU mapping of a memory allocation containing + * a given address range + * + * Searches for a CPU mapping of any part of the region starting at @p gpu_addr + * that fully encloses the CPU virtual address range specified by @p uaddr and + * @p size. Returns a failure indication if only part of the address range lies + * within a CPU mapping, or the address range lies within a CPU mapping of a + * different region. + * + * @param[in,out] kctx The kernel base context used for the allocation. + * @param[in] gpu_addr GPU address of the start of the allocated region + * within which to search. + * @param[in] uaddr Start of the CPU virtual address range. + * @param[in] size Size of the CPU virtual address range (in bytes). + * @param[out] offset The offset from the start of the allocation to the + * specified CPU virtual address. + * + * @return MALI_ERROR_NONE if offset was obtained successfully. Error code + * otherwise. + */ +mali_error kbasep_find_enclosing_cpu_mapping_offset(kbase_context *kctx, + mali_addr64 gpu_addr, + unsigned long uaddr, + size_t size, + mali_size64 *offset); + +enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer); +void kbase_as_poking_timer_retain_atom(kbase_device *kbdev, kbase_context *kctx, kbase_jd_atom *katom); +void kbase_as_poking_timer_release_atom(kbase_device *kbdev, kbase_context *kctx, kbase_jd_atom *katom); + +/** +* @brief Allocates physical pages. +* +* Allocates \a nr_pages_requested and updates the alloc object. +* +* @param[in] alloc allocation object to add pages to +* @param[in] nr_pages_requested number of physical pages to allocate +* +* @return 0 if all pages have been successfully allocated. Error code otherwise +*/ +int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc * alloc, size_t nr_pages_requested); + +/** +* @brief Free physical pages. +* +* Frees \a nr_pages and updates the alloc object. +* +* @param[in] alloc allocation object to free pages from +* @param[in] nr_pages_to_free number of physical pages to free +*/ +int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc * alloc, size_t nr_pages_to_free); + +#ifdef CONFIG_MALI_NO_MALI +static inline void kbase_wait_write_flush(kbase_context *kctx) +{ +} +#else +void kbase_wait_write_flush(kbase_context *kctx); +#endif + + +#endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c new file mode 100755 index 00000000000..f05320087b6 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c @@ -0,0 +1,263 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_mem.c + * Base kernel memory APIs + */ +#include +#include +#include +#include +#include + +static unsigned long kbase_mem_allocator_count(struct shrinker *s, + struct shrink_control *sc) +{ + kbase_mem_allocator *allocator; + allocator = container_of(s, kbase_mem_allocator, free_list_reclaimer); + return atomic_read(&allocator->free_list_size); +} + +static unsigned long kbase_mem_allocator_scan(struct shrinker *s, + struct shrink_control *sc) +{ + kbase_mem_allocator *allocator; + int i; + int freed; + + allocator = container_of(s, kbase_mem_allocator, free_list_reclaimer); + + might_sleep(); + + mutex_lock(&allocator->free_list_lock); + i = MIN(atomic_read(&allocator->free_list_size), sc->nr_to_scan); + freed = i; + + atomic_sub(i, &allocator->free_list_size); + + while (i--) { + struct page *p; + + BUG_ON(list_empty(&allocator->free_list_head)); + p = list_first_entry(&allocator->free_list_head, + struct page, lru); + list_del(&p->lru); + __free_page(p); + } + mutex_unlock(&allocator->free_list_lock); + return atomic_read(&allocator->free_list_size); + +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +static int kbase_mem_allocator_shrink(struct shrinker *s, + struct shrink_control *sc) +{ + if (sc->nr_to_scan == 0) + return kbase_mem_allocator_count(s, sc); + else + return kbase_mem_allocator_scan(s, sc); +} +#endif + +mali_error kbase_mem_allocator_init(kbase_mem_allocator *const allocator, + unsigned int max_size) +{ + KBASE_DEBUG_ASSERT(NULL != allocator); + + INIT_LIST_HEAD(&allocator->free_list_head); + + mutex_init(&allocator->free_list_lock); + + atomic_set(&allocator->free_list_size, 0); + + allocator->free_list_max_size = max_size; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) + allocator->free_list_reclaimer.shrink = kbase_mem_allocator_shrink; +#else + allocator->free_list_reclaimer.count_objects = + kbase_mem_allocator_count; + allocator->free_list_reclaimer.scan_objects = kbase_mem_allocator_scan; +#endif + allocator->free_list_reclaimer.seeks = DEFAULT_SEEKS; + /* Kernel versions prior to 3.1 : + * struct shrinker does not define batch */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) + allocator->free_list_reclaimer.batch = 0; +#endif + + register_shrinker(&allocator->free_list_reclaimer); + + return MALI_ERROR_NONE; +} +KBASE_EXPORT_TEST_API(kbase_mem_allocator_init) + +void kbase_mem_allocator_term(kbase_mem_allocator *allocator) +{ + KBASE_DEBUG_ASSERT(NULL != allocator); + + unregister_shrinker(&allocator->free_list_reclaimer); + mutex_lock(&allocator->free_list_lock); + while (!list_empty(&allocator->free_list_head)) + { + struct page * p; + p = list_first_entry(&allocator->free_list_head, struct page, lru); + list_del(&p->lru); + __free_page(p); + } + atomic_set(&allocator->free_list_size, 0); + mutex_unlock(&allocator->free_list_lock); + mutex_destroy(&allocator->free_list_lock); +} +KBASE_EXPORT_TEST_API(kbase_mem_allocator_term) + +mali_error kbase_mem_allocator_alloc(kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages) +{ + struct page * p; + void * mp; + int i; + int num_from_free_list; + struct list_head from_free_list = LIST_HEAD_INIT(from_free_list); + + might_sleep(); + + KBASE_DEBUG_ASSERT(NULL != allocator); + + /* take from the free list first */ + mutex_lock(&allocator->free_list_lock); + num_from_free_list = MIN(nr_pages, atomic_read(&allocator->free_list_size)); + atomic_sub(num_from_free_list, &allocator->free_list_size); + for (i = 0; i < num_from_free_list; i++) + { + BUG_ON(list_empty(&allocator->free_list_head)); + p = list_first_entry(&allocator->free_list_head, struct page, lru); + list_move(&p->lru, &from_free_list); + } + mutex_unlock(&allocator->free_list_lock); + i = 0; + + /* Allocate as many pages from the pool of already allocated pages. */ + list_for_each_entry(p, &from_free_list, lru) + { + pages[i] = PFN_PHYS(page_to_pfn(p)); + i++; + } + + if (i == nr_pages) + return MALI_ERROR_NONE; + + /* If not all pages were sourced from the pool, request new ones. */ + for (; i < nr_pages; i++) + { + p = alloc_page(GFP_HIGHUSER); + if (NULL == p) + { + goto err_out_roll_back; + } + mp = kmap(p); + if (NULL == mp) + { + __free_page(p); + goto err_out_roll_back; + } + memset(mp, 0x00, PAGE_SIZE); /* instead of __GFP_ZERO, so we can do cache maintenance */ + kbase_sync_to_memory(PFN_PHYS(page_to_pfn(p)), mp, PAGE_SIZE); + kunmap(p); + pages[i] = PFN_PHYS(page_to_pfn(p)); + } + + return MALI_ERROR_NONE; + +err_out_roll_back: + while (i--) + { + struct page * p; + p = pfn_to_page(PFN_DOWN(pages[i])); + pages[i] = (phys_addr_t)0; + __free_page(p); + } + + return MALI_ERROR_OUT_OF_MEMORY; +} +KBASE_EXPORT_TEST_API(kbase_mem_allocator_alloc) + +void kbase_mem_allocator_free(kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages, mali_bool sync_back) +{ + int i = 0; + int page_count = 0; + int tofree; + + LIST_HEAD(new_free_list_items); + + KBASE_DEBUG_ASSERT(NULL != allocator); + + might_sleep(); + + /* Starting by just freeing the overspill. + * As we do this outside of the lock we might spill too many pages + * or get too many on the free list, but the max_size is just a ballpark so it is ok + * providing that tofree doesn't exceed nr_pages + */ + tofree = MAX((int)allocator->free_list_max_size - atomic_read(&allocator->free_list_size),0); + tofree = nr_pages - MIN(tofree, nr_pages); + for (; i < tofree; i++) + { + if (likely(0 != pages[i])) + { + struct page * p; + + p = pfn_to_page(PFN_DOWN(pages[i])); + pages[i] = (phys_addr_t)0; + __free_page(p); + } + } + + for (; i < nr_pages; i++) + { + if (likely(0 != pages[i])) + { + struct page * p; + + p = pfn_to_page(PFN_DOWN(pages[i])); + pages[i] = (phys_addr_t)0; + /* Sync back the memory to ensure that future cache invalidations + * don't trample on memory. + */ + if( sync_back ) + { + void* mp = kmap(p); + if( NULL != mp) + { + kbase_sync_to_cpu(PFN_PHYS(page_to_pfn(p)), mp, PAGE_SIZE); + kunmap(p); + } + + } + list_add(&p->lru, &new_free_list_items); + page_count++; + } + } + mutex_lock(&allocator->free_list_lock); + list_splice(&new_free_list_items, &allocator->free_list_head); + atomic_add(page_count, &allocator->free_list_size); + mutex_unlock(&allocator->free_list_lock); +} +KBASE_EXPORT_TEST_API(kbase_mem_allocator_free) + diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h new file mode 100755 index 00000000000..5929b14a12c --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h @@ -0,0 +1,33 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include +#include +#include + +/* raw page handling */ +typedef struct kbase_mem_allocator +{ + atomic_t free_list_size; + unsigned int free_list_max_size; + struct mutex free_list_lock; + struct list_head free_list_head; + struct shrinker free_list_reclaimer; +} kbase_mem_allocator; + diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c new file mode 100755 index 00000000000..39c909904dc --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c @@ -0,0 +1,1655 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_mem_linux.c + * Base kernel memory APIs, Linux implementation. + */ + +#include +#include +#include +#include +#include +#include +#include +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + #include +#endif +#ifdef CONFIG_DMA_SHARED_BUFFER +#include +#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ + +#include +#include +#include + +static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); +static const struct vm_operations_struct kbase_vm_ops; + +struct kbase_va_region *kbase_mem_alloc(kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment) +{ + int zone; + int gpu_pc_bits; + int cpu_va_bits; + struct kbase_va_region *reg; + struct device *dev; + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(flags); + KBASE_DEBUG_ASSERT(gpu_va); + KBASE_DEBUG_ASSERT(va_alignment); + + dev = kctx->kbdev->dev; + *va_alignment = 0; /* no alignment by default */ + *gpu_va = 0; /* return 0 on failure */ + + gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; + cpu_va_bits = BITS_PER_LONG; + + if (0 == va_pages) { + dev_warn(dev, "kbase_mem_alloc called with 0 va_pages!"); + goto zero_size; + } + +#if defined(CONFIG_64BIT) + if (is_compat_task()) + cpu_va_bits = 32; + else + /* force SAME_VA if a 64-bit client */ + *flags |= BASE_MEM_SAME_VA; +#endif + + if (!kbase_check_alloc_flags(*flags)) { + dev_warn(dev, + "kbase_mem_alloc called with bad flags (%llx)", + (unsigned long long)*flags); + goto bad_flags; + } + + /* Limit GPU executable allocs to GPU PC size */ + if ((*flags & BASE_MEM_PROT_GPU_EX) && + (va_pages > (1ULL << gpu_pc_bits >> PAGE_SHIFT))) + goto bad_ex_size; + + /* find out which VA zone to use */ + if (*flags & BASE_MEM_SAME_VA) + zone = KBASE_REG_ZONE_SAME_VA; + else if (*flags & BASE_MEM_PROT_GPU_EX) + zone = KBASE_REG_ZONE_EXEC; + else + zone = KBASE_REG_ZONE_CUSTOM_VA; + + reg = kbase_alloc_free_region(kctx, 0, va_pages, zone); + if (!reg) { + dev_err(dev, "Failed to allocate free region"); + goto no_region; + } + + if (MALI_ERROR_NONE != kbase_reg_prepare_native(reg, kctx)) { + dev_err(dev, "Failed to prepare region"); + goto prepare_failed; + } + + kbase_update_region_flags(reg, *flags); + + if (*flags & BASE_MEM_GROW_ON_GPF) + reg->extent = extent; + else + reg->extent = 0; + + if (kbase_alloc_phy_pages(reg, va_pages, commit_pages)) { + dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)", + (unsigned long long)commit_pages, (unsigned long long)va_pages); + goto no_mem; + } + + kbase_gpu_vm_lock(kctx); + + /* mmap needed to setup VA? */ + if (*flags & BASE_MEM_SAME_VA) { + /* Bind to a cookie */ + if (!kctx->cookies) { + dev_err(dev, "No cookies available for allocation!"); + goto no_cookie; + } + /* return a cookie */ + *gpu_va = __ffs(kctx->cookies); + kctx->cookies &= ~(1UL << *gpu_va); + BUG_ON(kctx->pending_regions[*gpu_va]); + kctx->pending_regions[*gpu_va] = reg; + + /* relocate to correct base */ + *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); + *gpu_va <<= PAGE_SHIFT; + + /* See if we must align memory due to GPU PC bits vs CPU VA */ + if ((*flags & BASE_MEM_PROT_GPU_EX) && + (cpu_va_bits > gpu_pc_bits)) { + *va_alignment = gpu_pc_bits; + reg->flags |= KBASE_REG_ALIGNED; + } + } else /* we control the VA */ { + if (MALI_ERROR_NONE != kbase_gpu_mmap(kctx, reg, 0, va_pages, 1)) { + dev_warn(dev, "Failed to map memory on GPU"); + goto no_mmap; + } + /* return real GPU VA */ + *gpu_va = reg->start_pfn << PAGE_SHIFT; + } + + kbase_gpu_vm_unlock(kctx); + return reg; + +no_mmap: +no_cookie: + kbase_gpu_vm_unlock(kctx); +no_mem: + kbase_mem_phy_alloc_put(reg->alloc); +prepare_failed: + kfree(reg); +no_region: +bad_ex_size: +bad_flags: +zero_size: + return NULL; +} + +mali_error kbase_mem_query(kbase_context *kctx, mali_addr64 gpu_addr, int query, u64 * const out) +{ + kbase_va_region *reg; + mali_error ret = MALI_ERROR_FUNCTION_FAILED; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(out); + + kbase_gpu_vm_lock(kctx); + + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (!reg || (reg->flags & KBASE_REG_FREE) ) + goto out_unlock; + + switch (query) { + case KBASE_MEM_QUERY_COMMIT_SIZE: + if (reg->alloc->type != KBASE_MEM_TYPE_ALIAS) { + *out = kbase_reg_current_backed_size(reg); + } else { + size_t i; + struct kbase_aliased *aliased; + *out = 0; + aliased = reg->alloc->imported.alias.aliased; + for (i = 0; i < reg->alloc->imported.alias.nents; i++) + *out += aliased[i].length; + } + break; + case KBASE_MEM_QUERY_VA_SIZE: + *out = reg->nr_pages; + break; + case KBASE_MEM_QUERY_FLAGS: + { + *out = 0; + if( KBASE_REG_GPU_WR & reg->flags ) + *out |= BASE_MEM_PROT_GPU_WR; + if( KBASE_REG_GPU_RD & reg->flags ) + *out |= BASE_MEM_PROT_GPU_RD; + if( !(KBASE_REG_GPU_NX & reg->flags) ) + *out |= BASE_MEM_PROT_GPU_EX; + if( KBASE_REG_SHARE_BOTH & reg->flags ) + *out |= BASE_MEM_COHERENT_SYSTEM; + if ( KBASE_REG_SHARE_IN & reg->flags ) + *out |= BASE_MEM_COHERENT_LOCAL; + break; + } + default: + *out = 0; + goto out_unlock; + } + + ret = MALI_ERROR_NONE; + +out_unlock: + kbase_gpu_vm_unlock(kctx); + return ret; +} + +mali_error kbase_mem_flags_change(kbase_context *kctx, mali_addr64 gpu_addr, unsigned int flags, unsigned int mask) +{ + kbase_va_region *reg; + mali_error ret = MALI_ERROR_FUNCTION_FAILED; + unsigned int real_flags = 0; + unsigned int prev_flags = 0; + + KBASE_DEBUG_ASSERT(kctx); + + if (!gpu_addr) + return MALI_ERROR_FUNCTION_FAILED; + + /* nuke other bits */ + flags &= mask; + + /* check for only supported flags */ + if (flags & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL)) + goto out; + + /* mask covers bits we don't support? */ + if (mask & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL)) + goto out; + + /* convert flags */ + if( BASE_MEM_COHERENT_SYSTEM & flags ) + real_flags |= KBASE_REG_SHARE_BOTH; + else if ( BASE_MEM_COHERENT_LOCAL & flags ) + real_flags |= KBASE_REG_SHARE_IN; + + /* now we can lock down the context, and find the region */ + kbase_gpu_vm_lock(kctx); + + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (!reg || (reg->flags & KBASE_REG_FREE) ) + goto out_unlock; + + /* limit to imported memory */ + if ( (reg->alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) && + (reg->alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) + goto out_unlock; + + /* no change? */ + if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) + { + ret = MALI_ERROR_NONE; + goto out_unlock; + } + + /* save for roll back */ + prev_flags = reg->flags; + reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); + reg->flags |= real_flags; + + /* Currently supporting only imported memory */ + switch(reg->alloc->type) + { +#ifdef CONFIG_UMP + case KBASE_MEM_TYPE_IMPORTED_UMP: + ret = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_phy_pages(reg), reg->alloc->nents, reg->flags); + break; +#endif +#ifdef CONFIG_DMA_SHARED_BUFFER + case KBASE_MEM_TYPE_IMPORTED_UMM: + /* Future use will use the new flags, existing mapping will NOT be updated + * as memory should not be in use by the GPU when updating the flags. + */ + ret = MALI_ERROR_NONE; + WARN_ON(reg->alloc->imported.umm.current_mapping_usage_count); + break; +#endif + default: + break; + } + + /* roll back on error, i.e. not UMP */ + if (ret != MALI_ERROR_NONE) + reg->flags = prev_flags; + +out_unlock: + kbase_gpu_vm_unlock(kctx); +out: + return ret; +} + +#ifdef CONFIG_UMP +static struct kbase_va_region *kbase_mem_from_ump(kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags) +{ + struct kbase_va_region *reg; + ump_dd_handle umph; + u64 block_count; + const ump_dd_physical_block_64 *block_array; + u64 i, j; + int page = 0; + ump_alloc_flags ump_flags; + ump_alloc_flags cpu_flags; + ump_alloc_flags gpu_flags; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(va_pages); + KBASE_DEBUG_ASSERT(flags); + + umph = ump_dd_from_secure_id(id); + if (UMP_DD_INVALID_MEMORY_HANDLE == umph) + goto bad_id; + + ump_flags = ump_dd_allocation_flags_get(umph); + cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK; + gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) & + UMP_DEVICE_MASK; + + *va_pages = ump_dd_size_get_64(umph); + *va_pages >>= PAGE_SHIFT; + + if (!*va_pages) + goto bad_size; + + if (*flags & BASE_MEM_SAME_VA) + reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA); + else + reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); + + if (!reg) + goto no_region; + + /* we've got pages to map now, and support SAME_VA */ + *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; + + reg->alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP); + if (IS_ERR_OR_NULL(reg->alloc)) + goto no_alloc_obj; + + reg->alloc->imported.ump_handle = umph; + + reg->flags &= ~KBASE_REG_FREE; + reg->flags |= KBASE_REG_GPU_NX; /* UMP is always No eXecute */ + reg->flags &= ~KBASE_REG_GROWABLE; /* UMP cannot be grown */ + + if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) == + (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) { + reg->flags |= KBASE_REG_CPU_CACHED; + *flags |= BASE_MEM_CACHED_CPU; + } + + if (cpu_flags & UMP_PROT_DEVICE_WR) { + reg->flags |= KBASE_REG_CPU_WR; + *flags |= BASE_MEM_PROT_CPU_WR; + } + + if (cpu_flags & UMP_PROT_DEVICE_RD) { + reg->flags |= KBASE_REG_CPU_RD; + *flags |= BASE_MEM_PROT_CPU_RD; + } + + if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) == + (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) + reg->flags |= KBASE_REG_GPU_CACHED; + + if (gpu_flags & UMP_PROT_DEVICE_WR) { + reg->flags |= KBASE_REG_GPU_WR; + *flags |= BASE_MEM_PROT_GPU_WR; + } + + if (gpu_flags & UMP_PROT_DEVICE_RD) { + reg->flags |= KBASE_REG_GPU_RD; + *flags |= BASE_MEM_PROT_GPU_RD; + } + + /* ump phys block query */ + ump_dd_phys_blocks_get_64(umph, &block_count, &block_array); + + for (i = 0; i < block_count; i++) { + for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) { + reg->alloc->pages[page] = block_array[i].addr + (j << PAGE_SHIFT); + page++; + } + } + reg->alloc->nents = *va_pages; + reg->extent = 0; + + return reg; + +no_alloc_obj: + kfree(reg); +no_region: +bad_size: + ump_dd_release(umph); +bad_id: + return NULL; + +} +#endif /* CONFIG_UMP */ + +#ifdef CONFIG_DMA_SHARED_BUFFER +static struct kbase_va_region *kbase_mem_from_umm(kbase_context *kctx, int fd, u64 *va_pages, u64 *flags) +{ + struct kbase_va_region *reg; + struct dma_buf *dma_buf; + struct dma_buf_attachment *dma_attachment; + + dma_buf = dma_buf_get(fd); + if (IS_ERR_OR_NULL(dma_buf)) + goto no_buf; + + dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev); + if (!dma_attachment) + goto no_attachment; + + *va_pages = PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT; + if (!*va_pages) + goto bad_size; + + /* ignore SAME_VA */ + *flags &= ~BASE_MEM_SAME_VA; + +#ifdef CONFIG_64BIT + if (!is_compat_task()) { + /* 64-bit tasks must MMAP anyway, but not expose this address to clients */ + *flags |= KBASE_MEM_NEED_MMAP; + reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA); + } else { +#else + if (1) { +#endif + reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); + } + + if (!reg) + goto no_region; + + reg->alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM); + if (IS_ERR_OR_NULL(reg->alloc)) + goto no_alloc_obj; + + /* No pages to map yet */ + reg->alloc->nents = 0; + + reg->flags &= ~KBASE_REG_FREE; + reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */ + reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */ + reg->flags |= KBASE_REG_GPU_CACHED; + + if (*flags & BASE_MEM_PROT_CPU_WR) + reg->flags |= KBASE_REG_CPU_WR; + + if (*flags & BASE_MEM_PROT_CPU_RD) + reg->flags |= KBASE_REG_CPU_RD; + + if (*flags & BASE_MEM_PROT_GPU_WR) + reg->flags |= KBASE_REG_GPU_WR; + + if (*flags & BASE_MEM_PROT_GPU_RD) + reg->flags |= KBASE_REG_GPU_RD; + + /* no read or write permission given on import, only on run do we give the right permissions */ + + reg->alloc->type = BASE_TMEM_IMPORT_TYPE_UMM; + reg->alloc->imported.umm.sgt = NULL; + reg->alloc->imported.umm.dma_buf = dma_buf; + reg->alloc->imported.umm.dma_attachment = dma_attachment; + reg->alloc->imported.umm.current_mapping_usage_count = 0; + reg->extent = 0; + + return reg; + +no_alloc_obj: + kfree(reg); +no_region: +bad_size: + dma_buf_detach(dma_buf, dma_attachment); +no_attachment: + dma_buf_put(dma_buf); +no_buf: + return NULL; +} +#endif /* CONFIG_DMA_SHARED_BUFFER */ + +u64 kbase_mem_alias(kbase_context *kctx, u64 *flags, u64 stride, + u64 nents, struct base_mem_aliasing_info *ai, + u64 *num_pages) +{ + kbase_va_region *reg; + u64 gpu_va; + size_t i; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(flags); + KBASE_DEBUG_ASSERT(ai); + KBASE_DEBUG_ASSERT(num_pages); + + /* mask to only allowed flags */ + *flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | + BASE_MEM_HINT_GPU_RD | BASE_MEM_HINT_GPU_WR | + BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL); + + if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR) )) { + dev_warn(kctx->kbdev->dev, + "kbase_mem_alias called with bad flags (%llx)", + (unsigned long long)*flags); + goto bad_flags; + } + + if (!stride) + goto bad_stride; + + if (!nents) + goto bad_nents; + + /* calculate the number of pages this alias will cover */ + *num_pages = nents * stride; + +#ifdef CONFIG_64BIT + if (!is_compat_task()) { + /* 64-bit tasks must MMAP anyway, but not expose this address to + * clients */ + *flags |= KBASE_MEM_NEED_MMAP; + reg = kbase_alloc_free_region(kctx, 0, *num_pages, + KBASE_REG_ZONE_SAME_VA); + } else { +#else + if (1) { +#endif + reg = kbase_alloc_free_region(kctx, 0, *num_pages, + KBASE_REG_ZONE_CUSTOM_VA); + } + + if (!reg) + goto no_reg; + + /* zero-sized page array, as we don't need one/can support one */ + reg->alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS); + if (IS_ERR_OR_NULL(reg->alloc)) + goto no_alloc_obj; + + kbase_update_region_flags(reg, *flags); + + reg->alloc->imported.alias.nents = nents; + reg->alloc->imported.alias.stride = stride; + reg->alloc->imported.alias.aliased = vzalloc(sizeof(*reg->alloc->imported.alias.aliased) * nents); + if (!reg->alloc->imported.alias.aliased) + goto no_aliased_array; + + kbase_gpu_vm_lock(kctx); + + /* validate and add src handles */ + for (i = 0; i < nents; i++) { + if (ai[i].handle < BASE_MEM_FIRST_FREE_ADDRESS) { + if (ai[i].handle != BASE_MEM_WRITE_ALLOC_PAGES_HANDLE) + goto bad_handle; /* unsupported magic handle */ + if (!ai[i].length) + goto bad_handle; /* must be > 0 */ + if (ai[i].length > stride) + goto bad_handle; /* can't be larger than the + stride */ + reg->alloc->imported.alias.aliased[i].length = ai[i].length; + } else { + struct kbase_va_region *aliasing_reg; + struct kbase_mem_phy_alloc *alloc; + aliasing_reg = kbase_region_tracker_find_region_base_address(kctx, (ai[i].handle >> PAGE_SHIFT) << PAGE_SHIFT); + + /* validate found region */ + if (!aliasing_reg) + goto bad_handle; /* Not found */ + if (aliasing_reg->flags & KBASE_REG_FREE) + goto bad_handle; /* Free region */ + if (!aliasing_reg->alloc) + goto bad_handle; /* No alloc */ + if (aliasing_reg->alloc->type != KBASE_MEM_TYPE_NATIVE) + goto bad_handle; /* Not a native alloc */ + + /* check size against stride */ + if (!ai[i].length) + goto bad_handle; /* must be > 0 */ + if (ai[i].length > stride) + goto bad_handle; /* can't be larger than the + stride */ + + alloc = aliasing_reg->alloc; + + /* check against the alloc's size */ + if (ai[i].offset > alloc->nents) + goto bad_handle; /* beyond end */ + if (ai[i].offset + ai[i].length > alloc->nents) + goto bad_handle; /* beyond end */ + + reg->alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc); + reg->alloc->imported.alias.aliased[i].length = ai[i].length; + reg->alloc->imported.alias.aliased[i].offset = ai[i].offset; + } + } + +#ifdef CONFIG_64BIT + if (!is_compat_task()) { + /* Bind to a cookie */ + if (!kctx->cookies) { + dev_err(kctx->kbdev->dev, "No cookies " + "available for allocation!"); + goto no_cookie; + } + /* return a cookie */ + gpu_va = __ffs(kctx->cookies); + kctx->cookies &= ~(1UL << gpu_va); + BUG_ON(kctx->pending_regions[gpu_va]); + kctx->pending_regions[gpu_va] = reg; + + /* relocate to correct base */ + gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); + gpu_va <<= PAGE_SHIFT; + } else /* we control the VA */ { +#else + if (1) { +#endif + if (MALI_ERROR_NONE != kbase_gpu_mmap(kctx, reg, 0, + *num_pages, 1)) { + dev_warn(kctx->kbdev->dev, + "Failed to map memory on GPU"); + goto no_mmap; + } + /* return real GPU VA */ + gpu_va = reg->start_pfn << PAGE_SHIFT; + } + + reg->flags &= ~KBASE_REG_FREE; + reg->flags &= ~KBASE_REG_GROWABLE; + + kbase_gpu_vm_unlock(kctx); + + return gpu_va; + +#ifdef CONFIG_64BIT +no_cookie: +#endif +no_mmap: +bad_handle: + kbase_gpu_vm_unlock(kctx); +no_aliased_array: + kbase_mem_phy_alloc_put(reg->alloc); +no_alloc_obj: + kfree(reg); +no_reg: +bad_nents: +bad_stride: +bad_flags: + return 0; +} + +int kbase_mem_import(kbase_context *kctx, base_mem_import_type type, int handle, mali_addr64 * gpu_va, u64 * va_pages, u64 * flags) +{ + kbase_va_region * reg; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(gpu_va); + KBASE_DEBUG_ASSERT(va_pages); + KBASE_DEBUG_ASSERT(flags); + +#ifdef CONFIG_64BIT + if (!is_compat_task()) + *flags |= BASE_MEM_SAME_VA; +#endif + + switch (type) { +#ifdef CONFIG_UMP + case BASE_MEM_IMPORT_TYPE_UMP: + reg = kbase_mem_from_ump(kctx, (ump_secure_id)handle, va_pages, flags); + break; +#endif /* CONFIG_UMP */ +#ifdef CONFIG_DMA_SHARED_BUFFER + case BASE_MEM_IMPORT_TYPE_UMM: + reg = kbase_mem_from_umm(kctx, handle, va_pages, flags); + break; +#endif /* CONFIG_DMA_SHARED_BUFFER */ + default: + reg = NULL; + break; + } + + if (!reg) + goto no_reg; + + kbase_gpu_vm_lock(kctx); + + /* mmap needed to setup VA? */ + if (*flags & (BASE_MEM_SAME_VA | KBASE_MEM_NEED_MMAP)) { + /* Bind to a cookie */ + if (!kctx->cookies) + goto no_cookie; + /* return a cookie */ + *gpu_va = __ffs(kctx->cookies); + kctx->cookies &= ~(1UL << *gpu_va); + BUG_ON(kctx->pending_regions[*gpu_va]); + kctx->pending_regions[*gpu_va] = reg; + + /* relocate to correct base */ + *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); + *gpu_va <<= PAGE_SHIFT; + + } else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) { + /* we control the VA, mmap now to the GPU */ + if (MALI_ERROR_NONE != kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1)) + goto no_gpu_va; + /* return real GPU VA */ + *gpu_va = reg->start_pfn << PAGE_SHIFT; + } else { + /* we control the VA, but nothing to mmap yet */ + if (MALI_ERROR_NONE != kbase_add_va_region(kctx, reg, 0, *va_pages, 1)) + goto no_gpu_va; + /* return real GPU VA */ + *gpu_va = reg->start_pfn << PAGE_SHIFT; + } + + kbase_gpu_vm_unlock(kctx); + + return 0; + +no_gpu_va: +no_cookie: + kbase_gpu_vm_unlock(kctx); + kbase_mem_phy_alloc_put(reg->alloc); + kfree(reg); +no_reg: + *gpu_va = 0; + *va_pages = 0; + *flags = 0; + return -ENOMEM; +} + + + +static int zap_range_nolock(struct mm_struct *mm, + const struct vm_operations_struct *vm_ops, + unsigned long start, unsigned long end) +{ + struct vm_area_struct *vma; + int err = -EINVAL; /* in case end < start */ + + while (start < end) { + unsigned long local_end; + + vma = find_vma_intersection(mm, start, end); + if (!vma) + break; + + /* is it ours? */ + if (vma->vm_ops != vm_ops) + goto try_next; + + local_end = vma->vm_end; + + if (end < local_end) + local_end = end; + + err = zap_vma_ptes(vma, start, local_end - start); + if (unlikely(err)) + break; + +try_next: + /* go to next vma, if any */ + start = vma->vm_end; + } + + return err; +} + +int kbase_mem_commit(kbase_context * kctx, mali_addr64 gpu_addr, u64 new_pages, base_backing_threshold_status * failure_reason) +{ + u64 old_pages; + u64 delta; + int res = -EINVAL; + kbase_va_region *reg; + phys_addr_t *phy_pages; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(failure_reason); + KBASE_DEBUG_ASSERT(gpu_addr != 0); + + down_read(¤t->mm->mmap_sem); + kbase_gpu_vm_lock(kctx); + + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (!reg || (reg->flags & KBASE_REG_FREE)) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS; + goto out_unlock; + } + + KBASE_DEBUG_ASSERT(reg->alloc); + + if (reg->alloc->type != KBASE_MEM_TYPE_NATIVE) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE; + goto out_unlock; + } + + if (0 == (reg->flags & KBASE_REG_GROWABLE)) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE; + goto out_unlock; + } + + if (new_pages > reg->nr_pages) { + /* Would overflow the VA region */ + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS; + goto out_unlock; + } + + /* can't be mapped more than once on the GPU */ + if (atomic_read(®->alloc->gpu_mappings) > 1) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE; + goto out_unlock; + } + + if (new_pages == reg->alloc->nents) { + /* no change */ + res = 0; + goto out_unlock; + } + + phy_pages = kbase_get_phy_pages(reg); + old_pages = kbase_reg_current_backed_size(reg); + + if (new_pages > old_pages) { + /* growing */ + mali_error err; + delta = new_pages - old_pages; + /* Allocate some more pages */ + if (MALI_ERROR_NONE != kbase_alloc_phy_pages_helper(reg->alloc, delta)) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; + goto out_unlock; + } + err = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages, phy_pages + old_pages, delta, reg->flags); + if (MALI_ERROR_NONE != err) { + kbase_free_phy_pages_helper(reg->alloc, delta); + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; + goto out_unlock; + } + } else { + /* shrinking */ + struct kbase_cpu_mapping * mapping; + mali_error err; + + /* first, unmap from any mappings affected */ + list_for_each_entry(mapping, ®->alloc->mappings, mappings_list) { + unsigned long mapping_size = (mapping->vm_end - mapping->vm_start) >> PAGE_SHIFT; + + /* is this mapping affected ?*/ + if ((mapping->page_off + mapping_size) > new_pages) { + unsigned long first_bad = 0; + int zap_res; + + if (new_pages > mapping->page_off) + first_bad = new_pages - mapping->page_off; + + zap_res = zap_range_nolock(current->mm, + &kbase_vm_ops, + mapping->vm_start + + (first_bad << PAGE_SHIFT), + mapping->vm_end); + WARN(zap_res, + "Failed to zap VA range (0x%lx -0x%lx);\n", + mapping->vm_start + + (first_bad << PAGE_SHIFT), + mapping->vm_end + ); + } + } + + /* Free some pages */ + delta = old_pages - new_pages; + err = kbase_mmu_teardown_pages(kctx, reg->start_pfn + new_pages, delta); + if (MALI_ERROR_NONE != err) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; + goto out_unlock; + } + + if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) { + /* Wait for GPU to flush write buffer before freeing physical pages */ + kbase_wait_write_flush(kctx); + } + + kbase_free_phy_pages_helper(reg->alloc, delta); + } + + res = 0; + +out_unlock: + kbase_gpu_vm_unlock(kctx); + up_read(¤t->mm->mmap_sem); + + return res; + +} + +STATIC void kbase_cpu_vm_open(struct vm_area_struct *vma) +{ + struct kbase_cpu_mapping *map = vma->vm_private_data; + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + /* non-atomic as we're under Linux' mm lock */ + map->count++; +} + +STATIC void kbase_cpu_vm_close(struct vm_area_struct *vma) +{ + struct kbase_cpu_mapping *map = vma->vm_private_data; + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + + /* non-atomic as we're under Linux' mm lock */ + if (--map->count) + return; + + KBASE_DEBUG_ASSERT(map->kctx); + KBASE_DEBUG_ASSERT(map->alloc); + + kbase_gpu_vm_lock(map->kctx); + + if (map->region) { + KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA); + kbase_mem_free_region(map->kctx, map->region); + } + + list_del(&map->mappings_list); + + kbase_gpu_vm_unlock(map->kctx); + + kbase_mem_phy_alloc_put(map->alloc); + kfree(map); +} + +KBASE_EXPORT_TEST_API(kbase_cpu_vm_close) + + +STATIC int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct kbase_cpu_mapping *map = vma->vm_private_data; + pgoff_t rel_pgoff; + size_t i; + + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + KBASE_DEBUG_ASSERT(map->kctx); + KBASE_DEBUG_ASSERT(map->alloc); + + /* we don't use vmf->pgoff as it's affected by our mmap with offset being a GPU VA or a cookie */ + rel_pgoff = ((unsigned long)vmf->virtual_address - map->vm_start) >> PAGE_SHIFT; + + kbase_gpu_vm_lock(map->kctx); + if (map->page_off + rel_pgoff >= map->alloc->nents) + goto locked_bad_fault; + + /* insert all valid pages from the fault location */ + for (i = rel_pgoff; + i < MIN((vma->vm_end - vma->vm_start) >> PAGE_SHIFT, + map->alloc->nents - map->page_off); i++) { + int ret = vm_insert_pfn(vma, map->vm_start + (i << PAGE_SHIFT), + PFN_DOWN(map->alloc->pages[map->page_off + i])); + if (ret < 0 && ret != -EBUSY) + goto locked_bad_fault; + } + + kbase_gpu_vm_unlock(map->kctx); + /* we resolved it, nothing for VM to do */ + return VM_FAULT_NOPAGE; + +locked_bad_fault: + kbase_gpu_vm_unlock(map->kctx); + send_sig(SIGSEGV, current, 1); + return VM_FAULT_NOPAGE; +} + +static const struct vm_operations_struct kbase_vm_ops = { + .open = kbase_cpu_vm_open, + .close = kbase_cpu_vm_close, + .fault = kbase_cpu_vm_fault +}; + +static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, int free_on_close) +{ + struct kbase_cpu_mapping *map; + u64 start_off = vma->vm_pgoff - reg->start_pfn; + phys_addr_t *page_array; + int err = 0; + int i; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + + if (!map) { + WARN_ON(1); + err = -ENOMEM; + goto out; + } + + /* + * VM_DONTCOPY - don't make this mapping available in fork'ed processes + * VM_DONTEXPAND - disable mremap on this region + * VM_IO - disables paging + * VM_DONTDUMP - Don't include in core dumps (3.7 only) + * VM_MIXEDMAP - Support mixing struct page*s and raw pfns. + * This is needed to support using the dedicated and + * the OS based memory backends together. + */ + /* + * This will need updating to propagate coherency flags + * See MIDBASE-1057 + */ + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0)) + vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; +#else + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; +#endif + vma->vm_ops = &kbase_vm_ops; + vma->vm_private_data = map; + + page_array = kbase_get_phy_pages(reg); + + if (!(reg->flags & KBASE_REG_CPU_CACHED) && + (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) { + /* We can't map vmalloc'd memory uncached. + * Other memory will have been returned from + * kbase_mem_allocator_alloc which would be + * suitable for mapping uncached. + */ + BUG_ON(kaddr); + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + } + + if (!kaddr) { + vma->vm_flags |= VM_PFNMAP; + for (i = 0; i < nr_pages; i++) { + err = vm_insert_pfn(vma, vma->vm_start + (i << PAGE_SHIFT), page_array[i + start_off] >> PAGE_SHIFT); + if (WARN_ON(err)) + break; + } + } else { + /* MIXEDMAP so we can vfree the kaddr early and not track it after map time */ + vma->vm_flags |= VM_MIXEDMAP; + /* vmalloc remaping is easy... */ + err = remap_vmalloc_range(vma, kaddr, 0); + WARN_ON(err); + } + + if (err) { + kfree(map); + goto out; + } + + + map->page_off = start_off; + map->region = free_on_close ? reg : NULL; + map->kctx = reg->kctx; + map->vm_start = vma->vm_start; + map->vm_end = vma->vm_end; + map->alloc = kbase_mem_phy_alloc_get(reg->alloc); + map->count = 1; /* start with one ref */ + + if (reg->flags & KBASE_REG_CPU_CACHED) + map->alloc->accessed_cached = 1; + + list_add(&map->mappings_list, &map->alloc->mappings); + + out: + return err; +} + +static int kbase_trace_buffer_mmap(kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr) +{ + struct kbase_va_region *new_reg; + u32 nr_pages; + size_t size; + int err = 0; + u32 *tb; + int owns_tb = 1; + + KBASE_LOG(1, kctx->kbdev->dev, "in %s\n", __func__); + size = (vma->vm_end - vma->vm_start); + nr_pages = size >> PAGE_SHIFT; + + if (!kctx->jctx.tb) { + KBASE_DEBUG_ASSERT(0 != size); + tb = vmalloc_user(size); + + if (NULL == tb) { + err = -ENOMEM; + goto out; + } + + kbase_device_trace_buffer_install(kctx, tb, size); + } else { + err = -EINVAL; + goto out; + } + + *kaddr = kctx->jctx.tb; + + new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA); + if (!new_reg) { + err = -ENOMEM; + WARN_ON(1); + goto out_no_region; + } + + new_reg->alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB); + if (IS_ERR_OR_NULL(new_reg->alloc)) { + err = -ENOMEM; + new_reg->alloc = NULL; + WARN_ON(1); + goto out_no_alloc; + } + + new_reg->alloc->imported.kctx = kctx; + new_reg->flags &= ~KBASE_REG_FREE; + new_reg->flags |= KBASE_REG_CPU_CACHED; + + /* alloc now owns the tb */ + owns_tb = 0; + + if (MALI_ERROR_NONE != kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1)) { + err = -ENOMEM; + WARN_ON(1); + goto out_no_va_region; + } + + *reg = new_reg; + + /* map read only, noexec */ + vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); + /* the rest of the flags is added by the cpu_mmap handler */ + + KBASE_LOG(1, kctx->kbdev->dev, "%s done\n", __func__); + return 0; + +out_no_va_region: +out_no_alloc: + kbase_free_alloced_region(new_reg); +out_no_region: + if (owns_tb) { + kbase_device_trace_buffer_uninstall(kctx); + vfree(tb); + } +out: + return err; + +} + +static int kbase_mmu_dump_mmap(kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr) +{ + struct kbase_va_region *new_reg; + void *kaddr; + u32 nr_pages; + size_t size; + int err = 0; + + KBASE_LOG(1, kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n"); + size = (vma->vm_end - vma->vm_start); + nr_pages = size >> PAGE_SHIFT; + + kaddr = kbase_mmu_dump(kctx, nr_pages); + + if (!kaddr) { + err = -ENOMEM; + goto out; + } + + new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA); + if (!new_reg) { + err = -ENOMEM; + WARN_ON(1); + goto out; + } + + new_reg->alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW); + if (IS_ERR_OR_NULL(new_reg->alloc)) { + err = -ENOMEM; + new_reg->alloc = NULL; + WARN_ON(1); + goto out_no_alloc; + } + + new_reg->flags &= ~KBASE_REG_FREE; + new_reg->flags |= KBASE_REG_CPU_CACHED; + if (MALI_ERROR_NONE != kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1)) { + err = -ENOMEM; + WARN_ON(1); + goto out_va_region; + } + + *kmap_addr = kaddr; + *reg = new_reg; + + KBASE_LOG(1, kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n"); + return 0; + +out_no_alloc: +out_va_region: + kbase_free_alloced_region(new_reg); +out: + return err; +} + + +void kbase_os_mem_map_lock(kbase_context *kctx) +{ + struct mm_struct *mm = current->mm; + (void)kctx; + down_read(&mm->mmap_sem); +} + +void kbase_os_mem_map_unlock(kbase_context *kctx) +{ + struct mm_struct *mm = current->mm; + (void)kctx; + up_read(&mm->mmap_sem); +} + +int kbase_mmap(struct file *file, struct vm_area_struct *vma) +{ + kbase_context *kctx = file->private_data; + struct kbase_va_region *reg; + void *kaddr = NULL; + size_t nr_pages; + int err = 0; + int free_on_close = 0; + struct device *dev = kctx->kbdev->dev; + + KBASE_LOG(1, dev, "kbase_mmap\n"); + nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + + /* strip away corresponding VM_MAY% flags to the VM_% flags requested */ + vma->vm_flags &= ~((vma->vm_flags & (VM_READ | VM_WRITE)) << 4); + + if (0 == nr_pages) { + err = -EINVAL; + goto out; + } + + if (!(vma->vm_flags & VM_SHARED)) { + err = -EINVAL; + goto out; + } + + kbase_gpu_vm_lock(kctx); + + if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) { + /* The non-mapped tracking helper page */ + err = kbase_tracking_page_setup(kctx, vma); + goto out_unlock; + } + + /* if not the MTP, verify that the MTP has been mapped */ + rcu_read_lock(); + /* catches both when the special page isn't present or + * when we've forked */ + if (rcu_dereference(kctx->process_mm) != current->mm) { + err = -EINVAL; + rcu_read_unlock(); + goto out_unlock; + } + rcu_read_unlock(); + + switch (vma->vm_pgoff) { + case PFN_DOWN(BASE_MEM_INVALID_HANDLE): + case PFN_DOWN(BASE_MEM_WRITE_ALLOC_PAGES_HANDLE): + /* Illegal handle for direct map */ + err = -EINVAL; + goto out_unlock; + case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE): + err = kbase_trace_buffer_mmap(kctx, vma, ®, &kaddr); + if (0 != err) + goto out_unlock; + KBASE_LOG(1, dev, "kbase_trace_buffer_mmap ok\n"); + /* free the region on munmap */ + free_on_close = 1; + goto map; + case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): + /* MMU dump */ + err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); + if (0 != err) + goto out_unlock; + /* free the region on munmap */ + free_on_close = 1; + goto map; + case PFN_DOWN(BASE_MEM_COOKIE_BASE) ... + PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: { + /* SAME_VA stuff, fetch the right region */ + int gpu_pc_bits; + int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); + gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; + reg = kctx->pending_regions[cookie]; + if (NULL != reg) { + size_t aligned_offset = 0; + + if (reg->flags & KBASE_REG_ALIGNED) { + /* nr_pages must be able to hold alignment pages + * plus actual pages */ + if (nr_pages != ((1UL << gpu_pc_bits >> + PAGE_SHIFT) + + reg->nr_pages)) { + /* incorrect mmap size */ + /* leave the cookie for a potential + * later mapping, or to be reclaimed + * later when the context is freed */ + err = -ENOMEM; + goto out_unlock; + } + + aligned_offset = (vma->vm_start + + (1UL << gpu_pc_bits) - 1) & + ~((1UL << gpu_pc_bits) - 1); + aligned_offset -= vma->vm_start; + } else if (reg->nr_pages != nr_pages) { + /* incorrect mmap size */ + /* leave the cookie for a potential later + * mapping, or to be reclaimed later when the + * context is freed */ + err = -ENOMEM; + goto out_unlock; + } + + if ((vma->vm_flags & VM_READ && + !(reg->flags & KBASE_REG_CPU_RD)) || + (vma->vm_flags & VM_WRITE && + !(reg->flags & KBASE_REG_CPU_WR))) { + /* VM flags inconsistent with region flags */ + err = -EPERM; + dev_err(dev, "%s:%d inconsistent VM flags\n", + __FILE__, __LINE__); + goto out_unlock; + } + + /* adjust down nr_pages to what we have physically */ + nr_pages = kbase_reg_current_backed_size(reg); + + if (MALI_ERROR_NONE != kbase_gpu_mmap(kctx, reg, + vma->vm_start + + aligned_offset, + reg->nr_pages, + 1)) { + dev_err(dev, "%s:%d\n", __FILE__, __LINE__); + /* Unable to map in GPU space. */ + WARN_ON(1); + err = -ENOMEM; + goto out_unlock; + } + + /* no need for the cookie anymore */ + kctx->pending_regions[cookie] = NULL; + kctx->cookies |= (1UL << cookie); + + /* + * Overwrite the offset with the + * region start_pfn, so we effectively + * map from offset 0 in the region. + */ + vma->vm_pgoff = reg->start_pfn; + + /* free the region on munmap */ + free_on_close = 1; + goto map; + } + + err = -ENOMEM; + goto out_unlock; + } + default: { + reg = kbase_region_tracker_find_region_enclosing_address(kctx, (u64)vma->vm_pgoff << PAGE_SHIFT); + + if (reg && !(reg->flags & KBASE_REG_FREE)) { + /* will this mapping overflow the size of the region? */ + if (nr_pages > (reg->nr_pages - (vma->vm_pgoff - reg->start_pfn))) + goto overflow; + + if ((vma->vm_flags & VM_READ && + !(reg->flags & KBASE_REG_CPU_RD)) || + (vma->vm_flags & VM_WRITE && + !(reg->flags & KBASE_REG_CPU_WR))) { + /* VM flags inconsistent with region flags */ + err = -EPERM; + printk(KERN_ERR "%s:%d inconsistent VM flags\n", + __FILE__, __LINE__); + goto out_unlock; + } + +#ifdef CONFIG_DMA_SHARED_BUFFER + if (reg->alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) + goto dma_map; +#endif /* CONFIG_DMA_SHARED_BUFFER */ + + /* limit what we map to the amount currently backed */ + if (reg->alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) { + if ((vma->vm_pgoff - reg->start_pfn) >= reg->alloc->nents) + nr_pages = 0; + else + nr_pages = reg->alloc->nents - (vma->vm_pgoff - reg->start_pfn); + } + + goto map; + } + +overflow: + err = -ENOMEM; + goto out_unlock; + } /* default */ + } /* switch */ +map: + err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, free_on_close); + + if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { + /* MMU dump - userspace should now have a reference on + * the pages, so we can now free the kernel mapping */ + vfree(kaddr); + } + goto out_unlock; + +#ifdef CONFIG_DMA_SHARED_BUFFER +dma_map: + err = dma_buf_mmap(reg->alloc->imported.umm.dma_buf, vma, vma->vm_pgoff - reg->start_pfn); +#endif /* CONFIG_DMA_SHARED_BUFFER */ +out_unlock: + kbase_gpu_vm_unlock(kctx); +out: + if (err) + dev_err(dev, "mmap failed %d\n", err); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_mmap) + +void kbasep_os_process_page_usage_update( kbase_context *kctx, int pages ) +{ + struct mm_struct *mm; + + rcu_read_lock(); + mm = rcu_dereference(kctx->process_mm); + if (mm) + { + atomic_add(pages, &kctx->nonmapped_pages); +#ifdef SPLIT_RSS_COUNTING + add_mm_counter(mm, MM_FILEPAGES, pages); +#else + spin_lock(&mm->page_table_lock); + add_mm_counter(mm, MM_FILEPAGES, pages); + spin_unlock(&mm->page_table_lock); +#endif + } + rcu_read_unlock(); +} + +static void kbasep_os_process_page_usage_drain(kbase_context * kctx) +{ + int pages; + struct mm_struct * mm; + + spin_lock(&kctx->mm_update_lock); + mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock)); + if (!mm) + { + spin_unlock(&kctx->mm_update_lock); + return; + } + + rcu_assign_pointer(kctx->process_mm, NULL); + spin_unlock(&kctx->mm_update_lock); + synchronize_rcu(); + + pages = atomic_xchg(&kctx->nonmapped_pages, 0); +#ifdef SPLIT_RSS_COUNTING + add_mm_counter(mm, MM_FILEPAGES, -pages); +#else + spin_lock(&mm->page_table_lock); + add_mm_counter(mm, MM_FILEPAGES, -pages); + spin_unlock(&mm->page_table_lock); +#endif +} + +static void kbase_special_vm_close(struct vm_area_struct *vma) +{ + kbase_context * kctx; + kctx = vma->vm_private_data; + kbasep_os_process_page_usage_drain(kctx); +} + +static const struct vm_operations_struct kbase_vm_special_ops = { + .close = kbase_special_vm_close, +}; + +static int kbase_tracking_page_setup(struct kbase_context * kctx, struct vm_area_struct * vma) +{ + /* check that this is the only tracking page */ + spin_lock(&kctx->mm_update_lock); + if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) + { + spin_unlock(&kctx->mm_update_lock); + return -EFAULT; + } + + rcu_assign_pointer(kctx->process_mm, current->mm); + + spin_unlock(&kctx->mm_update_lock); + + /* no real access */ + vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0)) + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; +#else + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; +#endif + vma->vm_ops = &kbase_vm_special_ops; + vma->vm_private_data = kctx; + + return 0; +} +void *kbase_va_alloc(kbase_context *kctx, u32 size, kbase_hwc_dma_mapping *handle) +{ + int i; + int res; + void *va; + dma_addr_t dma_pa; + struct kbase_va_region *reg; + phys_addr_t *page_array; +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + DEFINE_DMA_ATTRS(attrs); +#endif + + u32 pages = ((size - 1) >> PAGE_SHIFT) + 1; + u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | + BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR; + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(0 != size); + KBASE_DEBUG_ASSERT(0 != pages); + + if (size == 0) + goto err; + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, &attrs); +#else + va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL); +#endif + if (!va) + goto err; + + memset(va, 0x0, size); + + /* Store the state so we can free it later. */ + handle->cpu_va = va; + handle->dma_pa = dma_pa; + handle->size = size; + + + reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA); + if (!reg) + goto no_reg; + + reg->flags &= ~KBASE_REG_FREE; + kbase_update_region_flags(reg, flags); + + reg->alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW); + if (IS_ERR_OR_NULL(reg->alloc)) + goto no_alloc; + + page_array = kbase_get_phy_pages(reg); + + for (i = 0; i < pages; i++) { + page_array[i] = dma_pa + (i << PAGE_SHIFT); + } + + reg->alloc->nents = pages; + + kbase_gpu_vm_lock(kctx); + res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1); + kbase_gpu_vm_unlock(kctx); + if (res) + goto no_mmap; + + return va; + +no_mmap: + kbase_mem_phy_alloc_put(reg->alloc); +no_alloc: + kfree(reg); +no_reg: +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs); +#else + dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa); +#endif +err: + return NULL; +} +KBASE_EXPORT_SYMBOL(kbase_va_alloc); + +void kbase_va_free(kbase_context *kctx, kbase_hwc_dma_mapping *handle) +{ + struct kbase_va_region *reg; + mali_error err; +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + DEFINE_DMA_ATTRS(attrs); +#endif + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(handle->cpu_va != NULL); + + kbase_gpu_vm_lock(kctx); + reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va); + KBASE_DEBUG_ASSERT(reg); + err = kbase_gpu_munmap(kctx, reg); + kbase_gpu_vm_unlock(kctx); + KBASE_DEBUG_ASSERT(err == MALI_ERROR_NONE); + + kbase_mem_phy_alloc_put(reg->alloc); + kfree(reg); + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + dma_free_attrs(kctx->kbdev->dev, handle->size, + handle->cpu_va, handle->dma_pa, &attrs); +#else + dma_free_writecombine(kctx->kbdev->dev, handle->size, + handle->cpu_va, handle->dma_pa); +#endif +} +KBASE_EXPORT_SYMBOL(kbase_va_free); + diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h new file mode 100755 index 00000000000..e7482a5cd45 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h @@ -0,0 +1,67 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_mem_linux.h + * Base kernel memory APIs, Linux implementation. + */ + +#ifndef _KBASE_MEM_LINUX_H_ +#define _KBASE_MEM_LINUX_H_ + +/* This define is used by the gator kernel module compile to select which DDK + * API calling convention to use. If not defined (legacy DDK) gator assumes + * version 1. The version to DDK release mapping is: + * Version 1 API: DDK versions r1px, r2px + * Version 2 API: DDK versions r3px and newer + **/ +#define MALI_DDK_GATOR_API_VERSION 2 + +/** A HWC dump mapping */ +typedef struct kbase_hwc_dma_mapping { + void *cpu_va; + dma_addr_t dma_pa; + size_t size; +} kbase_hwc_dma_mapping; + +struct kbase_va_region * kbase_mem_alloc(kbase_context * kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 * flags, u64 * gpu_va, u16 * va_alignment); +mali_error kbase_mem_query(kbase_context *kctx, mali_addr64 gpu_addr, int query, u64 * const pages); +int kbase_mem_import(kbase_context *kctx, base_mem_import_type type, int handle, mali_addr64 * gpu_va, u64 * va_pages, u64 * flags); +u64 kbase_mem_alias(kbase_context *kctx, u64* flags, u64 stride, u64 nents, struct base_mem_aliasing_info* ai, u64 * num_pages); +mali_error kbase_mem_flags_change(kbase_context *kctx, mali_addr64 gpu_addr, unsigned int flags, unsigned int mask); +int kbase_mem_commit(kbase_context * kctx, mali_addr64 gpu_addr, u64 new_pages, base_backing_threshold_status * failure_reason); +int kbase_mmap(struct file *file, struct vm_area_struct *vma); + +/** @brief Allocate memory from kernel space and map it onto the GPU + * + * @param kctx The context used for the allocation/mapping + * @param size The size of the allocation in bytes + * @param handle An opaque structure used to contain the state needed to free the memory + * @return the VA for kernel space and GPU MMU + */ +void *kbase_va_alloc(kbase_context *kctx, u32 size, kbase_hwc_dma_mapping *handle); + +/** @brief Free/unmap memory allocated by kbase_va_alloc + * + * @param kctx The context used for the allocation/mapping + * @param handle An opaque structure returned by the kbase_va_alloc function. + */ +void kbase_va_free(kbase_context *kctx, kbase_hwc_dma_mapping *handle); + +#endif /* _KBASE_MEM_LINUX_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.c b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.c new file mode 100755 index 00000000000..62e5c9f219b --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.c @@ -0,0 +1,62 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include + +#include +#include +#include +#include +#include +#include + +void kbase_sync_to_memory(phys_addr_t paddr, void *vaddr, size_t sz) +{ +#ifdef CONFIG_ARM + __cpuc_flush_dcache_area(vaddr, sz); + outer_flush_range(paddr, paddr + sz); +#elif defined(CONFIG_ARM64) + /* TODO (MID64-46): There's no other suitable cache flush function for ARM64 */ + flush_cache_all(); +#elif defined(CONFIG_X86) + struct scatterlist scl = { 0, }; + sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz, paddr & (PAGE_SIZE - 1)); + dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_TO_DEVICE); + mb(); /* for outer_sync (if needed) */ +#else +#error Implement cache maintenance for your architecture here +#endif +} + +void kbase_sync_to_cpu(phys_addr_t paddr, void *vaddr, size_t sz) +{ +#ifdef CONFIG_ARM + __cpuc_flush_dcache_area(vaddr, sz); + outer_flush_range(paddr, paddr + sz); +#elif defined(CONFIG_ARM64) + /* TODO (MID64-46): There's no other suitable cache flush function for ARM64 */ + flush_cache_all(); +#elif defined(CONFIG_X86) + struct scatterlist scl = { 0, }; + sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz, paddr & (PAGE_SIZE - 1)); + dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_FROM_DEVICE); +#else +#error Implement cache maintenance for your architecture here +#endif +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h new file mode 100755 index 00000000000..c88a3f13880 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h @@ -0,0 +1,111 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _KBASE_MEM_LOWLEVEL_H +#define _KBASE_MEM_LOWLEVEL_H + +#ifndef _KBASE_H_ +#error "Don't include this file directly, use mali_kbase.h instead" +#endif + +/** + * @brief Flags for kbase_phy_allocator_pages_alloc + */ +#define KBASE_PHY_PAGES_FLAG_DEFAULT (0) /** Default allocation flag */ +#define KBASE_PHY_PAGES_FLAG_CLEAR (1 << 0) /** Clear the pages after allocation */ +#define KBASE_PHY_PAGES_FLAG_POISON (1 << 1) /** Fill the memory with a poison value */ + +#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON) + +#define KBASE_PHY_PAGES_POISON_VALUE 0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */ + +/** + * A pointer to a cache synchronization function, either kbase_sync_to_cpu() + * or kbase_sync_to_memory(). + */ +typedef void (*kbase_sync_kmem_fn) (phys_addr_t, void *, size_t); + +/** + * @brief Synchronize a memory area for other system components usage + * + * Performs the necessary memory coherency operations on a given memory area, + * such that after the call, changes in memory are correctly seen by other + * system components. Any change made to memory after that call may not be seen + * by other system components. + * + * In effect: + * - all CPUs will perform a cache clean operation on their inner & outer data caches + * - any write buffers are drained (including that of outer cache controllers) + * + * This function waits until all operations have completed. + * + * The area is restricted to one page or less and must not cross a page boundary. + * The offset within the page is aligned to cache line size and size is ensured + * to be a multiple of the cache line size. + * + * Both physical and virtual address of the area need to be provided to support OS + * cache flushing APIs that either use the virtual or the physical address. When + * called from OS specific code it is allowed to only provide the address that + * is actually used by the specific OS and leave the other address as 0. + * + * @param[in] paddr physical address + * @param[in] vaddr CPU virtual address valid in the current user VM or the kernel VM + * @param[in] sz size of the area, <= PAGE_SIZE. + */ +void kbase_sync_to_memory(phys_addr_t paddr, void *vaddr, size_t sz); + +/** + * @brief Synchronize a memory area for CPU usage + * + * Performs the necessary memory coherency operations on a given memory area, + * such that after the call, changes in memory are correctly seen by any CPU. + * Any change made to this area by any CPU before this call may be lost. + * + * In effect: + * - all CPUs will perform a cache clean & invalidate operation on their inner & + * outer data caches. + * + * @note Stricly only an invalidate operation is required but by cleaning the cache + * too we prevent loosing changes made to the memory area due to software bugs. By + * having these changes cleaned from the cache it allows us to catch the memory + * area getting corrupted with the help of watch points. In correct operation the + * clean & invalidate operation would not be more expensive than an invalidate + * operation. Also note that for security reasons, it is dangerous to expose a + * cache 'invalidate only' operation to user space. + * + * - any read buffers are flushed (including that of outer cache controllers) + * + * This function waits until all operations have completed. + * + * The area is restricted to one page or less and must not cross a page boundary. + * The offset within the page is aligned to cache line size and size is ensured + * to be a multiple of the cache line size. + * + * Both physical and virtual address of the area need to be provided to support OS + * cache flushing APIs that either use the virtual or the physical address. When + * called from OS specific code it is allowed to only provide the address that + * is actually used by the specific OS and leave the other address as 0. + * + * @param[in] paddr physical address + * @param[in] vaddr CPU virtual address valid in the current user VM or the kernel VM + * @param[in] sz size of the area, <= PAGE_SIZE. + */ +void kbase_sync_to_cpu(phys_addr_t paddr, void *vaddr, size_t sz); + +#endif /* _KBASE_LOWLEVEL_H */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c new file mode 100755 index 00000000000..4aedadbaf45 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c @@ -0,0 +1,1685 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_mmu.c + * Base kernel MMU management. + */ + +/* #define DEBUG 1 */ +#include +#include +#include +#include + +#define beenthere(kctx, f, a...) KBASE_LOG(1, kctx->kbdev->dev, "%s:" f, __func__, ##a) + +#include +#include + +#define KBASE_MMU_PAGE_ENTRIES 512 + +/* + * Definitions: + * - PGD: Page Directory. + * - PTE: Page Table Entry. A 64bit value pointing to the next + * level of translation + * - ATE: Address Transation Entry. A 64bit value pointing to + * a 4kB physical page. + */ + +static void kbase_mmu_report_fault_and_kill(kbase_context *kctx, kbase_as *as); +static u64 lock_region(kbase_device *kbdev, u64 pfn, size_t num_pages); + +/* Helper Function to perform assignment of page table entries, to ensure the use of + * strd, which is required on LPAE systems. + */ + +static inline void page_table_entry_set( kbase_device * kbdev, u64 * pte, u64 phy ) +{ +#ifdef CONFIG_64BIT + *pte = phy; +#elif defined(CONFIG_ARM) + /* + * + * In order to prevent the compiler keeping cached copies of memory, we have to explicitly + * say that we have updated memory. + * + * Note: We could manually move the data ourselves into R0 and R1 by specifying + * register variables that are explicitly given registers assignments, the down side of + * this is that we have to assume cpu endianess. To avoid this we can use the ldrd to read the + * data from memory into R0 and R1 which will respect the cpu endianess, we then use strd to + * make the 64 bit assignment to the page table entry. + * + */ + + asm volatile("ldrd r0, r1, [%[ptemp]]\n\t" + "strd r0, r1, [%[pte]]\n\t" + : "=m" (*pte) + : [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy) + : "r0", "r1" ); +#else +#error "64-bit atomic write must be implemented for your architecture" +#endif +} + +static void ksync_kern_vrange_gpu(phys_addr_t paddr, void *vaddr, size_t size) +{ + kbase_sync_to_memory(paddr, vaddr, size); +} + +static size_t make_multiple(size_t minimum, size_t multiple) +{ + size_t remainder = minimum % multiple; + if (remainder == 0) + return minimum; + else + return minimum + multiple - remainder; +} + +static void mmu_mask_reenable(kbase_device *kbdev, kbase_context *kctx, kbase_as *as) +{ + unsigned long flags; + u32 mask; + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx); + mask |= ((1UL << as->number) | (1UL << (MMU_REGS_BUS_ERROR_FLAG(as->number)))); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), mask, kctx); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); +} + +static void page_fault_worker(struct work_struct *data) +{ + u64 fault_pfn; + size_t new_pages; + size_t fault_rel_pfn; + kbase_as *faulting_as; + int as_no; + kbase_context *kctx; + kbase_device *kbdev; + kbase_va_region *region; + mali_error err; + + faulting_as = container_of(data, kbase_as, work_pagefault); + fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT; + as_no = faulting_as->number; + + kbdev = container_of(faulting_as, kbase_device, as[as_no]); + + /* Grab the context that was already refcounted in kbase_mmu_interrupt(). + * Therefore, it cannot be scheduled out of this AS until we explicitly release it + * + * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */ + kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no); + + if (kctx == NULL) { + /* Only handle this if not already suspended */ + if ( !kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + /* Address space has no context, terminate the work */ + u32 reg; + + /* AS transaction begin */ + mutex_lock(&faulting_as->transaction_mutex); + reg = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), NULL); + reg = (reg & (~(u32) MMU_TRANSTAB_ADRMODE_MASK)) | ASn_TRANSTAB_ADRMODE_UNMAPPED; + kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), reg, NULL); + kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_UPDATE, NULL); + mutex_unlock(&faulting_as->transaction_mutex); + /* AS transaction end */ + + mmu_mask_reenable(kbdev, NULL, faulting_as); + kbase_pm_context_idle(kbdev); + } + return; + } + + KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); + + kbase_gpu_vm_lock(kctx); + + /* find the region object for this VA */ + region = kbase_region_tracker_find_region_enclosing_address(kctx, faulting_as->fault_addr); + if (NULL == region || (GROWABLE_FLAGS_REQUIRED != (region->flags & GROWABLE_FLAGS_MASK))) { + kbase_gpu_vm_unlock(kctx); + /* failed to find the region or mismatch of the flags */ + kbase_mmu_report_fault_and_kill(kctx, faulting_as); + goto fault_done; + } + + if ((((faulting_as->fault_status & ASn_FAULTSTATUS_ACCESS_TYPE_MASK) == ASn_FAULTSTATUS_ACCESS_TYPE_READ) && !(region->flags & KBASE_REG_GPU_RD)) || (((faulting_as->fault_status & ASn_FAULTSTATUS_ACCESS_TYPE_MASK) == ASn_FAULTSTATUS_ACCESS_TYPE_WRITE) && !(region->flags & KBASE_REG_GPU_WR)) || (((faulting_as->fault_status & ASn_FAULTSTATUS_ACCESS_TYPE_MASK) == ASn_FAULTSTATUS_ACCESS_TYPE_EX) && (region->flags & KBASE_REG_GPU_NX))) { + dev_warn(kbdev->dev, "Access permissions don't match: region->flags=0x%lx", region->flags); + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as); + goto fault_done; + } + + /* find the size we need to grow it by */ + /* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address + * validating the fault_adress to be within a size_t from the start_pfn */ + fault_rel_pfn = fault_pfn - region->start_pfn; + + if (fault_rel_pfn < kbase_reg_current_backed_size(region)) { + dev_warn(kbdev->dev, "Page fault in allocated region of growable TMEM: Ignoring"); + mmu_mask_reenable(kbdev, kctx, faulting_as); + kbase_gpu_vm_unlock(kctx); + goto fault_done; + } + + new_pages = make_multiple(fault_rel_pfn - kbase_reg_current_backed_size(region) + 1, region->extent); + if (new_pages + kbase_reg_current_backed_size(region) > region->nr_pages) { + /* cap to max vsize */ + new_pages = region->nr_pages - kbase_reg_current_backed_size(region); + } + + if (0 == new_pages) { + /* Duplicate of a fault we've already handled, nothing to do */ + mmu_mask_reenable(kbdev, kctx, faulting_as); + kbase_gpu_vm_unlock(kctx); + goto fault_done; + } + + if (MALI_ERROR_NONE == kbase_alloc_phy_pages_helper(region->alloc, new_pages)) { + /* alloc success */ + mali_addr64 lock_addr; + KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages); + + /* AS transaction begin */ + mutex_lock(&faulting_as->transaction_mutex); + + /* Lock the VA region we're about to update */ + lock_addr = lock_region(kbdev, faulting_as->fault_addr >> PAGE_SHIFT, new_pages); + kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_LOCKADDR_LO), lock_addr & 0xFFFFFFFFUL, kctx); + kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_LOCKADDR_HI), lock_addr >> 32, kctx); + kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_LOCK, kctx); + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3285)) { + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), (1UL << as_no), NULL); + kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_LOCK, kctx); + } + + /* set up the new pages */ + err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags); + if (MALI_ERROR_NONE != err) { + /* failed to insert pages, handle as a normal PF */ + mutex_unlock(&faulting_as->transaction_mutex); + kbase_gpu_vm_unlock(kctx); + kbase_free_phy_pages_helper(region->alloc, new_pages); + /* The locked VA region will be unlocked and the cache invalidated in here */ + kbase_mmu_report_fault_and_kill(kctx, faulting_as); + goto fault_done; + } +#ifdef CONFIG_MALI_GATOR_SUPPORT + kbase_trace_mali_page_fault_insert_pages(as_no, new_pages); +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + + /* flush L2 and unlock the VA (resumes the MMU) */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367)) + kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_FLUSH, kctx); + else + kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_FLUSH_PT, kctx); + + /* wait for the flush to complete */ + while (kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_STATUS), kctx) & 1) + ; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) { + /* Issue an UNLOCK command to ensure that valid page tables are re-read by the GPU after an update. + Note that, the FLUSH command should perform all the actions necessary, however the bus logs show + that if multiple page faults occur within an 8 page region the MMU does not always re-read the + updated page table entries for later faults or is only partially read, it subsequently raises the + page fault IRQ for the same addresses, the unlock ensures that the MMU cache is flushed, so updates + can be re-read. As the region is now unlocked we need to issue 2 UNLOCK commands in order to flush the + MMU/uTLB, see PRLAM-8812. + */ + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_UNLOCK, kctx); + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_UNLOCK, kctx); + } + + mutex_unlock(&faulting_as->transaction_mutex); + /* AS transaction end */ + + /* reenable this in the mask */ + mmu_mask_reenable(kbdev, kctx, faulting_as); + kbase_gpu_vm_unlock(kctx); + } else { + /* failed to extend, handle as a normal PF */ + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as); + } + + fault_done: + /* By this point, the fault was handled in some way, so release the ctx refcount */ + kbasep_js_runpool_release_ctx(kbdev, kctx); +} + +phys_addr_t kbase_mmu_alloc_pgd(kbase_context *kctx) +{ + phys_addr_t pgd; + u64 *page; + int i; + + KBASE_DEBUG_ASSERT(NULL != kctx); + kbase_atomic_add_pages(1, &kctx->used_pages); + kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages); + + if (MALI_ERROR_NONE != kbase_mem_allocator_alloc(kctx->pgd_allocator, 1, &pgd)) + goto sub_pages; + + page = kmap(pfn_to_page(PFN_DOWN(pgd))); + if (NULL == page) + goto alloc_free; + + kbase_process_page_usage_inc(kctx, 1); + + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) + page_table_entry_set( kctx->kbdev, &page[i], ENTRY_IS_INVAL ); + + /* Clean the full page */ + ksync_kern_vrange_gpu(pgd, page, KBASE_MMU_PAGE_ENTRIES * sizeof(u64)); + kunmap(pfn_to_page(PFN_DOWN(pgd))); + return pgd; + +alloc_free: + kbase_mem_allocator_free(kctx->pgd_allocator, 1, &pgd, MALI_FALSE); +sub_pages: + kbase_atomic_sub_pages(1, &kctx->used_pages); + kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd) + +static phys_addr_t mmu_pte_to_phy_addr(u64 entry) +{ + if (!(entry & 1)) + return 0; + + return entry & ~0xFFF; +} + +static u64 mmu_phyaddr_to_pte(phys_addr_t phy) +{ + return (phy & ~0xFFF) | ENTRY_IS_PTE; +} + +static u64 mmu_phyaddr_to_ate(phys_addr_t phy, u64 flags) +{ + return (phy & ~0xFFF) | (flags & ENTRY_FLAGS_MASK) | ENTRY_IS_ATE; +} + +/* Given PGD PFN for level N, return PGD PFN for level N+1 */ +static phys_addr_t mmu_get_next_pgd(kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level) +{ + u64 *page; + phys_addr_t target_pgd; + + KBASE_DEBUG_ASSERT(pgd); + KBASE_DEBUG_ASSERT(NULL != kctx); + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Architecture spec defines level-0 as being the top-most. + * This is a bit unfortunate here, but we keep the same convention. + */ + vpfn >>= (3 - level) * 9; + vpfn &= 0x1FF; + + page = kmap(pfn_to_page(PFN_DOWN(pgd))); + if (NULL == page) { + dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n"); + return 0; + } + + target_pgd = mmu_pte_to_phy_addr(page[vpfn]); + + if (!target_pgd) { + target_pgd = kbase_mmu_alloc_pgd(kctx); + if (!target_pgd) { + dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n"); + kunmap(pfn_to_page(PFN_DOWN(pgd))); + return 0; + } + + page_table_entry_set( kctx->kbdev, &page[vpfn], mmu_phyaddr_to_pte(target_pgd) ); + + ksync_kern_vrange_gpu(pgd + (vpfn * sizeof(u64)), page + vpfn, sizeof(u64)); + /* Rely on the caller to update the address space flags. */ + } + + kunmap(pfn_to_page(PFN_DOWN(pgd))); + return target_pgd; +} + +static phys_addr_t mmu_get_bottom_pgd(kbase_context *kctx, u64 vpfn) +{ + phys_addr_t pgd; + int l; + + pgd = kctx->pgd; + + for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) { + pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l); + /* Handle failure condition */ + if (!pgd) { + dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n"); + return 0; + } + } + + return pgd; +} + +static phys_addr_t mmu_insert_pages_recover_get_next_pgd(kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level) +{ + u64 *page; + phys_addr_t target_pgd; + + KBASE_DEBUG_ASSERT(pgd); + KBASE_DEBUG_ASSERT(NULL != kctx); + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Architecture spec defines level-0 as being the top-most. + * This is a bit unfortunate here, but we keep the same convention. + */ + vpfn >>= (3 - level) * 9; + vpfn &= 0x1FF; + + page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); + /* kmap_atomic should NEVER fail */ + KBASE_DEBUG_ASSERT(NULL != page); + + target_pgd = mmu_pte_to_phy_addr(page[vpfn]); + /* As we are recovering from what has already been set up, we should have a target_pgd */ + KBASE_DEBUG_ASSERT(0 != target_pgd); + + kunmap_atomic(page); + return target_pgd; +} + +static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(kbase_context *kctx, u64 vpfn) +{ + phys_addr_t pgd; + int l; + + pgd = kctx->pgd; + + for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) { + pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l); + /* Should never fail */ + KBASE_DEBUG_ASSERT(0 != pgd); + } + + return pgd; +} + +static void mmu_insert_pages_failure_recovery(kbase_context *kctx, u64 vpfn, + size_t nr) +{ + phys_addr_t pgd; + u64 *pgd_page; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(0 != vpfn); + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE)); + + lockdep_assert_held(&kctx->reg_lock); + + while (nr) { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; + + if (count > nr) + count = nr; + + pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn); + KBASE_DEBUG_ASSERT(0 != pgd); + + pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); + KBASE_DEBUG_ASSERT(NULL != pgd_page); + + /* Invalidate the entries we added */ + for (i = 0; i < count; i++) + page_table_entry_set(kctx->kbdev, &pgd_page[index + i], + ENTRY_IS_INVAL); + + vpfn += count; + nr -= count; + + ksync_kern_vrange_gpu(pgd + (index * sizeof(u64)), + pgd_page + index, count * sizeof(u64)); + + kunmap_atomic(pgd_page); + } +} + +/** + * Map KBASE_REG flags to MMU flags + */ +static u64 kbase_mmu_get_mmu_flags(unsigned long flags) +{ + u64 mmu_flags; + + /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */ + mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2; + + /* write perm if requested */ + mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0; + /* read perm if requested */ + mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0; + /* nx if requested */ + mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; + + if (flags & KBASE_REG_SHARE_BOTH) { + /* inner and outer shareable */ + mmu_flags |= SHARE_BOTH_BITS; + } else if (flags & KBASE_REG_SHARE_IN) { + /* inner shareable coherency */ + mmu_flags |= SHARE_INNER_BITS; + } + + return mmu_flags; +} + +/* + * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn' + */ +mali_error kbase_mmu_insert_single_page(kbase_context *kctx, u64 vpfn, + phys_addr_t phys, size_t nr, + unsigned long flags) +{ + phys_addr_t pgd; + u64 *pgd_page; + u64 pte_entry; + /* In case the insert_single_page only partially completes we need to be + * able to recover */ + mali_bool recover_required = MALI_FALSE; + u64 recover_vpfn = vpfn; + size_t recover_count = 0; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(0 != vpfn); + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE)); + + lockdep_assert_held(&kctx->reg_lock); + + /* the one entry we'll populate everywhere */ + pte_entry = mmu_phyaddr_to_ate(phys, kbase_mmu_get_mmu_flags(flags)); + + while (nr) { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; + + if (count > nr) + count = nr; + + /* + * Repeatedly calling mmu_get_bottom_pte() is clearly + * suboptimal. We don't have to re-parse the whole tree + * each time (just cache the l0-l2 sequence). + * On the other hand, it's only a gain when we map more than + * 256 pages at once (on average). Do we really care? + */ + pgd = mmu_get_bottom_pgd(kctx, vpfn); + if (!pgd) { + dev_warn(kctx->kbdev->dev, + "kbase_mmu_insert_pages: " + "mmu_get_bottom_pgd failure\n"); + if (recover_required) { + /* Invalidate the pages we have partially + * completed */ + mmu_insert_pages_failure_recovery(kctx, + recover_vpfn, + recover_count); + } + return MALI_ERROR_FUNCTION_FAILED; + } + + pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); + if (!pgd_page) { + dev_warn(kctx->kbdev->dev, + "kbase_mmu_insert_pages: " + "kmap failure\n"); + if (recover_required) { + /* Invalidate the pages we have partially + * completed */ + mmu_insert_pages_failure_recovery(kctx, + recover_vpfn, + recover_count); + } + return MALI_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < count; i++) { + unsigned int ofs = index + i; + KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); + page_table_entry_set(kctx->kbdev, &pgd_page[ofs], + pte_entry); + } + + vpfn += count; + nr -= count; + + ksync_kern_vrange_gpu(pgd + (index * sizeof(u64)), + pgd_page + index, count * sizeof(u64)); + + kunmap(pfn_to_page(PFN_DOWN(pgd))); + /* We have started modifying the page table. + * If further pages need inserting and fail we need to undo what + * has already taken place */ + recover_required = MALI_TRUE; + recover_count += count; + } + return MALI_ERROR_NONE; +} + +/* + * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' + */ +mali_error kbase_mmu_insert_pages(kbase_context *kctx, u64 vpfn, + phys_addr_t *phys, size_t nr, + unsigned long flags) +{ + phys_addr_t pgd; + u64 *pgd_page; + u64 mmu_flags = 0; + /* In case the insert_pages only partially completes we need to be able + * to recover */ + mali_bool recover_required = MALI_FALSE; + u64 recover_vpfn = vpfn; + size_t recover_count = 0; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(0 != vpfn); + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE)); + + lockdep_assert_held(&kctx->reg_lock); + + mmu_flags = kbase_mmu_get_mmu_flags(flags); + + while (nr) { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; + + if (count > nr) + count = nr; + + /* + * Repeatedly calling mmu_get_bottom_pte() is clearly + * suboptimal. We don't have to re-parse the whole tree + * each time (just cache the l0-l2 sequence). + * On the other hand, it's only a gain when we map more than + * 256 pages at once (on average). Do we really care? + */ + pgd = mmu_get_bottom_pgd(kctx, vpfn); + if (!pgd) { + dev_warn(kctx->kbdev->dev, + "kbase_mmu_insert_pages: " + "mmu_get_bottom_pgd failure\n"); + if (recover_required) { + /* Invalidate the pages we have partially + * completed */ + mmu_insert_pages_failure_recovery(kctx, + recover_vpfn, + recover_count); + } + return MALI_ERROR_FUNCTION_FAILED; + } + + pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); + if (!pgd_page) { + dev_warn(kctx->kbdev->dev, + "kbase_mmu_insert_pages: " + "kmap failure\n"); + if (recover_required) { + /* Invalidate the pages we have partially + * completed */ + mmu_insert_pages_failure_recovery(kctx, + recover_vpfn, + recover_count); + } + return MALI_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < count; i++) { + unsigned int ofs = index + i; + KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); + page_table_entry_set(kctx->kbdev, &pgd_page[ofs], + mmu_phyaddr_to_ate(phys[i], + mmu_flags) + ); + } + + phys += count; + vpfn += count; + nr -= count; + + ksync_kern_vrange_gpu(pgd + (index * sizeof(u64)), + pgd_page + index, count * sizeof(u64)); + + kunmap(pfn_to_page(PFN_DOWN(pgd))); + /* We have started modifying the page table. If further pages + * need inserting and fail we need to undo what has already + * taken place */ + recover_required = MALI_TRUE; + recover_count += count; + } + return MALI_ERROR_NONE; +} + +KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages) + +/** + * This function is responsible for validating the MMU PTs + * triggering reguired flushes. + * + * * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is + * currently scheduled into the runpool, and so potentially uses a lot of locks. + * These locks must be taken in the correct order with respect to others + * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more + * information. + */ +static void kbase_mmu_flush(kbase_context *kctx, u64 vpfn, size_t nr) +{ + kbase_device *kbdev; + mali_bool ctx_is_in_runpool; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + kbdev = kctx->kbdev; + + /* We must flush if we're currently running jobs. At the very least, we need to retain the + * context to ensure it doesn't schedule out whilst we're trying to flush it */ + ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx); + + if (ctx_is_in_runpool) { + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + /* Second level check is to try to only do this when jobs are running. The refcount is + * a heuristic for this. */ + if (kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr].as_busy_refcount >= 2) { + /* Lock the VA region we're about to update */ + u64 lock_addr = lock_region(kbdev, vpfn, nr); + unsigned int max_loops = KBASE_AS_FLUSH_MAX_LOOPS; + + /* AS transaction begin */ + mutex_lock(&kbdev->as[kctx->as_nr].transaction_mutex); + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_LOCKADDR_LO), lock_addr & 0xFFFFFFFFUL, kctx); + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_LOCKADDR_HI), lock_addr >> 32, kctx); + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_LOCK, kctx); + + /* flush L2 and unlock the VA */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367)) + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_FLUSH, kctx); + else + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_FLUSH_MEM, kctx); + + /* wait for the flush to complete */ + while (--max_loops && kbase_reg_read(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_STATUS), kctx) & ASn_STATUS_FLUSH_ACTIVE) + ; + + if (!max_loops) { + /* Flush failed to complete, assume the GPU has hung and perform a reset to recover */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + } + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) { + /* Issue an UNLOCK command to ensure that valid page tables are re-read by the GPU after an update. + Note that, the FLUSH command should perform all the actions necessary, however the bus logs show + that if multiple page faults occur within an 8 page region the MMU does not always re-read the + updated page table entries for later faults or is only partially read, it subsequently raises the + page fault IRQ for the same addresses, the unlock ensures that the MMU cache is flushed, so updates + can be re-read. As the region is now unlocked we need to issue 2 UNLOCK commands in order to flush the + MMU/uTLB, see PRLAM-8812. + */ + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_UNLOCK, kctx); + kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_UNLOCK, kctx); + } + + mutex_unlock(&kbdev->as[kctx->as_nr].transaction_mutex); + /* AS transaction end */ + } + kbasep_js_runpool_release_ctx(kbdev, kctx); + } +} + +/* + * We actually only discard the ATE, and not the page table + * pages. There is a potential DoS here, as we'll leak memory by + * having PTEs that are potentially unused. Will require physical + * page accounting, so MMU pages are part of the process allocation. + * + * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is + * currently scheduled into the runpool, and so potentially uses a lot of locks. + * These locks must be taken in the correct order with respect to others + * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more + * information. + */ +mali_error kbase_mmu_teardown_pages(kbase_context *kctx, u64 vpfn, size_t nr) +{ + phys_addr_t pgd; + u64 *pgd_page; + kbase_device *kbdev; + size_t requested_nr = nr; + + KBASE_DEBUG_ASSERT(NULL != kctx); + beenthere(kctx, "kctx %p vpfn %lx nr %d", (void *)kctx, (unsigned long)vpfn, nr); + + lockdep_assert_held(&kctx->reg_lock); + + if (0 == nr) { + /* early out if nothing to do */ + return MALI_ERROR_NONE; + } + + kbdev = kctx->kbdev; + + while (nr) { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; + if (count > nr) + count = nr; + + pgd = mmu_get_bottom_pgd(kctx, vpfn); + if (!pgd) { + dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n"); + return MALI_ERROR_FUNCTION_FAILED; + } + + pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); + if (!pgd_page) { + dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n"); + return MALI_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < count; i++) { + page_table_entry_set( kctx->kbdev, &pgd_page[index + i], ENTRY_IS_INVAL ); + } + + vpfn += count; + nr -= count; + + ksync_kern_vrange_gpu(pgd + (index * sizeof(u64)), pgd_page + index, count * sizeof(u64)); + + kunmap(pfn_to_page(PFN_DOWN(pgd))); + } + + kbase_mmu_flush(kctx,vpfn,requested_nr); + return MALI_ERROR_NONE; +} + +KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages) + +/** + * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'. + * This call is being triggered as a response to the changes of the mem attributes + * + * @pre : The caller is responsible for validating the memory attributes + * + * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is + * currently scheduled into the runpool, and so potentially uses a lot of locks. + * These locks must be taken in the correct order with respect to others + * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more + * information. + */ +mali_error kbase_mmu_update_pages(kbase_context* kctx, u64 vpfn, phys_addr_t* phys, size_t nr, unsigned long flags) +{ + phys_addr_t pgd; + u64* pgd_page; + u64 mmu_flags = 0; + size_t requested_nr = nr; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(0 != vpfn); + KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE)); + + lockdep_assert_held(&kctx->reg_lock); + + mmu_flags = kbase_mmu_get_mmu_flags(flags); + + dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags "\ + "on GPU PFN 0x%llx from phys %p, %zu pages", + vpfn, phys, nr); + + + while(nr) + { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + size_t count = KBASE_MMU_PAGE_ENTRIES - index; + if (count > nr) + count = nr; + + pgd = mmu_get_bottom_pgd(kctx, vpfn); + if (!pgd) { + dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n"); + return MALI_ERROR_FUNCTION_FAILED; + } + + pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); + if (!pgd_page) { + dev_warn(kctx->kbdev->dev, "kmap failure\n"); + return MALI_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < count; i++) { + page_table_entry_set( kctx->kbdev, &pgd_page[index + i], mmu_phyaddr_to_ate(phys[i], mmu_flags) ); + } + + phys += count; + vpfn += count; + nr -= count; + + ksync_kern_vrange_gpu(pgd + (index * sizeof(u64)), pgd_page + index, count * sizeof(u64)); + + kunmap(pfn_to_page(PFN_DOWN(pgd))); + } + + kbase_mmu_flush(kctx,vpfn,requested_nr); + + return MALI_ERROR_NONE; +} + +static int mmu_pte_is_valid(u64 pte) +{ + return ((pte & 3) == ENTRY_IS_ATE); +} + +/* This is a debug feature only */ +static void mmu_check_unused(kbase_context *kctx, phys_addr_t pgd) +{ + u64 *page; + int i; + + page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); + /* kmap_atomic should NEVER fail. */ + KBASE_DEBUG_ASSERT(NULL != page); + + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { + if (mmu_pte_is_valid(page[i])) + beenthere(kctx, "live pte %016lx", (unsigned long)page[i]); + } + kunmap_atomic(page); +} + +static void mmu_teardown_level(kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer) +{ + phys_addr_t target_pgd; + u64 *pgd_page; + int i; + + KBASE_DEBUG_ASSERT(NULL != kctx); + lockdep_assert_held(&kctx->reg_lock); + + pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); + /* kmap_atomic should NEVER fail. */ + KBASE_DEBUG_ASSERT(NULL != pgd_page); + /* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */ + memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); + kunmap_atomic(pgd_page); + pgd_page = pgd_page_buffer; + + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { + target_pgd = mmu_pte_to_phy_addr(pgd_page[i]); + + if (target_pgd) { + if (level < 2) { + mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64))); + } else { + /* + * So target_pte is a level-3 page. + * As a leaf, it is safe to free it. + * Unless we have live pages attached to it! + */ + mmu_check_unused(kctx, target_pgd); + } + + beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1); + if (zap) { + kbase_mem_allocator_free(kctx->pgd_allocator, 1, &target_pgd, MALI_TRUE); + kbase_process_page_usage_dec(kctx, 1 ); + kbase_atomic_sub_pages(1, &kctx->used_pages); + kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); + } + } + } +} + +mali_error kbase_mmu_init(kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages); + + /* Preallocate MMU depth of four pages for mmu_teardown_level to use */ + kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); + + kctx->mem_attrs = (ASn_MEMATTR_IMPL_DEF_CACHE_POLICY << + (ASn_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | + (ASn_MEMATTR_FORCE_TO_CACHE_ALL << + (ASn_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | + (ASn_MEMATTR_WRITE_ALLOC << + (ASn_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + 0; /* The other indices are unused for now */ + + if (NULL == kctx->mmu_teardown_pages) + return MALI_ERROR_OUT_OF_MEMORY; + + return MALI_ERROR_NONE; +} + +void kbase_mmu_term(kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages); + + kfree(kctx->mmu_teardown_pages); + kctx->mmu_teardown_pages = NULL; +} + +void kbase_mmu_free_pgd(kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages); + + lockdep_assert_held(&kctx->reg_lock); + + mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages); + + beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd); + kbase_mem_allocator_free(kctx->pgd_allocator, 1, &kctx->pgd, MALI_TRUE); + kbase_process_page_usage_dec(kctx, 1 ); + kbase_atomic_sub_pages(1, &kctx->used_pages); + kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); +} + +KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd) + +static size_t kbasep_mmu_dump_level(kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left) +{ + phys_addr_t target_pgd; + u64 *pgd_page; + int i; + size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64); + size_t dump_size; + + KBASE_DEBUG_ASSERT(NULL != kctx); + lockdep_assert_held(&kctx->reg_lock); + + pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); + if (!pgd_page) { + dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n"); + return 0; + } + + if (*size_left >= size) { + /* A modified physical address that contains the page table level */ + u64 m_pgd = pgd | level; + + /* Put the modified physical address in the output buffer */ + memcpy(*buffer, &m_pgd, sizeof(m_pgd)); + *buffer += sizeof(m_pgd); + + /* Followed by the page table itself */ + memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES); + *buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES; + + *size_left -= size; + } + + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { + if ((pgd_page[i] & ENTRY_IS_PTE) == ENTRY_IS_PTE) { + target_pgd = mmu_pte_to_phy_addr(pgd_page[i]); + + dump_size = kbasep_mmu_dump_level(kctx, target_pgd, level + 1, buffer, size_left); + if (!dump_size) { + kunmap(pfn_to_page(PFN_DOWN(pgd))); + return 0; + } + size += dump_size; + } + } + + kunmap(pfn_to_page(PFN_DOWN(pgd))); + + return size; +} + +void *kbase_mmu_dump(kbase_context *kctx, int nr_pages) +{ + void *kaddr; + size_t size_left; + + KBASE_DEBUG_ASSERT(kctx); + + lockdep_assert_held(&kctx->reg_lock); + + if (0 == nr_pages) { + /* can't find in a 0 sized buffer, early out */ + return NULL; + } + + size_left = nr_pages * PAGE_SIZE; + + KBASE_DEBUG_ASSERT(0 != size_left); + kaddr = vmalloc_user(size_left); + + if (kaddr) { + u64 end_marker = 0xFFULL; + char *buffer = (char *)kaddr; + + size_t size = kbasep_mmu_dump_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, &buffer, &size_left); + if (!size) { + vfree(kaddr); + return NULL; + } + + /* Add on the size for the end marker */ + size += sizeof(u64); + + if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) { + /* The buffer isn't big enough - free the memory and return failure */ + vfree(kaddr); + return NULL; + } + + /* Add the end marker */ + memcpy(buffer, &end_marker, sizeof(u64)); + } + + return kaddr; +} +KBASE_EXPORT_TEST_API(kbase_mmu_dump) + +static u64 lock_region(kbase_device *kbdev, u64 pfn, size_t num_pages) +{ + u64 region; + + /* can't lock a zero sized range */ + KBASE_DEBUG_ASSERT(num_pages); + + region = pfn << PAGE_SHIFT; + /* + * fls returns (given the ASSERT above): + * 32-bit: 1 .. 32 + * 64-bit: 1 .. 32 + * + * 32-bit: 10 + fls(num_pages) + * results in the range (11 .. 42) + * 64-bit: 10 + fls(num_pages) + * results in the range (11 .. 42) + */ + + /* gracefully handle num_pages being zero */ + if (0 == num_pages) { + region |= 11; + } else { + u8 region_width; + region_width = 10 + fls(num_pages); + if (num_pages != (1ul << (region_width - 11))) { + /* not pow2, so must go up to the next pow2 */ + region_width += 1; + } + KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE); + KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE); + region |= region_width; + } + + return region; +} + +static void bus_fault_worker(struct work_struct *data) +{ + kbase_as *faulting_as; + int as_no; + kbase_context *kctx; + kbase_device *kbdev; + u32 reg; + mali_bool reset_status = MALI_FALSE; + + faulting_as = container_of(data, kbase_as, work_busfault); + as_no = faulting_as->number; + + kbdev = container_of(faulting_as, kbase_device, as[as_no]); + + /* Grab the context that was already refcounted in kbase_mmu_interrupt(). + * Therefore, it cannot be scheduled out of this AS until we explicitly release it + * + * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */ + kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { + /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. + * We start the reset before switching to UNMAPPED to ensure that unrelated jobs + * are evicted from the GPU before the switch. + */ + dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n"); + reset_status = kbase_prepare_to_reset_gpu(kbdev); + } + + /* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */ + if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ + /* AS transaction begin */ + mutex_lock(&kbdev->as[as_no].transaction_mutex); + + reg = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), kctx); + reg &= ~3; + kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), reg, kctx); + kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_UPDATE, kctx); + + mutex_unlock(&kbdev->as[as_no].transaction_mutex); + /* AS transaction end */ + + mmu_mask_reenable(kbdev, kctx, faulting_as); + kbase_pm_context_idle(kbdev); + } + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) + kbase_reset_gpu(kbdev); + + /* By this point, the fault was handled in some way, so release the ctx refcount */ + if (kctx != NULL) + kbasep_js_runpool_release_ctx(kbdev, kctx); +} + +void kbase_mmu_interrupt(kbase_device *kbdev, u32 irq_stat) +{ + unsigned long flags; + const int num_as = 16; + const int busfault_shift = 16; + const int pf_shift = 0; + const unsigned long mask = (1UL << num_as) - 1; + kbasep_js_device_data *js_devdata; + u32 new_mask; + u32 tmp; + u32 bf_bits = (irq_stat >> busfault_shift) & mask; /* bus faults */ + /* Ignore ASes with both pf and bf */ + u32 pf_bits = ((irq_stat >> pf_shift) & mask) & ~bf_bits; /* page faults */ + + KBASE_DEBUG_ASSERT(NULL != kbdev); + + js_devdata = &kbdev->js_data; + + /* remember current mask */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL); + /* mask interrupts for now */ + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); + + while (bf_bits) { + /* the while logic ensures we have a bit set, no need to check for not-found here */ + int as_no = ffs(bf_bits) - 1; + kbase_as *as = &kbdev->as[as_no]; + kbase_context *kctx; + + /* Refcount the kctx ASAP - it shouldn't disappear anyway, since Bus/Page faults + * _should_ only occur whilst jobs are running, and a job causing the Bus/Page fault + * shouldn't complete until the MMU is updated */ + kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no); + + /* mark as handled */ + bf_bits &= ~(1UL << as_no); + + /* find faulting address & status */ + as->fault_addr = ((u64)kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_FAULTADDRESS_HI), kctx) << 32) | + kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_FAULTADDRESS_LO), kctx); + as->fault_status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_FAULTSTATUS), kctx); + + /* Clear the internal JM mask first before clearing the internal MMU mask */ + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 1UL << MMU_REGS_BUS_ERROR_FLAG(as_no), kctx); + + if (kctx) { + /* hw counters dumping in progress, signal the other thread that it failed */ + if ((kbdev->hwcnt.kctx == kctx) && (kbdev->hwcnt.state == KBASE_INSTR_STATE_DUMPING)) + kbdev->hwcnt.state = KBASE_INSTR_STATE_FAULT; + + /* Stop the kctx from submitting more jobs and cause it to be scheduled + * out/rescheduled when all references to it are released */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbasep_js_clear_submit_allowed(js_devdata, kctx); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", as_no, as->fault_addr); + } else { + dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx with no context present! " "Suprious IRQ or SW Design Error?\n", as_no, as->fault_addr); + } + + /* remove the queued BFs from the mask */ + new_mask &= ~(1UL << (as_no + num_as)); + + /* We need to switch to UNMAPPED mode - but we do this in a worker so that we can sleep */ + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault)); + INIT_WORK(&as->work_busfault, bus_fault_worker); + queue_work(as->pf_wq, &as->work_busfault); + } + + /* + * pf_bits is non-zero if we have at least one AS with a page fault and no bus fault. + * Handle the PFs in our worker thread. + */ + while (pf_bits) { + /* the while logic ensures we have a bit set, no need to check for not-found here */ + int as_no = ffs(pf_bits) - 1; + kbase_as *as = &kbdev->as[as_no]; + kbase_context *kctx; + + /* Refcount the kctx ASAP - it shouldn't disappear anyway, since Bus/Page faults + * _should_ only occur whilst jobs are running, and a job causing the Bus/Page fault + * shouldn't complete until the MMU is updated */ + kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no); + + /* mark as handled */ + pf_bits &= ~(1UL << as_no); + + /* find faulting address & status */ + as->fault_addr = ((u64)kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_FAULTADDRESS_HI), kctx) << 32) | + kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_FAULTADDRESS_LO), kctx); + as->fault_status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_FAULTSTATUS), kctx); + + /* Clear the internal JM mask first before clearing the internal MMU mask */ + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 1UL << MMU_REGS_PAGE_FAULT_FLAG(as_no), kctx); + + if (kctx == NULL) + dev_warn(kbdev->dev, "Page fault in AS%d at 0x%016llx with no context present! " "Suprious IRQ or SW Design Error?\n", as_no, as->fault_addr); + + /* remove the queued PFs from the mask */ + new_mask &= ~((1UL << as_no) | (1UL << (as_no + num_as))); + + /* queue work pending for this AS */ + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault)); + INIT_WORK(&as->work_pagefault, page_fault_worker); + queue_work(as->pf_wq, &as->work_pagefault); + } + + /* reenable interrupts */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL); + new_mask |= tmp; + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); +} + +KBASE_EXPORT_TEST_API(kbase_mmu_interrupt) + +const char *kbase_exception_name(u32 exception_code) +{ + const char *e; + + switch (exception_code) { + /* Non-Fault Status code */ + case 0x00: + e = "NOT_STARTED/IDLE/OK"; + break; + case 0x01: + e = "DONE"; + break; + case 0x02: + e = "INTERRUPTED"; + break; + case 0x03: + e = "STOPPED"; + break; + case 0x04: + e = "TERMINATED"; + break; + case 0x08: + e = "ACTIVE"; + break; + /* Job exceptions */ + case 0x40: + e = "JOB_CONFIG_FAULT"; + break; + case 0x41: + e = "JOB_POWER_FAULT"; + break; + case 0x42: + e = "JOB_READ_FAULT"; + break; + case 0x43: + e = "JOB_WRITE_FAULT"; + break; + case 0x44: + e = "JOB_AFFINITY_FAULT"; + break; + case 0x48: + e = "JOB_BUS_FAULT"; + break; + case 0x50: + e = "INSTR_INVALID_PC"; + break; + case 0x51: + e = "INSTR_INVALID_ENC"; + break; + case 0x52: + e = "INSTR_TYPE_MISMATCH"; + break; + case 0x53: + e = "INSTR_OPERAND_FAULT"; + break; + case 0x54: + e = "INSTR_TLS_FAULT"; + break; + case 0x55: + e = "INSTR_BARRIER_FAULT"; + break; + case 0x56: + e = "INSTR_ALIGN_FAULT"; + break; + case 0x58: + e = "DATA_INVALID_FAULT"; + break; + case 0x59: + e = "TILE_RANGE_FAULT"; + break; + case 0x5A: + e = "ADDR_RANGE_FAULT"; + break; + case 0x60: + e = "OUT_OF_MEMORY"; + break; + /* GPU exceptions */ + case 0x80: + e = "DELAYED_BUS_FAULT"; + break; + case 0x81: + e = "SHAREABILITY_FAULT"; + break; + /* MMU exceptions */ + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + e = "TRANSLATION_FAULT"; + break; + case 0xC8: + e = "PERMISSION_FAULT"; + break; + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + e = "TRANSTAB_BUS_FAULT"; + break; + case 0xD8: + e = "ACCESS_FLAG"; + break; + default: + e = "UNKNOWN"; + break; + }; + + return e; +} + +/** + * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on. + */ +static void kbase_mmu_report_fault_and_kill(kbase_context *kctx, kbase_as *as) +{ + unsigned long flags; + u32 reg; + int exception_type; + int access_type; + int source_id; + int as_no; + kbase_device *kbdev; + kbasep_js_device_data *js_devdata; + mali_bool reset_status = MALI_FALSE; + static const char * const access_type_names[] = { "RESERVED", "EXECUTE", "READ", "WRITE" }; + + KBASE_DEBUG_ASSERT(as); + KBASE_DEBUG_ASSERT(kctx); + + as_no = as->number; + kbdev = kctx->kbdev; + js_devdata = &kbdev->js_data; + + /* ASSERT that the context won't leave the runpool */ + KBASE_DEBUG_ASSERT(kbasep_js_debug_check_ctx_refcount(kbdev, kctx) > 0); + + /* decode the fault status */ + exception_type = as->fault_status & 0xFF; + access_type = (as->fault_status >> 8) & 0x3; + source_id = (as->fault_status >> 16); + + /* terminal fault, print info about the fault */ + dev_err(kbdev->dev, "Unhandled Page fault in AS%d at VA 0x%016llX\n" + "raw fault status 0x%X\n" + "decoded fault status: %s\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X\n", + as_no, as->fault_addr, + as->fault_status, + (as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), + exception_type, kbase_exception_name(exception_type), + access_type, access_type_names[access_type], + source_id); + + /* hardware counters dump fault handling */ + if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && (kbdev->hwcnt.state == KBASE_INSTR_STATE_DUMPING)) { + unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; + if ((as->fault_addr >= kbdev->hwcnt.addr) && (as->fault_addr < (kbdev->hwcnt.addr + (num_core_groups * 2048)))) + kbdev->hwcnt.state = KBASE_INSTR_STATE_FAULT; + } + + /* Stop the kctx from submitting more jobs and cause it to be scheduled + * out/rescheduled - this will occur on releasing the context's refcount */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbasep_js_clear_submit_allowed(js_devdata, kctx); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + /* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this + * context can appear in the job slots from this point on */ + kbase_job_kill_jobs_from_context(kctx); + /* AS transaction begin */ + mutex_lock(&as->transaction_mutex); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { + /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. + * We start the reset before switching to UNMAPPED to ensure that unrelated jobs + * are evicted from the GPU before the switch. + */ + dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery."); + reset_status = kbase_prepare_to_reset_gpu(kbdev); + } + + /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ + reg = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), kctx); + reg &= ~3; + kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), reg, kctx); + kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_UPDATE, kctx); + + mutex_unlock(&as->transaction_mutex); + /* AS transaction end */ + mmu_mask_reenable(kbdev, kctx, as); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) + kbase_reset_gpu(kbdev); +} + +void kbasep_as_do_poke(struct work_struct *work) +{ + kbase_as *as; + kbase_device *kbdev; + unsigned long flags; + + KBASE_DEBUG_ASSERT(work); + as = container_of(work, kbase_as, poke_work); + kbdev = container_of(as, kbase_device, as[as->number]); + KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); + + /* GPU power will already be active by virtue of the caller holding a JS + * reference on the address space, and will not release it until this worker + * has finished */ + + /* AS transaction begin */ + mutex_lock(&as->transaction_mutex); + /* Force a uTLB invalidate */ + kbase_reg_write(kbdev, MMU_AS_REG(as->number, ASn_COMMAND), ASn_COMMAND_UNLOCK, NULL); + mutex_unlock(&as->transaction_mutex); + /* AS transaction end */ + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + if (as->poke_refcount && + !(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) { + /* Only queue up the timer if we need it, and we're not trying to kill it */ + hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL); + } + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + +} + +enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer) +{ + kbase_as *as; + int queue_work_ret; + + KBASE_DEBUG_ASSERT(NULL != timer); + as = container_of(timer, kbase_as, poke_timer); + KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); + + queue_work_ret = queue_work(as->poke_wq, &as->poke_work); + KBASE_DEBUG_ASSERT(queue_work_ret); + return HRTIMER_NORESTART; +} + +/** + * Retain the poking timer on an atom's context (if the atom hasn't already + * done so), and start the timer (if it's not already started). + * + * This must only be called on a context that's scheduled in, and an atom + * that's running on the GPU. + * + * The caller must hold kbasep_js_device_data::runpool_irq::lock + * + * This can be called safely from atomic context + */ +void kbase_as_poking_timer_retain_atom(kbase_device *kbdev, kbase_context *kctx, kbase_jd_atom *katom) +{ + kbase_as *as; + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + if (katom->poking) + return; + + katom->poking = 1; + + /* It's safe to work on the as/as_nr without an explicit reference, + * because the caller holds the runpool_irq lock, and the atom itself + * was also running and had already taken a reference */ + as = &kbdev->as[kctx->as_nr]; + + if (++(as->poke_refcount) == 1) { + /* First refcount for poke needed: check if not already in flight */ + if (!as->poke_state) { + /* need to start poking */ + as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT; + queue_work(as->poke_wq, &as->poke_work); + } + } +} + +/** + * If an atom holds a poking timer, release it and wait for it to finish + * + * This must only be called on a context that's scheduled in, and an atom + * that still has a JS reference on the context + * + * This must \b not be called from atomic context, since it can sleep. + */ +void kbase_as_poking_timer_release_atom(kbase_device *kbdev, kbase_context *kctx, kbase_jd_atom *katom) +{ + kbase_as *as; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + if (!katom->poking) + return; + + as = &kbdev->as[kctx->as_nr]; + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + KBASE_DEBUG_ASSERT(as->poke_refcount > 0); + KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); + + if (--(as->poke_refcount) == 0) { + as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE; + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + + hrtimer_cancel(&as->poke_timer); + flush_workqueue(as->poke_wq); + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + + /* Re-check whether it's still needed */ + if (as->poke_refcount) { + int queue_work_ret; + /* Poking still needed: + * - Another retain will not be starting the timer or queueing work, + * because it's still marked as in-flight + * - The hrtimer has finished, and has not started a new timer or + * queued work because it's been marked as killing + * + * So whatever happens now, just queue the work again */ + as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE); + queue_work_ret = queue_work(as->poke_wq, &as->poke_work); + KBASE_DEBUG_ASSERT(queue_work_ret); + } else { + /* It isn't - so mark it as not in flight, and not killing */ + as->poke_state = 0u; + + /* The poke associated with the atom has now finished. If this is + * also the last atom on the context, then we can guarentee no more + * pokes (and thus no more poking register accesses) will occur on + * the context until new atoms are run */ + } + } + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + + katom->poking = 0; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c new file mode 100755 index 00000000000..b33f0b3bb4d --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c @@ -0,0 +1,142 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifdef CONFIG_MALI_PLATFORM_FAKE + +#include +#include +#include +#include +#include + +#ifdef CONFIG_MACH_MANTA +#include +#endif + +/* + * This file is included only for type definitions and functions belonging to + * specific platform folders. Do not add dependencies with symbols that are + * defined somewhere else. + */ +#include + +#define PLATFORM_CONFIG_RESOURCE_COUNT 4 +#define PLATFORM_CONFIG_IRQ_RES_COUNT 3 + +static struct platform_device *mali_device; + +#ifndef CONFIG_OF +/** + * @brief Convert data in kbase_io_resources struct to Linux-specific resources + * + * Function converts data in kbase_io_resources struct to an array of Linux resource structures. Note that function + * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT. + * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ. + * + * @param[in] io_resource Input IO resource data + * @param[out] linux_resources Pointer to output array of Linux resource structures + */ +static void kbasep_config_parse_io_resources(const kbase_io_resources *io_resources, struct resource *const linux_resources) +{ + if (!io_resources || !linux_resources) { + pr_err("%s: couldn't find proper resources\n", __func__); + return; + } + + memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource)); + + linux_resources[0].start = io_resources->io_memory_region.start; + linux_resources[0].end = io_resources->io_memory_region.end; + linux_resources[0].flags = IORESOURCE_MEM; + + linux_resources[1].start = linux_resources[1].end = io_resources->job_irq_number; + linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; + + linux_resources[2].start = linux_resources[2].end = io_resources->mmu_irq_number; + linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; + + linux_resources[3].start = linux_resources[3].end = io_resources->gpu_irq_number; + linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; +} +#endif /* CONFIG_OF */ + +int kbase_platform_fake_register(void) +{ + kbase_platform_config *config; + int attribute_count; +#ifndef CONFIG_OF + struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT]; +#endif + int err; + + config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */ + if (config == NULL) + { + pr_err("%s: couldn't get platform config\n", __func__); + return -ENODEV; + } + + attribute_count = kbasep_get_config_attribute_count(config->attributes); +#ifdef CONFIG_MACH_MANTA + err = platform_device_add_data(&exynos5_device_g3d, config->attributes, attribute_count * sizeof(config->attributes[0])); + if (err) + return err; +#else + + mali_device = platform_device_alloc("mali", 0); + if (mali_device == NULL) + return -ENOMEM; + +#ifndef CONFIG_OF + kbasep_config_parse_io_resources(config->io_resources, resources); + err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT); + if (err) { + platform_device_put(mali_device); + mali_device = NULL; + return err; + } +#endif /* CONFIG_OF */ + + err = platform_device_add_data(mali_device, config->attributes, attribute_count * sizeof(config->attributes[0])); + if (err) { + platform_device_unregister(mali_device); + mali_device = NULL; + return err; + } + + err = platform_device_add(mali_device); + if (err) { + platform_device_unregister(mali_device); + mali_device = NULL; + return err; + } +#endif /* CONFIG_CONFIG_MACH_MANTA */ + + return 0; +} + +void kbase_platform_fake_unregister(void) +{ + if (mali_device) + platform_device_unregister(mali_device); +} + +EXPORT_SYMBOL(kbase_platform_fake_register); +EXPORT_SYMBOL(kbase_platform_fake_unregister); + +#endif /* CONFIG_MALI_PLATFORM_FAKE */ + diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.c b/drivers/gpu/arm/midgard/mali_kbase_pm.c new file mode 100755 index 00000000000..212221f94fd --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm.c @@ -0,0 +1,462 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm.c + * Base kernel power management APIs + */ + +#include +#include + +#include + +void kbase_pm_register_access_enable(kbase_device *kbdev) +{ + kbase_pm_callback_conf *callbacks; + + callbacks = (kbase_pm_callback_conf *) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS); + + if (callbacks) + callbacks->power_on_callback(kbdev); +} + +void kbase_pm_register_access_disable(kbase_device *kbdev) +{ + kbase_pm_callback_conf *callbacks; + + callbacks = (kbase_pm_callback_conf *) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS); + + if (callbacks) + callbacks->power_off_callback(kbdev); +} + +mali_error kbase_pm_init(kbase_device *kbdev) +{ + mali_error ret = MALI_ERROR_NONE; + kbase_pm_callback_conf *callbacks; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + mutex_init(&kbdev->pm.lock); + + kbdev->pm.gpu_powered = MALI_FALSE; + kbdev->pm.suspending = MALI_FALSE; +#ifdef CONFIG_MALI_DEBUG + kbdev->pm.driver_ready_for_irqs = MALI_FALSE; +#endif /* CONFIG_MALI_DEBUG */ + kbdev->pm.gpu_in_desired_state = MALI_TRUE; + init_waitqueue_head(&kbdev->pm.gpu_in_desired_state_wait); + + callbacks = (kbase_pm_callback_conf *) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS); + if (callbacks) { + kbdev->pm.callback_power_on = callbacks->power_on_callback; + kbdev->pm.callback_power_off = callbacks->power_off_callback; + kbdev->pm.callback_power_suspend = + callbacks->power_suspend_callback; + kbdev->pm.callback_power_resume = + callbacks->power_resume_callback; + kbdev->pm.callback_power_runtime_init = callbacks->power_runtime_init_callback; + kbdev->pm.callback_power_runtime_term = callbacks->power_runtime_term_callback; + kbdev->pm.callback_power_runtime_on = callbacks->power_runtime_on_callback; + kbdev->pm.callback_power_runtime_off = callbacks->power_runtime_off_callback; + } else { + kbdev->pm.callback_power_on = NULL; + kbdev->pm.callback_power_off = NULL; + kbdev->pm.callback_power_suspend = NULL; + kbdev->pm.callback_power_resume = NULL; + kbdev->pm.callback_power_runtime_init = NULL; + kbdev->pm.callback_power_runtime_term = NULL; + kbdev->pm.callback_power_runtime_on = NULL; + kbdev->pm.callback_power_runtime_off = NULL; + } + + kbdev->pm.platform_dvfs_frequency = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_POWER_MANAGEMENT_DVFS_FREQ); + + /* Initialise the metrics subsystem */ + ret = kbasep_pm_metrics_init(kbdev); + if (MALI_ERROR_NONE != ret) + return ret; + + init_waitqueue_head(&kbdev->pm.l2_powered_wait); + kbdev->pm.l2_powered = 0; + + init_waitqueue_head(&kbdev->pm.reset_done_wait); + kbdev->pm.reset_done = MALI_FALSE; + + init_waitqueue_head(&kbdev->pm.zero_active_count_wait); + kbdev->pm.active_count = 0; + + spin_lock_init(&kbdev->pm.power_change_lock); + spin_lock_init(&kbdev->pm.gpu_cycle_counter_requests_lock); + spin_lock_init(&kbdev->pm.gpu_powered_lock); + + if (MALI_ERROR_NONE != kbase_pm_ca_init(kbdev)) + goto workq_fail; + + if (MALI_ERROR_NONE != kbase_pm_policy_init(kbdev)) + goto pm_policy_fail; + + return MALI_ERROR_NONE; + +pm_policy_fail: + kbase_pm_ca_term(kbdev); +workq_fail: + kbasep_pm_metrics_term(kbdev); + return MALI_ERROR_FUNCTION_FAILED; +} + +KBASE_EXPORT_TEST_API(kbase_pm_init) + +void kbase_pm_do_poweron(kbase_device *kbdev, mali_bool is_resume) +{ + lockdep_assert_held(&kbdev->pm.lock); + + /* Turn clocks and interrupts on - no-op if we haven't done a previous + * kbase_pm_clock_off() */ + kbase_pm_clock_on(kbdev, is_resume); + + /* Update core status as required by the policy */ + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START); + kbase_pm_update_cores_state(kbdev); + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END); + + /* NOTE: We don't wait to reach the desired state, since running atoms + * will wait for that state to be reached anyway */ +} + +void kbase_pm_do_poweroff(kbase_device *kbdev, mali_bool is_suspend) +{ + unsigned long flags; + mali_bool cores_are_available; + + lockdep_assert_held(&kbdev->pm.lock); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + /* Force all cores off */ + kbdev->pm.desired_shader_state = 0; + + /* Force all cores to be unavailable, in the situation where + * transitions are in progress for some cores but not others, + * and kbase_pm_check_transitions_nolock can not immediately + * power off the cores */ + kbdev->shader_available_bitmap = 0; + kbdev->tiler_available_bitmap = 0; + kbdev->l2_available_bitmap = 0; + + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START); + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END); + /* Don't need 'cores_are_available', because we don't return anything */ + CSTD_UNUSED(cores_are_available); + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + /* NOTE: We won't wait to reach the core's desired state, even if we're + * powering off the GPU itself too. It's safe to cut the power whilst + * they're transitioning to off, because the cores should be idle and all + * cache flushes should already have occurred */ + + /* Consume any change-state events */ + kbase_timeline_pm_check_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + /* Disable interrupts and turn the clock off */ + kbase_pm_clock_off(kbdev, is_suspend); +} + +mali_error kbase_pm_powerup(kbase_device *kbdev) +{ + unsigned long flags; + mali_error ret; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + mutex_lock(&kbdev->pm.lock); + + /* A suspend won't happen during startup/insmod */ + KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + + /* Power up the GPU, don't enable IRQs as we are not ready to receive them. */ + ret = kbase_pm_init_hw(kbdev, MALI_FALSE ); + if (ret != MALI_ERROR_NONE) { + mutex_unlock(&kbdev->pm.lock); + return ret; + } + + kbasep_pm_read_present_cores(kbdev); + + kbdev->pm.debug_core_mask = kbdev->shader_present_bitmap; + + /* Pretend the GPU is active to prevent a power policy turning the GPU cores off */ + kbdev->pm.active_count = 1; + + spin_lock_irqsave(&kbdev->pm.gpu_cycle_counter_requests_lock, flags); + /* Ensure cycle counter is off */ + kbdev->pm.gpu_cycle_counter_requests = 0; + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CYCLE_COUNT_STOP, NULL); + spin_unlock_irqrestore(&kbdev->pm.gpu_cycle_counter_requests_lock, flags); + + /* We are ready to receive IRQ's now as power policy is set up, so enable them now. */ +#ifdef CONFIG_MALI_DEBUG + spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags); + kbdev->pm.driver_ready_for_irqs = MALI_TRUE; + spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags); +#endif + kbase_pm_enable_interrupts(kbdev); + + /* Turn on the GPU and any cores needed by the policy */ + kbase_pm_do_poweron(kbdev, MALI_FALSE); + mutex_unlock(&kbdev->pm.lock); + + /* Idle the GPU and/or cores, if the policy wants it to */ + kbase_pm_context_idle(kbdev); + + return MALI_ERROR_NONE; +} + +KBASE_EXPORT_TEST_API(kbase_pm_powerup) + +void kbase_pm_context_active(kbase_device *kbdev) +{ + (void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); +} + +int kbase_pm_context_active_handle_suspend(kbase_device *kbdev, kbase_pm_suspend_handler suspend_handler) +{ + int c; + int old_count; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + /* Trace timeline information about how long it took to handle the decision + * to powerup. Sometimes the event might be missed due to reading the count + * outside of mutex, but this is necessary to get the trace timing + * correct. */ + old_count = kbdev->pm.active_count; + if (old_count == 0) + kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); + + mutex_lock(&kbdev->pm.lock); + if (kbase_pm_is_suspending(kbdev)) + { + switch (suspend_handler) { + case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: + if (kbdev->pm.active_count != 0 ) + break; + /* FALLTHROUGH */ + case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: + mutex_unlock(&kbdev->pm.lock); + if (old_count == 0) + kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); + return 1; + + case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE: + /* FALLTHROUGH */ + default: + KBASE_DEBUG_ASSERT_MSG(MALI_FALSE,"unreachable"); + break; + } + } + c = ++kbdev->pm.active_count; + + KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c); + + /* Trace the event being handled */ + if (old_count == 0) + kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); + + if (c == 1) { + /* First context active: Power on the GPU and any cores requested by + * the policy */ + kbase_pm_update_active(kbdev); + + kbasep_pm_record_gpu_active(kbdev); + } + + mutex_unlock(&kbdev->pm.lock); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_pm_context_active) + +void kbase_pm_context_idle(kbase_device *kbdev) +{ + int c; + int old_count; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + /* Trace timeline information about how long it took to handle the decision + * to powerdown. Sometimes the event might be missed due to reading the + * count outside of mutex, but this is necessary to get the trace timing + * correct. */ + old_count = kbdev->pm.active_count; + if (old_count == 0) + kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE); + + mutex_lock(&kbdev->pm.lock); + + c = --kbdev->pm.active_count; + + KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c); + + KBASE_DEBUG_ASSERT(c >= 0); + + /* Trace the event being handled */ + if (old_count == 0) + kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE); + + if (c == 0) { + /* Last context has gone idle */ + kbase_pm_update_active(kbdev); + + kbasep_pm_record_gpu_idle(kbdev); + + /* Wake up anyone waiting for this to become 0 (e.g. suspend). The + * waiters must synchronize with us by locking the pm.lock after + * waiting */ + wake_up(&kbdev->pm.zero_active_count_wait); + } + + mutex_unlock(&kbdev->pm.lock); +} + +KBASE_EXPORT_TEST_API(kbase_pm_context_idle) + +void kbase_pm_halt(kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + mutex_lock(&kbdev->pm.lock); + kbase_pm_cancel_deferred_poweroff(kbdev); + kbase_pm_do_poweroff(kbdev, MALI_FALSE); + mutex_unlock(&kbdev->pm.lock); +} + +KBASE_EXPORT_TEST_API(kbase_pm_halt) + +void kbase_pm_term(kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); + KBASE_DEBUG_ASSERT(kbdev->pm.gpu_cycle_counter_requests == 0); + + /* Free any resources the policy allocated */ + kbase_pm_policy_term(kbdev); + kbase_pm_ca_term(kbdev); + + /* Shut down the metrics subsystem */ + kbasep_pm_metrics_term(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_term) + +void kbase_pm_suspend(struct kbase_device *kbdev) +{ + int nr_keep_gpu_powered_ctxs; + KBASE_DEBUG_ASSERT(kbdev); + + mutex_lock(&kbdev->pm.lock); + KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + kbdev->pm.suspending = MALI_TRUE; + mutex_unlock(&kbdev->pm.lock); + + /* From now on, the active count will drop towards zero. Sometimes, it'll + * go up briefly before going down again. However, once it reaches zero it + * will stay there - guaranteeing that we've idled all pm references */ + + /* Suspend job scheduler and associated components, so that it releases all + * the PM active count references */ + kbasep_js_suspend(kbdev); + + /* Suspend any counter collection that might be happening */ + kbase_instr_hwcnt_suspend(kbdev); + + /* Cancel the keep_gpu_powered calls */ + for (nr_keep_gpu_powered_ctxs = atomic_read(&kbdev->keep_gpu_powered_count); + nr_keep_gpu_powered_ctxs > 0 ; + --nr_keep_gpu_powered_ctxs ) { + kbase_pm_context_idle(kbdev); + } + + /* Wait for the active count to reach zero. This is not the same as + * waiting for a power down, since not all policies power down when this + * reaches zero. */ + wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0); + + /* Suspend PM Metric timer on system suspend. + * It is ok if kbase_pm_context_idle() is still running, it is safe + * to still complete the last active time period - the pm stats will + * get reset on resume anyway. + */ + kbasep_pm_metrics_term(kbdev); + + /* NOTE: We synchronize with anything that was just finishing a + * kbase_pm_context_idle() call by locking the pm.lock below */ + + /* Force power off the GPU and all cores (regardless of policy), only after + * the PM active count reaches zero (otherwise, we risk turning it off + * prematurely) */ + mutex_lock(&kbdev->pm.lock); + kbase_pm_cancel_deferred_poweroff(kbdev); + kbase_pm_do_poweroff(kbdev, MALI_TRUE); + mutex_unlock(&kbdev->pm.lock); +} + +void kbase_pm_resume(struct kbase_device *kbdev) +{ + int nr_keep_gpu_powered_ctxs; + + /* MUST happen before any pm_context_active calls occur */ + mutex_lock(&kbdev->pm.lock); + kbdev->pm.suspending = MALI_FALSE; + mutex_unlock(&kbdev->pm.lock); + + kbase_pm_do_poweron(kbdev, MALI_TRUE); + + /* Restart PM Metric timer on resume */ + kbasep_pm_metrics_init(kbdev); + kbasep_pm_record_gpu_idle(kbdev); + + /* Initial active call, to power on the GPU/cores if needed */ + kbase_pm_context_active(kbdev); + + /* Restore the keep_gpu_powered calls */ + for (nr_keep_gpu_powered_ctxs = atomic_read(&kbdev->keep_gpu_powered_count); + nr_keep_gpu_powered_ctxs > 0 ; + --nr_keep_gpu_powered_ctxs ) { + kbase_pm_context_active(kbdev); + } + + /* Re-enable instrumentation, if it was previously disabled */ + kbase_instr_hwcnt_resume(kbdev); + + /* Resume any blocked atoms (which may cause contexts to be scheduled in + * and dependent atoms to run) */ + kbase_resume_suspended_soft_jobs(kbdev); + + /* Resume the Job Scheduler and associated components, and start running + * atoms */ + kbasep_js_resume(kbdev); + + /* Matching idle call, to power off the GPU/cores if we didn't actually + * need it and the policy doesn't want it on */ + kbase_pm_context_idle(kbdev); +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.h b/drivers/gpu/arm/midgard/mali_kbase_pm.h new file mode 100755 index 00000000000..a9baff977dd --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm.h @@ -0,0 +1,849 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm.h + * Power management API definitions + */ + +#ifndef _KBASE_PM_H_ +#define _KBASE_PM_H_ + +#include +#include + +/* Forward definition - see mali_kbase.h */ +struct kbase_device; + +#include "mali_kbase_pm_ca.h" +#include "mali_kbase_pm_policy.h" + +#include "mali_kbase_pm_ca_fixed.h" +#if MALI_CUSTOMER_RELEASE == 0 +#include "mali_kbase_pm_ca_random.h" +#endif + +#include "mali_kbase_pm_always_on.h" +#include "mali_kbase_pm_coarse_demand.h" +#include "mali_kbase_pm_demand.h" +#if MALI_CUSTOMER_RELEASE == 0 +#include "mali_kbase_pm_demand_always_powered.h" +#include "mali_kbase_pm_fast_start.h" +#endif + +/** The types of core in a GPU. + * + * These enumerated values are used in calls to: + * - @ref kbase_pm_get_present_cores + * - @ref kbase_pm_get_active_cores + * - @ref kbase_pm_get_trans_cores + * - @ref kbase_pm_get_ready_cores. + * + * They specify which type of core should be acted on. These values are set in + * a manner that allows @ref core_type_to_reg function to be simpler and more + * efficient. + */ +typedef enum kbase_pm_core_type { + KBASE_PM_CORE_L3 = L3_PRESENT_LO, /**< The L3 cache */ + KBASE_PM_CORE_L2 = L2_PRESENT_LO, /**< The L2 cache */ + KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO, /**< Shader cores */ + KBASE_PM_CORE_TILER = TILER_PRESENT_LO /**< Tiler cores */ +} kbase_pm_core_type; + +/** Initialize the power management framework. + * + * Must be called before any other power management function + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * + * @return MALI_ERROR_NONE if the power management framework was successfully initialized. + */ +mali_error kbase_pm_init(struct kbase_device *kbdev); + +/** Power up GPU after all modules have been initialized and interrupt handlers installed. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * + * @return MALI_ERROR_NONE if powerup was successful. + */ +mali_error kbase_pm_powerup(struct kbase_device *kbdev); + +/** + * Halt the power management framework. + * Should ensure that no new interrupts are generated, + * but allow any currently running interrupt handlers to complete successfully. + * The GPU is forced off by the time this function returns, regardless of + * whether or not the active power policy asks for the GPU to be powered off. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_halt(struct kbase_device *kbdev); + +/** Terminate the power management framework. + * + * No power management functions may be called after this + * (except @ref kbase_pm_init) + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_term(struct kbase_device *kbdev); + +/** Metrics data collected for use by the power management framework. + * + */ +typedef struct kbasep_pm_metrics_data { + int vsync_hit; + int utilisation; + ktime_t time_period_start; + u32 time_busy; + u32 time_idle; + mali_bool gpu_active; + + spinlock_t lock; + + struct hrtimer timer; + mali_bool timer_active; + + void *platform_data; + struct kbase_device *kbdev; +} kbasep_pm_metrics_data; + +/** Actions for DVFS. + * + * kbase_pm_get_dvfs_action will return one of these enumerated values to + * describe the action that the DVFS system should take. + */ +typedef enum kbase_pm_dvfs_action { + KBASE_PM_DVFS_NOP, /**< No change in clock frequency is requested */ + KBASE_PM_DVFS_CLOCK_UP, /**< The clock frequency should be increased if possible */ + KBASE_PM_DVFS_CLOCK_DOWN /**< The clock frequency should be decreased if possible */ +} kbase_pm_dvfs_action; + +typedef union kbase_pm_policy_data { + kbasep_pm_policy_always_on always_on; + kbasep_pm_policy_coarse_demand coarse_demand; + kbasep_pm_policy_demand demand; +#if MALI_CUSTOMER_RELEASE == 0 + kbasep_pm_policy_demand_always_powered demand_always_powered; + kbasep_pm_policy_fast_start fast_start; +#endif +} kbase_pm_policy_data; + +typedef union kbase_pm_ca_policy_data { + kbasep_pm_ca_policy_fixed fixed; +#if MALI_CUSTOMER_RELEASE == 0 + kbasep_pm_ca_policy_random random; +#endif +} kbase_pm_ca_policy_data; + +/** Data stored per device for power management. + * + * This structure contains data for the power management framework. There is one instance of this structure per device + * in the system. + */ +typedef struct kbase_pm_device_data { + /** The lock protecting Power Management structures accessed + * outside of IRQ. + * + * This lock must also be held whenever the GPU is being powered on or off. + */ + struct mutex lock; + + /** The policy that is currently actively controlling core availability. + * + * @note: During an IRQ, this can be NULL when the policy is being changed + * with kbase_pm_ca_set_policy(). The change is protected under + * kbase_device::pm::power_change_lock. Direct access to this from IRQ + * context must therefore check for NULL. If NULL, then + * kbase_pm_ca_set_policy() will re-issue the policy functions that would've + * been done under IRQ. + */ + const kbase_pm_ca_policy *ca_current_policy; + + /** The policy that is currently actively controlling the power state. + * + * @note: During an IRQ, this can be NULL when the policy is being changed + * with kbase_pm_set_policy(). The change is protected under + * kbase_device::pm::power_change_lock. Direct access to this from IRQ + * context must therefore check for NULL. If NULL, then + * kbase_pm_set_policy() will re-issue the policy functions that would've + * been done under IRQ. + */ + const kbase_pm_policy *pm_current_policy; + + /** Private data for current CA policy */ + kbase_pm_ca_policy_data ca_policy_data; + + /** Private data for current PM policy */ + kbase_pm_policy_data pm_policy_data; + + /** Flag indicating when core availability policy is transitioning cores. + * The core availability policy must set this when a change in core availability + * is occuring. + * + * power_change_lock must be held when accessing this. */ + mali_bool ca_in_transition; + + /** Waiting for reset and a queue to wait for changes */ + mali_bool reset_done; + wait_queue_head_t reset_done_wait; + + /** Wait queue for whether the l2 cache has been powered as requested */ + wait_queue_head_t l2_powered_wait; + /** State indicating whether all the l2 caches are powered. + * Non-zero indicates they're *all* powered + * Zero indicates that some (or all) are not powered */ + int l2_powered; + + /** The reference count of active contexts on this device. */ + int active_count; + /** Flag indicating suspending/suspended */ + mali_bool suspending; + /* Wait queue set when active_count == 0 */ + wait_queue_head_t zero_active_count_wait; + + /** The reference count of active gpu cycle counter users */ + int gpu_cycle_counter_requests; + /** Lock to protect gpu_cycle_counter_requests */ + spinlock_t gpu_cycle_counter_requests_lock; + + /** A bit mask identifying the shader cores that the power policy would like to be on. + * The current state of the cores may be different, but there should be transitions in progress that will + * eventually achieve this state (assuming that the policy doesn't change its mind in the mean time. + */ + u64 desired_shader_state; + /** bit mask indicating which shader cores are currently in a power-on transition */ + u64 powering_on_shader_state; + /** A bit mask identifying the tiler cores that the power policy would like to be on. + * @see kbase_pm_device_data:desired_shader_state */ + u64 desired_tiler_state; + /** bit mask indicating which tiler core are currently in a power-on transition */ + u64 powering_on_tiler_state; + + /** bit mask indicating which l2-caches are currently in a power-on transition */ + u64 powering_on_l2_state; + /** bit mask indicating which l3-caches are currently in a power-on transition */ + u64 powering_on_l3_state; + + /** Lock protecting the power state of the device. + * + * This lock must be held when accessing the shader_available_bitmap, tiler_available_bitmap, l2_available_bitmap, + * shader_inuse_bitmap and tiler_inuse_bitmap fields of kbase_device, and the ca_in_transition and shader_poweroff_pending + * fields of kbase_pm_device_data. It is also held when the hardware power registers are being written to, to ensure + * that two threads do not conflict over the power transitions that the hardware should make. + */ + spinlock_t power_change_lock; + + /** This flag is set iff the GPU is powered as requested by the + * desired_xxx_state variables */ + mali_bool gpu_in_desired_state; + /* Wait queue set when gpu_in_desired_state != 0 */ + wait_queue_head_t gpu_in_desired_state_wait; + + /** Set to true when the GPU is powered and register accesses are possible, false otherwise */ + mali_bool gpu_powered; + + /** A bit mask identifying the available shader cores that are specified via sysfs */ + u64 debug_core_mask; + + /** Set to true when instrumentation is enabled, false otherwise */ + mali_bool instr_enabled; + + mali_bool cg1_disabled; + +#ifdef CONFIG_MALI_DEBUG + /** Debug state indicating whether sufficient initialization of the driver + * has occurred to handle IRQs */ + mali_bool driver_ready_for_irqs; +#endif /* CONFIG_MALI_DEBUG */ + + /** Spinlock that must be held when: + * - writing gpu_powered + * - accessing driver_ready_for_irqs (in CONFIG_MALI_DEBUG builds) */ + spinlock_t gpu_powered_lock; + + /** Time in milliseconds between each dvfs sample */ + + u32 platform_dvfs_frequency; + + /** Structure to hold metrics for the GPU */ + + kbasep_pm_metrics_data metrics; + + /** Set to the number of poweroff timer ticks until the GPU is powered off */ + int gpu_poweroff_pending; + + /** Set to the number of poweroff timer ticks until shaders are powered off */ + int shader_poweroff_pending_time; + + /** Timer for powering off GPU */ + struct hrtimer gpu_poweroff_timer; + + struct workqueue_struct *gpu_poweroff_wq; + + struct work_struct gpu_poweroff_work; + + /** Period of GPU poweroff timer */ + ktime_t gpu_poweroff_time; + + /** Bit mask of shaders to be powered off on next timer callback */ + u64 shader_poweroff_pending; + + /** Set to MALI_TRUE if the poweroff timer is currently running, MALI_FALSE otherwise */ + mali_bool poweroff_timer_running; + + int poweroff_shader_ticks; + + int poweroff_gpu_ticks; + + /** Callback when the GPU needs to be turned on. See @ref kbase_pm_callback_conf + * + * @param kbdev The kbase device + * + * @return 1 if GPU state was lost, 0 otherwise + */ + int (*callback_power_on) (struct kbase_device *kbdev); + + /** Callback when the GPU may be turned off. See @ref kbase_pm_callback_conf + * + * @param kbdev The kbase device + */ + void (*callback_power_off) (struct kbase_device *kbdev); + + /** Callback when a suspend occurs and the GPU needs to be turned off. + * See @ref kbase_pm_callback_conf + * + * @param kbdev The kbase device + */ + void (*callback_power_suspend) (struct kbase_device *kbdev); + + /** Callback when a resume occurs and the GPU needs to be turned on. + * See @ref kbase_pm_callback_conf + * + * @param kbdev The kbase device + */ + void (*callback_power_resume) (struct kbase_device *kbdev); + + /** Callback for initializing the runtime power management. + * + * @param kbdev The kbase device + * + * @return MALI_ERROR_NONE on success, else error code + */ + mali_error(*callback_power_runtime_init) (struct kbase_device *kbdev); + + /** Callback for terminating the runtime power management. + * + * @param kbdev The kbase device + */ + void (*callback_power_runtime_term) (struct kbase_device *kbdev); + + /** Callback when the GPU needs to be turned on. See @ref kbase_pm_callback_conf + * + * @param kbdev The kbase device + * + * @return 1 if GPU state was lost, 0 otherwise + */ + int (*callback_power_runtime_on) (struct kbase_device *kbdev); + + /** Callback when the GPU may be turned off. See @ref kbase_pm_callback_conf + * + * @param kbdev The kbase device + */ + void (*callback_power_runtime_off) (struct kbase_device *kbdev); + +} kbase_pm_device_data; + +/** The GPU is idle. + * + * The OS may choose to turn off idle devices + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_dev_idle(struct kbase_device *kbdev); + +/** The GPU is active. + * + * The OS should avoid opportunistically turning off the GPU while it is active + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_dev_activate(struct kbase_device *kbdev); + +/** Get details of the cores that are present in the device. + * + * This function can be called by the active power policy to return a bitmask of the cores (of a specified type) + * present in the GPU device and also a count of the number of cores. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param type The type of core (see the @ref kbase_pm_core_type enumeration) + * + * @return The bit mask of cores present + */ +u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, kbase_pm_core_type type); + +/** Get details of the cores that are currently active in the device. + * + * This function can be called by the active power policy to return a bitmask of the cores (of a specified type) that + * are actively processing work (i.e. turned on *and* busy). + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param type The type of core (see the @ref kbase_pm_core_type enumeration) + * + * @return The bit mask of active cores + */ +u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, kbase_pm_core_type type); + +/** Get details of the cores that are currently transitioning between power states. + * + * This function can be called by the active power policy to return a bitmask of the cores (of a specified type) that + * are currently transitioning between power states. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param type The type of core (see the @ref kbase_pm_core_type enumeration) + * + * @return The bit mask of transitioning cores + */ +u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, kbase_pm_core_type type); + +/** Get details of the cores that are currently powered and ready for jobs. + * + * This function can be called by the active power policy to return a bitmask of the cores (of a specified type) that + * are powered and ready for jobs (they may or may not be currently executing jobs). + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param type The type of core (see the @ref kbase_pm_core_type enumeration) + * + * @return The bit mask of ready cores + */ +u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, kbase_pm_core_type type); + +/** Turn the clock for the device on, and enable device interrupts. + * + * This function can be used by a power policy to turn the clock for the GPU on. It should be modified during + * integration to perform the necessary actions to ensure that the GPU is fully powered and clocked. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param is_resume MALI_TRUE if clock on due to resume after suspend, + * MALI_FALSE otherwise + */ +void kbase_pm_clock_on(struct kbase_device *kbdev, mali_bool is_resume); + +/** Disable device interrupts, and turn the clock for the device off. + * + * This function can be used by a power policy to turn the clock for the GPU off. It should be modified during + * integration to perform the necessary actions to turn the clock off (if this is possible in the integration). + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param is_suspend MALI_TRUE if clock off due to suspend, MALI_FALSE otherwise + */ +void kbase_pm_clock_off(struct kbase_device *kbdev, mali_bool is_suspend); + +/** Enable interrupts on the device. + * + * Interrupts are also enabled after a call to kbase_pm_clock_on(). + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_enable_interrupts(struct kbase_device *kbdev); + +/** Disable interrupts on the device. + * + * This prevents delivery of Power Management interrupts to the CPU so that + * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler + * until @ref kbase_pm_enable_interrupts or kbase_pm_clock_on() is called. + * + * Interrupts are also disabled after a call to kbase_pm_clock_off(). + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_disable_interrupts(struct kbase_device *kbdev); + +/** Initialize the hardware + * + * This function checks the GPU ID register to ensure that the GPU is supported by the driver and performs a reset on + * the device so that it is in a known state before the device is used. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param enable_irqs When set to MALI_TRUE gpu irqs will be enabled after this call, else + * they will be left disabled. + * + * @return MALI_ERROR_NONE if the device is supported and successfully reset. + */ +mali_error kbase_pm_init_hw(struct kbase_device *kbdev, mali_bool enable_irqs ); + +/** The GPU has been reset successfully. + * + * This function must be called by the GPU interrupt handler when the RESET_COMPLETED bit is set. It signals to the + * power management initialization code that the GPU has been successfully reset. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_reset_done(struct kbase_device *kbdev); + +/** Increment the count of active contexts. + * + * This function should be called when a context is about to submit a job. It informs the active power policy that the + * GPU is going to be in use shortly and the policy is expected to start turning on the GPU. + * + * This function will block until the GPU is available. + * + * This function ASSERTS if a suspend is occuring/has occurred whilst this is + * in use. Use kbase_pm_contect_active_unless_suspending() instead. + * + * @note a Suspend is only visible to Kernel threads; user-space threads in a + * syscall cannot witness a suspend, because they are frozen before the suspend + * begins. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_context_active(struct kbase_device *kbdev); + + +/** Handler codes for doing kbase_pm_context_active_handle_suspend() */ +typedef enum { + /** A suspend is not expected/not possible - this is the same as + * kbase_pm_context_active() */ + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE, + /** If we're suspending, fail and don't increase the active count */ + KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE, + /** If we're suspending, succeed and allow the active count to increase iff + * it didn't go from 0->1 (i.e., we didn't re-activate the GPU). + * + * This should only be used when there is a bounded time on the activation + * (e.g. guarantee it's going to be idled very soon after) */ + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE +} kbase_pm_suspend_handler; + +/** Suspend 'safe' variant of kbase_pm_context_active() + * + * If a suspend is in progress, this allows for various different ways of + * handling the suspend. Refer to @ref kbase_pm_suspend_handler for details. + * + * We returns a status code indicating whether we're allowed to keep the GPU + * active during the suspend, depending on the handler code. If the status code + * indicates a failure, the caller must abort whatever operation it was + * attempting, and potentially queue it up for after the OS has resumed. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param suspend_handler The handler code for how to handle a suspend that might occur + * @return zero Indicates success + * @return non-zero Indicates failure due to the system being suspending/suspended. + */ +int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, kbase_pm_suspend_handler suspend_handler); + +/** Decrement the reference count of active contexts. + * + * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power + * policy so the calling code should ensure that it does not access the GPU's registers. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_context_idle(struct kbase_device *kbdev); + +/** Check if there are any power transitions to make, and if so start them. + * + * This function will check the desired_xx_state members of kbase_pm_device_data and the actual status of the + * hardware to see if any power transitions can be made at this time to make the hardware state closer to the state + * desired by the power policy. + * + * The return value can be used to check whether all the desired cores are + * available, and so whether it's worth submitting a job (e.g. from a Power + * Management IRQ). + * + * Note that this still returns MALI_TRUE when desired_xx_state has no + * cores. That is: of the no cores desired, none were unavailable. In + * this case, the caller may still need to try submitting jobs. This is because + * the Core Availability Policy might have taken us to an intermediate state + * where no cores are powered, before powering on more cores (e.g. for core + * rotation) + * + * The caller must hold kbase_device::pm::power_change_lock + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @return non-zero when all desired cores are available. That is, + * it's worthwhile for the caller to submit a job. + * @return MALI_FALSE otherwise + */ +mali_bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev); + +/** Synchronous and locking variant of kbase_pm_check_transitions_nolock() + * + * On returning, the desired state at the time of the call will have been met. + * + * @note There is nothing to stop the core being switched off by calls to + * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the + * caller must have already made a call to + * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously. + * + * The usual use-case for this is to ensure cores are 'READY' after performing + * a GPU Reset. + * + * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold + * kbase_device::pm::power_change_lock, because this function will take that + * lock itself. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_check_transitions_sync(struct kbase_device *kbdev); + +/** Variant of kbase_pm_update_cores_state() where the caller must hold + * kbase_device::pm::power_change_lock + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev); + +/** Update the desired state of shader cores from the Power Policy, and begin + * any power transitions. + * + * This function will update the desired_xx_state members of + * kbase_pm_device_data by calling into the current Power Policy. It will then + * begin power transitions to make the hardware acheive the desired shader core + * state. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_update_cores_state(struct kbase_device *kbdev); + +/** Cancel any pending requests to power off the GPU and/or shader cores. + * + * This should be called by any functions which directly power off the GPU. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev); + +/** Read the bitmasks of present cores. + * + * This information is cached to avoid having to perform register reads whenever the information is required. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbasep_pm_read_present_cores(struct kbase_device *kbdev); + +/** Initialize the metrics gathering framework. + * + * This must be called before other metric gathering APIs are called. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * + * @return MALI_ERROR_NONE on success, MALI_ERROR_FUNCTION_FAILED on error + */ +mali_error kbasep_pm_metrics_init(struct kbase_device *kbdev); + +/** Terminate the metrics gathering framework. + * + * This must be called when metric gathering is no longer required. It is an error to call any metrics gathering + * function (other than kbasep_pm_metrics_init) after calling this function. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbasep_pm_metrics_term(struct kbase_device *kbdev); + +/** Record that the GPU is active. + * + * This records that the GPU is now active. The previous GPU state must have been idle, the function will assert if + * this is not true in a debug build. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbasep_pm_record_gpu_active(struct kbase_device *kbdev); + +/** Record that the GPU is idle. + * + * This records that the GPU is now idle. The previous GPU state must have been active, the function will assert if + * this is not true in a debug build. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbasep_pm_record_gpu_idle(struct kbase_device *kbdev); + +/** Function to be called by the frame buffer driver to update the vsync metric. + * + * This function should be called by the frame buffer driver to update whether the system is hitting the vsync target + * or not. buffer_updated should be true if the vsync corresponded with a new frame being displayed, otherwise it + * should be false. This function does not need to be called every vsync, but only when the value of buffer_updated + * differs from a previous call. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param buffer_updated True if the buffer has been updated on this VSync, false otherwise + */ +void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated); + +/** Configure the frame buffer device to set the vsync callback. + * + * This function should do whatever is necessary for this integration to ensure that kbase_pm_report_vsync is + * called appropriately. + * + * This function will need porting as part of the integration for a device. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_register_vsync_callback(struct kbase_device *kbdev); + +/** Free any resources that kbase_pm_register_vsync_callback allocated. + * + * This function should perform any cleanup required from the call to kbase_pm_register_vsync_callback. + * No call backs should occur after this function has returned. + * + * This function will need porting as part of the integration for a device. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_unregister_vsync_callback(struct kbase_device *kbdev); + +/** Determine whether the DVFS system should change the clock speed of the GPU. + * + * This function should be called regularly by the DVFS system to check whether the clock speed of the GPU needs + * updating. It will return one of three enumerated values of kbase_pm_dvfs_action: + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @retval KBASE_PM_DVFS_NOP The clock does not need changing + * @retval KBASE_PM_DVFS_CLOCK_UP, The clock frequency should be increased if possible. + * @retval KBASE_PM_DVFS_CLOCK_DOWN The clock frequency should be decreased if possible. + */ +kbase_pm_dvfs_action kbase_pm_get_dvfs_action(struct kbase_device *kbdev); + +/** Mark that the GPU cycle counter is needed, if the caller is the first caller + * then the GPU cycle counters will be enabled. + * + * The GPU must be powered when calling this function (i.e. @ref kbase_pm_context_active must have been called). + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ + +void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev); + +/** Mark that the GPU cycle counter is no longer in use, if the caller is the last + * caller then the GPU cycle counters will be disabled. A request must have been made + * before a call to this. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ + +void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev); + +/** Enables access to the GPU registers before power management has powered up the GPU + * with kbase_pm_powerup(). + * + * Access to registers should be done using kbase_os_reg_read/write() at this stage, + * not kbase_reg_read/write(). + * + * This results in the power management callbacks provided in the driver configuration + * to get called to turn on power and/or clocks to the GPU. + * See @ref kbase_pm_callback_conf. + * + * This should only be used before power management is powered up with kbase_pm_powerup() + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_register_access_enable(struct kbase_device *kbdev); + +/** Disables access to the GPU registers enabled earlier by a call to + * kbase_pm_register_access_enable(). + * + * This results in the power management callbacks provided in the driver configuration + * to get called to turn off power and/or clocks to the GPU. + * See @ref kbase_pm_callback_conf + * + * This should only be used before power management is powered up with kbase_pm_powerup() + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_register_access_disable(struct kbase_device *kbdev); + +/** + * Suspend the GPU and prevent any further register accesses to it from Kernel + * threads. + * + * This is called in response to an OS suspend event, and calls into the various + * kbase components to complete the suspend. + * + * @note the mechanisms used here rely on all user-space threads being frozen + * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up + * the GPU e.g. via atom submission. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_suspend(struct kbase_device *kbdev); + +/** + * Resume the GPU, allow register accesses to it, and resume running atoms on + * the GPU. + * + * This is called in response to an OS resume event, and calls into the various + * kbase components to complete the resume. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_resume(struct kbase_device *kbdev); + +/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline function */ + +/** + * Check if the power management metrics collection is active. + * + * Note that this returns if the power management metrics collection was + * active at the time of calling, it is possible that after the call the metrics + * collection enable may have changed state. + * + * The caller must handle the consequence that the state may have changed. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @return MALI_TRUE if metrics collection was active else MALI_FALSE. + */ + +mali_bool kbase_pm_metrics_is_active(struct kbase_device *kbdev); + +/** + * Power on the GPU, and any cores that are requested. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param is_resume MALI_TRUE if power on due to resume after suspend, + * MALI_FALSE otherwise + */ +void kbase_pm_do_poweron(struct kbase_device *kbdev, mali_bool is_resume); + +/** + * Power off the GPU, and any cores that have been requested. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param is_suspend MALI_TRUE if power off due to suspend, + * MALI_FALSE otherwise + */ +void kbase_pm_do_poweroff(struct kbase_device *kbdev, mali_bool is_suspend); + +#ifdef CONFIG_MALI_MIDGARD_DVFS + +/** + * Function provided by platform specific code when DVFS is enabled to allow + * the power management metrics system to report utilisation. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param utilisation The current calculated utilisation by the metrics system. + * @return Returns 0 on failure and non zero on success. + */ + +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation); +#endif +#endif /* _KBASE_PM_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_always_on.c b/drivers/gpu/arm/midgard/mali_kbase_pm_always_on.c new file mode 100755 index 00000000000..b457ca22933 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm_always_on.c @@ -0,0 +1,62 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_always_on.c + * "Always on" power management policy + */ + +#include +#include + +static u64 always_on_get_core_mask(struct kbase_device *kbdev) +{ + return kbdev->shader_present_bitmap; +} + +static mali_bool always_on_get_core_active (struct kbase_device *kbdev) +{ + return MALI_TRUE; +} + +static void always_on_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static void always_on_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +/** The @ref kbase_pm_policy structure for the demand power policy. + * + * This is the static structure that defines the demand power policy's callback and name. + */ +const kbase_pm_policy kbase_pm_always_on_policy_ops = { + "always_on", /* name */ + always_on_init, /* init */ + always_on_term, /* term */ + always_on_get_core_mask, /* get_core_mask */ + always_on_get_core_active, /* get_core_active */ + 0u, /* flags */ + KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops) diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_always_on.h b/drivers/gpu/arm/midgard/mali_kbase_pm_always_on.h new file mode 100755 index 00000000000..a3858045bd0 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm_always_on.h @@ -0,0 +1,68 @@ + +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_always_on.h + * "Always on" power management policy + */ + +#ifndef MALI_KBASE_PM_ALWAYS_ON_H +#define MALI_KBASE_PM_ALWAYS_ON_H + +/** + * The "Always on" power management policy has the following + * characteristics: + * - When KBase indicates that the GPU will be powered up, but we don't yet + * know which Job Chains are to be run: + * - All Shader Cores are powered up, regardless of whether or not they will + * be needed later. + * - When KBase indicates that a set of Shader Cores are needed to submit the + * currently queued Job Chains: + * - All Shader Cores are kept powered, regardless of whether or not they will + * be needed + * - When KBase indicates that the GPU need not be powered: + * - The Shader Cores are kept powered, regardless of whether or not they will + * be needed. The GPU itself is also kept powered, even though it is not + * needed. + * + * This policy is automatically overridden during system suspend: the desired + * core state is ignored, and the cores are forced off regardless of what the + * policy requests. After resuming from suspend, new changes to the desired + * core state made by the policy are honored. + * + * @note: + * - KBase indicates the GPU will be powered up when it has a User Process that + * has just started to submit Job Chains. + * - KBase indicates the GPU need not be powered when all the Job Chains from + * User Processes have finished, and it is waiting for a User Process to + * submit some more Job Chains. + */ + +/** + * Private structure for policy instance data. + * + * This contains data that is private to the particular power policy that is active. + */ +typedef struct kbasep_pm_policy_always_on { + /** No state needed - just have a dummy variable here */ + int dummy; +} kbasep_pm_policy_always_on; + +#endif /* MALI_KBASE_PM_ALWAYS_ON_H */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/mali_kbase_pm_ca.c new file mode 100755 index 00000000000..e7cfba5b50f --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm_ca.c @@ -0,0 +1,173 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_pm_ca.c + * Base kernel core availability APIs + */ + +#include +#include + +extern const kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops; +#if MALI_CUSTOMER_RELEASE == 0 +extern const kbase_pm_ca_policy kbase_pm_ca_random_policy_ops; +#endif + +static const kbase_pm_ca_policy *const policy_list[] = { + &kbase_pm_ca_fixed_policy_ops, +#if MALI_CUSTOMER_RELEASE == 0 + &kbase_pm_ca_random_policy_ops +#endif +}; + +/** The number of policies available in the system. + * This is derived from the number of functions listed in policy_get_functions. + */ +#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list)) + +mali_error kbase_pm_ca_init(kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + kbdev->pm.ca_current_policy = policy_list[0]; + + kbdev->pm.ca_current_policy->init(kbdev); + + return MALI_ERROR_NONE; +} + +void kbase_pm_ca_term(kbase_device *kbdev) +{ + kbdev->pm.ca_current_policy->term(kbdev); +} + +int kbase_pm_ca_list_policies(const kbase_pm_ca_policy * const **list) +{ + if (!list) + return POLICY_COUNT; + + *list = policy_list; + + return POLICY_COUNT; +} + +KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies) + +const kbase_pm_ca_policy *kbase_pm_ca_get_policy(kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + return kbdev->pm.ca_current_policy; +} + +KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy) + +void kbase_pm_ca_set_policy(kbase_device *kbdev, const kbase_pm_ca_policy *new_policy) +{ + const kbase_pm_ca_policy *old_policy; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(new_policy != NULL); + + KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u, new_policy->id); + + /* During a policy change we pretend the GPU is active */ + /* A suspend won't happen here, because we're in a syscall from a userspace thread */ + kbase_pm_context_active(kbdev); + + mutex_lock(&kbdev->pm.lock); + + /* Remove the policy to prevent IRQ handlers from working on it */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + old_policy = kbdev->pm.ca_current_policy; + kbdev->pm.ca_current_policy = NULL; + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + if (old_policy->term) + old_policy->term(kbdev); + + if (new_policy->init) + new_policy->init(kbdev); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbdev->pm.ca_current_policy = new_policy; + + /* If any core power state changes were previously attempted, but couldn't + * be made because the policy was changing (current_policy was NULL), then + * re-try them here. */ + kbase_pm_update_cores_state_nolock(kbdev); + + kbdev->pm.ca_current_policy->update_core_status(kbdev, kbdev->shader_ready_bitmap, kbdev->shader_transitioning_bitmap); + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + mutex_unlock(&kbdev->pm.lock); + + /* Now the policy change is finished, we release our fake context active reference */ + kbase_pm_context_idle(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy) + +u64 kbase_pm_ca_get_core_mask(kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->pm.power_change_lock); + + /* All cores must be enabled when instrumentation is in use */ + if (kbdev->pm.instr_enabled == MALI_TRUE) + return kbdev->shader_present_bitmap & kbdev->pm.debug_core_mask; + + if (kbdev->pm.ca_current_policy == NULL) + return kbdev->shader_present_bitmap & kbdev->pm.debug_core_mask; + + return kbdev->pm.ca_current_policy->get_core_mask(kbdev) & kbdev->pm.debug_core_mask; +} + +KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask) + +void kbase_pm_ca_update_core_status(kbase_device *kbdev, u64 cores_ready, u64 cores_transitioning) +{ + lockdep_assert_held(&kbdev->pm.power_change_lock); + + if (kbdev->pm.ca_current_policy != NULL) + kbdev->pm.ca_current_policy->update_core_status(kbdev, cores_ready, cores_transitioning); +} + +void kbase_pm_ca_instr_enable(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbdev->pm.instr_enabled = MALI_TRUE; + + kbase_pm_update_cores_state_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + +void kbase_pm_ca_instr_disable(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbdev->pm.instr_enabled = MALI_FALSE; + + kbase_pm_update_cores_state_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_ca.h b/drivers/gpu/arm/midgard/mali_kbase_pm_ca.h new file mode 100755 index 00000000000..f6a97c7c6b8 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm_ca.h @@ -0,0 +1,170 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_pm_ca.h + * Base kernel core availability APIs + */ + +#ifndef _KBASE_PM_CA_H_ +#define _KBASE_PM_CA_H_ + +typedef enum kbase_pm_ca_policy_id { + KBASE_PM_CA_POLICY_ID_FIXED = 1, + KBASE_PM_CA_POLICY_ID_RANDOM +} kbase_pm_ca_policy_id; + +typedef u32 kbase_pm_ca_policy_flags; + +/** Core availability policy structure. + * + * Each core availability policy exposes a (static) instance of this structure which contains function pointers to the + * policy's methods. + */ +typedef struct kbase_pm_ca_policy { + /** The name of this policy */ + char *name; + + /** Function called when the policy is selected + * + * This should initialize the kbdev->pm.ca_policy_data structure. It should not attempt + * to make any changes to hardware state. + * + * It is undefined what state the cores are in when the function is called. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ + void (*init) (struct kbase_device *kbdev); + + /** Function called when the policy is unselected. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ + void (*term) (struct kbase_device *kbdev); + + /** Function called to get the current shader core availability mask + * + * When a change in core availability is occuring, the policy must set kbdev->pm.ca_in_transition + * to MALI_TRUE. This is to indicate that reporting changes in power state cannot be optimized out, + * even if kbdev->pm.desired_shader_state remains unchanged. This must be done by any functions + * internal to the Core Availability Policy that change the return value of + * kbase_pm_ca_policy::get_core_mask. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * + * @return The current core availability mask */ + u64 (*get_core_mask) (struct kbase_device *kbdev); + + /** Function called to update the current core status + * + * If none of the cores in core group 0 are ready or transitioning, then the policy must + * ensure that the next call to get_core_mask does not return 0 for all cores in core group + * 0. It is an error to disable core group 0 through the core availability policy. + * + * When a change in core availability has finished, the policy must set kbdev->pm.ca_in_transition + * to MALI_FALSE. This is to indicate that changes in power state can once again be optimized out + * when kbdev->pm.desired_shader_state is unchanged. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param cores_ready The mask of cores currently powered and ready to run jobs + * @param cores_transitioning The mask of cores currently transitioning power state */ + void (*update_core_status) (struct kbase_device *kbdev, u64 cores_ready, u64 cores_transitioning); + + /** Field indicating flags for this policy */ + kbase_pm_ca_policy_flags flags; + + /** Field indicating an ID for this policy. This is not necessarily the + * same as its index in the list returned by kbase_pm_list_policies(). + * It is used purely for debugging. */ + kbase_pm_ca_policy_id id; +} kbase_pm_ca_policy; + +/** Initialize core availability framework + * + * Must be called before calling any other core availability function + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * + * @return MALI_ERROR_NONE if the core availability framework was successfully initialized. + */ +mali_error kbase_pm_ca_init(struct kbase_device *kbdev); + +/** Terminate core availability framework + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_ca_term(struct kbase_device *kbdev); + +/** Return mask of currently available shaders cores + * Calls into the core availability policy + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * + * @return The bit mask of available cores + */ +u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev); + +/** Update core availability policy with current core power status + * Calls into the core availability policy + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param cores_ready The bit mask of cores ready for job submission + * @param cores_transitioning The bit mask of cores that are transitioning power state + */ +void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, u64 cores_transitioning); + +/** Enable override for instrumentation + * + * This overrides the output of the core availability policy, ensuring that all cores are available + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_ca_instr_enable(struct kbase_device *kbdev); + +/** Disable override for instrumentation + * + * This disables any previously enabled override, and resumes normal policy functionality + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_ca_instr_disable(struct kbase_device *kbdev); + +/** Get the current policy. + * Returns the policy that is currently active. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * + * @return The current policy + */ +const kbase_pm_ca_policy *kbase_pm_ca_get_policy(struct kbase_device *kbdev); + +/** Change the policy to the one specified. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param policy The policy to change to (valid pointer returned from @ref kbase_pm_ca_list_policies) + */ +void kbase_pm_ca_set_policy(struct kbase_device *kbdev, const kbase_pm_ca_policy *policy); + +/** Retrieve a static list of the available policies. + * @param[out] policies An array pointer to take the list of policies. This may be NULL. + * The contents of this array must not be modified. + * + * @return The number of policies + */ +int kbase_pm_ca_list_policies(const kbase_pm_ca_policy * const **policies); + +#endif /* _KBASE_PM_CA_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.c b/drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.c new file mode 100755 index 00000000000..e391ecfce9b --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.c @@ -0,0 +1,62 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_pm_ca_fixed.c + * A power policy implementing fixed core availability + */ + +#include +#include + +static void fixed_init(struct kbase_device *kbdev) +{ + kbdev->pm.ca_in_transition = MALI_FALSE; +} + +static void fixed_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static u64 fixed_get_core_mask(struct kbase_device *kbdev) +{ + return kbdev->shader_present_bitmap; +} + +static void fixed_update_core_status (struct kbase_device *kbdev, u64 cores_ready, u64 cores_transitioning) +{ + CSTD_UNUSED(kbdev); + CSTD_UNUSED(cores_ready); + CSTD_UNUSED(cores_transitioning); +} + +/** The @ref kbase_pm_policy structure for the fixed power policy. + * + * This is the static structure that defines the fixed power policy's callback and name. + */ +const kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = { + "fixed", /* name */ + fixed_init, /* init */ + fixed_term, /* term */ + fixed_get_core_mask, /* get_core_mask */ + fixed_update_core_status, /* update_core_status */ + 0u, /* flags */ + KBASE_PM_CA_POLICY_ID_FIXED, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops) diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.h b/drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.h new file mode 100755 index 00000000000..9d95e07a2d5 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.h @@ -0,0 +1,37 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_pm_ca_fixed.h + * A power policy implementing fixed core availability + */ + +#ifndef MALI_KBASE_PM_CA_FIXED_H +#define MALI_KBASE_PM_CA_FIXED_H + +/** + * Private structure for policy instance data. + * + * This contains data that is private to the particular power policy that is active. + */ +typedef struct kbasep_pm_ca_policy_fixed { + /** No state needed - just have a dummy variable here */ + int dummy; +} kbasep_pm_ca_policy_fixed; + +#endif /* MALI_KBASE_PM_CA_FIXED_H */ + diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/mali_kbase_pm_coarse_demand.c new file mode 100755 index 00000000000..095e6f097ec --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm_coarse_demand.c @@ -0,0 +1,68 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_coarse_demand.c + * "Coarse Demand" power management policy + */ + +#include +#include + +static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev) +{ + if (kbdev->pm.active_count == 0) + return 0; + + return kbdev->shader_present_bitmap; +} + +static mali_bool coarse_demand_get_core_active(struct kbase_device *kbdev) +{ + if (kbdev->pm.active_count == 0) + return MALI_FALSE; + + return MALI_TRUE; +} + +static void coarse_demand_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static void coarse_demand_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +/** The @ref kbase_pm_policy structure for the demand power policy. + * + * This is the static structure that defines the demand power policy's callback and name. + */ +const kbase_pm_policy kbase_pm_coarse_demand_policy_ops = { + "coarse_demand", /* name */ + coarse_demand_init, /* init */ + coarse_demand_term, /* term */ + coarse_demand_get_core_mask, /* get_core_mask */ + coarse_demand_get_core_active, /* get_core_active */ + 0u, /* flags */ + KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops) diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/midgard/mali_kbase_pm_coarse_demand.h new file mode 100755 index 00000000000..afe3fc93b42 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm_coarse_demand.h @@ -0,0 +1,60 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_coarse_demand.h + * "Coarse Demand" power management policy + */ + +#ifndef MALI_KBASE_PM_COARSE_DEMAND_H +#define MALI_KBASE_PM_COARSE_DEMAND_H + +/** + * The "Coarse" demand power management policy has the following + * characteristics: + * - When KBase indicates that the GPU will be powered up, but we don't yet + * know which Job Chains are to be run: + * - All Shader Cores are powered up, regardless of whether or not they will + * be needed later. + * - When KBase indicates that a set of Shader Cores are needed to submit the + * currently queued Job Chains: + * - All Shader Cores are kept powered, regardless of whether or not they will + * be needed + * - When KBase indicates that the GPU need not be powered: + * - The Shader Cores are powered off, and the GPU itself is powered off too. + * + * @note: + * - KBase indicates the GPU will be powered up when it has a User Process that + * has just started to submit Job Chains. + * - KBase indicates the GPU need not be powered when all the Job Chains from + * User Processes have finished, and it is waiting for a User Process to + * submit some more Job Chains. + */ + +/** + * Private structure for policy instance data. + * + * This contains data that is private to the particular power policy that is active. + */ +typedef struct kbasep_pm_policy_coarse_demand { + /** No state needed - just have a dummy variable here */ + int dummy; +} kbasep_pm_policy_coarse_demand; + +#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/mali_kbase_pm_demand.c new file mode 100755 index 00000000000..fd94294433f --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm_demand.c @@ -0,0 +1,70 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_demand.c + * A simple demand based power management policy + */ + +#include +#include + +static u64 demand_get_core_mask(struct kbase_device *kbdev) +{ + u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap; + + if (0 == kbdev->pm.active_count) + return 0; + + return desired; +} + +static mali_bool demand_get_core_active (struct kbase_device *kbdev) +{ + if (0 == kbdev->pm.active_count) + return MALI_FALSE; + + return MALI_TRUE; +} + +static void demand_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static void demand_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +/** The @ref kbase_pm_policy structure for the demand power policy. + * + * This is the static structure that defines the demand power policy's callback and name. + */ +const kbase_pm_policy kbase_pm_demand_policy_ops = { + "demand", /* name */ + demand_init, /* init */ + demand_term, /* term */ + demand_get_core_mask, /* get_core_mask */ + demand_get_core_active, /* get_core_active */ + 0u, /* flags */ + KBASE_PM_POLICY_ID_DEMAND, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops) diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_demand.h b/drivers/gpu/arm/midgard/mali_kbase_pm_demand.h new file mode 100755 index 00000000000..8579181d7dd --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm_demand.h @@ -0,0 +1,57 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_demand.h + * A simple demand based power management policy + */ + +#ifndef MALI_KBASE_PM_DEMAND_H +#define MALI_KBASE_PM_DEMAND_H + +/** + * The demand power management policy has the following characteristics: + * - When KBase indicates that the GPU will be powered up, but we don't yet + * know which Job Chains are to be run: + * - The Shader Cores are not powered up + * - When KBase indicates that a set of Shader Cores are needed to submit the + * currently queued Job Chains: + * - Only those Shader Cores are powered up + * - When KBase indicates that the GPU need not be powered: + * - The Shader Cores are powered off, and the GPU itself is powered off too. + * + * @note: + * - KBase indicates the GPU will be powered up when it has a User Process that + * has just started to submit Job Chains. + * - KBase indicates the GPU need not be powered when all the Job Chains from + * User Processes have finished, and it is waiting for a User Process to + * submit some more Job Chains. + */ + +/** + * Private structure for policy instance data. + * + * This contains data that is private to the particular power policy that is active. + */ +typedef struct kbasep_pm_policy_demand { + /** No state needed - just have a dummy variable here */ + int dummy; +} kbasep_pm_policy_demand; + +#endif /* MALI_KBASE_PM_DEMAND_H */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/mali_kbase_pm_driver.c new file mode 100755 index 00000000000..1a8cdacd50e --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm_driver.c @@ -0,0 +1,949 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_driver.c + * Base kernel Power Management hardware control + */ + +#include +#include +#include +#include +#include + +#if MALI_MOCK_TEST +#define MOCKABLE(function) function##_original +#else +#define MOCKABLE(function) function +#endif /* MALI_MOCK_TEST */ + +/** Actions that can be performed on a core. + * + * This enumeration is private to the file. Its values are set to allow @ref core_type_to_reg function, + * which decodes this enumeration, to be simpler and more efficient. + */ +typedef enum kbasep_pm_action { + ACTION_PRESENT = 0, + ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO), + ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO), + ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO), + ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO), + ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO) +} kbasep_pm_action; + +/** Decode a core type and action to a register. + * + * Given a core type (defined by @ref kbase_pm_core_type) and an action (defined by @ref kbasep_pm_action) this + * function will return the register offset that will perform the action on the core type. The register returned is + * the \c _LO register and an offset must be applied to use the \c _HI register. + * + * @param core_type The type of core + * @param action The type of action + * + * @return The register offset of the \c _LO register that performs an action of type \c action on a core of type \c + * core_type. + */ +static u32 core_type_to_reg(kbase_pm_core_type core_type, kbasep_pm_action action) +{ + return core_type + action; +} + +/** Invokes an action on a core set + * + * This function performs the action given by \c action on a set of cores of a type given by \c core_type. It is a + * static function used by @ref kbase_pm_transition_core_type + * + * @param kbdev The kbase device structure of the device + * @param core_type The type of core that the action should be performed on + * @param cores A bit mask of cores to perform the action on (low 32 bits) + * @param action The action to perform on the cores + */ +STATIC void kbase_pm_invoke(kbase_device *kbdev, kbase_pm_core_type core_type, u64 cores, kbasep_pm_action action) +{ + u32 reg; + u32 lo = cores & 0xFFFFFFFF; + u32 hi = (cores >> 32) & 0xFFFFFFFF; + + lockdep_assert_held(&kbdev->pm.power_change_lock); + + reg = core_type_to_reg(core_type, action); + + KBASE_DEBUG_ASSERT(reg); +#ifdef CONFIG_MALI_GATOR_SUPPORT + if (cores) { + if (action == ACTION_PWRON) + kbase_trace_mali_pm_power_on(core_type, cores); + else if (action == ACTION_PWROFF) + kbase_trace_mali_pm_power_off(core_type, cores); + } +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + /* Tracing */ + if (cores) { + if (action == ACTION_PWRON) + switch (core_type) { + case KBASE_PM_CORE_SHADER: + KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u, lo); + break; + case KBASE_PM_CORE_TILER: + KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL, NULL, 0u, lo); + break; + case KBASE_PM_CORE_L2: + KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL, 0u, lo); + break; + default: + /* L3 not handled */ + break; + } + else if (action == ACTION_PWROFF) + switch (core_type) { + case KBASE_PM_CORE_SHADER: + KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL, 0u, lo); + break; + case KBASE_PM_CORE_TILER: + KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL, NULL, 0u, lo); + break; + case KBASE_PM_CORE_L2: + KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL, 0u, lo); + break; + default: + /* L3 not handled */ + break; + } + } + + if (lo != 0) + kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL); + + if (hi != 0) + kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL); +} + +/** Get information about a core set + * + * This function gets information (chosen by \c action) about a set of cores of a type given by \c core_type. It is a + * static function used by @ref kbase_pm_get_present_cores, @ref kbase_pm_get_active_cores, @ref + * kbase_pm_get_trans_cores and @ref kbase_pm_get_ready_cores. + * + * @param kbdev The kbase device structure of the device + * @param core_type The type of core that the should be queried + * @param action The property of the cores to query + * + * @return A bit mask specifying the state of the cores + */ +static u64 kbase_pm_get_state(kbase_device *kbdev, kbase_pm_core_type core_type, kbasep_pm_action action) +{ + u32 reg; + u32 lo, hi; + + reg = core_type_to_reg(core_type, action); + + KBASE_DEBUG_ASSERT(reg); + + lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL); + hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL); + + return (((u64) hi) << 32) | ((u64) lo); +} + +void kbasep_pm_read_present_cores(kbase_device *kbdev) +{ + kbdev->shader_present_bitmap = kbase_pm_get_state(kbdev, KBASE_PM_CORE_SHADER, ACTION_PRESENT); + kbdev->tiler_present_bitmap = kbase_pm_get_state(kbdev, KBASE_PM_CORE_TILER, ACTION_PRESENT); + kbdev->l2_present_bitmap = kbase_pm_get_state(kbdev, KBASE_PM_CORE_L2, ACTION_PRESENT); + kbdev->l3_present_bitmap = kbase_pm_get_state(kbdev, KBASE_PM_CORE_L3, ACTION_PRESENT); + + kbdev->shader_inuse_bitmap = 0; + kbdev->shader_needed_bitmap = 0; + kbdev->shader_available_bitmap = 0; + kbdev->tiler_available_bitmap = 0; + kbdev->l2_users_count = 0; + kbdev->l2_available_bitmap = 0; + kbdev->tiler_needed_cnt = 0; + kbdev->tiler_inuse_cnt = 0; + + memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt)); +} + +KBASE_EXPORT_TEST_API(kbasep_pm_read_present_cores) + +/** Get the cores that are present + */ +u64 kbase_pm_get_present_cores(kbase_device *kbdev, kbase_pm_core_type type) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + switch (type) { + case KBASE_PM_CORE_L3: + return kbdev->l3_present_bitmap; + break; + case KBASE_PM_CORE_L2: + return kbdev->l2_present_bitmap; + break; + case KBASE_PM_CORE_SHADER: + return kbdev->shader_present_bitmap; + break; + case KBASE_PM_CORE_TILER: + return kbdev->tiler_present_bitmap; + break; + } + KBASE_DEBUG_ASSERT(0); + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores) + +/** Get the cores that are "active" (busy processing work) + */ +u64 kbase_pm_get_active_cores(kbase_device *kbdev, kbase_pm_core_type type) +{ + return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE); +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores) + +/** Get the cores that are transitioning between power states + */ +u64 kbase_pm_get_trans_cores(kbase_device *kbdev, kbase_pm_core_type type) +{ + return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS); +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores) +/** Get the cores that are powered on + */ +u64 kbase_pm_get_ready_cores(kbase_device *kbdev, kbase_pm_core_type type) +{ + u64 result; + result = kbase_pm_get_state(kbdev, type, ACTION_READY); + + switch (type) { + case KBASE_PM_CORE_SHADER: + KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u, (u32) result); + break; + case KBASE_PM_CORE_TILER: + KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u, (u32) result); + break; + case KBASE_PM_CORE_L2: + KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u, (u32) result); + break; + default: + /* NB: L3 not currently traced */ + break; + } + + return result; +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores) + +/** Perform power transitions for a particular core type. + * + * This function will perform any available power transitions to make the actual hardware state closer to the desired + * state. If a core is currently transitioning then changes to the power state of that call cannot be made until the + * transition has finished. Cores which are not present in the hardware are ignored if they are specified in the + * desired_state bitmask, however the return value will always be 0 in this case. + * + * @param kbdev The kbase device + * @param type The core type to perform transitions for + * @param desired_state A bit mask of the desired state of the cores + * @param in_use A bit mask of the cores that are currently running jobs. + * These cores have to be kept powered up because there are jobs + * running (or about to run) on them. + * @param[out] available Receives a bit mask of the cores that the job scheduler can use to submit jobs to. + * May be NULL if this is not needed. + * @param[in,out] powering_on Bit mask to update with cores that are transitioning to a power-on state. + * + * @return MALI_TRUE if the desired state has been reached, MALI_FALSE otherwise + */ +STATIC mali_bool kbase_pm_transition_core_type(kbase_device *kbdev, kbase_pm_core_type type, u64 desired_state, + u64 in_use, u64 * const available, u64 *powering_on) +{ + u64 present; + u64 ready; + u64 trans; + u64 powerup; + u64 powerdown; + u64 powering_on_trans; + u64 desired_state_in_use; + + lockdep_assert_held(&kbdev->pm.power_change_lock); + + /* Get current state */ + present = kbase_pm_get_present_cores(kbdev, type); + trans = kbase_pm_get_trans_cores(kbdev, type); + ready = kbase_pm_get_ready_cores(kbdev, type); + + powering_on_trans = trans & *powering_on; + *powering_on = powering_on_trans; + + if (available != NULL) + *available = (ready | powering_on_trans) & desired_state; + + /* Update desired state to include the in-use cores. These have to be kept powered up because there are jobs + * running or about to run on these cores + */ + desired_state_in_use = desired_state | in_use; + + /* Update state of whether l2 caches are powered */ + if (type == KBASE_PM_CORE_L2) { + if ((ready == present) && (desired_state_in_use == ready) && (trans == 0)) { + /* All are ready, none will be turned off, and none are transitioning */ + kbdev->pm.l2_powered = 1; + if (kbdev->l2_users_count > 0) { + /* Notify any registered l2 cache users (optimized out when no users waiting) */ + wake_up(&kbdev->pm.l2_powered_wait); + } + } else { + kbdev->pm.l2_powered = 0; + } + } + + if (desired_state_in_use == ready && (trans == 0)) + return MALI_TRUE; + + /* Restrict the cores to those that are actually present */ + powerup = desired_state_in_use & present; + powerdown = (~desired_state_in_use) & present; + + /* Restrict to cores that are not already in the desired state */ + powerup &= ~ready; + powerdown &= ready; + + /* Don't transition any cores that are already transitioning, except for + * Mali cores that support the following case: + * + * If the SHADER_PWRON or TILER_PWRON registers are written to turn on + * a core that is currently transitioning to power off, then this is + * remembered and the shader core is automatically powered up again once + * the original transition completes. Once the automatic power on is + * complete any job scheduled on the shader core should start. + */ + powerdown &= ~trans; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS)) + if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type) + trans = powering_on_trans; /* for exception cases, only mask off cores in power on transitions */ + + powerup &= ~trans; + + /* Perform transitions if any */ + kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON); + kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF); + + /* Recalculate cores transitioning on, and re-evaluate our state */ + powering_on_trans |= powerup; + *powering_on = powering_on_trans; + if (available != NULL) + *available = (ready | powering_on_trans) & desired_state; + + return MALI_FALSE; +} + +KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type) + +/** Determine which caches should be on for a particular core state. + * + * This function takes a bit mask of the present caches and the cores (or caches) that are attached to the caches that + * will be powered. It then computes which caches should be turned on to allow the cores requested to be powered up. + * + * @param present The bit mask of present caches + * @param cores_powered A bit mask of cores (or L2 caches) that are desired to be powered + * + * @return A bit mask of the caches that should be turned on + */ +STATIC u64 get_desired_cache_status(u64 present, u64 cores_powered) +{ + u64 desired = 0; + + while (present) { + /* Find out which is the highest set bit */ + u64 bit = fls64(present) - 1; + u64 bit_mask = 1ull << bit; + /* Create a mask which has all bits from 'bit' upwards set */ + + u64 mask = ~(bit_mask - 1); + + /* If there are any cores powered at this bit or above (that haven't previously been processed) then we need + * this core on */ + if (cores_powered & mask) + desired |= bit_mask; + + /* Remove bits from cores_powered and present */ + cores_powered &= ~mask; + present &= ~bit_mask; + } + + return desired; +} + +KBASE_EXPORT_TEST_API(get_desired_cache_status) + +mali_bool MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) +{ + mali_bool cores_are_available = MALI_FALSE; + mali_bool in_desired_state = MALI_TRUE; + u64 desired_l2_state; + u64 desired_l3_state; + u64 cores_powered; + u64 tiler_available_bitmap; + u64 shader_available_bitmap; + u64 shader_ready_bitmap; + u64 shader_transitioning_bitmap; + u64 l2_available_bitmap; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.power_change_lock); + + spin_lock(&kbdev->pm.gpu_powered_lock); + if (kbdev->pm.gpu_powered == MALI_FALSE) { + spin_unlock(&kbdev->pm.gpu_powered_lock); + if (kbdev->pm.desired_shader_state == 0 && kbdev->pm.desired_tiler_state == 0) + return MALI_TRUE; + return MALI_FALSE; + } + + /* Trace that a change-state is being requested, and that it took + * (effectively) no time to start it. This is useful for counting how many + * state changes occurred, in a way that's backwards-compatible with + * processing the trace data */ + kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE); + kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE); + + /* If any cores are already powered then, we must keep the caches on */ + cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + + cores_powered |= kbdev->pm.desired_shader_state; + + /* If there are l2 cache users registered, keep all l2s powered even if all other cores are off. */ + if (kbdev->l2_users_count > 0) + cores_powered |= kbdev->l2_present_bitmap; + + desired_l2_state = get_desired_cache_status(kbdev->l2_present_bitmap, cores_powered); + + /* If any l2 cache is on, then enable l2 #0, for use by job manager */ + if (0 != desired_l2_state) { + desired_l2_state |= 1; + /* Also enable tiler if l2 cache is powered */ + kbdev->pm.desired_tiler_state = kbdev->tiler_present_bitmap; + } else { + kbdev->pm.desired_tiler_state = 0; + } + + desired_l3_state = get_desired_cache_status(kbdev->l3_present_bitmap, desired_l2_state); + + in_desired_state &= kbase_pm_transition_core_type(kbdev, KBASE_PM_CORE_L3, desired_l3_state, 0, NULL, &kbdev->pm.powering_on_l3_state); + in_desired_state &= kbase_pm_transition_core_type(kbdev, KBASE_PM_CORE_L2, desired_l2_state, 0, &l2_available_bitmap, &kbdev->pm.powering_on_l2_state); + + if( kbdev->l2_available_bitmap != l2_available_bitmap) + { + KBASE_TIMELINE_POWER_L2(kbdev,l2_available_bitmap); + } + + kbdev->l2_available_bitmap = l2_available_bitmap; + + if (in_desired_state) { + + in_desired_state &= kbase_pm_transition_core_type(kbdev, KBASE_PM_CORE_TILER, kbdev->pm.desired_tiler_state, 0, &tiler_available_bitmap, &kbdev->pm.powering_on_tiler_state); + in_desired_state &= kbase_pm_transition_core_type(kbdev, KBASE_PM_CORE_SHADER, kbdev->pm.desired_shader_state, kbdev->shader_inuse_bitmap, &shader_available_bitmap, &kbdev->pm.powering_on_shader_state); + + if (kbdev->shader_available_bitmap != shader_available_bitmap) { + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, NULL, 0u, (u32) shader_available_bitmap); + KBASE_TIMELINE_POWER_SHADER(kbdev, shader_available_bitmap); + } + + kbdev->shader_available_bitmap = shader_available_bitmap; + + if (kbdev->tiler_available_bitmap != tiler_available_bitmap) { + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, NULL, 0u, (u32) tiler_available_bitmap); + KBASE_TIMELINE_POWER_TILER(kbdev, tiler_available_bitmap); + } + + kbdev->tiler_available_bitmap = tiler_available_bitmap; + + } else if ((l2_available_bitmap & kbdev->tiler_present_bitmap) != kbdev->tiler_present_bitmap) { + tiler_available_bitmap = 0; + + if (kbdev->tiler_available_bitmap != tiler_available_bitmap) { + KBASE_TIMELINE_POWER_TILER(kbdev, tiler_available_bitmap); + } + + kbdev->tiler_available_bitmap = tiler_available_bitmap; + } + + /* State updated for slow-path waiters */ + kbdev->pm.gpu_in_desired_state = in_desired_state; + + shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER); + + /* Determine whether the cores are now available (even if the set of + * available cores is empty). Note that they can be available even if we've + * not finished transitioning to the desired state */ + if ((kbdev->shader_available_bitmap & kbdev->pm.desired_shader_state) == kbdev->pm.desired_shader_state + && (kbdev->tiler_available_bitmap & kbdev->pm.desired_tiler_state) == kbdev->pm.desired_tiler_state) { + cores_are_available = MALI_TRUE; + + KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u, (u32)(kbdev->shader_available_bitmap & kbdev->pm.desired_shader_state)); + KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u, (u32)(kbdev->tiler_available_bitmap & kbdev->pm.desired_tiler_state)); + + /* Log timelining information about handling events that power up + * cores, to match up either with immediate submission either because + * cores already available, or from PM IRQ */ + if (!in_desired_state) + kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + } + + if (in_desired_state) { + KBASE_DEBUG_ASSERT(cores_are_available); + +#ifdef CONFIG_MALI_GATOR_SUPPORT + kbase_trace_mali_pm_status(KBASE_PM_CORE_L3, kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L3)); + kbase_trace_mali_pm_status(KBASE_PM_CORE_L2, kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)); + kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER, kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER)); + kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER, kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER)); +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + + KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL, kbdev->pm.gpu_in_desired_state, (u32)kbdev->pm.desired_shader_state); + KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u, (u32)kbdev->pm.desired_tiler_state); + + /* Log timelining information for synchronous waiters */ + kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + /* Wake slow-path waiters. Job scheduler does not use this. */ + KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0); + wake_up(&kbdev->pm.gpu_in_desired_state_wait); + } + + spin_unlock(&kbdev->pm.gpu_powered_lock); + + /* kbase_pm_ca_update_core_status can cause one-level recursion into + * this function, so it must only be called once all changes to kbdev + * have been committed, and after the gpu_powered_lock has been + * dropped. */ + if (kbdev->shader_ready_bitmap != shader_ready_bitmap || + kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) { + kbdev->shader_ready_bitmap = shader_ready_bitmap; + kbdev->shader_transitioning_bitmap = shader_transitioning_bitmap; + + kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap, shader_transitioning_bitmap); + } + + /* The core availability policy is not allowed to keep core group 0 off */ + if (!((shader_ready_bitmap | shader_transitioning_bitmap) & kbdev->gpu_props.props.coherency_info.group[0].core_mask) && + !(kbase_pm_ca_get_core_mask(kbdev) & kbdev->gpu_props.props.coherency_info.group[0].core_mask)) + BUG(); + + /* The core availability policy is allowed to keep core group 1 off, + * but all jobs specifically targeting CG1 must fail */ + if (!((shader_ready_bitmap | shader_transitioning_bitmap) & kbdev->gpu_props.props.coherency_info.group[1].core_mask) && + !(kbase_pm_ca_get_core_mask(kbdev) & kbdev->gpu_props.props.coherency_info.group[1].core_mask)) + kbdev->pm.cg1_disabled = MALI_TRUE; + else + kbdev->pm.cg1_disabled = MALI_FALSE; + + return cores_are_available; +} +KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock) + +void kbase_pm_check_transitions_sync(struct kbase_device *kbdev) +{ + unsigned long flags; + mali_bool cores_are_available; + /* Force the transition to be checked and reported - the cores may be + * 'available' (for job submission) but not fully powered up. */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + /* Don't need 'cores_are_available', because we don't return anything */ + CSTD_UNUSED(cores_are_available); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + /* Wait for cores */ + wait_event(kbdev->pm.gpu_in_desired_state_wait, kbdev->pm.gpu_in_desired_state); + + /* Log timelining information that a change in state has completed */ + kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); +} +KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync) + +void kbase_pm_enable_interrupts(kbase_device *kbdev) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + /* + * Clear all interrupts, + * and unmask them all. + */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL, NULL); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, NULL); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL); +} + +KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts) + +void kbase_pm_disable_interrupts(kbase_device *kbdev) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + /* + * Mask all interrupts, + * and clear them all. + */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, NULL); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, NULL); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); +} + +KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts) + +/* + * pmu layout: + * 0x0000: PMU TAG (RO) (0xCAFECAFE) + * 0x0004: PMU VERSION ID (RO) (0x00000000) + * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE) + */ +void kbase_pm_clock_on(kbase_device *kbdev, mali_bool is_resume) +{ + mali_bool reset_required = is_resume; + unsigned long flags; + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.lock); + + if (kbdev->pm.gpu_powered) { + /* Already turned on */ + KBASE_DEBUG_ASSERT(!is_resume); + return; + } + + KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u); + + if (is_resume && kbdev->pm.callback_power_resume) { + kbdev->pm.callback_power_resume(kbdev); + } else if (kbdev->pm.callback_power_on) { + if (kbdev->pm.callback_power_on(kbdev)) + reset_required = MALI_TRUE; + } + + if (reset_required) { + /* GPU state was lost, reset GPU to ensure it is in a + * consistent state */ + kbase_pm_init_hw(kbdev, MALI_TRUE); + } + + spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags); + kbdev->pm.gpu_powered = MALI_TRUE; + spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags); + + /* Lastly, enable the interrupts */ + kbase_pm_enable_interrupts(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_clock_on) + +void kbase_pm_clock_off(kbase_device *kbdev, mali_bool is_suspend) +{ + unsigned long flags; + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.lock); + + /* ASSERT that the cores should now be unavailable. No lock needed. */ + KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u); + + if (!kbdev->pm.gpu_powered) { + /* Already turned off */ + if (is_suspend && kbdev->pm.callback_power_suspend) + kbdev->pm.callback_power_suspend(kbdev); + return; + } + + KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u); + + /* Disable interrupts. This also clears any outstanding interrupts */ + kbase_pm_disable_interrupts(kbdev); + /* Ensure that any IRQ handlers have finished */ + kbase_synchronize_irqs(kbdev); + + /* The GPU power may be turned off from this point */ + spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags); + kbdev->pm.gpu_powered = MALI_FALSE; + spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags); + + if (is_suspend && kbdev->pm.callback_power_suspend) + kbdev->pm.callback_power_suspend(kbdev); + else if (kbdev->pm.callback_power_off) + kbdev->pm.callback_power_off(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_clock_off) + +struct kbasep_reset_timeout_data { + struct hrtimer timer; + mali_bool timed_out; + kbase_device *kbdev; +}; + +void kbase_pm_reset_done(kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + kbdev->pm.reset_done = MALI_TRUE; + wake_up(&kbdev->pm.reset_done_wait); +} + +/** + * Wait for the RESET_COMPLETED IRQ to occur, then reset the waiting state. + */ +STATIC void kbase_pm_wait_for_reset(kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->pm.lock); + + wait_event(kbdev->pm.reset_done_wait, (kbdev->pm.reset_done)); + kbdev->pm.reset_done = MALI_FALSE; +} + +KBASE_EXPORT_TEST_API(kbase_pm_reset_done) + +static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) +{ + struct kbasep_reset_timeout_data *rtdata = container_of(timer, struct kbasep_reset_timeout_data, timer); + + rtdata->timed_out = 1; + + /* Set the wait queue to wake up kbase_pm_init_hw even though the reset hasn't completed */ + kbase_pm_reset_done(rtdata->kbdev); + + return HRTIMER_NORESTART; +} + +static void kbase_pm_hw_issues(kbase_device *kbdev) +{ + u32 value = 0; + u32 config_value; + + /* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443)) + value |= SC_LS_PAUSEBUFFER_DISABLE; + + /* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327)) + value |= SC_SDC_DISABLE_OQ_DISCARD; + + /* Enable alternative hardware counter selection if configured. */ + if (DEFAULT_ALTERNATIVE_HWC) + value |= SC_ALT_COUNTERS; + + /* Use software control of forward pixel kill when needed. See MIDEUR-174. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_2121)) + value |= SC_OVERRIDE_FWD_PIXEL_KILL; + + /* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797)) + value |= SC_ENABLE_TEXGRD_FLAGS; + + if (value != 0) + kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), value, NULL); + + /* Limit the GPU bus bandwidth if the platform needs this. */ + value = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL); + + /* Limit read ID width for AXI */ + config_value = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_ARID_LIMIT); + value &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS); + value |= (config_value & 0x3) << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT; + + /* Limit write ID width for AXI */ + config_value = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_AWID_LIMIT); + value &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES); + value |= (config_value & 0x3) << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT; + + kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), value, NULL); +} + +mali_error kbase_pm_init_hw(kbase_device *kbdev, mali_bool enable_irqs ) +{ + unsigned long flags; + struct kbasep_reset_timeout_data rtdata; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.lock); + + /* Ensure the clock is on before attempting to access the hardware */ + if (!kbdev->pm.gpu_powered) { + if (kbdev->pm.callback_power_on) + kbdev->pm.callback_power_on(kbdev); + + spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags); + kbdev->pm.gpu_powered = MALI_TRUE; + spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags); + } + + /* Ensure interrupts are off to begin with, this also clears any outstanding interrupts */ + kbase_pm_disable_interrupts(kbdev); + + /* Prepare for the soft-reset */ + kbdev->pm.reset_done = MALI_FALSE; + + /* The cores should be made unavailable due to the reset */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + if (kbdev->shader_available_bitmap != 0u) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, NULL, 0u, (u32)0u); + if (kbdev->tiler_available_bitmap != 0u) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, NULL, 0u, (u32)0u); + kbdev->shader_available_bitmap = 0u; + kbdev->tiler_available_bitmap = 0u; + kbdev->l2_available_bitmap = 0u; + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + /* Soft reset the GPU */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_SOFT_RESET, NULL); + + /* Unmask the reset complete interrupt only */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED, NULL); + + /* Initialize a structure for tracking the status of the reset */ + rtdata.kbdev = kbdev; + rtdata.timed_out = 0; + + /* Create a timer to use as a timeout on the reset */ + hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + rtdata.timer.function = kbasep_reset_timeout; + + hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), HRTIMER_MODE_REL); + + /* Wait for the RESET_COMPLETED interrupt to be raised */ + kbase_pm_wait_for_reset(kbdev); + + if (rtdata.timed_out == 0) { + /* GPU has been reset */ + hrtimer_cancel(&rtdata.timer); + destroy_hrtimer_on_stack(&rtdata.timer); + goto out; + } + + /* No interrupt has been received - check if the RAWSTAT register says the reset has completed */ + if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) & RESET_COMPLETED) { + /* The interrupt is set in the RAWSTAT; this suggests that the interrupts are not getting to the CPU */ + dev_warn(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); + /* If interrupts aren't working we can't continue. */ + destroy_hrtimer_on_stack(&rtdata.timer); + goto out; + } + + /* The GPU doesn't seem to be responding to the reset so try a hard reset */ + dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", RESET_TIMEOUT); + KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET, NULL); + + /* Restart the timer to wait for the hard reset to complete */ + rtdata.timed_out = 0; + + hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), HRTIMER_MODE_REL); + + /* Wait for the RESET_COMPLETED interrupt to be raised */ + kbase_pm_wait_for_reset(kbdev); + + if (rtdata.timed_out == 0) { + /* GPU has been reset */ + hrtimer_cancel(&rtdata.timer); + destroy_hrtimer_on_stack(&rtdata.timer); + goto out; + } + + destroy_hrtimer_on_stack(&rtdata.timer); + + dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", RESET_TIMEOUT); + + /* The GPU still hasn't reset, give up */ + return MALI_ERROR_FUNCTION_FAILED; + + out: + /* Re-enable interrupts if requested*/ + if ( enable_irqs ) + { + kbase_pm_enable_interrupts(kbdev); + } + /* If cycle counter was in use-re enable it */ + spin_lock_irqsave(&kbdev->pm.gpu_cycle_counter_requests_lock, flags); + + if (kbdev->pm.gpu_cycle_counter_requests) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CYCLE_COUNT_START, NULL); + + spin_unlock_irqrestore(&kbdev->pm.gpu_cycle_counter_requests_lock, flags); + + kbase_pm_hw_issues(kbdev); + + return MALI_ERROR_NONE; +} + +KBASE_EXPORT_TEST_API(kbase_pm_init_hw) + +void kbase_pm_request_gpu_cycle_counter(kbase_device *kbdev) +{ + unsigned long flags; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + KBASE_DEBUG_ASSERT(kbdev->pm.gpu_powered); + + spin_lock_irqsave(&kbdev->pm.gpu_cycle_counter_requests_lock, flags); + + KBASE_DEBUG_ASSERT(kbdev->pm.gpu_cycle_counter_requests < INT_MAX); + + ++kbdev->pm.gpu_cycle_counter_requests; + + if (1 == kbdev->pm.gpu_cycle_counter_requests) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CYCLE_COUNT_START, NULL); + + spin_unlock_irqrestore(&kbdev->pm.gpu_cycle_counter_requests_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter) + +void kbase_pm_release_gpu_cycle_counter(kbase_device *kbdev) +{ + unsigned long flags; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.gpu_cycle_counter_requests_lock, flags); + + KBASE_DEBUG_ASSERT(kbdev->pm.gpu_cycle_counter_requests > 0); + + --kbdev->pm.gpu_cycle_counter_requests; + + if (0 == kbdev->pm.gpu_cycle_counter_requests) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CYCLE_COUNT_STOP, NULL); + + spin_unlock_irqrestore(&kbdev->pm.gpu_cycle_counter_requests_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter) diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/mali_kbase_pm_metrics.c new file mode 100755 index 00000000000..50450ed6dc3 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm_metrics.c @@ -0,0 +1,266 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_metrics.c + * Metrics for power management + */ + +#include +#include + +/* When VSync is being hit aim for utilisation between 70-90% */ +#define KBASE_PM_VSYNC_MIN_UTILISATION 70 +#define KBASE_PM_VSYNC_MAX_UTILISATION 90 +/* Otherwise aim for 10-40% */ +#define KBASE_PM_NO_VSYNC_MIN_UTILISATION 10 +#define KBASE_PM_NO_VSYNC_MAX_UTILISATION 40 + +/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns + This gives a maximum period between samples of 2^(32+8)/100 ns = slightly under 11s. + Exceeding this will cause overflow */ +#define KBASE_PM_TIME_SHIFT 8 + +static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) +{ + unsigned long flags; + kbase_pm_dvfs_action action; + kbasep_pm_metrics_data *metrics; + + KBASE_DEBUG_ASSERT(timer != NULL); + + metrics = container_of(timer, kbasep_pm_metrics_data, timer); + action = kbase_pm_get_dvfs_action(metrics->kbdev); + + spin_lock_irqsave(&metrics->lock, flags); + + if (metrics->timer_active) + hrtimer_start(timer, + HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.platform_dvfs_frequency), + HRTIMER_MODE_REL); + + spin_unlock_irqrestore(&metrics->lock, flags); + + return HRTIMER_NORESTART; +} + +mali_error kbasep_pm_metrics_init(kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + kbdev->pm.metrics.kbdev = kbdev; + kbdev->pm.metrics.vsync_hit = 0; + kbdev->pm.metrics.utilisation = 0; + + kbdev->pm.metrics.time_period_start = ktime_get(); + kbdev->pm.metrics.time_busy = 0; + kbdev->pm.metrics.time_idle = 0; + kbdev->pm.metrics.gpu_active = MALI_TRUE; + kbdev->pm.metrics.timer_active = MALI_TRUE; + + spin_lock_init(&kbdev->pm.metrics.lock); + + hrtimer_init(&kbdev->pm.metrics.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + kbdev->pm.metrics.timer.function = dvfs_callback; + + hrtimer_start(&kbdev->pm.metrics.timer, HR_TIMER_DELAY_MSEC(kbdev->pm.platform_dvfs_frequency), HRTIMER_MODE_REL); + + kbase_pm_register_vsync_callback(kbdev); + + return MALI_ERROR_NONE; +} + +KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init) + +void kbasep_pm_metrics_term(kbase_device *kbdev) +{ + unsigned long flags; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.metrics.lock, flags); + kbdev->pm.metrics.timer_active = MALI_FALSE; + spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags); + + hrtimer_cancel(&kbdev->pm.metrics.timer); + + kbase_pm_unregister_vsync_callback(kbdev); +} + +KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term) + +void kbasep_pm_record_gpu_idle(kbase_device *kbdev) +{ + unsigned long flags; + ktime_t now; + ktime_t diff; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.metrics.lock, flags); + + KBASE_DEBUG_ASSERT(kbdev->pm.metrics.gpu_active == MALI_TRUE); + + kbdev->pm.metrics.gpu_active = MALI_FALSE; + + now = ktime_get(); + diff = ktime_sub(now, kbdev->pm.metrics.time_period_start); + + kbdev->pm.metrics.time_busy += (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + kbdev->pm.metrics.time_period_start = now; + + spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags); +} + +KBASE_EXPORT_TEST_API(kbasep_pm_record_gpu_idle) + +void kbasep_pm_record_gpu_active(kbase_device *kbdev) +{ + unsigned long flags; + ktime_t now; + ktime_t diff; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.metrics.lock, flags); + + KBASE_DEBUG_ASSERT(kbdev->pm.metrics.gpu_active == MALI_FALSE); + + kbdev->pm.metrics.gpu_active = MALI_TRUE; + + now = ktime_get(); + diff = ktime_sub(now, kbdev->pm.metrics.time_period_start); + + kbdev->pm.metrics.time_idle += (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + kbdev->pm.metrics.time_period_start = now; + + spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags); +} + +KBASE_EXPORT_TEST_API(kbasep_pm_record_gpu_active) + +void kbase_pm_report_vsync(kbase_device *kbdev, int buffer_updated) +{ + unsigned long flags; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.metrics.lock, flags); + kbdev->pm.metrics.vsync_hit = buffer_updated; + spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_report_vsync) + +/*caller needs to hold kbdev->pm.metrics.lock before calling this function*/ +int kbase_pm_get_dvfs_utilisation(kbase_device *kbdev) +{ + int utilisation = 0; + ktime_t now = ktime_get(); + ktime_t diff; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + diff = ktime_sub(now, kbdev->pm.metrics.time_period_start); + + if (kbdev->pm.metrics.gpu_active) { + kbdev->pm.metrics.time_busy += (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + kbdev->pm.metrics.time_period_start = now; + } else { + kbdev->pm.metrics.time_idle += (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + kbdev->pm.metrics.time_period_start = now; + } + + if (kbdev->pm.metrics.time_idle + kbdev->pm.metrics.time_busy == 0) { + /* No data - so we return NOP */ + utilisation = -1; + goto out; + } + + utilisation = (100 * kbdev->pm.metrics.time_busy) / (kbdev->pm.metrics.time_idle + kbdev->pm.metrics.time_busy); + + out: + + kbdev->pm.metrics.time_idle = 0; + kbdev->pm.metrics.time_busy = 0; + + return utilisation; +} + +kbase_pm_dvfs_action kbase_pm_get_dvfs_action(kbase_device *kbdev) +{ + unsigned long flags; + int utilisation; + kbase_pm_dvfs_action action; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.metrics.lock, flags); + + utilisation = kbase_pm_get_dvfs_utilisation(kbdev); + + if (utilisation < 0) { + action = KBASE_PM_DVFS_NOP; + utilisation = 0; + goto out; + } + + if (kbdev->pm.metrics.vsync_hit) { + /* VSync is being met */ + if (utilisation < KBASE_PM_VSYNC_MIN_UTILISATION) + action = KBASE_PM_DVFS_CLOCK_DOWN; + else if (utilisation > KBASE_PM_VSYNC_MAX_UTILISATION) + action = KBASE_PM_DVFS_CLOCK_UP; + else + action = KBASE_PM_DVFS_NOP; + } else { + /* VSync is being missed */ + if (utilisation < KBASE_PM_NO_VSYNC_MIN_UTILISATION) + action = KBASE_PM_DVFS_CLOCK_DOWN; + else if (utilisation > KBASE_PM_NO_VSYNC_MAX_UTILISATION) + action = KBASE_PM_DVFS_CLOCK_UP; + else + action = KBASE_PM_DVFS_NOP; + } + + kbdev->pm.metrics.utilisation = utilisation; + out: +#ifdef CONFIG_MALI_MIDGARD_DVFS + kbase_platform_dvfs_event(kbdev, utilisation); +#endif /*CONFIG_MALI_MIDGARD_DVFS */ + kbdev->pm.metrics.time_idle = 0; + kbdev->pm.metrics.time_busy = 0; + spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags); + + return action; +} +KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_action) + +mali_bool kbase_pm_metrics_is_active(kbase_device *kbdev) +{ + mali_bool isactive; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.metrics.lock, flags); + isactive = (kbdev->pm.metrics.timer_active == MALI_TRUE); + spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags); + + return isactive; +} +KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active) diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_metrics_dummy.c b/drivers/gpu/arm/midgard/mali_kbase_pm_metrics_dummy.c new file mode 100755 index 00000000000..81dd06b5ed6 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm_metrics_dummy.c @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_metrics_dummy.c + * Dummy Metrics for power management. + */ + +#include +#include + +void kbase_pm_register_vsync_callback(kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + /* no VSync metrics will be available */ + kbdev->pm.metrics.platform_data = NULL; +} + +void kbase_pm_unregister_vsync_callback(kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/mali_kbase_pm_policy.c new file mode 100755 index 00000000000..7ac2d86e800 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm_policy.c @@ -0,0 +1,792 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_pm_policy.c + * Power policy API implementations + */ + +#include +#include +#include +#include + +extern const kbase_pm_policy kbase_pm_always_on_policy_ops; +extern const kbase_pm_policy kbase_pm_coarse_demand_policy_ops; +extern const kbase_pm_policy kbase_pm_demand_policy_ops; + +#if MALI_CUSTOMER_RELEASE == 0 +extern const kbase_pm_policy kbase_pm_fast_start_policy_ops; +extern const kbase_pm_policy kbase_pm_demand_always_powered_policy_ops; +#endif + +static const kbase_pm_policy *const policy_list[] = { +#ifdef CONFIG_MALI_NO_MALI + &kbase_pm_always_on_policy_ops, + &kbase_pm_demand_policy_ops, + &kbase_pm_coarse_demand_policy_ops, +#if MALI_CUSTOMER_RELEASE == 0 + &kbase_pm_demand_always_powered_policy_ops, + &kbase_pm_fast_start_policy_ops, +#endif +#else /* CONFIG_MALI_NO_MALI */ + &kbase_pm_demand_policy_ops, + &kbase_pm_always_on_policy_ops, + &kbase_pm_coarse_demand_policy_ops, +#if MALI_CUSTOMER_RELEASE == 0 + &kbase_pm_demand_always_powered_policy_ops, + &kbase_pm_fast_start_policy_ops, +#endif +#endif /* CONFIG_MALI_NO_MALI */ +}; + +/** The number of policies available in the system. + * This is derived from the number of functions listed in policy_get_functions. + */ +#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list)) + + +/* Function IDs for looking up Timeline Trace codes in kbase_pm_change_state_trace_code */ +typedef enum +{ + KBASE_PM_FUNC_ID_REQUEST_CORES_START, + KBASE_PM_FUNC_ID_REQUEST_CORES_END, + KBASE_PM_FUNC_ID_RELEASE_CORES_START, + KBASE_PM_FUNC_ID_RELEASE_CORES_END, + /* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither + * expect to hit it nor tend to hit it very much anyway. We can detect + * whether we need more instrumentation by a difference between + * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */ + + /* Must be the last */ + KBASE_PM_FUNC_ID_COUNT +} kbase_pm_func_id; + + +/* State changes during request/unrequest/release-ing cores */ +enum +{ + KBASE_PM_CHANGE_STATE_SHADER = (1u << 0), + KBASE_PM_CHANGE_STATE_TILER = (1u << 1), + + /* These two must be last */ + KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER|KBASE_PM_CHANGE_STATE_SHADER), + KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1 +}; +typedef u32 kbase_pm_change_state; + + +#ifdef CONFIG_MALI_TRACE_TIMELINE +/* Timeline Trace code lookups for each function */ +static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT][KBASE_PM_CHANGE_STATE_COUNT] = +{ + /* kbase_pm_request_cores */ + [KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0, + [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, + [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, + [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER|KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, + + [KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0, + [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END, + [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END, + [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER|KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END, + + /* kbase_pm_release_cores */ + [KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0, + [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, + [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, + [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER|KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, + + [KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0, + [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END, + [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END, + [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER|KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END +}; + +STATIC INLINE void kbase_timeline_pm_cores_func(kbase_device *kbdev, + kbase_pm_func_id func_id, + kbase_pm_change_state state) +{ + int trace_code; + KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT); + KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) == state); + + trace_code = kbase_pm_change_state_trace_code[func_id][state]; + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code); +} + +#else /* CONFIG_MALI_TRACE_TIMELINE */ +STATIC INLINE void kbase_timeline_pm_cores_func(kbase_device *kbdev, + kbase_pm_func_id func_id, + kbase_pm_change_state state) +{ +} + +#endif /* CONFIG_MALI_TRACE_TIMELINE */ + +static enum hrtimer_restart kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) +{ + kbase_device *kbdev; + + kbdev = container_of(timer, kbase_device, pm.gpu_poweroff_timer); + + /* It is safe for this call to do nothing if the work item is already queued. + * The worker function will read the must up-to-date state of kbdev->pm.gpu_poweroff_pending + * under lock. + * + * If a state change occurs while the worker function is processing, this + * call will succeed as a work item can be requeued once it has started + * processing. + */ + if (kbdev->pm.gpu_poweroff_pending) + queue_work(kbdev->pm.gpu_poweroff_wq, &kbdev->pm.gpu_poweroff_work); + + if (kbdev->pm.shader_poweroff_pending) { + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + if (kbdev->pm.shader_poweroff_pending) { + kbdev->pm.shader_poweroff_pending_time--; + + KBASE_DEBUG_ASSERT(kbdev->pm.shader_poweroff_pending_time >= 0); + + if (kbdev->pm.shader_poweroff_pending_time == 0) { + u64 prev_shader_state = kbdev->pm.desired_shader_state; + + kbdev->pm.desired_shader_state &= ~kbdev->pm.shader_poweroff_pending; + kbdev->pm.shader_poweroff_pending = 0; + + if (prev_shader_state != kbdev->pm.desired_shader_state || + kbdev->pm.ca_in_transition != MALI_FALSE) { + mali_bool cores_are_available; + + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START); + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END); + + /* Don't need 'cores_are_available', because we don't return anything */ + CSTD_UNUSED(cores_are_available); + } + } + } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + } + + hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time); + return HRTIMER_RESTART; +} + +static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data) +{ + unsigned long flags; + kbase_device *kbdev; + mali_bool do_poweroff = MALI_FALSE; + + kbdev = container_of(data, kbase_device, pm.gpu_poweroff_work); + + mutex_lock(&kbdev->pm.lock); + + if (kbdev->pm.gpu_poweroff_pending == 0) { + mutex_unlock(&kbdev->pm.lock); + return; + } + + kbdev->pm.gpu_poweroff_pending--; + + if (kbdev->pm.gpu_poweroff_pending > 0) { + mutex_unlock(&kbdev->pm.lock); + return; + } + + KBASE_DEBUG_ASSERT(kbdev->pm.gpu_poweroff_pending == 0); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + /* Only power off the GPU if a request is still pending */ + if (kbdev->pm.pm_current_policy->get_core_active(kbdev) == MALI_FALSE) + do_poweroff = MALI_TRUE; + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + if (do_poweroff != MALI_FALSE) { + kbdev->pm.poweroff_timer_running = MALI_FALSE; + /* Power off the GPU */ + kbase_pm_do_poweroff(kbdev, MALI_FALSE); + hrtimer_cancel(&kbdev->pm.gpu_poweroff_timer); + } + + mutex_unlock(&kbdev->pm.lock); +} + +mali_error kbase_pm_policy_init(kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + kbdev->pm.gpu_poweroff_wq = alloc_workqueue("kbase_pm_do_poweroff", WQ_HIGHPRI | WQ_UNBOUND, 1); + if (NULL == kbdev->pm.gpu_poweroff_wq) + return MALI_ERROR_OUT_OF_MEMORY; + INIT_WORK(&kbdev->pm.gpu_poweroff_work, kbasep_pm_do_gpu_poweroff_wq); + + hrtimer_init(&kbdev->pm.gpu_poweroff_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + kbdev->pm.gpu_poweroff_timer.function = kbasep_pm_do_gpu_poweroff_callback; + + kbdev->pm.pm_current_policy = policy_list[0]; + + kbdev->pm.pm_current_policy->init(kbdev); + + kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_PM_GPU_POWEROFF_TICK_NS)); + + kbdev->pm.poweroff_shader_ticks = kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_SHADER); + kbdev->pm.poweroff_gpu_ticks = kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_GPU); + + return MALI_ERROR_NONE; +} + +void kbase_pm_policy_term(kbase_device *kbdev) +{ + kbdev->pm.pm_current_policy->term(kbdev); +} + +void kbase_pm_cancel_deferred_poweroff(kbase_device *kbdev) +{ + unsigned long flags; + + lockdep_assert_held(&kbdev->pm.lock); + + hrtimer_cancel(&kbdev->pm.gpu_poweroff_timer); + + /* If wq is already running but is held off by pm.lock, make sure it has no effect */ + kbdev->pm.gpu_poweroff_pending = 0; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + kbdev->pm.shader_poweroff_pending = 0; + kbdev->pm.shader_poweroff_pending_time = 0; + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + +void kbase_pm_update_active(kbase_device *kbdev) +{ + unsigned long flags; + mali_bool active; + + lockdep_assert_held(&kbdev->pm.lock); + + /* pm_current_policy will never be NULL while pm.lock is held */ + KBASE_DEBUG_ASSERT(kbdev->pm.pm_current_policy); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + active = kbdev->pm.pm_current_policy->get_core_active(kbdev); + + if (active != MALI_FALSE) { + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + if (kbdev->pm.gpu_poweroff_pending) { + /* Cancel any pending power off request */ + kbdev->pm.gpu_poweroff_pending = 0; + + /* If a request was pending then the GPU was still powered, so no need to continue */ + return; + } + + if (!kbdev->pm.poweroff_timer_running && !kbdev->pm.gpu_powered) { + kbdev->pm.poweroff_timer_running = MALI_TRUE; + hrtimer_start(&kbdev->pm.gpu_poweroff_timer, kbdev->pm.gpu_poweroff_time, HRTIMER_MODE_REL); + } + + /* Power on the GPU and any cores requested by the policy */ + kbase_pm_do_poweron(kbdev, MALI_FALSE); + } else { + /* It is an error for the power policy to power off the GPU + * when there are contexts active */ + KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); + + if (kbdev->pm.shader_poweroff_pending) { + kbdev->pm.shader_poweroff_pending = 0; + kbdev->pm.shader_poweroff_pending_time = 0; + } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + + /* Request power off */ + if (kbdev->pm.gpu_powered) { + kbdev->pm.gpu_poweroff_pending = kbdev->pm.poweroff_gpu_ticks; + if (!kbdev->pm.poweroff_timer_running) { + /* Start timer if not running (eg if power policy has been changed from always_on + * to something else). This will ensure the GPU is actually powered off */ + kbdev->pm.poweroff_timer_running = MALI_TRUE; + hrtimer_start(&kbdev->pm.gpu_poweroff_timer, kbdev->pm.gpu_poweroff_time, HRTIMER_MODE_REL); + } + } + } +} + +void kbase_pm_update_cores_state_nolock(kbase_device *kbdev) +{ + u64 desired_bitmap; + mali_bool cores_are_available; + + lockdep_assert_held(&kbdev->pm.power_change_lock); + + if (kbdev->pm.pm_current_policy == NULL) + return; + + desired_bitmap = kbdev->pm.pm_current_policy->get_core_mask(kbdev); + desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev); + + /* Enable core 0 if tiler required, regardless of core availability */ + if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) + desired_bitmap |= 1; + + if (kbdev->pm.desired_shader_state != desired_bitmap) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u, (u32)desired_bitmap); + + /* Are any cores being powered on? */ + if (~kbdev->pm.desired_shader_state & desired_bitmap || + kbdev->pm.ca_in_transition != MALI_FALSE) { + kbdev->pm.desired_shader_state = desired_bitmap; + + /* If any cores are being powered on, transition immediately */ + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + + /* Ensure timer does not power off wanted cores */ + if (kbdev->pm.shader_poweroff_pending != 0) { + kbdev->pm.shader_poweroff_pending &= ~kbdev->pm.desired_shader_state; + if (kbdev->pm.shader_poweroff_pending == 0) + kbdev->pm.shader_poweroff_pending_time = 0; + } + } else if (kbdev->pm.desired_shader_state & ~desired_bitmap) { + /* Start timer to power off cores */ + kbdev->pm.shader_poweroff_pending |= (kbdev->pm.desired_shader_state & ~desired_bitmap); + kbdev->pm.shader_poweroff_pending_time = kbdev->pm.poweroff_shader_ticks; + } else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 && kbdev->pm.poweroff_timer_running) { + /* If power policy is keeping cores on despite there being no active contexts + * then disable poweroff timer as it isn't required */ + kbdev->pm.poweroff_timer_running = MALI_FALSE; + hrtimer_cancel(&kbdev->pm.gpu_poweroff_timer); + } + + /* Don't need 'cores_are_available', because we don't return anything */ + CSTD_UNUSED(cores_are_available); +} + +void kbase_pm_update_cores_state(kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + kbase_pm_update_cores_state_nolock(kbdev); + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + +int kbase_pm_list_policies(const kbase_pm_policy * const **list) +{ + if (!list) + return POLICY_COUNT; + + *list = policy_list; + + return POLICY_COUNT; +} + +KBASE_EXPORT_TEST_API(kbase_pm_list_policies) + +const kbase_pm_policy *kbase_pm_get_policy(kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + return kbdev->pm.pm_current_policy; +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_policy) + +void kbase_pm_set_policy(kbase_device *kbdev, const kbase_pm_policy *new_policy) +{ + const kbase_pm_policy *old_policy; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(new_policy != NULL); + + KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id); + + /* During a policy change we pretend the GPU is active */ + /* A suspend won't happen here, because we're in a syscall from a userspace thread */ + kbase_pm_context_active(kbdev); + + mutex_lock(&kbdev->pm.lock); + + /* Remove the policy to prevent IRQ handlers from working on it */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + old_policy = kbdev->pm.pm_current_policy; + kbdev->pm.pm_current_policy = NULL; + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u, old_policy->id); + if (old_policy->term) + old_policy->term(kbdev); + + KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u, new_policy->id); + if (new_policy->init) + new_policy->init(kbdev); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbdev->pm.pm_current_policy = new_policy; + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + /* If any core power state changes were previously attempted, but couldn't + * be made because the policy was changing (current_policy was NULL), then + * re-try them here. */ + kbase_pm_update_active(kbdev); + kbase_pm_update_cores_state(kbdev); + + mutex_unlock(&kbdev->pm.lock); + + /* Now the policy change is finished, we release our fake context active reference */ + kbase_pm_context_idle(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_set_policy) + +/** Check whether a state change has finished, and trace it as completed */ +STATIC void kbase_pm_trace_check_and_finish_state_change(kbase_device *kbdev) +{ + if ((kbdev->shader_available_bitmap & kbdev->pm.desired_shader_state) == kbdev->pm.desired_shader_state + && (kbdev->tiler_available_bitmap & kbdev->pm.desired_tiler_state) == kbdev->pm.desired_tiler_state) + kbase_timeline_pm_check_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); +} + +void kbase_pm_request_cores(kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores) +{ + unsigned long flags; + u64 cores; + + kbase_pm_change_state change_gpu_state = 0u; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + cores = shader_cores; + while (cores) { + int bitnum = fls64(cores) - 1; + u64 bit = 1ULL << bitnum; + + /* It should be almost impossible for this to overflow. It would require 2^32 atoms + * to request a particular core, which would require 2^24 contexts to submit. This + * would require an amount of memory that is impossible on a 32-bit system and + * extremely unlikely on a 64-bit system. */ + int cnt = ++kbdev->shader_needed_cnt[bitnum]; + + if (1 == cnt) { + kbdev->shader_needed_bitmap |= bit; + change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; + } + + cores &= ~bit; + } + + if (tiler_required != MALI_FALSE) { + ++kbdev->tiler_needed_cnt; + + KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0); + + /* For tiler jobs, we must make sure that core 0 is not turned off if it's already on. + * However, it's safe for core 0 to be left off and turned on later whilst a tiler job + * is running. Hence, we don't need to update the cores state immediately. Also, + * attempts to turn off cores will always check the tiler_needed/inuse state first anyway. + * + * Finally, kbase_js_choose_affinity() ensures core 0 is always requested for tiler jobs + * anyway. Hence when there's only a tiler job in the system, this will still cause + * kbase_pm_update_cores_state_nolock() to be called. + * + * Note that we still need to keep track of tiler_needed/inuse_cnt, to ensure that + * kbase_pm_update_cores_state_nolock() can override the core availability policy and + * force core 0 to be powered when a tiler job is in the system. */ + } + + if (change_gpu_state) { + KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL, NULL, 0u, (u32) kbdev->shader_needed_bitmap); + + kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_REQUEST_CORES_START, change_gpu_state); + kbase_pm_update_cores_state_nolock(kbdev); + kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_REQUEST_CORES_END, change_gpu_state); + } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_cores) + +void kbase_pm_unrequest_cores(kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores) +{ + unsigned long flags; + + kbase_pm_change_state change_gpu_state = 0u; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + while (shader_cores) { + int bitnum = fls64(shader_cores) - 1; + u64 bit = 1ULL << bitnum; + int cnt; + + KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0); + + cnt = --kbdev->shader_needed_cnt[bitnum]; + + if (0 == cnt) { + kbdev->shader_needed_bitmap &= ~bit; + + change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; + } + + shader_cores &= ~bit; + } + + if (tiler_required != MALI_FALSE) { + KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0); + + --kbdev->tiler_needed_cnt; + + /* Whilst tiler jobs must not allow core 0 to be turned off, we don't need to make an + * extra call to kbase_pm_update_cores_state_nolock() to ensure core 0 is turned off + * when the last tiler job unrequests cores: kbase_js_choose_affinity() ensures core 0 + * was originally requested for tiler jobs. Hence when there's only a tiler job in the + * system, this will still cause kbase_pm_update_cores_state_nolock() to be called. */ + } + + if (change_gpu_state) { + KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL, NULL, 0u, (u32) kbdev->shader_needed_bitmap); + + kbase_pm_update_cores_state_nolock(kbdev); + + /* Trace that any state change effectively completes immediately - + * no-one will wait on the state change */ + kbase_pm_trace_check_and_finish_state_change(kbdev); + } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores) + +kbase_pm_cores_ready kbase_pm_register_inuse_cores(kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores) +{ + unsigned long flags; + u64 prev_shader_needed; /* Just for tracing */ + u64 prev_shader_inuse; /* Just for tracing */ + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + prev_shader_needed = kbdev->shader_needed_bitmap; + prev_shader_inuse = kbdev->shader_inuse_bitmap; + + /* If desired_shader_state does not contain the requested cores, then power + * management is not attempting to powering those cores (most likely + * due to core availability policy) and a new job affinity must be + * chosen */ + if ((kbdev->pm.desired_shader_state & shader_cores) != shader_cores) { + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + return KBASE_NEW_AFFINITY; + } + + if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores || + (tiler_required != MALI_FALSE && !kbdev->tiler_available_bitmap)) { + /* Trace ongoing core transition */ + kbase_timeline_pm_l2_transition_start(kbdev); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + return KBASE_CORES_NOT_READY; + } + + /* If we started to trace a state change, then trace it has being finished + * by now, at the very latest */ + kbase_pm_trace_check_and_finish_state_change(kbdev); + /* Trace core transition done */ + kbase_timeline_pm_l2_transition_done(kbdev); + + while (shader_cores) { + int bitnum = fls64(shader_cores) - 1; + u64 bit = 1ULL << bitnum; + int cnt; + + KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0); + + cnt = --kbdev->shader_needed_cnt[bitnum]; + + if (0 == cnt) + kbdev->shader_needed_bitmap &= ~bit; + + /* shader_inuse_cnt should not overflow because there can only be a + * very limited number of jobs on the h/w at one time */ + + kbdev->shader_inuse_cnt[bitnum]++; + kbdev->shader_inuse_bitmap |= bit; + + shader_cores &= ~bit; + } + + if (tiler_required != MALI_FALSE) { + KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0); + + --kbdev->tiler_needed_cnt; + + kbdev->tiler_inuse_cnt++; + + KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0); + } + + if (prev_shader_needed != kbdev->shader_needed_bitmap) + KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL, NULL, 0u, (u32) kbdev->shader_needed_bitmap); + + if (prev_shader_inuse != kbdev->shader_inuse_bitmap) + KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL, NULL, 0u, (u32) kbdev->shader_inuse_bitmap); + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + return KBASE_CORES_READY; +} + +KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores) + +void kbase_pm_release_cores(kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores) +{ + unsigned long flags; + kbase_pm_change_state change_gpu_state = 0u; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + while (shader_cores) { + int bitnum = fls64(shader_cores) - 1; + u64 bit = 1ULL << bitnum; + int cnt; + + KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0); + + cnt = --kbdev->shader_inuse_cnt[bitnum]; + + if (0 == cnt) { + kbdev->shader_inuse_bitmap &= ~bit; + change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; + } + + shader_cores &= ~bit; + } + + if (tiler_required != MALI_FALSE) { + KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0); + + --kbdev->tiler_inuse_cnt; + + /* Whilst tiler jobs must not allow core 0 to be turned off, we don't need to make an + * extra call to kbase_pm_update_cores_state_nolock() to ensure core 0 is turned off + * when the last tiler job finishes: kbase_js_choose_affinity() ensures core 0 was + * originally requested for tiler jobs. Hence when there's only a tiler job in the + * system, this will still cause kbase_pm_update_cores_state_nolock() to be called */ + } + + if (change_gpu_state) { + KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL, NULL, 0u, (u32) kbdev->shader_inuse_bitmap); + + kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_RELEASE_CORES_START, change_gpu_state); + kbase_pm_update_cores_state_nolock(kbdev); + kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_RELEASE_CORES_END, change_gpu_state); + + /* Trace that any state change completed immediately */ + kbase_pm_trace_check_and_finish_state_change(kbdev); + } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_release_cores) + +void kbase_pm_request_cores_sync(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores) +{ + kbase_pm_request_cores(kbdev, tiler_required, shader_cores); + + kbase_pm_check_transitions_sync(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync) + +void kbase_pm_request_l2_caches(kbase_device *kbdev) +{ + unsigned long flags; + u32 prior_l2_users_count; + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + prior_l2_users_count = kbdev->l2_users_count++; + + KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0); + + if (!prior_l2_users_count) + kbase_pm_update_cores_state_nolock(kbdev); + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + wait_event(kbdev->pm.l2_powered_wait, kbdev->pm.l2_powered == 1); + + /* Trace that any state change completed immediately */ + kbase_pm_trace_check_and_finish_state_change(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches) + +void kbase_pm_release_l2_caches(kbase_device *kbdev) +{ + unsigned long flags; + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0); + + --kbdev->l2_users_count; + + if (!kbdev->l2_users_count) { + kbase_pm_update_cores_state_nolock(kbdev); + /* Trace that any state change completed immediately */ + kbase_pm_trace_check_and_finish_state_change(kbdev); + } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches) + diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_policy.h b/drivers/gpu/arm/midgard/mali_kbase_pm_policy.h new file mode 100755 index 00000000000..007cdde9460 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm_policy.h @@ -0,0 +1,269 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_pm_policy.h + * Power policy API definitions + */ + +#ifndef _KBASE_PM_POLICY_H_ +#define _KBASE_PM_POLICY_H_ + +/** List of policy IDs */ +typedef enum kbase_pm_policy_id { + KBASE_PM_POLICY_ID_DEMAND = 1, + KBASE_PM_POLICY_ID_ALWAYS_ON, + KBASE_PM_POLICY_ID_COARSE_DEMAND, +#if MALI_CUSTOMER_RELEASE == 0 + KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED, + KBASE_PM_POLICY_ID_FAST_START +#endif +} kbase_pm_policy_id; + +typedef u32 kbase_pm_policy_flags; + +/** Power policy structure. + * + * Each power policy exposes a (static) instance of this structure which contains function pointers to the + * policy's methods. + */ +typedef struct kbase_pm_policy { + /** The name of this policy */ + char *name; + + /** Function called when the policy is selected + * + * This should initialize the kbdev->pm.pm_policy_data structure. It should not attempt + * to make any changes to hardware state. + * + * It is undefined what state the cores are in when the function is called. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ + void (*init) (struct kbase_device *kbdev); + + /** Function called when the policy is unselected. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ + void (*term) (struct kbase_device *kbdev); + + /** Function called to get the current shader core mask + * + * The returned mask should meet or exceed (kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap). + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * + * @return The mask of shader cores to be powered */ + u64 (*get_core_mask) (struct kbase_device *kbdev); + + /** Function called to get the current overall GPU power state + * + * This function should consider the state of kbdev->pm.active_count. If this count is greater than 0 then + * there is at least one active context on the device and the GPU should be powered. If it is equal to 0 + * then there are no active contexts and the GPU could be powered off if desired. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * + * @return MALI_TRUE if the GPU should be powered, MALI_FALSE otherwise */ + mali_bool (*get_core_active) (struct kbase_device *kbdev); + + /** Field indicating flags for this policy */ + kbase_pm_policy_flags flags; + + /** Field indicating an ID for this policy. This is not necessarily the + * same as its index in the list returned by kbase_pm_list_policies(). + * It is used purely for debugging. */ + kbase_pm_policy_id id; +} kbase_pm_policy; + +/** Initialize power policy framework + * + * Must be called before calling any other policy function + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * + * @return MALI_ERROR_NONE if the power policy framework was successfully initialized. + */ +mali_error kbase_pm_policy_init(struct kbase_device *kbdev); + +/** Terminate power policy framework + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_policy_term(struct kbase_device *kbdev); + +/** Update the active power state of the GPU + * Calls into the current power policy + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_update_active(struct kbase_device *kbdev); + +/** Update the desired core state of the GPU + * Calls into the current power policy + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_update_cores(struct kbase_device *kbdev); + +/** Get the current policy. + * Returns the policy that is currently active. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * + * @return The current policy + */ +const kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev); + +/** Change the policy to the one specified. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param policy The policy to change to (valid pointer returned from @ref kbase_pm_list_policies) + */ +void kbase_pm_set_policy(struct kbase_device *kbdev, const kbase_pm_policy *policy); + +/** Retrieve a static list of the available policies. + * @param[out] policies An array pointer to take the list of policies. This may be NULL. + * The contents of this array must not be modified. + * + * @return The number of policies + */ +int kbase_pm_list_policies(const kbase_pm_policy * const **policies); + + +typedef enum kbase_pm_cores_ready { + KBASE_CORES_NOT_READY = 0, + KBASE_NEW_AFFINITY = 1, + KBASE_CORES_READY = 2 +} kbase_pm_cores_ready; + + +/** Synchronous variant of kbase_pm_request_cores() + * + * When this function returns, the @a shader_cores will be in the READY state. + * + * This is safe variant of kbase_pm_check_transitions_sync(): it handles the + * work of ensuring the requested cores will remain powered until a matching + * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate) + * is made. + * + * @param kbdev The kbase device structure for the device + * @param tiler_required MALI_TRUE if the tiler is required, MALI_FALSE otherwise + * @param shader_cores A bitmask of shader cores which are necessary for the job + */ + +void kbase_pm_request_cores_sync(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores); + +/** Mark one or more cores as being required for jobs to be submitted. + * + * This function is called by the job scheduler to mark one or more cores + * as being required to submit jobs that are ready to run. + * + * The cores requested are reference counted and a subsequent call to @ref kbase_pm_register_inuse_cores or + * @ref kbase_pm_unrequest_cores should be made to dereference the cores as being 'needed'. + * + * The active power policy will meet or exceed the requirements of the + * requested cores in the system. Any core transitions needed will be begun + * immediately, but they might not complete/the cores might not be available + * until a Power Management IRQ. + * + * @param kbdev The kbase device structure for the device + * @param tiler_required MALI_TRUE if the tiler is required, MALI_FALSE otherwise + * @param shader_cores A bitmask of shader cores which are necessary for the job + * + * @return MALI_ERROR_NONE if the cores were successfully requested. + */ +void kbase_pm_request_cores(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores); + +/** Unmark one or more cores as being required for jobs to be submitted. + * + * This function undoes the effect of @ref kbase_pm_request_cores. It should be used when a job is not + * going to be submitted to the hardware (e.g. the job is cancelled before it is enqueued). + * + * The active power policy will meet or exceed the requirements of the + * requested cores in the system. Any core transitions needed will be begun + * immediately, but they might not complete until a Power Management IRQ. + * + * The policy may use this as an indication that it can power down cores. + * + * @param kbdev The kbase device structure for the device + * @param tiler_required MALI_TRUE if the tiler is required, MALI_FALSE otherwise + * @param shader_cores A bitmask of shader cores (as given to @ref kbase_pm_request_cores) + */ +void kbase_pm_unrequest_cores(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores); + +/** Register a set of cores as in use by a job. + * + * This function should be called after @ref kbase_pm_request_cores when the job is about to be submitted to + * the hardware. It will check that the necessary cores are available and if so update the 'needed' and 'inuse' + * bitmasks to reflect that the job is now committed to being run. + * + * If the necessary cores are not currently available then the function will return MALI_FALSE and have no effect. + * + * @param kbdev The kbase device structure for the device + * @param tiler_required MALI_TRUE if the tiler is required, MALI_FALSE otherwise + * @param shader_cores A bitmask of shader cores (as given to @ref kbase_pm_request_cores) + * + * @return MALI_TRUE if the job can be submitted to the hardware or MALI_FALSE if the job is not ready to run. + */ +mali_bool kbase_pm_register_inuse_cores(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores); + +/** Release cores after a job has run. + * + * This function should be called when a job has finished running on the hardware. A call to @ref + * kbase_pm_register_inuse_cores must have previously occurred. The reference counts of the specified cores will be + * decremented which may cause the bitmask of 'inuse' cores to be reduced. The power policy may then turn off any + * cores which are no longer 'inuse'. + * + * @param kbdev The kbase device structure for the device + * @param tiler_required MALI_TRUE if the tiler is required, MALI_FALSE otherwise + * @param shader_cores A bitmask of shader cores (as given to @ref kbase_pm_register_inuse_cores) + */ +void kbase_pm_release_cores(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores); + +/** Request the use of l2 caches for all core groups, power up, wait and prevent the power manager from + * powering down the l2 caches. + * + * This tells the power management that the caches should be powered up, and they + * should remain powered, irrespective of the usage of shader cores. This does not + * return until the l2 caches are powered up. + * + * The caller must call @ref kbase_pm_release_l2_caches when they are finished to + * allow normal power management of the l2 caches to resume. + * + * This should only be used when power management is active. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_request_l2_caches(struct kbase_device *kbdev); + +/** Release the use of l2 caches for all core groups and allow the power manager to + * power them down when necessary. + * + * This tells the power management that the caches can be powered down if necessary, with respect + * to the usage of shader cores. + * + * The caller must have called @ref kbase_pm_request_l2_caches prior to a call to this. + * + * This should only be used when power management is active. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_release_l2_caches(struct kbase_device *kbdev); + +#endif /* _KBASE_PM_POLICY_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h new file mode 100755 index 00000000000..6f9db71f7a0 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h @@ -0,0 +1,40 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_profiling_gator_api.h + * Model interface + */ + +#ifndef _KBASE_PROFILING_GATOR_API_H_ +#define _KBASE_PROFILING_GATOR_API_H_ + +/* + * List of possible actions to be controlled by Streamline. + * The following numbers are used by gator to control + * the frame buffer dumping and s/w counter reporting. + */ +#define FBDUMP_CONTROL_ENABLE (1) +#define FBDUMP_CONTROL_RATE (2) +#define SW_COUNTER_ENABLE (3) +#define FBDUMP_CONTROL_RESIZE_FACTOR (4) +#define FBDUMP_CONTROL_MAX (5) +#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE + +void _mali_profiling_control(u32 action, u32 value); + +#endif /* _KBASE_PROFILING_GATOR_API */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c new file mode 100755 index 00000000000..91381329881 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c @@ -0,0 +1,1069 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_replay.c + * Replay soft job handlers + */ + +#include +#include +#include +#include + +#define JOB_NOT_STARTED 0 + +#define JOB_TYPE_MASK 0xfe +#define JOB_TYPE_NULL (1 << 1) +#define JOB_TYPE_VERTEX (5 << 1) +#define JOB_TYPE_TILER (7 << 1) +#define JOB_TYPE_FUSED (8 << 1) +#define JOB_TYPE_FRAGMENT (9 << 1) + +#define JOB_FLAG_DESC_SIZE (1 << 0) +#define JOB_FLAG_PERFORM_JOB_BARRIER (1 << 8) + +#define JOB_HEADER_32_FBD_OFFSET (31*4) + +#define FBD_POINTER_MASK (~0x3f) + +#define SFBD_TILER_OFFSET (48*4) + +#define MFBD_TILER_FLAGS_OFFSET (15*4) +#define MFBD_TILER_OFFSET (16*4) + +#define FBD_HIERARCHY_WEIGHTS 8 +#define FBD_HIERARCHY_MASK_MASK 0x1fff + +#define FBD_TYPE 1 + +#define HIERARCHY_WEIGHTS 13 + +#define JOB_HEADER_ID_MAX 0xffff + +typedef struct job_head +{ + u32 status; + u32 not_complete_index; + u64 fault_addr; + u16 flags; + u16 index; + u16 dependencies[2]; + union + { + u64 _64; + u32 _32; + } next; + u32 x[2]; + union + { + u64 _64; + u32 _32; + } fragment_fbd; +} job_head; + +static void dump_job_head(kbase_context *kctx, char *head_str, job_head *job) +{ +#ifdef CONFIG_MALI_DEBUG + struct device *dev = kctx->kbdev->dev; + + KBASE_LOG(2, dev, "%s\n", head_str); + KBASE_LOG(2, dev, "addr = %p\n" + "status = %x\n" + "not_complete_index = %x\n" + "fault_addr = %llx\n" + "flags = %x\n" + "index = %x\n" + "dependencies = %x,%x\n", + job, + job->status, + job->not_complete_index, + job->fault_addr, + job->flags, + job->index, + job->dependencies[0], + job->dependencies[1]); + + if (job->flags & JOB_FLAG_DESC_SIZE) + KBASE_LOG(2, dev, "next = %llx\n", job->next._64); + else + KBASE_LOG(2, dev, "next = %x\n", job->next._32); +#endif +} + + +static void *kbasep_map_page(kbase_context *kctx, mali_addr64 gpu_addr, + u64 *phys_addr) +{ + void *cpu_addr = NULL; + u64 page_index; + kbase_va_region *region; + phys_addr_t *page_array; + + region = kbase_region_tracker_find_region_enclosing_address(kctx, + gpu_addr); + if (!region || (region->flags & KBASE_REG_FREE)) + return NULL; + + page_index = (gpu_addr >> PAGE_SHIFT) - region->start_pfn; + if (page_index >= kbase_reg_current_backed_size(region)) + return NULL; + + page_array = kbase_get_phy_pages(region); + if (!page_array) + return NULL; + + cpu_addr = kmap_atomic(pfn_to_page(PFN_DOWN(page_array[page_index]))); + if (!cpu_addr) + return NULL; + + if (phys_addr) + *phys_addr = page_array[page_index]; + + return cpu_addr + (gpu_addr & ~PAGE_MASK); +} + +static void *kbasep_map_page_sync(kbase_context *kctx, mali_addr64 gpu_addr, + u64 *phys_addr) +{ + void *cpu_addr = kbasep_map_page(kctx, gpu_addr, phys_addr); + + if (!cpu_addr) + return NULL; + + kbase_sync_to_cpu(*phys_addr, + (void *)((uintptr_t)cpu_addr & PAGE_MASK), + PAGE_SIZE); + + return cpu_addr; +} + +static void kbasep_unmap_page(void *cpu_addr) +{ + kunmap_atomic((void *)((uintptr_t)cpu_addr & PAGE_MASK)); +} + +static void kbasep_unmap_page_sync(void *cpu_addr, u64 phys_addr) +{ + kbase_sync_to_memory(phys_addr, + (void *)((uintptr_t)cpu_addr & PAGE_MASK), + PAGE_SIZE); + + kunmap_atomic((void *)((uintptr_t)cpu_addr & PAGE_MASK)); +} + +static mali_error kbasep_replay_reset_sfbd(kbase_context *kctx, + mali_addr64 fbd_address, + mali_addr64 tiler_heap_free, + u16 hierarchy_mask, + u32 default_weight) +{ + u64 phys_addr; + struct + { + u32 padding_1[1]; + u32 flags; + u64 padding_2[2]; + u64 heap_free_address; + u32 padding[8]; + u32 weights[FBD_HIERARCHY_WEIGHTS]; + } *fbd_tiler; + struct device *dev = kctx->kbdev->dev; + + KBASE_LOG(2, dev, "fbd_address: %llx\n", fbd_address); + + fbd_tiler = kbasep_map_page_sync(kctx, fbd_address + SFBD_TILER_OFFSET, + &phys_addr); + if (!fbd_tiler) { + dev_err(dev, "kbasep_replay_reset_fbd: failed to map fbd\n"); + return MALI_ERROR_FUNCTION_FAILED; + } +#ifdef CONFIG_MALI_DEBUG + KBASE_LOG(2, dev, "FBD tiler:\n" + "flags = %x\n" + "heap_free_address = %llx\n", + fbd_tiler->flags, + fbd_tiler->heap_free_address); +#endif + if (hierarchy_mask) { + u32 weights[HIERARCHY_WEIGHTS]; + u16 old_hierarchy_mask = fbd_tiler->flags & + FBD_HIERARCHY_MASK_MASK; + int i, j = 0; + + for (i = 0; i < HIERARCHY_WEIGHTS; i++) { + if (old_hierarchy_mask & (1 << i)) { + KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); + weights[i] = fbd_tiler->weights[j++]; + } else { + weights[i] = default_weight; + } + } + + + KBASE_LOG(2, dev, + "Old hierarchy mask=%x New hierarchy mask=%x\n", + old_hierarchy_mask, hierarchy_mask); + for (i = 0; i < HIERARCHY_WEIGHTS; i++) + KBASE_LOG(2, dev, " Hierarchy weight %02d: %08x\n", + i, weights[i]); + + j = 0; + + for (i = 0; i < HIERARCHY_WEIGHTS; i++) { + if (hierarchy_mask & (1 << i)) { + KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); + + KBASE_LOG(2, dev, + " Writing hierarchy level %02d (%08x) to %d\n", + i, weights[i], j); + + fbd_tiler->weights[j++] = weights[i]; + } + } + + for (; j < FBD_HIERARCHY_WEIGHTS; j++) + fbd_tiler->weights[j] = 0; + + fbd_tiler->flags = hierarchy_mask | (1 << 16); + } + + fbd_tiler->heap_free_address = tiler_heap_free; + + KBASE_LOG(2, dev, "heap_free_address=%llx flags=%x\n", + fbd_tiler->heap_free_address, fbd_tiler->flags); + + kbasep_unmap_page_sync(fbd_tiler, phys_addr); + + return MALI_ERROR_NONE; +} + +static mali_error kbasep_replay_reset_mfbd(kbase_context *kctx, + mali_addr64 fbd_address, + mali_addr64 tiler_heap_free, + u16 hierarchy_mask, + u32 default_weight) +{ + u64 phys_addr, phys_addr_flags; + struct + { + u64 padding_1[2]; + u64 heap_free_address; + u64 padding_2; + u32 weights[FBD_HIERARCHY_WEIGHTS]; + } *fbd_tiler; + u32 *fbd_tiler_flags; + mali_bool flags_different_page; + struct device *dev = kctx->kbdev->dev; + + KBASE_LOG(2, dev, "fbd_address: %llx\n", fbd_address); + + fbd_tiler = kbasep_map_page_sync(kctx, fbd_address + MFBD_TILER_OFFSET, + &phys_addr); + if (((fbd_address + MFBD_TILER_OFFSET) & PAGE_MASK) != + ((fbd_address + MFBD_TILER_FLAGS_OFFSET) & PAGE_MASK)) { + flags_different_page = MALI_TRUE; + fbd_tiler_flags = kbasep_map_page_sync(kctx, + fbd_address + MFBD_TILER_FLAGS_OFFSET, + &phys_addr_flags); + } else { + flags_different_page = MALI_FALSE; + fbd_tiler_flags = (u32 *)((uintptr_t)fbd_tiler - + MFBD_TILER_OFFSET + MFBD_TILER_FLAGS_OFFSET); + } + + if (!fbd_tiler || !fbd_tiler_flags) { + dev_err(dev, "kbasep_replay_reset_fbd: failed to map fbd\n"); + + if (fbd_tiler_flags && flags_different_page) + kbasep_unmap_page_sync(fbd_tiler_flags, + phys_addr_flags); + if (fbd_tiler) + kbasep_unmap_page_sync(fbd_tiler, phys_addr); + + return MALI_ERROR_FUNCTION_FAILED; + } +#ifdef CONFIG_MALI_DEBUG + KBASE_LOG(2, dev, "FBD tiler:\n" + "heap_free_address = %llx\n", + fbd_tiler->heap_free_address); +#endif + if (hierarchy_mask) { + u32 weights[HIERARCHY_WEIGHTS]; + u16 old_hierarchy_mask = (*fbd_tiler_flags) & + FBD_HIERARCHY_MASK_MASK; + int i, j = 0; + + for (i = 0; i < HIERARCHY_WEIGHTS; i++) { + if (old_hierarchy_mask & (1 << i)) { + KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); + weights[i] = fbd_tiler->weights[j++]; + } + else + weights[i] = default_weight; + } + + + KBASE_LOG(2, dev, + "Old hierarchy mask=%x New hierarchy mask=%x\n", + old_hierarchy_mask, hierarchy_mask); + for (i = 0; i < HIERARCHY_WEIGHTS; i++) + KBASE_LOG(2, dev, " Hierarchy weight %02d: %08x\n", + i, weights[i]); + + j = 0; + + for (i = 0; i < HIERARCHY_WEIGHTS; i++) { + if (hierarchy_mask & (1 << i)) { + KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); + + KBASE_LOG(2, dev, + " Writing hierarchy level %02d (%08x) to %d\n", + i, weights[i], j); + + fbd_tiler->weights[j++] = weights[i]; + } + } + + for (; j < FBD_HIERARCHY_WEIGHTS; j++) + fbd_tiler->weights[j] = 0; + + *fbd_tiler_flags = hierarchy_mask | (1 << 16); + } + + fbd_tiler->heap_free_address = tiler_heap_free; + + if (flags_different_page) + kbasep_unmap_page_sync(fbd_tiler_flags, phys_addr_flags); + + kbasep_unmap_page_sync(fbd_tiler, phys_addr); + + return MALI_ERROR_NONE; +} + +/** + * @brief Reset the status of an FBD pointed to by a tiler job + * + * This performs two functions : + * - Set the hierarchy mask + * - Reset the tiler free heap address + * + * @param[in] kctx Context pointer + * @param[in] job_header Address of job header to reset. + * @param[in] tiler_heap_free The value to reset Tiler Heap Free to + * @param[in] hierarchy_mask The hierarchy mask to use + * @param[in] default_weight Default hierarchy weight to write when no other + * weight is given in the FBD + * @param[in] job_64 MALI_TRUE if this job is using 64-bit + * descriptors + * + * @return MALI_ERROR_NONE on success, error code on failure + */ +static mali_error kbasep_replay_reset_tiler_job(kbase_context *kctx, + mali_addr64 job_header, + mali_addr64 tiler_heap_free, + u16 hierarchy_mask, + u32 default_weight, + mali_bool job_64) +{ + mali_addr64 fbd_address; + + if (job_64) { + dev_err(kctx->kbdev->dev, + "64-bit job descriptor not supported\n"); + return MALI_ERROR_FUNCTION_FAILED; + } else { + u32 *job_ext; + + job_ext = kbasep_map_page(kctx, + job_header + JOB_HEADER_32_FBD_OFFSET, + NULL); + if (!job_ext) { + dev_err(kctx->kbdev->dev, + "kbasep_replay_reset_tiler_job: failed to map jc\n"); + return MALI_ERROR_FUNCTION_FAILED; + } + + fbd_address = *job_ext; + + kbasep_unmap_page(job_ext); + } + + if (fbd_address & FBD_TYPE) { + return kbasep_replay_reset_mfbd(kctx, + fbd_address & FBD_POINTER_MASK, + tiler_heap_free, + hierarchy_mask, + default_weight); + } else { + return kbasep_replay_reset_sfbd(kctx, + fbd_address & FBD_POINTER_MASK, + tiler_heap_free, + hierarchy_mask, + default_weight); + } +} + +/** + * @brief Reset the status of a job + * + * This performs the following functions : + * + * - Reset the Job Status field of each job to NOT_STARTED. + * - Set the Job Type field of any Vertex Jobs to Null Job. + * - For any jobs using an FBD, set the Tiler Heap Free field to the value of + * the tiler_heap_free parameter, and set the hierarchy level mask to the + * hier_mask parameter. + * - Offset HW dependencies by the hw_job_id_offset parameter + * - Set the Perform Job Barrier flag if this job is the first in the chain + * - Read the address of the next job header + * + * @param[in] kctx Context pointer + * @param[in,out] job_header Address of job header to reset. Set to address + * of next job header on exit. + * @param[in] prev_jc Previous job chain to link to, if this job is + * the last in the chain. + * @param[in] hw_job_id_offset Offset for HW job IDs + * @param[in] tiler_heap_free The value to reset Tiler Heap Free to + * @param[in] hierarchy_mask The hierarchy mask to use + * @param[in] default_weight Default hierarchy weight to write when no other + * weight is given in the FBD + * @param[in] first_in_chain MALI_TRUE if this job is the first in the chain + * @param[in] fragment_chain MALI_TRUE if this job is in the fragment chain + * + * @return MALI_ERROR_NONE on success, error code on failure + */ +static mali_error kbasep_replay_reset_job(kbase_context *kctx, + mali_addr64 *job_header, + mali_addr64 prev_jc, + mali_addr64 tiler_heap_free, + u16 hierarchy_mask, + u32 default_weight, + u16 hw_job_id_offset, + mali_bool first_in_chain, + mali_bool fragment_chain) +{ + job_head *job; + u64 phys_addr; + mali_addr64 new_job_header; + struct device *dev = kctx->kbdev->dev; + + job = kbasep_map_page_sync(kctx, *job_header, &phys_addr); + if (!job) { + dev_err(dev, "kbasep_replay_parse_jc: failed to map jc\n"); + return MALI_ERROR_FUNCTION_FAILED; + } + + dump_job_head(kctx, "Job header:", job); + + if (job->status == JOB_NOT_STARTED && !fragment_chain) { + dev_err(dev, "Job already not started\n"); + kbasep_unmap_page_sync(job, phys_addr); + return MALI_ERROR_FUNCTION_FAILED; + } + job->status = JOB_NOT_STARTED; + + if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_VERTEX) + job->flags = (job->flags & ~JOB_TYPE_MASK) | JOB_TYPE_NULL; + + if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FUSED) { + dev_err(dev, "Fused jobs can not be replayed\n"); + kbasep_unmap_page_sync(job, phys_addr); + return MALI_ERROR_FUNCTION_FAILED; + } + + if (first_in_chain) + job->flags |= JOB_FLAG_PERFORM_JOB_BARRIER; + + if ((job->dependencies[0] + hw_job_id_offset) > JOB_HEADER_ID_MAX || + (job->dependencies[1] + hw_job_id_offset) > JOB_HEADER_ID_MAX || + (job->index + hw_job_id_offset) > JOB_HEADER_ID_MAX) { + dev_err(dev, "Job indicies/dependencies out of valid range\n"); + kbasep_unmap_page_sync(job, phys_addr); + return MALI_ERROR_FUNCTION_FAILED; + } + + if (job->dependencies[0]) + job->dependencies[0] += hw_job_id_offset; + if (job->dependencies[1]) + job->dependencies[1] += hw_job_id_offset; + + job->index += hw_job_id_offset; + + if (job->flags & JOB_FLAG_DESC_SIZE) { + new_job_header = job->next._64; + if (!job->next._64) + job->next._64 = prev_jc; + } else { + new_job_header = job->next._32; + if (!job->next._32) + job->next._32 = prev_jc; + } + dump_job_head(kctx, "Updated to:", job); + + if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_TILER) { + kbasep_unmap_page_sync(job, phys_addr); + if (kbasep_replay_reset_tiler_job(kctx, *job_header, + tiler_heap_free, hierarchy_mask, + default_weight, + job->flags & JOB_FLAG_DESC_SIZE) != + MALI_ERROR_NONE) + return MALI_ERROR_FUNCTION_FAILED; + + } else if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FRAGMENT) { + u64 fbd_address; + + if (job->flags & JOB_FLAG_DESC_SIZE) { + kbasep_unmap_page_sync(job, phys_addr); + dev_err(dev, "64-bit job descriptor not supported\n"); + return MALI_ERROR_FUNCTION_FAILED; + } else { + fbd_address = (u64)job->fragment_fbd._32; + } + + kbasep_unmap_page_sync(job, phys_addr); + + if (fbd_address & FBD_TYPE) { + if (kbasep_replay_reset_mfbd(kctx, + fbd_address & FBD_POINTER_MASK, + tiler_heap_free, + hierarchy_mask, + default_weight) != + MALI_ERROR_NONE) + return MALI_ERROR_FUNCTION_FAILED; + } else { + if (kbasep_replay_reset_sfbd(kctx, + fbd_address & FBD_POINTER_MASK, + tiler_heap_free, + hierarchy_mask, + default_weight) != + MALI_ERROR_NONE) + return MALI_ERROR_FUNCTION_FAILED; + } + } else { + kbasep_unmap_page_sync(job, phys_addr); + } + + *job_header = new_job_header; + + return MALI_ERROR_NONE; +} + +/** + * @brief Find the highest job ID in a job chain + * + * @param[in] kctx Context pointer + * @param[in] jc Job chain start address + * @param[out] hw_job_id Highest job ID in chain + * + * @return MALI_ERROR_NONE on success, error code on failure + */ +static mali_error kbasep_replay_find_hw_job_id(kbase_context *kctx, + mali_addr64 jc, + u16 *hw_job_id) +{ + while (jc) { + job_head *job; + u64 phys_addr; + + KBASE_LOG(2, kctx->kbdev->dev, + "kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc); + + job = kbasep_map_page_sync(kctx, jc, &phys_addr); + if (!job) { + dev_err(kctx->kbdev->dev, "failed to map jc\n"); + + return MALI_ERROR_FUNCTION_FAILED; + } + + if (job->index > *hw_job_id) + *hw_job_id = job->index; + + if (job->flags & JOB_FLAG_DESC_SIZE) + jc = job->next._64; + else + jc = job->next._32; + + kbasep_unmap_page_sync(job, phys_addr); + } + + return MALI_ERROR_NONE; +} + +/** + * @brief Reset the status of a number of jobs + * + * This function walks the provided job chain, and calls + * kbasep_replay_reset_job for each job. It also links the job chain to the + * provided previous job chain. + * + * The function will fail if any of the jobs passed already have status of + * NOT_STARTED. + * + * @param[in] kctx Context pointer + * @param[in] jc Job chain to be processed + * @param[in] prev_jc Job chain to be added to. May be NULL + * @param[in] tiler_heap_free The value to reset Tiler Heap Free to + * @param[in] hierarchy_mask The hierarchy mask to use + * @param[in] default_weight Default hierarchy weight to write when no other + * weight is given in the FBD + * @param[in] hw_job_id_offset Offset for HW job IDs + * @param[in] fragment_chain MAIL_TRUE if this chain is the fragment chain + * + * @return MALI_ERROR_NONE on success, error code otherwise + */ +static mali_error kbasep_replay_parse_jc(kbase_context *kctx, + mali_addr64 jc, + mali_addr64 prev_jc, + mali_addr64 tiler_heap_free, + u16 hierarchy_mask, + u32 default_weight, + u16 hw_job_id_offset, + mali_bool fragment_chain) +{ + mali_bool first_in_chain = MALI_TRUE; + int nr_jobs = 0; + + KBASE_LOG(2, kctx->kbdev->dev, + "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n", + jc, hw_job_id_offset); + + while (jc) { + KBASE_LOG(2, kctx->kbdev->dev, + "kbasep_replay_parse_jc: parsing jc=%llx\n", + jc); + + if (kbasep_replay_reset_job(kctx, &jc, prev_jc, + tiler_heap_free, hierarchy_mask, + default_weight, hw_job_id_offset, + first_in_chain, fragment_chain) != + MALI_ERROR_NONE) + return MALI_ERROR_FUNCTION_FAILED; + + first_in_chain = MALI_FALSE; + + nr_jobs++; + if (fragment_chain && + nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) { + dev_err(kctx->kbdev->dev, + "Exceeded maximum number of jobs in fragment chain\n"); + return MALI_ERROR_FUNCTION_FAILED; + } + } + + return MALI_ERROR_NONE; +} + +/** + * @brief Reset the status of a replay job, and set up dependencies + * + * This performs the actions to allow the replay job to be re-run following + * completion of the passed dependency. + * + * @param[in] katom The atom to be reset + * @param[in] dep_atom The dependency to be attached to the atom + */ +static void kbasep_replay_reset_softjob(kbase_jd_atom *katom, + kbase_jd_atom *dep_atom) +{ + katom->status = KBASE_JD_ATOM_STATE_QUEUED; + katom->dep_atom[0] = dep_atom; + list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]); +} + +/** + * @brief Allocate an unused katom + * + * This will search the provided context for an unused katom, and will mark it + * as KBASE_JD_ATOM_STATE_QUEUED. + * + * If no atoms are available then the function will fail. + * + * @param[in] kctx Context pointer + * @return An atom ID, or -1 on failure + */ +static int kbasep_allocate_katom(kbase_context *kctx) +{ + kbase_jd_context *jctx = &kctx->jctx; + int i; + + for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) { + if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) { + jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED; + KBASE_LOG(2, kctx->kbdev->dev, + "kbasep_allocate_katom: Allocated atom %d\n", + i); + return i; + } + } + + return -1; +} + +/** + * @brief Release a katom + * + * This will mark the provided atom as available, and remove any dependencies. + * + * For use on error path. + * + * @param[in] kctx Context pointer + * @param[in] atom_id ID of atom to release + */ +static void kbasep_release_katom(kbase_context *kctx, int atom_id) +{ + kbase_jd_context *jctx = &kctx->jctx; + + KBASE_LOG(2, kctx->kbdev->dev, + "kbasep_release_katom: Released atom %d\n", + atom_id); + + while (!list_empty(&jctx->atoms[atom_id].dep_head[0])) + list_del(jctx->atoms[atom_id].dep_head[0].next); + while (!list_empty(&jctx->atoms[atom_id].dep_head[1])) + list_del(jctx->atoms[atom_id].dep_head[1].next); + + jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED; +} + +static void kbasep_replay_create_atom(kbase_context *kctx, + base_jd_atom_v2 *atom, + int atom_nr, + int prio) +{ + atom->nr_extres = 0; + atom->extres_list.value = NULL; + atom->device_nr = 0; + /* Convert priority back from NICE range */ + atom->prio = ((prio << 16) / ((20 << 16) / 128)) - 128; + atom->atom_number = atom_nr; + + atom->pre_dep[0] = 0; + atom->pre_dep[1] = 0; + + atom->udata.blob[0] = 0; + atom->udata.blob[1] = 0; +} + +/** + * @brief Create two atoms for the purpose of replaying jobs + * + * Two atoms are allocated and created. The jc pointer is not set at this + * stage. The second atom has a dependency on the first. The remaining fields + * are set up as follows : + * + * - No external resources. Any required external resources will be held by the + * replay atom. + * - device_nr is set to 0. This is not relevant as + * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set. + * - Priority is inherited from the replay job. + * + * @param[out] t_atom Atom to use for tiler jobs + * @param[out] f_atom Atom to use for fragment jobs + * @param[in] prio Priority of new atom (inherited from replay soft + * job) + * @return MALI_ERROR_NONE on success, error code on failure + */ +static mali_error kbasep_replay_create_atoms(kbase_context *kctx, + base_jd_atom_v2 *t_atom, + base_jd_atom_v2 *f_atom, + int prio) +{ + int t_atom_nr, f_atom_nr; + + t_atom_nr = kbasep_allocate_katom(kctx); + if (t_atom_nr < 0) { + dev_err(kctx->kbdev->dev, "Failed to allocate katom\n"); + return MALI_ERROR_FUNCTION_FAILED; + } + + f_atom_nr = kbasep_allocate_katom(kctx); + if (f_atom_nr < 0) { + dev_err(kctx->kbdev->dev, "Failed to allocate katom\n"); + kbasep_release_katom(kctx, t_atom_nr); + return MALI_ERROR_FUNCTION_FAILED; + } + + kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio); + kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio); + + f_atom->pre_dep[0] = t_atom_nr; + + return MALI_ERROR_NONE; +} + +#ifdef CONFIG_MALI_DEBUG +static void payload_dump(kbase_context *kctx, base_jd_replay_payload *payload) +{ + mali_addr64 next; + + KBASE_LOG(2, kctx->kbdev->dev, "Tiler jc list :\n"); + next = payload->tiler_jc_list; + + while (next) { + base_jd_replay_jc *jc_struct = kbasep_map_page(kctx, next, NULL); + + if (!jc_struct) + return; + + KBASE_LOG(2, kctx->kbdev->dev, + "* jc_struct=%p jc=%llx next=%llx\n", + jc_struct, + jc_struct->jc, + jc_struct->next); + next = jc_struct->next; + + kbasep_unmap_page(jc_struct); + } +} +#endif + +/** + * @brief Parse a base_jd_replay_payload provided by userspace + * + * This will read the payload from userspace, and parse the job chains. + * + * @param[in] kctx Context pointer + * @param[in] replay_atom Replay soft job atom + * @param[in] t_atom Atom to use for tiler jobs + * @param[in] f_atom Atom to use for fragment jobs + * @return MALI_ERROR_NONE on success, error code on failure + */ +static mali_error kbasep_replay_parse_payload(kbase_context *kctx, + kbase_jd_atom *replay_atom, + base_jd_atom_v2 *t_atom, + base_jd_atom_v2 *f_atom) +{ + base_jd_replay_payload *payload; + mali_addr64 next; + mali_addr64 prev_jc = 0; + u16 hw_job_id_offset = 0; + mali_error ret = MALI_ERROR_FUNCTION_FAILED; + u64 phys_addr; + struct device *dev = kctx->kbdev->dev; + + KBASE_LOG(2, dev, + "kbasep_replay_parse_payload: replay_atom->jc = %llx " + "sizeof(payload) = %d\n", + replay_atom->jc, sizeof(payload)); + + kbase_gpu_vm_lock(kctx); + + payload = kbasep_map_page_sync(kctx, replay_atom->jc, &phys_addr); + + if (!payload) { + kbase_gpu_vm_unlock(kctx); + dev_err(dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n"); + return MALI_ERROR_FUNCTION_FAILED; + } + +#ifdef CONFIG_MALI_DEBUG + KBASE_LOG(2, dev, "kbasep_replay_parse_payload: payload=%p\n", payload); + KBASE_LOG(2, dev, "Payload structure:\n" + "tiler_jc_list = %llx\n" + "fragment_jc = %llx\n" + "tiler_heap_free = %llx\n" + "fragment_hierarchy_mask = %x\n" + "tiler_hierarchy_mask = %x\n" + "hierarchy_default_weight = %x\n" + "tiler_core_req = %x\n" + "fragment_core_req = %x\n", + payload->tiler_jc_list, + payload->fragment_jc, + payload->tiler_heap_free, + payload->fragment_hierarchy_mask, + payload->tiler_hierarchy_mask, + payload->hierarchy_default_weight, + payload->tiler_core_req, + payload->fragment_core_req); + payload_dump(kctx, payload); +#endif + + t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER; + f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER; + + /* Sanity check core requirements*/ + if ((t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & + ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_T || + (f_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & + ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_FS || + t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES || + f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { + dev_err(dev, "Invalid core requirements\n"); + goto out; + } + + /* Process tiler job chains */ + next = payload->tiler_jc_list; + if (!next) { + dev_err(dev, "Invalid tiler JC list\n"); + goto out; + } + + while (next) { + base_jd_replay_jc *jc_struct = kbasep_map_page(kctx, next, NULL); + mali_addr64 jc; + + if (!jc_struct) { + dev_err(dev, "Failed to map jc struct\n"); + goto out; + } + + jc = jc_struct->jc; + next = jc_struct->next; + if (next) + jc_struct->jc = 0; + + kbasep_unmap_page(jc_struct); + + if (jc) { + u16 max_hw_job_id = 0; + + if (kbasep_replay_find_hw_job_id(kctx, jc, + &max_hw_job_id) != MALI_ERROR_NONE) + goto out; + + if (kbasep_replay_parse_jc(kctx, jc, prev_jc, + payload->tiler_heap_free, + payload->tiler_hierarchy_mask, + payload->hierarchy_default_weight, + hw_job_id_offset, MALI_FALSE) != + MALI_ERROR_NONE) { + goto out; + } + + hw_job_id_offset += max_hw_job_id; + + prev_jc = jc; + } + } + t_atom->jc = prev_jc; + + /* Process fragment job chain */ + f_atom->jc = payload->fragment_jc; + if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0, + payload->tiler_heap_free, + payload->fragment_hierarchy_mask, + payload->hierarchy_default_weight, 0, + MALI_TRUE) != MALI_ERROR_NONE) { + goto out; + } + + if (!t_atom->jc || !f_atom->jc) { + dev_err(dev, "Invalid payload\n"); + goto out; + } + + KBASE_LOG(2, dev, "t_atom->jc=%llx f_atom->jc=%llx\n", + t_atom->jc, f_atom->jc); + ret = MALI_ERROR_NONE; + +out: + kbasep_unmap_page_sync(payload, phys_addr); + + kbase_gpu_vm_unlock(kctx); + + return ret; +} + +/** + * @brief Process a replay job + * + * Called from kbase_process_soft_job. + * + * On exit, if the job has completed, katom->event_code will have been updated. + * If the job has not completed, and is replaying jobs, then the atom status + * will have been reset to KBASE_JD_ATOM_STATE_QUEUED. + * + * @param[in] katom The atom to be processed + * @return MALI_REPLAY_STATUS_COMPLETE if the atom has completed + * MALI_REPLAY_STATUS_REPLAYING if the atom is replaying jobs + * Set MALI_REPLAY_FLAG_JS_RESCHED if + * kbasep_js_try_schedule_head_ctx required + */ +int kbase_replay_process(kbase_jd_atom *katom) +{ + kbase_context *kctx = katom->kctx; + kbase_jd_context *jctx = &kctx->jctx; + mali_bool need_to_try_schedule_context = MALI_FALSE; + base_jd_atom_v2 t_atom, f_atom; + kbase_jd_atom *t_katom, *f_katom; + struct device *dev = kctx->kbdev->dev; + + if (katom->event_code == BASE_JD_EVENT_DONE) { + KBASE_LOG(2, dev, "Previous job succeeded - not replaying\n"); + return MALI_REPLAY_STATUS_COMPLETE; + } + + if (jctx->sched_info.ctx.is_dying) { + KBASE_LOG(2, dev, "Not replaying; context is dying\n"); + return MALI_REPLAY_STATUS_COMPLETE; + } + + dev_warn(dev, "Replaying jobs retry=%d\n", katom->retry_count); + + katom->retry_count++; + if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) { + dev_err(dev, "Replay exceeded limit - failing jobs\n"); + /* katom->event_code is already set to the failure code of the + previous job */ + return MALI_REPLAY_STATUS_COMPLETE; + } + + if (kbasep_replay_create_atoms(kctx, &t_atom, &f_atom, + katom->nice_prio) != MALI_ERROR_NONE) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + return MALI_REPLAY_STATUS_COMPLETE; + } + + t_katom = &jctx->atoms[t_atom.atom_number]; + f_katom = &jctx->atoms[f_atom.atom_number]; + + if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != + MALI_ERROR_NONE) { + kbasep_release_katom(kctx, t_atom.atom_number); + kbasep_release_katom(kctx, f_atom.atom_number); + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + return MALI_REPLAY_STATUS_COMPLETE; + } + + kbasep_replay_reset_softjob(katom, f_katom); + + need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom); + if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) { + dev_err(dev, "Replay failed to submit atom\n"); + kbasep_release_katom(kctx, f_atom.atom_number); + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + return MALI_REPLAY_STATUS_COMPLETE; + } + need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom); + if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) { + dev_err(dev, "Replay failed to submit atom\n"); + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + return MALI_REPLAY_STATUS_COMPLETE; + } + + katom->event_code = BASE_JD_EVENT_DONE; + + if (need_to_try_schedule_context) + return MALI_REPLAY_STATUS_REPLAYING | + MALI_REPLAY_FLAG_JS_RESCHED; + return MALI_REPLAY_STATUS_REPLAYING; +} + diff --git a/drivers/gpu/arm/midgard/mali_kbase_security.c b/drivers/gpu/arm/midgard/mali_kbase_security.c new file mode 100755 index 00000000000..babde223891 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_security.c @@ -0,0 +1,78 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_security.c + * Base kernel security capability API + */ + +#include + +static inline mali_bool kbasep_am_i_root(void) +{ +#if KBASE_HWCNT_DUMP_BYPASS_ROOT + return MALI_TRUE; +#else + /* Check if root */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) + if (uid_eq(current_euid(), GLOBAL_ROOT_UID)) + return MALI_TRUE; +#else + if (current_euid() == 0) + return MALI_TRUE; +#endif /*LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)*/ + return MALI_FALSE; +#endif /*KBASE_HWCNT_DUMP_BYPASS_ROOT*/ +} + +/** + * kbase_security_has_capability - see mali_kbase_caps.h for description. + */ + +mali_bool kbase_security_has_capability(kbase_context *kctx, kbase_security_capability cap, u32 flags) +{ + /* Assume failure */ + mali_bool access_allowed = MALI_FALSE; + mali_bool audit = (KBASE_SEC_FLAG_AUDIT & flags) ? MALI_TRUE : MALI_FALSE; + + KBASE_DEBUG_ASSERT(NULL != kctx); + CSTD_UNUSED(kctx); + + /* Detect unsupported flags */ + KBASE_DEBUG_ASSERT(((~KBASE_SEC_FLAG_MASK) & flags) == 0); + + /* Determine if access is allowed for the given cap */ + switch (cap) { + case KBASE_SEC_MODIFY_PRIORITY: + case KBASE_SEC_INSTR_HW_COUNTERS_COLLECT: + /* Access is granted only if the caller is privileged */ + access_allowed = kbasep_am_i_root(); + break; + } + + /* Report problem if requested */ + if (MALI_FALSE == access_allowed) { + if (MALI_FALSE != audit) + dev_warn(kctx->kbdev->dev, "Security capability failure: %d, %p", cap, (void *)kctx); + } + + return access_allowed; +} + +KBASE_EXPORT_TEST_API(kbase_security_has_capability) diff --git a/drivers/gpu/arm/midgard/mali_kbase_security.h b/drivers/gpu/arm/midgard/mali_kbase_security.h new file mode 100755 index 00000000000..783e2810d5b --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_security.h @@ -0,0 +1,52 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_security.h + * Base kernel security capability APIs + */ + +#ifndef _KBASE_SECURITY_H_ +#define _KBASE_SECURITY_H_ + +/* Security flags */ +#define KBASE_SEC_FLAG_NOAUDIT (0u << 0) /* Silently handle privilege failure */ +#define KBASE_SEC_FLAG_AUDIT (1u << 0) /* Write audit message on privilege failure */ +#define KBASE_SEC_FLAG_MASK (KBASE_SEC_FLAG_AUDIT) /* Mask of all valid flag bits */ + +/* List of unique capabilities that have security access privileges */ +typedef enum { + /* Instrumentation Counters access privilege */ + KBASE_SEC_INSTR_HW_COUNTERS_COLLECT = 1, + KBASE_SEC_MODIFY_PRIORITY + /* Add additional access privileges here */ +} kbase_security_capability; + +/** + * kbase_security_has_capability - determine whether a task has a particular effective capability + * @param[in] kctx The task context. + * @param[in] cap The capability to check for. + * @param[in] flags Additional configuration information + * Such as whether to write an audit message or not. + * @return MALI_TRUE if success (capability is allowed), MALI_FALSE otherwise. + */ + +mali_bool kbase_security_has_capability(kbase_context *kctx, kbase_security_capability cap, u32 flags); + +#endif /* _KBASE_SECURITY_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c new file mode 100755 index 00000000000..6c19155808e --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c @@ -0,0 +1,442 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include + +#ifdef CONFIG_SYNC +#include +#include +#include "mali_kbase_sync.h" +#endif + + +/* Mask to check cache alignment of data structures */ +#define KBASE_CACHE_ALIGNMENT_MASK ((1<jc; + kbase_context *kctx = katom->kctx; + int pm_active_err; + + u32 hi1, hi2; + + memset(&data, 0, sizeof(data)); + + /* Take the PM active reference as late as possible - otherwise, it could + * delay suspend until we process the atom (which may be at the end of a + * long chain of dependencies */ + pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); + if (pm_active_err) { + kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; + + /* We're suspended - queue this on the list of suspended jobs + * Use dep_item[1], because dep_item[0] is in use for 'waiting_soft_jobs' */ + mutex_lock(&js_devdata->runpool_mutex); + list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list); + mutex_unlock(&js_devdata->runpool_mutex); + + return pm_active_err; + } + + kbase_pm_request_gpu_cycle_counter(kctx->kbdev); + + /* Read hi, lo, hi to ensure that overflow from lo to hi is handled correctly */ + do { + hi1 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), NULL); + cycle_counter = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL); + hi2 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), NULL); + cycle_counter |= (((u64) hi1) << 32); + } while (hi1 != hi2); + + /* Read hi, lo, hi to ensure that overflow from lo to hi is handled correctly */ + do { + hi1 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), NULL); + system_time = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(TIMESTAMP_LO), NULL); + hi2 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), NULL); + system_time |= (((u64) hi1) << 32); + } while (hi1 != hi2); + + /* Record the CPU's idea of current time */ + getnstimeofday(&ts); + + kbase_pm_release_gpu_cycle_counter(kctx->kbdev); + + kbase_pm_context_idle(kctx->kbdev); + + data.sec = ts.tv_sec; + data.usec = ts.tv_nsec / 1000; + data.system_time = system_time; + data.cycle_counter = cycle_counter; + + pfn = jc >> PAGE_SHIFT; + offset = jc & ~PAGE_MASK; + + /* Assume this atom will be cancelled until we know otherwise */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + if (offset > 0x1000 - sizeof(data)) { + /* Wouldn't fit in the page */ + return 0; + } + + reg = kbase_region_tracker_find_region_enclosing_address(kctx, jc); + if (!reg) + return 0; + + if (!(reg->flags & KBASE_REG_GPU_WR)) { + /* Region is not writable by GPU so we won't write to it either */ + return 0; + } + + if (!reg->alloc->pages) + return 0; + + addr = reg->alloc->pages[pfn - reg->start_pfn]; + if (!addr) + return 0; + + page = kmap(pfn_to_page(PFN_DOWN(addr))); + if (!page) + return 0; + + memcpy(page + offset, &data, sizeof(data)); + kbase_sync_to_cpu(addr + offset, page + offset, sizeof(data)); + kunmap(pfn_to_page(PFN_DOWN(addr))); + + /* Atom was fine - mark it as done */ + katom->event_code = BASE_JD_EVENT_DONE; + + return 0; +} + +#ifdef CONFIG_SYNC + +/* Complete an atom that has returned '1' from kbase_process_soft_job (i.e. has waited) + * + * @param katom The atom to complete + */ +static void complete_soft_job(kbase_jd_atom *katom) +{ + kbase_context *kctx = katom->kctx; + + mutex_lock(&kctx->jctx.lock); + list_del(&katom->dep_item[0]); + kbase_finish_soft_job(katom); + if (jd_done_nolock(katom)) + kbasep_js_try_schedule_head_ctx(kctx->kbdev); + mutex_unlock(&kctx->jctx.lock); +} + +static base_jd_event_code kbase_fence_trigger(kbase_jd_atom *katom, int result) +{ + struct sync_pt *pt; + struct sync_timeline *timeline; + + if (!list_is_singular(&katom->fence->pt_list_head)) { + /* Not exactly one item in the list - so it didn't (directly) come from us */ + return BASE_JD_EVENT_JOB_CANCELLED; + } + + pt = list_first_entry(&katom->fence->pt_list_head, struct sync_pt, pt_list); + timeline = pt->parent; + + if (!kbase_sync_timeline_is_ours(timeline)) { + /* Fence has a sync_pt which isn't ours! */ + return BASE_JD_EVENT_JOB_CANCELLED; + } + + kbase_sync_signal_pt(pt, result); + + sync_timeline_signal(timeline); + + return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; +} + +static void kbase_fence_wait_worker(struct work_struct *data) +{ + kbase_jd_atom *katom; + kbase_context *kctx; + + katom = container_of(data, kbase_jd_atom, work); + kctx = katom->kctx; + + complete_soft_job(katom); +} + +static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fence_waiter *waiter) +{ + kbase_jd_atom *katom = container_of(waiter, kbase_jd_atom, sync_waiter); + kbase_context *kctx; + + KBASE_DEBUG_ASSERT(NULL != katom); + + kctx = katom->kctx; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + /* Propagate the fence status to the atom. + * If negative then cancel this atom and its dependencies. + */ + if (fence->status < 0) + { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + } + + /* To prevent a potential deadlock we schedule the work onto the job_done_wq workqueue + * + * The issue is that we may signal the timeline while holding kctx->jctx.lock and + * the callbacks are run synchronously from sync_timeline_signal. So we simply defer the work. + */ + + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); + INIT_WORK(&katom->work, kbase_fence_wait_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + +static int kbase_fence_wait(kbase_jd_atom *katom) +{ + int ret; + + KBASE_DEBUG_ASSERT(NULL != katom); + KBASE_DEBUG_ASSERT(NULL != katom->kctx); + + sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback); + + ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter); + + if (ret == 1) { + /* Already signalled */ + return 0; + } else if (ret < 0) { + goto cancel_atom; + } + return 1; + + cancel_atom: + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + /* We should cause the dependant jobs in the bag to be failed, + * to do this we schedule the work queue to complete this job */ + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); + INIT_WORK(&katom->work, kbase_fence_wait_worker); + queue_work(katom->kctx->jctx.job_done_wq, &katom->work); + return 1; +} + +static void kbase_fence_cancel_wait(kbase_jd_atom *katom) +{ + if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) + { + /* The wait wasn't cancelled - leave the cleanup for kbase_fence_wait_callback */ + return; + } + + /* Wait was cancelled - zap the atoms */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + kbase_finish_soft_job(katom); + + if (jd_done_nolock(katom)) + kbasep_js_try_schedule_head_ctx(katom->kctx->kbdev); +} +#endif /* CONFIG_SYNC */ + +int kbase_process_soft_job(kbase_jd_atom *katom) +{ + int status; + switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { + case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: + return kbase_dump_cpu_gpu_time(katom); +#ifdef CONFIG_SYNC + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: + KBASE_DEBUG_ASSERT(katom->fence != NULL); + katom->event_code = kbase_fence_trigger(katom, katom->event_code == BASE_JD_EVENT_DONE ? 0 : -EFAULT); + /* Release the reference as we don't need it any more */ + sync_fence_put(katom->fence); + katom->fence = NULL; + break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: + return kbase_fence_wait(katom); +#endif /* CONFIG_SYNC */ + case BASE_JD_REQ_SOFT_REPLAY: + status = kbase_replay_process(katom); + if (status & MALI_REPLAY_FLAG_JS_RESCHED) + pr_err("replay called from kbase_process_soft_job - missing resched!\n"); + return status & MALI_REPLAY_STATUS_MASK; + } + + /* Atom is complete */ + return 0; +} + +void kbase_cancel_soft_job(kbase_jd_atom *katom) +{ + switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { +#ifdef CONFIG_SYNC + case BASE_JD_REQ_SOFT_FENCE_WAIT: + kbase_fence_cancel_wait(katom); + break; +#endif + default: + /* This soft-job doesn't support cancellation! */ + KBASE_DEBUG_ASSERT(0); + } +} + +mali_error kbase_prepare_soft_job(kbase_jd_atom *katom) +{ + switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { + case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: + { + if(0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK)) + return MALI_ERROR_FUNCTION_FAILED; + } + break; +#ifdef CONFIG_SYNC + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: + { + base_fence fence; + int fd; + if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) + return MALI_ERROR_FUNCTION_FAILED; + + fd = kbase_stream_create_fence(fence.basep.stream_fd); + if (fd < 0) + return MALI_ERROR_FUNCTION_FAILED; + + katom->fence = sync_fence_fdget(fd); + + if (katom->fence == NULL) { + /* The only way the fence can be NULL is if userspace closed it for us. + * So we don't need to clear it up */ + return MALI_ERROR_FUNCTION_FAILED; + } + fence.basep.fd = fd; + if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) { + katom->fence = NULL; + sys_close(fd); + return MALI_ERROR_FUNCTION_FAILED; + } + } + break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: + { + base_fence fence; + if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) + return MALI_ERROR_FUNCTION_FAILED; + + /* Get a reference to the fence object */ + katom->fence = sync_fence_fdget(fence.basep.fd); + if (katom->fence == NULL) + return MALI_ERROR_FUNCTION_FAILED; + } + break; +#endif /* CONFIG_SYNC */ + case BASE_JD_REQ_SOFT_REPLAY: + break; + default: + /* Unsupported soft-job */ + return MALI_ERROR_FUNCTION_FAILED; + } + return MALI_ERROR_NONE; +} + +void kbase_finish_soft_job(kbase_jd_atom *katom) +{ + switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { + case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: + /* Nothing to do */ + break; +#ifdef CONFIG_SYNC + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: + if (katom->fence) { + /* The fence has not yet been signalled, so we do it now */ + kbase_fence_trigger(katom, katom->event_code == BASE_JD_EVENT_DONE ? 0 : -EFAULT); + sync_fence_put(katom->fence); + katom->fence = NULL; + } + break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: + /* Release the reference to the fence object */ + sync_fence_put(katom->fence); + katom->fence = NULL; + break; +#endif /* CONFIG_SYNC */ + } +} + +void kbase_resume_suspended_soft_jobs(kbase_device *kbdev) +{ + LIST_HEAD(local_suspended_soft_jobs); + kbase_jd_atom *tmp_iter; + kbase_jd_atom *katom_iter; + kbasep_js_device_data *js_devdata; + mali_bool resched = MALI_FALSE; + KBASE_DEBUG_ASSERT(kbdev); + + js_devdata = &kbdev->js_data; + + /* Move out the entire list */ + mutex_lock(&js_devdata->runpool_mutex); + list_splice_init(&js_devdata->suspended_soft_jobs_list, &local_suspended_soft_jobs); + mutex_unlock(&js_devdata->runpool_mutex); + + /* Each atom must be detached from the list and ran separately - it could + * be re-added to the old list, but this is unlikely */ + list_for_each_entry_safe(katom_iter, tmp_iter, &local_suspended_soft_jobs, dep_item[1]) + { + kbase_context *kctx = katom_iter->kctx; + mutex_lock(&kctx->jctx.lock); + + /* Remove from the global list */ + list_del(&katom_iter->dep_item[1]); + /* Remove from the context's list of waiting soft jobs */ + list_del(&katom_iter->dep_item[0]); + + if (kbase_process_soft_job(katom_iter) == 0) { + kbase_finish_soft_job(katom_iter); + resched |= jd_done_nolock(katom_iter); + } else { + /* The job has not completed */ + KBASE_DEBUG_ASSERT((katom_iter->core_req & BASEP_JD_REQ_ATOM_TYPE) + != BASE_JD_REQ_SOFT_REPLAY); + list_add_tail(&katom_iter->dep_item[0], &kctx->waiting_soft_jobs); + } + + mutex_unlock(&kctx->jctx.lock); + } + + if (resched) + kbasep_js_try_schedule_head_ctx(kbdev); +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.c b/drivers/gpu/arm/midgard/mali_kbase_sync.c new file mode 100755 index 00000000000..85b19104bce --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_sync.c @@ -0,0 +1,195 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_sync.c + * + */ + +#ifdef CONFIG_SYNC + +#include +#include + +struct mali_sync_timeline { + struct sync_timeline timeline; + atomic_t counter; + atomic_t signalled; +}; + +struct mali_sync_pt { + struct sync_pt pt; + u32 order; + int result; +}; + +static struct mali_sync_timeline *to_mali_sync_timeline(struct sync_timeline *timeline) +{ + return container_of(timeline, struct mali_sync_timeline, timeline); +} + +static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt) +{ + return container_of(pt, struct mali_sync_pt, pt); +} + +static struct sync_pt *timeline_dup(struct sync_pt *pt) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + struct mali_sync_pt *new_mpt; + struct sync_pt *new_pt = sync_pt_create(pt->parent, sizeof(struct mali_sync_pt)); + + if (!new_pt) + return NULL; + + new_mpt = to_mali_sync_pt(new_pt); + new_mpt->order = mpt->order; + new_mpt->result = mpt->result; + + return new_pt; + +} + +static int timeline_has_signaled(struct sync_pt *pt) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + struct mali_sync_timeline *mtl = to_mali_sync_timeline(pt->parent); + int result = mpt->result; + + long diff = atomic_read(&mtl->signalled) - mpt->order; + + if (diff >= 0) + { + return result < 0 ? result : 1; + } + else + return 0; +} + +static int timeline_compare(struct sync_pt *a, struct sync_pt *b) +{ + struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt); + struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt); + + long diff = ma->order - mb->order; + + if (diff < 0) + return -1; + else if (diff == 0) + return 0; + else + return 1; +} + +static void timeline_value_str(struct sync_timeline *timeline, char * str, + int size) +{ + struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline); + snprintf(str, size, "%d", atomic_read(&mtl->signalled)); +} + +static void pt_value_str(struct sync_pt *pt, char *str, int size) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + snprintf(str, size, "%d(%d)", mpt->order, mpt->result); +} + +static struct sync_timeline_ops mali_timeline_ops = { + .driver_name = "Mali", + .dup = timeline_dup, + .has_signaled = timeline_has_signaled, + .compare = timeline_compare, + .timeline_value_str = timeline_value_str, + .pt_value_str = pt_value_str, +#if 0 + .free_pt = timeline_free_pt, + .release_obj = timeline_release_obj +#endif +}; + +int kbase_sync_timeline_is_ours(struct sync_timeline *timeline) +{ + return timeline->ops == &mali_timeline_ops; +} + +struct sync_timeline *kbase_sync_timeline_alloc(const char *name) +{ + struct sync_timeline *tl; + struct mali_sync_timeline *mtl; + + tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline), name); + if (!tl) + return NULL; + + /* Set the counter in our private struct */ + mtl = to_mali_sync_timeline(tl); + atomic_set(&mtl->counter, 0); + atomic_set(&mtl->signalled, 0); + + return tl; +} + +struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent) +{ + struct sync_pt *pt = sync_pt_create(parent, sizeof(struct mali_sync_pt)); + struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent); + struct mali_sync_pt *mpt; + + if (!pt) + return NULL; + + mpt = to_mali_sync_pt(pt); + mpt->order = atomic_inc_return(&mtl->counter); + mpt->result = 0; + + return pt; +} + +void kbase_sync_signal_pt(struct sync_pt *pt, int result) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + struct mali_sync_timeline *mtl = to_mali_sync_timeline(pt->parent); + int signalled; + int diff; + + mpt->result = result; + + do { + + signalled = atomic_read(&mtl->signalled); + + diff = signalled - mpt->order; + + if (diff > 0) { + /* The timeline is already at or ahead of this point. + * This should not happen unless userspace has been + * signalling fences out of order, so warn but don't + * violate the sync_pt API. + * The warning is only in debug builds to prevent + * a malicious user being able to spam dmesg. + */ +#ifdef CONFIG_MALI_DEBUG + pr_err("Fences were triggered in a different order to allocation!"); +#endif /* CONFIG_MALI_DEBUG */ + return; + } + } while (atomic_cmpxchg(&mtl->signalled, signalled, mpt->order) != signalled); +} + +#endif /* CONFIG_SYNC */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.h b/drivers/gpu/arm/midgard/mali_kbase_sync.h new file mode 100755 index 00000000000..97ffef8cf35 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_sync.h @@ -0,0 +1,83 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_sync.h + * + */ + +#ifndef MALI_KBASE_SYNC_H +#define MALI_KBASE_SYNC_H + +#include +#include + +/* + * Create a stream object. + * Built on top of timeline object. + * Exposed as a file descriptor. + * Life-time controlled via the file descriptor: + * - dup to add a ref + * - close to remove a ref + */ +mali_error kbase_stream_create(const char *name, int *const out_fd); + +/* + * Create a fence in a stream object + */ +int kbase_stream_create_fence(int tl_fd); + +/* + * Validate a fd to be a valid fence + * No reference is taken. + * + * This function is only usable to catch unintentional user errors early, + * it does not stop malicious code changing the fd after this function returns. + */ +mali_error kbase_fence_validate(int fd); + +/* Returns true if the specified timeline is allocated by Mali */ +int kbase_sync_timeline_is_ours(struct sync_timeline *timeline); + +/* Allocates a timeline for Mali + * + * One timeline should be allocated per API context. + */ +struct sync_timeline *kbase_sync_timeline_alloc(const char *name); + +/* Allocates a sync point within the timeline. + * + * The timeline must be the one allocated by kbase_sync_timeline_alloc + * + * Sync points must be triggered in *exactly* the same order as they are allocated. + */ +struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent); + +/* Signals a particular sync point + * + * Sync points must be triggered in *exactly* the same order as they are allocated. + * + * If they are signalled in the wrong order then a message will be printed in debug + * builds and otherwise attempts to signal order sync_pts will be ignored. + * + * result can be negative to indicate error, any other value is interpreted as success. + */ +void kbase_sync_signal_pt(struct sync_pt *pt, int result); + +#endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_user.c b/drivers/gpu/arm/midgard/mali_kbase_sync_user.c new file mode 100755 index 00000000000..53a936f6b05 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_sync_user.c @@ -0,0 +1,155 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_sync_user.c + * + */ + +#ifdef CONFIG_SYNC + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int kbase_stream_close(struct inode *inode, struct file *file) +{ + struct sync_timeline *tl; + tl = (struct sync_timeline *)file->private_data; + BUG_ON(!tl); + sync_timeline_destroy(tl); + return 0; +} + +static const struct file_operations stream_fops = { + .owner = THIS_MODULE, + .release = kbase_stream_close, +}; + +mali_error kbase_stream_create(const char *name, int *const out_fd) +{ + struct sync_timeline *tl; + BUG_ON(!out_fd); + + tl = kbase_sync_timeline_alloc(name); + if (!tl) + return MALI_ERROR_FUNCTION_FAILED; + + *out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY | O_CLOEXEC); + + if (*out_fd < 0) { + sync_timeline_destroy(tl); + return MALI_ERROR_FUNCTION_FAILED; + } else { + return MALI_ERROR_NONE; + } +} + +int kbase_stream_create_fence(int tl_fd) +{ + struct sync_timeline *tl; + struct sync_pt *pt; + struct sync_fence *fence; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) + struct files_struct *files; + struct fdtable *fdt; +#endif + int fd; + struct file *tl_file; + + tl_file = fget(tl_fd); + if (tl_file == NULL) + return -EBADF; + + if (tl_file->f_op != &stream_fops) { + fd = -EBADF; + goto out; + } + + tl = tl_file->private_data; + + pt = kbase_sync_pt_alloc(tl); + if (!pt) { + fd = -EFAULT; + goto out; + } + + fence = sync_fence_create("mali_fence", pt); + if (!fence) { + sync_pt_free(pt); + fd = -EFAULT; + goto out; + } + + /* from here the fence owns the sync_pt */ + + /* create a fd representing the fence */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) + fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); + if (fd < 0) { + sync_fence_put(fence); + goto out; + } +#else + fd = get_unused_fd(); + if (fd < 0) { + sync_fence_put(fence); + goto out; + } + + files = current->files; + spin_lock(&files->file_lock); + fdt = files_fdtable(files); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) + __set_close_on_exec(fd, fdt); +#else + FD_SET(fd, fdt->close_on_exec); +#endif + spin_unlock(&files->file_lock); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */ + + /* bind fence to the new fd */ + sync_fence_install(fence, fd); + + out: + fput(tl_file); + + return fd; +} + +mali_error kbase_fence_validate(int fd) +{ + struct sync_fence *fence; + fence = sync_fence_fdget(fd); + if (NULL != fence) { + sync_fence_put(fence); + return MALI_ERROR_NONE; + } else { + return MALI_ERROR_FUNCTION_FAILED; + } +} + +#endif /* CONFIG_SYNC */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h new file mode 100755 index 00000000000..2e5b7443356 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h @@ -0,0 +1,232 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** + * ***** DO NOT INCLUDE DIRECTLY ***** + * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ + +/* + * The purpose of this header file is just to contain a list of trace code idenitifers + * + * Each identifier is wrapped in a macro, so that its string form and enum form can be created + * + * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block. + * + * This allows automatic creation of an enum and a corresponding array of strings + * + * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE. + * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE. + * + * e.g.: + * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X + * typedef enum + * { + * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X ) + * #include "mali_kbase_trace_defs.h" + * #undef KBASE_TRACE_CODE_MAKE_CODE + * } kbase_trace_code; + * + * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE + * + * + * The use of the macro here is: + * - KBASE_TRACE_CODE_MAKE_CODE( X ) + * + * Which produces: + * - For an enum, KBASE_TRACE_CODE_X + * - For a string, "X" + * + * + * For example: + * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to: + * - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum + * - "JM_JOB_COMPLETE" for the string + * - To use it to trace an event, do: + * - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val ); + */ + +#if 0 /* Dummy section to avoid breaking formatting */ +int dummy_array[] = { +#endif + +/* + * Core events + */ + KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY), /* no info_val, no gpu_addr, no atom */ + KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM), /* no info_val, no gpu_addr, no atom */ + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ), /* info_val == GPU_IRQ_STATUS register */ + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR), /* info_val == bits cleared */ + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE), /* info_val == GPU_IRQ_STATUS register */ + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET), + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET), + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR), + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE), /* GPU addr==dump address */ + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES), + +/* + * Job Slot management events + */ + KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ), /* info_val==irq rawstat at start */ + KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END), + /* info_val==jobs processed */ +/* In the following: + * + * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases) + * - uatom==kernel-side mapped uatom address (for correlation with user-side) + */ + KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE), /* info_val==exit code; gpu_addr==chain gpuaddr */ + KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT), + /* gpu_addr==JSn_HEAD_NEXT written, info_val==lower 32 bits of affinity */ +/* gpu_addr is as follows: + * - If JSn_STATUS active after soft-stop, val==gpu addr written to JSn_HEAD on submit + * - otherwise gpu_addr==0 */ + KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP), + KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0), + KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1), + KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP), /* gpu_addr==JSn_HEAD read */ + KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0), /* gpu_addr==JSn_HEAD read */ + KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1), /* gpu_addr==JSn_HEAD read */ + KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD), /* gpu_addr==JSn_TAIL read */ +/* gpu_addr is as follows: + * - If JSn_STATUS active before soft-stop, val==JSn_HEAD + * - otherwise gpu_addr==0 */ + KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD), /* gpu_addr==JSn_HEAD read */ + KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS), + KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE), + KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED), /* info_val == is_scheduled */ + KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED), + /* info_val == is_scheduled */ + KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE), + KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP), /* info_val == nr jobs submitted */ + KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT), /* gpu_addr==JSn_HEAD_NEXT last written */ + KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET), + KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER), + KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER), +/* + * Job dispatch events + */ + KBASE_TRACE_CODE_MAKE_CODE(JD_DONE),/* gpu_addr==value to write into JSn_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER), /* gpu_addr==value to write into JSn_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END), + /* gpu_addr==value to write into JSn_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB), + /* gpu_addr==value to write into JSn_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT), /* gpu_addr==0, info_val==0, uatom==0 */ + KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL), + /* gpu_addr==value to write into JSn_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER), + /* gpu_addr==value to write into JSn_HEAD */ +/* + * Scheduler Core events + */ + KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK), + KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB), /* gpu_addr==value to write into JSn_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB), /* gpu_addr==last value written/would be written to JSn_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB), /* gpu_addr==value to write into JSn_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED), + /* gpu_addr==value to write into JSn_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX), + /* kctx is the one being evicted, info_val == kctx to put in */ + KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED), + KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT), /* info_val == lower 32 bits of affinity */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED), + /* info_val == lower 32 bits of affinity */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED), + /* info_val == lower 32 bits of affinity */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED), /* info_val == lower 32 bits of rechecked affinity */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED), /* info_val == lower 32 bits of rechecked affinity */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE), + /* info_val == lower 32 bits of affinity */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX), /* info_val == the ctx attribute now on ctx */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL), + /* info_val == the ctx attribute now on runpool */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),/* info_val == the ctx attribute now off ctx */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL), /* info_val == the ctx attribute now off runpool */ +/* + * Scheduler Policy events + */ + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX), /* info_val == whether it was evicted */ + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB), /* gpu_addr==JSn_HEAD to write if the job were run */ + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END), +/* + * Power Management Events + */ + KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP), + KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP), + KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON), + KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER), + KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2), + KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF), + KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER), + KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER), + /* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */ + KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED), + KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER), + KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE), + KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE), + KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED), + KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED), + KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE), + KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE), + KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED), + KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED), + KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED), + KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED), + KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS), + KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE), + KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE), + KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON), + KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF), + KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY), /* info_val == policy number, or -1 for "Already changing" */ + KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY), + + KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT), /* info_val == policy number */ + KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM), /* info_val == policy number */ +/* Unused code just to make it easier to not have a comma at the end. + * All other codes MUST come before this */ + KBASE_TRACE_CODE_MAKE_CODE(DUMMY) + + +#if 0 /* Dummy section to avoid breaking formatting */ +}; +#endif + +/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c new file mode 100755 index 00000000000..0968025359c --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c @@ -0,0 +1,231 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include +#include + +#define CREATE_TRACE_POINTS + +#ifdef CONFIG_MALI_TRACE_TIMELINE +#include "mali_timeline.h" + +#include +#include + +struct kbase_trace_timeline_desc +{ + char *enum_str; + char *desc; + char *format; + char *format_desc; +}; + +struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = +{ + #define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc } + #include "mali_kbase_trace_timeline_defs.h" + #undef KBASE_TIMELINE_TRACE_CODE +}; + +#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table) + +STATIC void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos) +{ + if (*pos >= KBASE_NR_TRACE_CODES) + return NULL; + + return &kbase_trace_timeline_desc_table[*pos]; +} + +STATIC void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data) +{ +} + +STATIC void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos) +{ + (*pos)++; + + if (*pos == KBASE_NR_TRACE_CODES) + return NULL; + + return &kbase_trace_timeline_desc_table[*pos]; +} + +STATIC int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data) +{ + struct kbase_trace_timeline_desc *trace_desc = data; + + seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc); + return 0; +} + + +static const struct seq_operations kbasep_trace_timeline_seq_ops = { + .start = kbasep_trace_timeline_seq_start, + .next = kbasep_trace_timeline_seq_next, + .stop = kbasep_trace_timeline_seq_stop, + .show = kbasep_trace_timeline_seq_show, +}; + +STATIC int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &kbasep_trace_timeline_seq_ops); +} + +static const struct file_operations kbasep_trace_timeline_debugfs_fops = { + .open = kbasep_trace_timeline_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +mali_error kbasep_trace_timeline_debugfs_init(kbase_device *kbdev) +{ + kbdev->timeline.dentry = debugfs_create_file("mali_timeline_defs", + S_IRUGO, kbdev->mali_debugfs_directory, NULL, + &kbasep_trace_timeline_debugfs_fops); + if (IS_ERR(kbdev->timeline.dentry)) + return MALI_ERROR_FUNCTION_FAILED; + + return MALI_ERROR_NONE; +} + +void kbasep_trace_timeline_debugfs_term(kbase_device *kbdev) +{ + debugfs_remove(kbdev->timeline.dentry); +} + +void kbase_timeline_job_slot_submit(kbase_device *kbdev, kbase_context *kctx, + kbase_jd_atom *katom, int js) +{ + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + if(kbdev->timeline.slot_atoms_submitted[js] > 0) { + KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1); + } else { + base_atom_id atom_number = kbase_jd_atom_id(kctx, katom); + KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1); + KBASE_TIMELINE_JOB_START(kctx, js, atom_number); + } + ++kbdev->timeline.slot_atoms_submitted[js]; + + KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]); +} + +void kbase_timeline_job_slot_done(kbase_device *kbdev, kbase_context *kctx, + kbase_jd_atom *katom, int js, + kbasep_js_atom_done_code done_code) +{ + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) { + KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0); + } else { + /* Job finished in JSn_HEAD */ + base_atom_id atom_number = kbase_jd_atom_id(kctx, katom); + KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0); + KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number); + /* see if we need to trace the job in JSn_NEXT moving to JSn_HEAD */ + if (kbdev->timeline.slot_atoms_submitted[js] > 1) { + /* Tag events with next_katom's kctx */ + kbase_jm_slot *slot = &kbdev->jm_slots[js]; + kbase_jd_atom *next_katom; + kbase_context *next_kctx; + KBASE_DEBUG_ASSERT(kbasep_jm_nr_jobs_submitted(slot) > 0); + + /* Peek the next atom - note that the atom in JSn_HEAD will already + * have been dequeued */ + next_katom = kbasep_jm_peek_idx_submit_slot(slot, 0); + next_kctx = next_katom->kctx; + KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0); + KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1); + KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom)); + } + } + + --kbdev->timeline.slot_atoms_submitted[js]; + + KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]); +} + +void kbase_timeline_pm_send_event(kbase_device *kbdev, kbase_timeline_pm_event event_sent) +{ + int uid = 0; + int old_uid; + + /* If a producer already exists for the event, try to use their UID (multiple-producers) */ + uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]); + old_uid = uid; + + /* Get a new non-zero UID if we don't have one yet */ + while (!uid) + uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter); + + /* Try to use this UID */ + if ( old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid)) + /* If it changed, raced with another producer: we've lost this UID */ + uid = 0; + + KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid); +} + +void kbase_timeline_pm_check_handle_event(kbase_device *kbdev, kbase_timeline_pm_event event) +{ + int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]); + + if (uid != 0) { + if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0)) + /* If it changed, raced with another consumer: we've lost this UID */ + uid = 0; + + KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid); + } +} + +void kbase_timeline_pm_handle_event(kbase_device *kbdev, kbase_timeline_pm_event event) +{ + int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]); + + if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0)) + /* If it changed, raced with another consumer: we've lost this UID */ + uid = 0; + + KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid); +} + +void kbase_timeline_pm_l2_transition_start(kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->pm.power_change_lock); + /* Simply log the start of the transition */ + kbdev->timeline.l2_transitioning = MALI_TRUE; + KBASE_TIMELINE_POWERING_L2(kbdev); +} + +void kbase_timeline_pm_l2_transition_done(kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->pm.power_change_lock); + /* Simply log the end of the transition */ + if( MALI_FALSE != kbdev->timeline.l2_transitioning ) + { + kbdev->timeline.l2_transitioning = MALI_FALSE; + KBASE_TIMELINE_POWERED_L2(kbdev); + } +} + +#endif /* CONFIG_MALI_TRACE_TIMELINE */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h new file mode 100755 index 00000000000..fc2a383ad1d --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h @@ -0,0 +1,368 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#if !defined(_KBASE_TRACE_TIMELINE_H) +#define _KBASE_TRACE_TIMELINE_H + +#ifdef CONFIG_MALI_TRACE_TIMELINE + +typedef enum +{ + #define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val + #include "mali_kbase_trace_timeline_defs.h" + #undef KBASE_TIMELINE_TRACE_CODE +} kbase_trace_timeline_code; + +/** Initialize Timeline DebugFS entries */ +mali_error kbasep_trace_timeline_debugfs_init(kbase_device *kbdev); +/** Terminate Timeline DebugFS entries */ +void kbasep_trace_timeline_debugfs_term(kbase_device *kbdev); + +/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE + * functions. + * Output is timestamped by either sched_clock() (default), local_clock(), or + * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */ +#include "mali_timeline.h" + +/* Trace number of atoms in flight for kctx (atoms either not completed, or in + process of being returned to user */ +#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec, \ + (int)kctx->timeline.owner_tgid, \ + count); \ + } while (0) + +/* Trace atom_id being Ready to Run */ +#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec, \ + CTX_FLOW_ATOM_READY, \ + (int)kctx->timeline.owner_tgid, \ + atom_id); \ + } while (0) + + +/* Trace number of atoms submitted to job slot js + * + * NOTE: This uses a different tracepoint to the head/next/soft-stop actions, + * so that those actions can be filtered out separately from this + * + * This is because this is more useful, as we can use it to calculate general + * utilization easily and accurately */ +#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_SLOT_ACTIVE, \ + (int)kctx->timeline.owner_tgid, \ + js, count); \ + } while (0) + + +/* Trace atoms present in JSn_NEXT */ +#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_SLOT_NEXT, \ + (int)kctx->timeline.owner_tgid, \ + js, count); \ + } while (0) + +/* Trace atoms present in JSn_HEAD */ +#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_SLOT_HEAD, \ + (int)kctx->timeline.owner_tgid, \ + js, count); \ + } while (0) + +/* Trace that a soft stop/evict from next is being attempted on a slot */ +#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_SLOT_STOPPING, \ + (kctx)?(int)kctx->timeline.owner_tgid:0, \ + js, count); \ + } while (0) + + + +/* Trace state of overall GPU power */ +#define KBASE_TIMELINE_GPU_POWER(kbdev, active) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_POWER_ACTIVE, active); \ + } while (0) + +/* Trace state of tiler power */ +#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_POWER_TILER_ACTIVE, \ + hweight64(bitmap)); \ + } while (0) + +/* Trace number of shaders currently powered */ +#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_POWER_SHADER_ACTIVE, \ + hweight64(bitmap)); \ + } while (0) + +/* Trace state of L2 power */ +#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_POWER_L2_ACTIVE, \ + hweight64(bitmap)); \ + }while(0) + +/* Trace state of L2 cache*/ +#define KBASE_TIMELINE_POWERING_L2(kbdev) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec, \ + SW_FLOW_GPU_POWER_L2_POWERING, \ + 1); \ + }while(0) + +#define KBASE_TIMELINE_POWERED_L2(kbdev) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec, \ + SW_FLOW_GPU_POWER_L2_ACTIVE, \ + 1); \ + }while(0) + +/* Trace kbase_pm_send_event message send */ +#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec, \ + SW_FLOW_PM_SEND_EVENT, \ + event_type, pm_event_id); \ + } while (0) + +/* Trace kbase_pm_worker message receive */ +#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec, \ + SW_FLOW_PM_HANDLE_EVENT, \ + event_type, pm_event_id); \ + } while (0) + + +/* Trace atom_id starting in JSn_HEAD */ +#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec, \ + HW_START_GPU_JOB_CHAIN_SW_APPROX, \ + (int)kctx->timeline.owner_tgid, \ + js, _consumerof_atom_number); \ + } while (0) + +/* Trace atom_id stopping on JSn_HEAD */ +#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec, \ + HW_STOP_GPU_JOB_CHAIN_SW_APPROX, \ + (int)kctx->timeline.owner_tgid, \ + js, _producerof_atom_number_completed); \ + } while (0) + +/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a + * certin caller */ +#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) \ + do \ + { \ + struct timespec ts; \ + getnstimeofday(&ts); \ + trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec, \ + trace_code, \ + 1); \ + } while (0) + +/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */ + +/** + * Trace that an atom is starting on a job slot + * + * The caller must be holding kbasep_js_device_data::runpool_irq::lock + */ +void kbase_timeline_job_slot_submit(kbase_device *kbdev, kbase_context *kctx, + kbase_jd_atom *katom, int js); + +/** + * Trace that an atom has done on a job slot + * + * 'Done' in this sense can occur either because: + * - the atom in JSn_HEAD finished + * - the atom in JSn_NEXT was evicted + * + * Whether the atom finished or was evicted is passed in @a done_code + * + * It is assumed that the atom has already been removed from the submit slot, + * with either: + * - kbasep_jm_dequeue_submit_slot() + * - kbasep_jm_dequeue_tail_submit_slot() + * + * The caller must be holding kbasep_js_device_data::runpool_irq::lock + */ +void kbase_timeline_job_slot_done(kbase_device *kbdev, kbase_context *kctx, + kbase_jd_atom *katom, int js, + kbasep_js_atom_done_code done_code); + + +/** Trace a pm event starting */ +void kbase_timeline_pm_send_event(kbase_device *kbdev, + kbase_timeline_pm_event event_sent); + +/** Trace a pm event finishing */ +void kbase_timeline_pm_check_handle_event(kbase_device *kbdev, kbase_timeline_pm_event event); + +/** Check whether a pm event was present, and if so trace finishing it */ +void kbase_timeline_pm_handle_event(kbase_device *kbdev, kbase_timeline_pm_event event); + +/** Trace L2 power-up start */ +void kbase_timeline_pm_l2_transition_start(kbase_device *kbdev); + +/** Trace L2 power-up done */ +void kbase_timeline_pm_l2_transition_done(kbase_device *kbdev); + +#else + +#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP() + +#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP() + +#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP() + +#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP() + +#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP() + +#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP() + +#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP() + +#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP() + +#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP() + +#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP() + +#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP() + +#define KBASE_TIMELINE_POWERED_L2(kbdev) CSTD_NOP() + +#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP() + +#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP() + +#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP() + +#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP() + +#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP() + +static INLINE void kbase_timeline_job_slot_submit(kbase_device *kbdev, kbase_context *kctx, + kbase_jd_atom *katom, int js) +{ + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); +} + +static INLINE void kbase_timeline_job_slot_done(kbase_device *kbdev, kbase_context *kctx, + kbase_jd_atom *katom, int js, + kbasep_js_atom_done_code done_code) +{ + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); +} + +static INLINE void kbase_timeline_pm_send_event(kbase_device *kbdev, kbase_timeline_pm_event event_sent) +{ +} + +static INLINE void kbase_timeline_pm_check_handle_event(kbase_device *kbdev, kbase_timeline_pm_event event) +{ +} + +static INLINE void kbase_timeline_pm_handle_event(kbase_device *kbdev, kbase_timeline_pm_event event) +{ +} + +static INLINE void kbase_timeline_pm_l2_transition_start(kbase_device *kbdev) +{ + +} + +static INLINE void kbase_timeline_pm_l2_transition_done(kbase_device *kbdev) +{ + +} +#endif /* CONFIG_MALI_TRACE_TIMELINE */ + +#endif /* _KBASE_TRACE_TIMELINE_H */ + diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h new file mode 100755 index 00000000000..2795c0b70c0 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h @@ -0,0 +1,132 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** + * ***** DO NOT INCLUDE DIRECTLY ***** + * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ + +/* + * Conventions on Event Names: + * + * - The prefix determines something about how the timeline should be + * displayed, and is split up into various parts, separated by underscores: + * - 'SW' and 'HW' as the first part will be used to determine whether a + * timeline is to do with Software or Hardware - effectively, separate + * 'channels' for Software and Hardware + * - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and + * signify related pairs of events - these are optional. + * - 'FLOW' indicates a generic event, which can use dependencies + * - This gives events such as: + * - 'SW_ENTER_FOO' + * - 'SW_LEAVE_FOO' + * - 'SW_FLOW_BAR_1' + * - 'SW_FLOW_BAR_2' + * - 'HW_START_BAZ' + * - 'HW_STOP_BAZ' + * - And an unadorned HW event: + * - 'HW_BAZ_FROZBOZ' + */ + +/* + * Conventions on parameter names: + * - anything with 'instance' in the name will have a separate timeline based + * on that instances. + * - underscored-prefixed parameters will by hidden by default on timelines + * + * Hence: + * - Different job slots have their own 'instance', based on the instance value + * - Per-context info (e.g. atoms on a context) have their own 'instance' + * (i.e. each context should be on a different timeline) + * + * Note that globally-shared resources can be tagged with a tgid, but we don't + * want an instance per context: + * - There's no point having separate Job Slot timelines for each context, that + * would be confusing - there's only really 3 job slots! + * - There's no point having separate Shader-powered timelines for each + * context, that would be confusing - all shader cores (whether it be 4, 8, + * etc) are shared in the system. + */ + + /* + * CTX events + */ + /* Separate timelines for each context 'instance'*/ + KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT, "CTX: Atoms in flight", "%d,%d", "_instance_tgid,_value_number_of_atoms"), + KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY, "CTX: Atoms Ready to Run", "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"), + + /* + * SW Events + */ + /* Separate timelines for each slot 'instance' */ + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE, "SW: GPU slot active", "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"), + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT, "SW: GPU atom in NEXT", "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"), + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD, "SW: GPU atom in HEAD", "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"), + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING, "SW: Try Soft-Stop on GPU slot", "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"), + /* Shader and overall power is shared - can't have separate instances of + * it, just tagging with the context */ + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE, "SW: GPU power active", "%d,%d", "_tgid,_value_is_power_active"), + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE, "SW: GPU tiler powered", "%d,%d", "_tgid,_value_number_of_tilers"), + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered", "%d,%d", "_tgid,_value_number_of_shaders"), + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE, "SW: GPU L2 powered", "%d,%d", "_tgid,_value_number_of_l2"), + + /* SW Power event messaging. _event_type is one from the kbase_pm_event enum */ + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT, "SW: PM Send Event", "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT, "SW: PM Handle Event", "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"), + /* SW L2 power events */ + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING, "SW: GPU L2 powering", "%d,%d", "_tgid,_writerof_l2_transitioning"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE, "SW: GPU L2 powering done", "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"), + + /* + * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock() + */ + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"), + + /* + * Significant Indirect callers of kbase_pm_check_transitions_nolock() + */ + /* kbase_pm_request_cores */ + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"), + /* kbase_pm_release_cores */ + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"), + /* + * END: SW Functions that call kbase_pm_check_transitions_nolock() + */ + + /* + * HW Events + */ + KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX, "HW: Job Chain start (SW approximated)", "%d,%d,%d", "_tgid,job_slot,_consumerof_atom_number_ready"), + KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX, "HW: Job Chain stop (SW approximated)", "%d,%d,%d", "_tgid,job_slot,_producerof_atom_number_completed") diff --git a/drivers/gpu/arm/midgard/mali_kbase_uku.h b/drivers/gpu/arm/midgard/mali_kbase_uku.h new file mode 100755 index 00000000000..a6c277176c2 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_uku.h @@ -0,0 +1,335 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _KBASE_UKU_H_ +#define _KBASE_UKU_H_ + +#include "mali_uk.h" +#include +#include "mali_base_kernel.h" + +/* This file needs to support being included from kernel and userside (which use different defines) */ +#if defined(CONFIG_MALI_ERROR_INJECT) +#define SUPPORT_MALI_ERROR_INJECT +#elif defined(MALI_ERROR_INJECT) +#if MALI_ERROR_INJECT +#define SUPPORT_MALI_ERROR_INJECT +#endif +#endif +#if defined(CONFIG_MALI_NO_MALI) +#define SUPPORT_MALI_NO_MALI +#elif defined(MALI_NO_MALI) +#if MALI_NO_MALI +#define SUPPORT_MALI_NO_MALI +#endif +#endif + +#if defined(SUPPORT_MALI_NO_MALI) || defined(SUPPORT_MALI_ERROR_INJECT) +#include "mali_kbase_model_dummy.h" +#endif + +#include "mali_kbase_gpuprops_types.h" + +#define BASE_UK_VERSION_MAJOR 6 +#define BASE_UK_VERSION_MINOR 0 + +typedef struct kbase_uk_mem_alloc { + uk_header header; + /* IN */ + u64 va_pages; + u64 commit_pages; + u64 extent; + /* IN/OUT */ + u64 flags; + /* OUT */ + u64 gpu_va; + u16 va_alignment; + u8 padding[6]; +} kbase_uk_mem_alloc; + +typedef struct kbase_uk_mem_free { + uk_header header; + /* IN */ + mali_addr64 gpu_addr; + /* OUT */ +} kbase_uk_mem_free; + +/* used by both aliasing and importing */ +#define KBASE_MEM_NEED_MMAP (1UL << BASE_MEM_FLAGS_NR_BITS) + +typedef struct kbase_uk_mem_alias { + uk_header header; + /* IN/OUT */ + u64 flags; + /* IN */ + u64 stride; + u64 nents; + kbase_pointer ai; + /* OUT */ + u64 gpu_va; + u64 va_pages; +} kbase_uk_mem_alias; + +typedef struct kbase_uk_mem_import { + uk_header header; + /* IN */ + kbase_pointer phandle; + u32 type; + u32 padding; + /* IN/OUT */ +#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << (BASE_MEM_FLAGS_NR_BITS + 1)) + u64 flags; + /* OUT */ + mali_addr64 gpu_va; + u64 va_pages; +} kbase_uk_mem_import; + +typedef struct kbase_uk_mem_flags_change { + uk_header header; + /* IN */ + mali_addr64 gpu_va; + u64 flags; + u64 mask; +} kbase_uk_mem_flags_change; + +typedef struct kbase_uk_job_submit { + uk_header header; + /* IN */ + kbase_pointer addr; + u32 nr_atoms; + u32 stride; /* bytes between atoms, i.e. sizeof(base_jd_atom_v2) */ + /* OUT */ +} kbase_uk_job_submit; + +typedef struct kbase_uk_post_term { + uk_header header; +} kbase_uk_post_term; + +typedef struct kbase_uk_sync_now { + uk_header header; + + /* IN */ + base_syncset sset; + + /* OUT */ +} kbase_uk_sync_now; + +typedef struct kbase_uk_hwcnt_setup { + uk_header header; + + /* IN */ + mali_addr64 dump_buffer; + u32 jm_bm; + u32 shader_bm; + u32 tiler_bm; + u32 l3_cache_bm; + u32 mmu_l2_bm; + u32 padding; + /* OUT */ +} kbase_uk_hwcnt_setup; + +typedef struct kbase_uk_hwcnt_dump { + uk_header header; +} kbase_uk_hwcnt_dump; + +typedef struct kbase_uk_hwcnt_clear { + uk_header header; +} kbase_uk_hwcnt_clear; + +typedef struct kbase_uk_fence_validate { + uk_header header; + /* IN */ + s32 fd; + u32 padding; + /* OUT */ +} kbase_uk_fence_validate; + +typedef struct kbase_uk_stream_create { + uk_header header; + /* IN */ + char name[32]; + /* OUT */ + s32 fd; + u32 padding; +} kbase_uk_stream_create; + +typedef struct kbase_uk_cpuprops { + uk_header header; + + /* IN */ + struct base_cpu_props props; + /* OUT */ +} kbase_uk_cpuprops; + +typedef struct kbase_uk_gpuprops { + uk_header header; + + /* IN */ + struct mali_base_gpu_props props; + /* OUT */ +} kbase_uk_gpuprops; + +typedef struct kbase_uk_mem_query { + uk_header header; + /* IN */ + mali_addr64 gpu_addr; +#define KBASE_MEM_QUERY_COMMIT_SIZE 1 +#define KBASE_MEM_QUERY_VA_SIZE 2 +#define KBASE_MEM_QUERY_FLAGS 3 + u64 query; + /* OUT */ + u64 value; +} kbase_uk_mem_query; + +typedef struct kbase_uk_mem_commit { + uk_header header; + /* IN */ + mali_addr64 gpu_addr; + u64 pages; + /* OUT */ + u32 result_subcode; + u32 padding; +} kbase_uk_mem_commit; + +typedef struct kbase_uk_find_cpu_offset { + uk_header header; + /* IN */ + mali_addr64 gpu_addr; + u64 cpu_addr; + u64 size; + /* OUT */ + mali_size64 offset; +} kbase_uk_find_cpu_offset; + +#define KBASE_GET_VERSION_BUFFER_SIZE 64 +typedef struct kbase_uk_get_ddk_version { + uk_header header; + /* OUT */ + char version_buffer[KBASE_GET_VERSION_BUFFER_SIZE]; + u32 version_string_size; + u32 padding; +} kbase_uk_get_ddk_version; + +typedef struct kbase_uk_set_flags { + uk_header header; + /* IN */ + u32 create_flags; + u32 padding; +} kbase_uk_set_flags; + +#if MALI_UNIT_TEST +#define TEST_ADDR_COUNT 4 +#define KBASE_TEST_BUFFER_SIZE 128 +typedef struct kbase_exported_test_data { + mali_addr64 test_addr[TEST_ADDR_COUNT]; /**< memory address */ + u32 test_addr_pages[TEST_ADDR_COUNT]; /**< memory size in pages */ + kbase_pointer kctx; /**< base context created by process */ + kbase_pointer mm; /**< pointer to process address space */ + u8 buffer1[KBASE_TEST_BUFFER_SIZE]; /**< unit test defined parameter */ + u8 buffer2[KBASE_TEST_BUFFER_SIZE]; /**< unit test defined parameter */ +} kbase_exported_test_data; + +typedef struct kbase_uk_set_test_data { + uk_header header; + /* IN */ + kbase_exported_test_data test_data; +} kbase_uk_set_test_data; + +#endif /* MALI_UNIT_TEST */ + +#ifdef SUPPORT_MALI_ERROR_INJECT +typedef struct kbase_uk_error_params { + uk_header header; + /* IN */ + kbase_error_params params; +} kbase_uk_error_params; +#endif /* SUPPORT_MALI_ERROR_INJECT */ + +#ifdef SUPPORT_MALI_NO_MALI +typedef struct kbase_uk_model_control_params { + uk_header header; + /* IN */ + kbase_model_control_params params; +} kbase_uk_model_control_params; +#endif /* SUPPORT_MALI_NO_MALI */ + +#define KBASE_MAXIMUM_EXT_RESOURCES 255 + +typedef struct kbase_uk_ext_buff_kds_data { + uk_header header; + kbase_pointer external_resource; + kbase_pointer file_descriptor; + u32 num_res; /* limited to KBASE_MAXIMUM_EXT_RESOURCES */ + u32 padding; +} kbase_uk_ext_buff_kds_data; + +typedef struct kbase_uk_keep_gpu_powered { + uk_header header; + u32 enabled; + u32 padding; +} kbase_uk_keep_gpu_powered; + +typedef struct kbase_uk_profiling_controls { + uk_header header; + u32 profiling_controls[FBDUMP_CONTROL_MAX]; +} kbase_uk_profiling_controls; + +typedef enum kbase_uk_function_id { + KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0), + KBASE_FUNC_MEM_IMPORT, + KBASE_FUNC_MEM_COMMIT, + KBASE_FUNC_MEM_QUERY, + KBASE_FUNC_MEM_FREE, + KBASE_FUNC_MEM_FLAGS_CHANGE, + KBASE_FUNC_MEM_ALIAS, + + KBASE_FUNC_JOB_SUBMIT, + + KBASE_FUNC_SYNC, + + KBASE_FUNC_POST_TERM, + + KBASE_FUNC_HWCNT_SETUP, + KBASE_FUNC_HWCNT_DUMP, + KBASE_FUNC_HWCNT_CLEAR, + + KBASE_FUNC_CPU_PROPS_REG_DUMP, + KBASE_FUNC_GPU_PROPS_REG_DUMP, + + KBASE_FUNC_FIND_CPU_OFFSET, + + KBASE_FUNC_GET_VERSION, + KBASE_FUNC_EXT_BUFFER_LOCK, + KBASE_FUNC_SET_FLAGS, + + KBASE_FUNC_SET_TEST_DATA, + KBASE_FUNC_INJECT_ERROR, + KBASE_FUNC_MODEL_CONTROL, + + KBASE_FUNC_KEEP_GPU_POWERED, + + KBASE_FUNC_FENCE_VALIDATE, + KBASE_FUNC_STREAM_CREATE, + KBASE_FUNC_GET_PROFILING_CONTROLS, + KBASE_FUNC_SET_PROFILING_CONTROLS /* to be used only for testing + * purposes, otherwise these controls + * are set through gator API */ +} kbase_uk_function_id; + +#endif /* _KBASE_UKU_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.c b/drivers/gpu/arm/midgard/mali_kbase_utility.c new file mode 100755 index 00000000000..c11c678e3b1 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_utility.c @@ -0,0 +1,32 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include + +mali_bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry) +{ + struct list_head *pos = base->next; + while (pos != base) { + if (pos == entry) + return MALI_TRUE; + + pos = pos->next; + } + return MALI_FALSE; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.h b/drivers/gpu/arm/midgard/mali_kbase_utility.h new file mode 100755 index 00000000000..59988163fd2 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_utility.h @@ -0,0 +1,37 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _KBASE_UTILITY_H +#define _KBASE_UTILITY_H + +#ifndef _KBASE_H_ +#error "Don't include this file directly, use mali_kbase.h instead" +#endif + +/** Test whether the given list entry is a member of the given list. + * + * @param base The head of the list to be tested + * @param entry The list entry to be tested + * + * @return MALI_TRUE if entry is a member of base + * MALI_FALSE otherwise + */ +mali_bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry); + +#endif /* _KBASE_UTILITY_H */ diff --git a/drivers/gpu/arm/midgard/mali_linux_trace.h b/drivers/gpu/arm/midgard/mali_linux_trace.h new file mode 100755 index 00000000000..537610b49d3 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_linux_trace.h @@ -0,0 +1,129 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MALI_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mali +#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM) +#define TRACE_INCLUDE_FILE mali_linux_trace + +#define MALI_JOB_SLOTS_EVENT_CHANGED + +/** + * mali_job_slots_event - called from mali_kbase_core_linux.c + * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro. + */ +TRACE_EVENT(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid, unsigned char job_id), TP_ARGS(event_id, tgid, pid, job_id), TP_STRUCT__entry(__field(unsigned int, event_id) + __field(unsigned int, tgid) + __field(unsigned int, pid) + __field(unsigned char, job_id) + ), TP_fast_assign(__entry->event_id = event_id; __entry->tgid = tgid; __entry->pid = pid; __entry->job_id = job_id;), TP_printk("event=%u tgid=%u pid=%u job_id=%u", __entry->event_id, __entry->tgid, __entry->pid, __entry->job_id) + ); + +/** + * mali_pm_status - Called by mali_kbase_pm_driver.c + * @event_id: core type (shader, tiler, l2 cache, l3 cache) + * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF) + */ +TRACE_EVENT(mali_pm_status, TP_PROTO(unsigned int event_id, unsigned long long value), TP_ARGS(event_id, value), TP_STRUCT__entry(__field(unsigned int, event_id) + __field(unsigned long long, value) + ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event %u = %llu", __entry->event_id, __entry->value) + ); + +/** + * mali_pm_power_on - Called by mali_kbase_pm_driver.c + * @event_id: core type (shader, tiler, l2 cache, l3 cache) + * @value: 64bits bitmask reporting the cores to power up + */ +TRACE_EVENT(mali_pm_power_on, TP_PROTO(unsigned int event_id, unsigned long long value), TP_ARGS(event_id, value), TP_STRUCT__entry(__field(unsigned int, event_id) + __field(unsigned long long, value) + ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event %u = %llu", __entry->event_id, __entry->value) + ); + +/** + * mali_pm_power_off - Called by mali_kbase_pm_driver.c + * @event_id: core type (shader, tiler, l2 cache, l3 cache) + * @value: 64bits bitmask reporting the cores to power down + */ +TRACE_EVENT(mali_pm_power_off, TP_PROTO(unsigned int event_id, unsigned long long value), TP_ARGS(event_id, value), TP_STRUCT__entry(__field(unsigned int, event_id) + __field(unsigned long long, value) + ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event %u = %llu", __entry->event_id, __entry->value) + ); + +/** + * mali_page_fault_insert_pages - Called by page_fault_worker() + * it reports an MMU page fault resulting in new pages being mapped. + * @event_id: MMU address space number. + * @value: number of newly allocated pages + */ +TRACE_EVENT(mali_page_fault_insert_pages, TP_PROTO(int event_id, unsigned long value), TP_ARGS(event_id, value), TP_STRUCT__entry(__field(int, event_id) + __field(unsigned long, value) + ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event %d = %lu", __entry->event_id, __entry->value) + ); + +/** + * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space() + * it reports that a certain MMU address space is in use now. + * @event_id: MMU address space number. + */ +TRACE_EVENT(mali_mmu_as_in_use, TP_PROTO(int event_id), TP_ARGS(event_id), TP_STRUCT__entry(__field(int, event_id) + ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event=%d", __entry->event_id) + ); + +/** + * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal() + * it reports that a certain MMU address space has been released now. + * @event_id: MMU address space number. + */ +TRACE_EVENT(mali_mmu_as_released, TP_PROTO(int event_id), TP_ARGS(event_id), TP_STRUCT__entry(__field(int, event_id) + ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event=%d", __entry->event_id) + ); + +/** + * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages() + * and by kbase_atomic_sub_pages() + * it reports that the total number of allocated pages is changed. + * @event_id: number of pages to be added or subtracted (according to the sign). + */ +TRACE_EVENT(mali_total_alloc_pages_change, TP_PROTO(long long int event_id), TP_ARGS(event_id), TP_STRUCT__entry(__field(long long int, event_id) + ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event=%lld", __entry->event_id) + ); + +/** + * mali_sw_counter - not currently used + * @event_id: counter id + */ +TRACE_EVENT(mali_sw_counter, TP_PROTO(unsigned int event_id, signed long long value), TP_ARGS(event_id, value), TP_STRUCT__entry(__field(int, event_id) + __field(long long, value) + ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event %d = %lld", __entry->event_id, __entry->value) + ); + +#endif /* _TRACE_MALI_H */ + +#undef TRACE_INCLUDE_PATH +#undef linux +#define TRACE_INCLUDE_PATH . + +/* This part must be outside protection */ +#include diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h new file mode 100755 index 00000000000..c40d74004ec --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h @@ -0,0 +1,513 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _MIDGARD_REGMAP_H_ +#define _MIDGARD_REGMAP_H_ + +/* + * Begin Register Offsets + */ + +#define GPU_CONTROL_BASE 0x0000 +#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) +#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ +#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ +#define L3_FEATURES 0x008 /* (RO) Level 3 cache features */ +#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ +#define MEM_FEATURES 0x010 /* (RO) Memory system features */ +#define MMU_FEATURES 0x014 /* (RO) MMU features */ +#define AS_PRESENT 0x018 /* (RO) Address space slots present */ +#define JS_PRESENT 0x01C /* (RO) Job slots present */ +#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ +#define GPU_IRQ_CLEAR 0x024 /* (WO) */ +#define GPU_IRQ_MASK 0x028 /* (RW) */ +#define GPU_IRQ_STATUS 0x02C /* (RO) */ + +/* IRQ flags */ +#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ +#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ +#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. Intended to use with SOFT_RESET + commands which may take time. */ +#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ +#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down + and the power manager is idle. */ + +#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ +#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ + +#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ + | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) + +#define GPU_COMMAND 0x030 /* (WO) */ +#define GPU_STATUS 0x034 /* (RO) */ + +#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ +#define GROUPS_L3_COHERENT (1 << 1) /* Cores groups are l3 coherent */ + +#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ +#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ +#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ + +#define PWR_KEY 0x050 /* (WO) Power manager key register */ +#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ +#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ + +#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory region base address, low word */ +#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory region base address, high word */ +#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter configuration */ +#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable flags for Job Manager */ +#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable flags for shader cores */ +#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable flags for tiler */ +#define PRFCNT_L3_CACHE_EN 0x078 /* (RW) Performance counter enable flags for L3 cache */ +#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable flags for MMU/L2 cache */ + +#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ +#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ +#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ +#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ + +#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ +#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ +#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ +#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ + +#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ +#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ +#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ + +#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) + +#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ +#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ +#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */ +#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */ +#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */ +#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */ +#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */ +#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */ +#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */ +#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */ +#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */ +#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */ +#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */ +#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */ +#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */ +#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */ + +#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2)) + +#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ +#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ + +#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ +#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ + +#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ +#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ + +#define L3_PRESENT_LO 0x130 /* (RO) Level 3 cache present bitmap, low word */ +#define L3_PRESENT_HI 0x134 /* (RO) Level 3 cache present bitmap, high word */ + +#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ +#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ + +#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ +#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ + +#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ +#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ + +#define L3_READY_LO 0x170 /* (RO) Level 3 cache ready bitmap, low word */ +#define L3_READY_HI 0x174 /* (RO) Level 3 cache ready bitmap, high word */ + +#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ +#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ + +#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ +#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ + +#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ +#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ + +#define L3_PWRON_LO 0x1B0 /* (WO) Level 3 cache power on bitmap, low word */ +#define L3_PWRON_HI 0x1B4 /* (WO) Level 3 cache power on bitmap, high word */ + +#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ +#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ + +#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ +#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ + +#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ +#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ + +#define L3_PWROFF_LO 0x1F0 /* (WO) Level 3 cache power off bitmap, low word */ +#define L3_PWROFF_HI 0x1F4 /* (WO) Level 3 cache power off bitmap, high word */ + +#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ +#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ + +#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ +#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ + +#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ +#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ + +#define L3_PWRTRANS_LO 0x230 /* (RO) Level 3 cache power transition bitmap, low word */ +#define L3_PWRTRANS_HI 0x234 /* (RO) Level 3 cache power transition bitmap, high word */ + +#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ +#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ + +#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ +#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ + +#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ +#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ + +#define L3_PWRACTIVE_LO 0x270 /* (RO) Level 3 cache active bitmap, low word */ +#define L3_PWRACTIVE_HI 0x274 /* (RO) Level 3 cache active bitmap, high word */ + +#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration settings (Mali-T60x additional register) */ +#define L2_MMU_CONFIG 0xF0C /* (RW) Configuration of the L2 cache and MMU (Mali-T60x additional register) */ + +#define JOB_CONTROL_BASE 0x1000 + +#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) + +#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ +#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ +#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ +#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ +#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ +#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ + +#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ +#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ +#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ +#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ +#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */ +#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */ +#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */ +#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */ +#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */ +#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */ +#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */ +#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */ +#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */ +#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */ +#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ +#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ + +#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) + +#define JSn_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ +#define JSn_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ +#define JSn_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ +#define JSn_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ +#define JSn_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ +#define JSn_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ +#define JSn_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ + +#define JSn_COMMAND 0x20 /* (WO) Command register for job slot n */ +#define JSn_STATUS 0x24 /* (RO) Status register for job slot n */ + +#define JSn_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ +#define JSn_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ + +#define JSn_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ +#define JSn_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ +#define JSn_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ + +#define JSn_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ + +#define MEMORY_MANAGEMENT_BASE 0x2000 +#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) + +#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ +#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ +#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ +#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ + +#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ +#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ +#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ +#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ +#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ +#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ +#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ +#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ +#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ +#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ +#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ +#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ +#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ +#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ +#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ +#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ + +#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) + +#define ASn_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ +#define ASn_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ +#define ASn_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ +#define ASn_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ +#define ASn_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ +#define ASn_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ +#define ASn_COMMAND 0x18 /* (WO) MMU command register for address space n */ +#define ASn_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ +#define ASn_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ +#define ASn_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ +#define ASn_STATUS 0x28 /* (RO) Status flags for address space n */ + +/* End Register Offsets */ + +/* + * MMU_IRQ_RAWSTAT register values. Values are valid also for + MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. + */ + +#define MMU_REGS_PAGE_FAULT_FLAGS 16 + +/* Macros return bit number to retrvie page fault or bus eror flag from MMU registers */ +#define MMU_REGS_PAGE_FAULT_FLAG(n) (n) +#define MMU_REGS_BUS_ERROR_FLAG(n) (n + MMU_REGS_PAGE_FAULT_FLAGS) + +/* + * Begin MMU TRANSTAB register values + */ +#define ASn_TRANSTAB_ADDR_SPACE_MASK 0xfffff000 +#define ASn_TRANSTAB_ADRMODE_UNMAPPED (0u << 0) +#define ASn_TRANSTAB_ADRMODE_IDENTITY (1u << 1) +#define ASn_TRANSTAB_ADRMODE_TABLE (3u << 0) +#define ASn_TRANSTAB_READ_INNER (1u << 2) +#define ASn_TRANSTAB_SHARE_OUTER (1u << 4) + +#define MMU_TRANSTAB_ADRMODE_MASK 0x00000003 + +/* + * Begin MMU STATUS register values + */ +#define ASn_STATUS_FLUSH_ACTIVE 0x01 + +#define ASn_FAULTSTATUS_ACCESS_TYPE_MASK (0x3<<8) +#define ASn_FAULTSTATUS_ACCESS_TYPE_EX (0x1<<8) +#define ASn_FAULTSTATUS_ACCESS_TYPE_READ (0x2<<8) +#define ASn_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3<<8) + +/* + * Begin Command Values + */ + +/* JSn_COMMAND register commands */ +#define JSn_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ +#define JSn_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ +#define JSn_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ +#define JSn_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ +#define JSn_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ +#define JSn_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ +#define JSn_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ +#define JSn_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ + +/* ASn_COMMAND register commands */ +#define ASn_COMMAND_NOP 0x00 /* NOP Operation */ +#define ASn_COMMAND_UPDATE 0x01 /* Broadcasts the values in ASn_TRANSTAB and ASn_MEMATTR to all MMUs */ +#define ASn_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ +#define ASn_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ +#define ASn_COMMAND_FLUSH 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs + (deprecated - only for use with T60x) */ +#define ASn_COMMAND_FLUSH_PT 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs */ +#define ASn_COMMAND_FLUSH_MEM 0x05 /* Wait for memory accesses to complete, flush all the L1s cache then + flush all L2 caches then issue a flush region command to all MMUs */ + +/* Possible values of JSn_CONFIG and JSn_CONFIG_NEXT registers */ +#define JSn_CONFIG_START_FLUSH_NO_ACTION (0u << 0) +#define JSn_CONFIG_START_FLUSH_CLEAN (1u << 8) +#define JSn_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) +#define JSn_CONFIG_START_MMU (1u << 10) +#define JSn_CONFIG_JOB_CHAIN_FLAG (1u << 11) +#define JSn_CONFIG_END_FLUSH_NO_ACTION JSn_CONFIG_START_FLUSH_NO_ACTION +#define JSn_CONFIG_END_FLUSH_CLEAN (1u << 12) +#define JSn_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) +#define JSn_CONFIG_THREAD_PRI(n) ((n) << 16) + +/* JSn_STATUS register values */ + +/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. + * The values are separated to avoid dependency of userspace and kernel code. + */ + +/* Group of values representing the job status insead a particular fault */ +#define JSn_STATUS_NO_EXCEPTION_BASE 0x00 +#define JSn_STATUS_INTERRUPTED (JSn_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ +#define JSn_STATUS_STOPPED (JSn_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ +#define JSn_STATUS_TERMINATED (JSn_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */ + +/* General fault values */ +#define JSn_STATUS_FAULT_BASE 0x40 +#define JSn_STATUS_CONFIG_FAULT (JSn_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */ +#define JSn_STATUS_POWER_FAULT (JSn_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */ +#define JSn_STATUS_READ_FAULT (JSn_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */ +#define JSn_STATUS_WRITE_FAULT (JSn_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */ +#define JSn_STATUS_AFFINITY_FAULT (JSn_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */ +#define JSn_STATUS_BUS_FAULT (JSn_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */ + +/* Instruction or data faults */ +#define JSn_STATUS_INSTRUCTION_FAULT_BASE 0x50 +#define JSn_STATUS_INSTR_INVALID_PC (JSn_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */ +#define JSn_STATUS_INSTR_INVALID_ENC (JSn_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */ +#define JSn_STATUS_INSTR_TYPE_MISMATCH (JSn_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */ +#define JSn_STATUS_INSTR_OPERAND_FAULT (JSn_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */ +#define JSn_STATUS_INSTR_TLS_FAULT (JSn_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */ +#define JSn_STATUS_INSTR_BARRIER_FAULT (JSn_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */ +#define JSn_STATUS_INSTR_ALIGN_FAULT (JSn_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */ +/* NOTE: No fault with 0x57 code defined in spec. */ +#define JSn_STATUS_DATA_INVALID_FAULT (JSn_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */ +#define JSn_STATUS_TILE_RANGE_FAULT (JSn_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */ +#define JSn_STATUS_ADDRESS_RANGE_FAULT (JSn_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */ + +/* Other faults */ +#define JSn_STATUS_MEMORY_FAULT_BASE 0x60 +#define JSn_STATUS_OUT_OF_MEMORY (JSn_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */ +#define JSn_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */ + +/* GPU_COMMAND values */ +#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ +#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */ +#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */ +#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */ +#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */ +#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ +#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ + +/* End Command Values */ + +/* GPU_STATUS values */ +#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ + +/* PRFCNT_CONFIG register values */ +#define PRFCNT_CONFIG_AS_SHIFT 4 /* address space bitmap starts from bit 4 of the register */ +#define PRFCNT_CONFIG_MODE_OFF 0 /* The performance counters are disabled. */ +#define PRFCNT_CONFIG_MODE_MANUAL 1 /* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */ +#define PRFCNT_CONFIG_MODE_TILE 2 /* The performance counters are enabled, and are written out each time a tile finishes rendering. */ + +/* AS_MEMATTR values: */ +/* Use GPU implementation-defined caching policy. */ +#define ASn_MEMATTR_IMPL_DEF_CACHE_POLICY 0x48 +/* The attribute set to force all resources to be cached. */ +#define ASn_MEMATTR_FORCE_TO_CACHE_ALL 0x4F +/* Inner write-alloc cache setup, no outer caching */ +#define ASn_MEMATTR_WRITE_ALLOC 0x4D +/* symbol for default MEMATTR to use */ +#define ASn_MEMATTR_INDEX_DEFAULT 0 +/* HW implementation defined caching */ +#define ASn_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 +/* Force cache on */ +#define ASn_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 +/* Write-alloc inner */ +#define ASn_MEMATTR_INDEX_WRITE_ALLOC 2 + +/* GPU_ID register */ +#define GPU_ID_VERSION_STATUS_SHIFT 0 +#define GPU_ID_VERSION_MINOR_SHIFT 4 +#define GPU_ID_VERSION_MAJOR_SHIFT 12 +#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16 +#define GPU_ID_VERSION_STATUS (0xF << GPU_ID_VERSION_STATUS_SHIFT) +#define GPU_ID_VERSION_MINOR (0xFF << GPU_ID_VERSION_MINOR_SHIFT) +#define GPU_ID_VERSION_MAJOR (0xF << GPU_ID_VERSION_MAJOR_SHIFT) +#define GPU_ID_VERSION_PRODUCT_ID (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT) + +/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */ +#define GPU_ID_PI_T60X 0x6956 +#define GPU_ID_PI_T62X 0x0620 +#define GPU_ID_PI_T67X 0x0670 +#define GPU_ID_PI_T76X 0x0750 +#define GPU_ID_PI_T72X 0x0720 + +/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ +#define GPU_ID_S_15DEV0 0x1 +#define GPU_ID_S_EAC 0x2 + +/* Helper macro to create a GPU_ID assuming valid values for id, major, minor, status */ +#define GPU_ID_MAKE(id, major, minor, status) \ + (((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ + ((major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ + ((minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ + ((status) << GPU_ID_VERSION_STATUS_SHIFT)) + +/* End GPU_ID register */ + +/* JS_FEATURES register */ + +#define JSn_FEATURE_NULL_JOB (1u << 1) +#define JSn_FEATURE_SET_VALUE_JOB (1u << 2) +#define JSn_FEATURE_CACHE_FLUSH_JOB (1u << 3) +#define JSn_FEATURE_COMPUTE_JOB (1u << 4) +#define JSn_FEATURE_VERTEX_JOB (1u << 5) +#define JSn_FEATURE_GEOMETRY_JOB (1u << 6) +#define JSn_FEATURE_TILER_JOB (1u << 7) +#define JSn_FEATURE_FUSED_JOB (1u << 8) +#define JSn_FEATURE_FRAGMENT_JOB (1u << 9) + +/* End JS_FEATURES register */ + +/* L2_MMU_CONFIG register */ +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT (24) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) + +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT (26) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) +/* End L2_MMU_CONFIG register */ + +/* THREAD_* registers */ + +/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ +#define IMPLEMENTATION_UNSPECIFIED 0 +#define IMPLEMENTATION_SILICON 1 +#define IMPLEMENTATION_FPGA 2 +#define IMPLEMENTATION_MODEL 3 + +/* Default values when registers are not supported by the implemented hardware */ +#define THREAD_MT_DEFAULT 256 +#define THREAD_MWS_DEFAULT 256 +#define THREAD_MBS_DEFAULT 256 +#define THREAD_MR_DEFAULT 1024 +#define THREAD_MTQ_DEFAULT 4 +#define THREAD_MTGS_DEFAULT 10 + +/* End THREAD_* registers */ + +/* SHADER_CONFIG register */ + +#define SC_ALT_COUNTERS (1ul << 3) +#define SC_OVERRIDE_FWD_PIXEL_KILL (1ul << 4) +#define SC_SDC_DISABLE_OQ_DISCARD (1ul << 6) +#define SC_LS_PAUSEBUFFER_DISABLE (1ul << 16) +#define SC_ENABLE_TEXGRD_FLAGS (1ul << 25) +/* End SHADER_CONFIG register */ + +#endif /* _MIDGARD_REGMAP_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_timeline.h b/drivers/gpu/arm/midgard/mali_timeline.h new file mode 100755 index 00000000000..f8b8710fcf9 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_timeline.h @@ -0,0 +1,369 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mali_timeline + +#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _MALI_TIMELINE_H + +#include + +TRACE_EVENT(mali_timeline_atoms_in_flight, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int tgid, + int count), + + TP_ARGS(ts_sec, + ts_nsec, + tgid, + count), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, tgid) + __field(int, count) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->tgid = tgid; + __entry->count = count; + ), + + TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->tgid, + __entry->count) +); + + +TRACE_EVENT(mali_timeline_atom, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int event_type, + int tgid, + int atom_id), + + TP_ARGS(ts_sec, + ts_nsec, + event_type, + tgid, + atom_id), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, event_type) + __field(int, tgid) + __field(int, atom_id) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->event_type = event_type; + __entry->tgid = tgid; + __entry->atom_id = atom_id; + ), + + TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->tgid, + __entry->atom_id, + __entry->atom_id) +); + +TRACE_EVENT(mali_timeline_gpu_slot_active, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int event_type, + int tgid, + int js, + int count), + + TP_ARGS(ts_sec, + ts_nsec, + event_type, + tgid, + js, + count), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, event_type) + __field(int, tgid) + __field(int, js) + __field(int, count) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->event_type = event_type; + __entry->tgid = tgid; + __entry->js = js; + __entry->count = count; + ), + + TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->tgid, + __entry->js, + __entry->count) +); + +TRACE_EVENT(mali_timeline_gpu_slot_action, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int event_type, + int tgid, + int js, + int count), + + TP_ARGS(ts_sec, + ts_nsec, + event_type, + tgid, + js, + count), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, event_type) + __field(int, tgid) + __field(int, js) + __field(int, count) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->event_type = event_type; + __entry->tgid = tgid; + __entry->js = js; + __entry->count = count; + ), + + TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->tgid, + __entry->js, + __entry->count) +); + +TRACE_EVENT(mali_timeline_gpu_power_active, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int event_type, + int active), + + TP_ARGS(ts_sec, + ts_nsec, + event_type, + active), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, event_type) + __field(int, active) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->event_type = event_type; + __entry->active = active; + ), + + TP_printk("%i,%i.%.9i,0,%i", __entry->event_type, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->active) + +); + +TRACE_EVENT(mali_timeline_l2_power_active, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int event_type, + int state), + + TP_ARGS(ts_sec, + ts_nsec, + event_type, + state), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, event_type) + __field(int, state) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->event_type = event_type; + __entry->state = state; + ), + + TP_printk("%i,%i.%.9i,0,%i", __entry->event_type, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->state) + +); +TRACE_EVENT(mali_timeline_pm_event, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int event_type, + int pm_event_type, + unsigned int pm_event_id), + + TP_ARGS(ts_sec, + ts_nsec, + event_type, + pm_event_type, + pm_event_id), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, event_type) + __field(int, pm_event_type) + __field(unsigned int, pm_event_id) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->event_type = event_type; + __entry->pm_event_type = pm_event_type; + __entry->pm_event_id = pm_event_id; + ), + + TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->pm_event_type, __entry->pm_event_id) + +); + +TRACE_EVENT(mali_timeline_slot_atom, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int event_type, + int tgid, + int js, + int atom_id), + + TP_ARGS(ts_sec, + ts_nsec, + event_type, + tgid, + js, + atom_id), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, event_type) + __field(int, tgid) + __field(int, js) + __field(int, atom_id) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->event_type = event_type; + __entry->tgid = tgid; + __entry->js = js; + __entry->atom_id = atom_id; + ), + + TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->tgid, + __entry->js, + __entry->atom_id) +); + +TRACE_EVENT(mali_timeline_pm_checktrans, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int trans_code, + int trans_id), + + TP_ARGS(ts_sec, + ts_nsec, + trans_code, + trans_id), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, trans_code) + __field(int, trans_id) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->trans_code = trans_code; + __entry->trans_id = trans_id; + ), + + TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->trans_id) + +); + + +#endif /* _MALI_TIMELINE_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . + +/* This part must be outside protection */ +#include + diff --git a/drivers/gpu/arm/midgard/mali_uk.h b/drivers/gpu/arm/midgard/mali_uk.h new file mode 100755 index 00000000000..c577e83220c --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_uk.h @@ -0,0 +1,143 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_uk.h + * Types and definitions that are common across OSs for both the user + * and kernel side of the User-Kernel interface. + */ + +#ifndef _UK_H_ +#define _UK_H_ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#include + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @defgroup uk_api User-Kernel Interface API + * + * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device + * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver. + * + * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent + * kernel-side API (UKK) via an OS-specific communication mechanism. + * + * This API is internal to the Midgard DDK and is not exposed to any applications. + * + * @{ + */ + +/** + * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The + * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this + * identifier to select a UKK client to the uku_open() function. + * + * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id + * enumeration and the uku_open() implemenation for the various OS ports need to be updated to + * provide a mapping of the identifier to the OS specific device name. + * + */ + typedef enum uk_client_id { + /** + * Value used to identify the Base driver UK client. + */ + UK_CLIENT_MALI_T600_BASE, + + /** The number of uk clients supported. This must be the last member of the enum */ + UK_CLIENT_COUNT + } uk_client_id; + +/** + * Each function callable through the UK interface has a unique number. + * Functions provided by UK clients start from number UK_FUNC_ID. + * Numbers below UK_FUNC_ID are used for internal UK functions. + */ + typedef enum uk_func { + UKP_FUNC_ID_CHECK_VERSION, /**< UKK Core internal function */ + /** + * Each UK client numbers the functions they provide starting from + * number UK_FUNC_ID. This number is then eventually assigned to the + * id field of the uk_header structure when preparing to make a + * UK call. See your UK client for a list of their function numbers. + */ + UK_FUNC_ID = 512 + } uk_func; + +/** + * Arguments for a UK call are stored in a structure. This structure consists + * of a fixed size header and a payload. The header carries a 32-bit number + * identifying the UK function to be called (see uk_func). When the UKK client + * receives this header and executed the requested UK function, it will use + * the same header to store the result of the function in the form of a + * mali_error return code. The size of this structure is such that the + * first member of the payload following the header can be accessed efficiently + * on a 32 and 64-bit kernel and the structure has the same size regardless + * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined + * accordingly in the OS specific mali_uk_os.h header file. + */ + typedef union uk_header { + /** + * 32-bit number identifying the UK function to be called. + * Also see uk_func. + */ + u32 id; + /** + * The mali_error return code returned by the called UK function. + * See the specification of the particular UK function you are + * calling for the meaning of the error codes returned. All + * UK functions return MALI_ERROR_NONE on success. + */ + u32 ret; + /* + * Used to ensure 64-bit alignment of this union. Do not remove. + * This field is used for padding and does not need to be initialized. + */ + u64 sizer; + } uk_header; + +/** + * This structure carries a 16-bit major and minor number and is sent along with an internal UK call + * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side. + */ + typedef struct uku_version_check_args { + uk_header header; + /**< UK call header */ + u16 major; + /**< This field carries the user-side major version on input and the kernel-side major version on output */ + u16 minor; + /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */ + u8 padding[4]; + } uku_version_check_args; + +/** @} end group uk_api */ + + /** @} *//* end group base_api */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* _UK_H_ */ diff --git a/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd.h b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd.h new file mode 100755 index 00000000000..9226d977f7a --- /dev/null +++ b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd.h @@ -0,0 +1,481 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @addtogroup malisw + * @{ + */ + +/* ============================================================================ + Description +============================================================================ */ +/** + * @defgroup arm_cstd_coding_standard ARM C standard types and constants + * The common files are a set of standard headers which are used by all parts + * of this development, describing types, and generic constants. + * + * Files in group: + * - arm_cstd.h + * - arm_cstd_compilers.h + * - arm_cstd_types.h + * - arm_cstd_types_rvct.h + * - arm_cstd_types_gcc.h + * - arm_cstd_types_msvc.h + * - arm_cstd_pack_push.h + * - arm_cstd_pack_pop.h + */ + +/** + * @addtogroup arm_cstd_coding_standard + * @{ + */ + +#ifndef _ARM_CSTD_ +#define _ARM_CSTD_ + +/* ============================================================================ + Import standard C99 types +============================================================================ */ +#include "arm_cstd_compilers.h" +#include "arm_cstd_types.h" + +/* ============================================================================ + Min and Max Values +============================================================================ */ +#if !defined(INT8_MAX) + #define INT8_MAX ((int8_t) 0x7F) +#endif +#if !defined(INT8_MIN) + #define INT8_MIN (-INT8_MAX - 1) +#endif + +#if !defined(INT16_MAX) + #define INT16_MAX ((int16_t)0x7FFF) +#endif +#if !defined(INT16_MIN) + #define INT16_MIN (-INT16_MAX - 1) +#endif + +#if !defined(INT32_MAX) + #define INT32_MAX ((int32_t)0x7FFFFFFF) +#endif +#if !defined(INT32_MIN) + #define INT32_MIN (-INT32_MAX - 1) +#endif + +#if !defined(INT64_MAX) + #define INT64_MAX ((int64_t)0x7FFFFFFFFFFFFFFFLL) +#endif +#if !defined(INT64_MIN) + #define INT64_MIN (-INT64_MAX - 1) +#endif + +#if !defined(UINT8_MAX) + #define UINT8_MAX ((uint8_t) 0xFF) +#endif + +#if !defined(UINT16_MAX) + #define UINT16_MAX ((uint16_t)0xFFFF) +#endif + +#if !defined(UINT32_MAX) + #define UINT32_MAX ((uint32_t)0xFFFFFFFF) +#endif + +#if !defined(UINT64_MAX) + #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) +#endif + +/* fallbacks if limits.h wasn't available */ +#if !defined(UCHAR_MAX) + #define UCHAR_MAX ((unsigned char)~0U) +#endif + +#if !defined(SCHAR_MAX) + #define SCHAR_MAX ((signed char)(UCHAR_MAX >> 1)) +#endif +#if !defined(SCHAR_MIN) + #define SCHAR_MIN ((signed char)(-SCHAR_MAX - 1)) +#endif + +#if !defined(USHRT_MAX) + #define USHRT_MAX ((unsigned short)~0U) +#endif + +#if !defined(SHRT_MAX) + #define SHRT_MAX ((signed short)(USHRT_MAX >> 1)) +#endif +#if !defined(SHRT_MIN) + #define SHRT_MIN ((signed short)(-SHRT_MAX - 1)) +#endif + +#if !defined(UINT_MAX) + #define UINT_MAX ((unsigned int)~0U) +#endif + +#if !defined(INT_MAX) + #define INT_MAX ((signed int)(UINT_MAX >> 1)) +#endif +#if !defined(INT_MIN) + #define INT_MIN ((signed int)(-INT_MAX - 1)) +#endif + +#if !defined(ULONG_MAX) + #define ULONG_MAX ((unsigned long)~0UL) +#endif + +#if !defined(LONG_MAX) + #define LONG_MAX ((signed long)(ULONG_MAX >> 1)) +#endif +#if !defined(LONG_MIN) + #define LONG_MIN ((signed long)(-LONG_MAX - 1)) +#endif + +#if !defined(ULLONG_MAX) + #define ULLONG_MAX ((unsigned long long)~0ULL) +#endif + +#if !defined(LLONG_MAX) + #define LLONG_MAX ((signed long long)(ULLONG_MAX >> 1)) +#endif +#if !defined(LLONG_MIN) + #define LLONG_MIN ((signed long long)(-LLONG_MAX - 1)) +#endif + +#if !defined(SIZE_MAX) + #if 1 == CSTD_CPU_32BIT + #define SIZE_MAX UINT32_MAX + #elif 1 == CSTD_CPU_64BIT + #define SIZE_MAX UINT64_MAX + #endif +#endif + +/* ============================================================================ + Keywords +============================================================================ */ +/* Portable keywords. */ + +#if !defined(CONST) +/** + * @hideinitializer + * Variable is a C @c const, which can be made non-const for testing purposes. + */ + #define CONST const +#endif + +#if !defined(STATIC) +/** + * @hideinitializer + * Variable is a C @c static, which can be made non-static for testing + * purposes. + */ + #define STATIC static +#endif + +/** + * Specifies a function as being exported outside of a logical module. + */ +#define PUBLIC + +/** + * @def PROTECTED + * Specifies a a function which is internal to an logical module, but which + * should not be used outside of that module. This cannot be enforced by the + * compiler, as a module is typically more than one translation unit. + */ +#define PROTECTED + +/** + * Specifies a function as being internal to a translation unit. Private + * functions would typically be declared as STATIC, unless they are being + * exported for unit test purposes. + */ +#define PRIVATE STATIC + +/** + * Specify an assertion value which is evaluated at compile time. Recommended + * usage is specification of a @c static @c INLINE function containing all of + * the assertions thus: + * + * @code + * static INLINE [module]_compile_time_assertions( void ) + * { + * COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) ); + * } + * @endcode + * + * @note Use @c static not @c STATIC. We never want to turn off this @c static + * specification for testing purposes. + */ +#define CSTD_COMPILE_TIME_ASSERT( expr ) \ + do { switch(0){case 0: case (expr):;} } while( FALSE ) + +/** + * @hideinitializer + * @deprecated Prefered form is @c CSTD_UNUSED + * Function-like macro for suppressing unused variable warnings. Where possible + * such variables should be removed; this macro is present for cases where we + * much support API backwards compatibility. + */ +#define UNUSED( x ) ((void)(x)) + +/** + * @hideinitializer + * Function-like macro for suppressing unused variable warnings. Where possible + * such variables should be removed; this macro is present for cases where we + * much support API backwards compatibility. + */ +#define CSTD_UNUSED( x ) ((void)(x)) + +/** + * @hideinitializer + * Function-like macro for use where "no behavior" is desired. This is useful + * when compile time macros turn a function-like macro in to a no-op, but + * where having no statement is otherwise invalid. + */ +#define CSTD_NOP( ... ) ((void)#__VA_ARGS__) + +/** + * @hideinitializer + * Function-like macro for converting a pointer in to a u64 for storing into + * an external data structure. This is commonly used when pairing a 32-bit + * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion + * is complex and a straight cast does not work reliably as pointers are + * often considered as signed. + */ +#define CSTD_PTR_TO_U64( x ) ((uint64_t)((uintptr_t)(x))) + +/** + * @hideinitializer + * Function-like macro for stringizing a single level macro. + * @code + * #define MY_MACRO 32 + * CSTD_STR1( MY_MACRO ) + * > "MY_MACRO" + * @endcode + */ +#define CSTD_STR1( x ) #x + +/** + * @hideinitializer + * Function-like macro for stringizing a macro's value. This should not be used + * if the macro is defined in a way which may have no value; use the + * alternative @c CSTD_STR2N macro should be used instead. + * @code + * #define MY_MACRO 32 + * CSTD_STR2( MY_MACRO ) + * > "32" + * @endcode + */ +#define CSTD_STR2( x ) CSTD_STR1( x ) + +/** + * @hideinitializer + * Utility function for stripping the first character off a string. + */ +static INLINE char* arm_cstd_strstrip( char * string ) +{ + return ++string; +} + +/** + * @hideinitializer + * Function-like macro for stringizing a single level macro where the macro + * itself may not have a value. Parameter @c a should be set to any single + * character which is then stripped by the macro via an inline function. This + * should only be used via the @c CSTD_STR2N macro; for printing a single + * macro only the @c CSTD_STR1 macro is a better alternative. + * + * This macro requires run-time code to handle the case where the macro has + * no value (you can't concat empty strings in the preprocessor). + */ +#define CSTD_STR1N( a, x ) arm_cstd_strstrip( CSTD_STR1( a##x ) ) + +/** + * @hideinitializer + * Function-like macro for stringizing a two level macro where the macro itself + * may not have a value. + * @code + * #define MY_MACRO 32 + * CSTD_STR2N( MY_MACRO ) + * > "32" + * + * #define MY_MACRO 32 + * CSTD_STR2N( MY_MACRO ) + * > "32" + * @endcode + */ +#define CSTD_STR2N( x ) CSTD_STR1N( _, x ) + +/* ============================================================================ + Validate portability constructs +============================================================================ */ +static INLINE void arm_cstd_compile_time_assertions( void ) +{ + CSTD_COMPILE_TIME_ASSERT( sizeof(uint8_t) == 1 ); + CSTD_COMPILE_TIME_ASSERT( sizeof(int8_t) == 1 ); + CSTD_COMPILE_TIME_ASSERT( sizeof(uint16_t) == 2 ); + CSTD_COMPILE_TIME_ASSERT( sizeof(int16_t) == 2 ); + CSTD_COMPILE_TIME_ASSERT( sizeof(uint32_t) == 4 ); + CSTD_COMPILE_TIME_ASSERT( sizeof(int32_t) == 4 ); + CSTD_COMPILE_TIME_ASSERT( sizeof(uint64_t) == 8 ); + CSTD_COMPILE_TIME_ASSERT( sizeof(int64_t) == 8 ); + CSTD_COMPILE_TIME_ASSERT( sizeof(intptr_t) == sizeof(uintptr_t) ); + + CSTD_COMPILE_TIME_ASSERT( 1 == TRUE ); + CSTD_COMPILE_TIME_ASSERT( 0 == FALSE ); + +#if 1 == CSTD_CPU_32BIT + CSTD_COMPILE_TIME_ASSERT( sizeof(uintptr_t) == 4 ); +#elif 1 == CSTD_CPU_64BIT + CSTD_COMPILE_TIME_ASSERT( sizeof(uintptr_t) == 8 ); +#endif + +} + +/* ============================================================================ + Useful function-like macro +============================================================================ */ +/** + * @brief Return the lesser of two values. + * As a macro it may evaluate its arguments more than once. + * @see CSTD_MAX + */ +#define CSTD_MIN( x, y ) ((x) < (y) ? (x) : (y)) + +/** + * @brief Return the greater of two values. + * As a macro it may evaluate its arguments more than once. + * If called on the same two arguments as CSTD_MIN it is guaranteed to return + * the one that CSTD_MIN didn't return. This is significant for types where not + * all values are comparable e.g. NaNs in floating-point types. But if you want + * to retrieve the min and max of two values, consider using a conditional swap + * instead. + */ +#define CSTD_MAX( x, y ) ((x) < (y) ? (y) : (x)) + +/** + * @brief Clamp value @c x to within @c min and @c max inclusive. + */ +#define CSTD_CLAMP( x, min, max ) ((x)<(min) ? (min):((x)>(max) ? (max):(x))) + +/** + * Flag a cast as a reinterpretation, usually of a pointer type. + */ +#define CSTD_REINTERPRET_CAST(type) (type) + +/** + * Flag a cast as casting away const, usually of a pointer type. + */ +#define CSTD_CONST_CAST(type) (type) + +/** + * Flag a cast as a (potentially complex) value conversion, usually of a + * numerical type. + */ +#define CSTD_STATIC_CAST(type) (type) + +/* ============================================================================ + Useful bit constants +============================================================================ */ +/** + * @cond arm_cstd_utilities + */ + +/* Common bit constant values, useful in embedded programming. */ +#define F_BIT_0 ((uint32_t)0x00000001) +#define F_BIT_1 ((uint32_t)0x00000002) +#define F_BIT_2 ((uint32_t)0x00000004) +#define F_BIT_3 ((uint32_t)0x00000008) +#define F_BIT_4 ((uint32_t)0x00000010) +#define F_BIT_5 ((uint32_t)0x00000020) +#define F_BIT_6 ((uint32_t)0x00000040) +#define F_BIT_7 ((uint32_t)0x00000080) +#define F_BIT_8 ((uint32_t)0x00000100) +#define F_BIT_9 ((uint32_t)0x00000200) +#define F_BIT_10 ((uint32_t)0x00000400) +#define F_BIT_11 ((uint32_t)0x00000800) +#define F_BIT_12 ((uint32_t)0x00001000) +#define F_BIT_13 ((uint32_t)0x00002000) +#define F_BIT_14 ((uint32_t)0x00004000) +#define F_BIT_15 ((uint32_t)0x00008000) +#define F_BIT_16 ((uint32_t)0x00010000) +#define F_BIT_17 ((uint32_t)0x00020000) +#define F_BIT_18 ((uint32_t)0x00040000) +#define F_BIT_19 ((uint32_t)0x00080000) +#define F_BIT_20 ((uint32_t)0x00100000) +#define F_BIT_21 ((uint32_t)0x00200000) +#define F_BIT_22 ((uint32_t)0x00400000) +#define F_BIT_23 ((uint32_t)0x00800000) +#define F_BIT_24 ((uint32_t)0x01000000) +#define F_BIT_25 ((uint32_t)0x02000000) +#define F_BIT_26 ((uint32_t)0x04000000) +#define F_BIT_27 ((uint32_t)0x08000000) +#define F_BIT_28 ((uint32_t)0x10000000) +#define F_BIT_29 ((uint32_t)0x20000000) +#define F_BIT_30 ((uint32_t)0x40000000) +#define F_BIT_31 ((uint32_t)0x80000000) + +/* Common 2^n size values, useful in embedded programming. */ +#define C_SIZE_1B ((uint32_t)0x00000001) +#define C_SIZE_2B ((uint32_t)0x00000002) +#define C_SIZE_4B ((uint32_t)0x00000004) +#define C_SIZE_8B ((uint32_t)0x00000008) +#define C_SIZE_16B ((uint32_t)0x00000010) +#define C_SIZE_32B ((uint32_t)0x00000020) +#define C_SIZE_64B ((uint32_t)0x00000040) +#define C_SIZE_128B ((uint32_t)0x00000080) +#define C_SIZE_256B ((uint32_t)0x00000100) +#define C_SIZE_512B ((uint32_t)0x00000200) +#define C_SIZE_1KB ((uint32_t)0x00000400) +#define C_SIZE_2KB ((uint32_t)0x00000800) +#define C_SIZE_4KB ((uint32_t)0x00001000) +#define C_SIZE_8KB ((uint32_t)0x00002000) +#define C_SIZE_16KB ((uint32_t)0x00004000) +#define C_SIZE_32KB ((uint32_t)0x00008000) +#define C_SIZE_64KB ((uint32_t)0x00010000) +#define C_SIZE_128KB ((uint32_t)0x00020000) +#define C_SIZE_256KB ((uint32_t)0x00040000) +#define C_SIZE_512KB ((uint32_t)0x00080000) +#define C_SIZE_1MB ((uint32_t)0x00100000) +#define C_SIZE_2MB ((uint32_t)0x00200000) +#define C_SIZE_4MB ((uint32_t)0x00400000) +#define C_SIZE_8MB ((uint32_t)0x00800000) +#define C_SIZE_16MB ((uint32_t)0x01000000) +#define C_SIZE_32MB ((uint32_t)0x02000000) +#define C_SIZE_64MB ((uint32_t)0x04000000) +#define C_SIZE_128MB ((uint32_t)0x08000000) +#define C_SIZE_256MB ((uint32_t)0x10000000) +#define C_SIZE_512MB ((uint32_t)0x20000000) +#define C_SIZE_1GB ((uint32_t)0x40000000) +#define C_SIZE_2GB ((uint32_t)0x80000000) + +/** + * @endcond + */ + +/** + * @} + */ + +/** + * @} + */ + +#endif /* End (_ARM_CSTD_) */ diff --git a/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_compilers.h b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_compilers.h new file mode 100755 index 00000000000..55637cf8afc --- /dev/null +++ b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_compilers.h @@ -0,0 +1,617 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _ARM_CSTD_COMPILERS_H_ +#define _ARM_CSTD_COMPILERS_H_ + +/* ============================================================================ + Document default definitions - assuming nothing set at this point. +============================================================================ */ +/** + * @addtogroup arm_cstd_coding_standard + * @{ + */ + +/** + * @hideinitializer + * Defined with value of 1 if toolchain is Microsoft Visual Studio, 0 + * otherwise. + */ +#define CSTD_TOOLCHAIN_MSVC 0 + +/** + * @hideinitializer + * Defined with value of 1 if toolchain is the GNU Compiler Collection, 0 + * otherwise. + */ +#define CSTD_TOOLCHAIN_GCC 0 + +/** + * @hideinitializer + * Defined with value of 1 if toolchain is ARM RealView Compiler Tools, 0 + * otherwise. Note - if running RVCT in GCC mode this define will be set to 0; + * @c CSTD_TOOLCHAIN_GCC and @c CSTD_TOOLCHAIN_RVCT_GCC_MODE will both be + * defined as 1. + */ +#define CSTD_TOOLCHAIN_RVCT 0 + +/** + * @hideinitializer + * Defined with value of 1 if toolchain is ARM RealView Compiler Tools running + * in GCC mode, 0 otherwise. + */ +#define CSTD_TOOLCHAIN_RVCT_GCC_MODE 0 + +/** + * @hideinitializer + * Defined with value of 1 if processor is an x86 32-bit machine, 0 otherwise. + */ +#define CSTD_CPU_X86_32 0 + +/** + * @hideinitializer + * Defined with value of 1 if processor is an x86-64 (AMD64) machine, 0 + * otherwise. + */ +#define CSTD_CPU_X86_64 0 + +/** + * @hideinitializer + * Defined with value of 1 if processor is an ARM machine, 0 otherwise. + */ +#define CSTD_CPU_ARM 0 + +/** + * @hideinitializer + * Defined with value of 1 if processor is an AARCH64 machine, 0 otherwise. + */ +#define CSTD_CPU_AARCH64 0 + + +/** + * @hideinitializer + * Defined with value of 1 if processor is a MIPS machine, 0 otherwise. + */ +#define CSTD_CPU_MIPS 0 + +/** + * @hideinitializer + * Defined with value of 1 if CPU is 32-bit, 0 otherwise. + */ +#define CSTD_CPU_32BIT 0 + +/** + * @hideinitializer + * Defined with value of 1 if CPU is 64-bit, 0 otherwise. + */ +#define CSTD_CPU_64BIT 0 + +/** + * @hideinitializer + * Defined with value of 1 if processor configured as big-endian, 0 if it + * is little-endian. + */ +#define CSTD_CPU_BIG_ENDIAN 0 + +/** + * @hideinitializer + * Defined with value of 1 if operating system is a version of Windows, 0 if + * it is not. + */ +#define CSTD_OS_WINDOWS 0 + +/** + * @hideinitializer + * Defined with value of 1 if operating system is a 32-bit version of Windows, + * 0 if it is not. + */ +#define CSTD_OS_WIN32 0 + +/** + * @hideinitializer + * Defined with value of 1 if operating system is a 64-bit version of Windows, + * 0 if it is not. + */ +#define CSTD_OS_WIN64 0 + +/** + * @hideinitializer + * Defined with value of 1 if operating system is Linux, 0 if it is not. + */ +#define CSTD_OS_LINUX 0 + +/** + * @hideinitializer + * Defined with value of 1 if we are compiling Linux kernel code, 0 otherwise. + */ +#define CSTD_OS_LINUX_KERNEL 0 + +/** + * @hideinitializer + * Defined with value of 1 if operating system is a 32-bit version of Linux, + * 0 if it is not. + */ +#define CSTD_OS_LINUX32 0 + +/** + * @hideinitializer + * Defined with value of 1 if operating system is a 64-bit version of Linux, + * 0 if it is not. + */ +#define CSTD_OS_LINUX64 0 + +/** + * @hideinitializer + * Defined with value of 1 if operating system is Android, 0 if it is not. + */ +#define CSTD_OS_ANDROID 0 + +/** + * @hideinitializer + * Defined with value of 1 if we are compiling Android kernel code, 0 otherwise. + */ +#define CSTD_OS_ANDROID_KERNEL 0 + +/** + * @hideinitializer + * Defined with value of 1 if operating system is a 32-bit version of Android, + * 0 if it is not. + */ +#define CSTD_OS_ANDROID32 0 + +/** + * @hideinitializer + * Defined with value of 1 if operating system is a 64-bit version of Android, + * 0 if it is not. + */ +#define CSTD_OS_ANDROID64 0 + +/** + * @hideinitializer + * Defined with value of 1 if operating system is a version of Apple OS, + * 0 if it is not. + */ +#define CSTD_OS_APPLEOS 0 + +/** + * @hideinitializer + * Defined with value of 1 if operating system is a 32-bit version of Apple OS, + * 0 if it is not. + */ +#define CSTD_OS_APPLEOS32 0 + +/** + * @hideinitializer + * Defined with value of 1 if operating system is a 64-bit version of Apple OS, + * 0 if it is not. + */ +#define CSTD_OS_APPLEOS64 0 + +/** + * @def CSTD_OS_SYMBIAN + * @hideinitializer + * Defined with value of 1 if operating system is Symbian, 0 if it is not. + */ +#define CSTD_OS_SYMBIAN 0 + +/** + * @def CSTD_OS_NONE + * @hideinitializer + * Defined with value of 1 if there is no operating system (bare metal), 0 + * otherwise + */ +#define CSTD_OS_NONE 0 + +/* ============================================================================ + Determine the compiler in use +============================================================================ */ + +/* Default empty definitions of compiler-specific option enable/disable. This will be overridden + * if applicable by preprocessor defines below. */ +#define CSTD_PUSH_WARNING_GCC_WADDRESS +#define CSTD_POP_WARNING_GCC_WADDRESS + +#if defined(_MSC_VER) + #undef CSTD_TOOLCHAIN_MSVC + #define CSTD_TOOLCHAIN_MSVC 1 + +#elif defined(__GNUC__) + #undef CSTD_TOOLCHAIN_GCC + #define CSTD_TOOLCHAIN_GCC 1 + + /* Detect RVCT pretending to be GCC. */ + #if defined(__ARMCC_VERSION) + #undef CSTD_TOOLCHAIN_RVCT_GCC_MODE + #define CSTD_TOOLCHAIN_RVCT_GCC_MODE 1 + #endif + + #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6 && MALI_GCC_WORKAROUND_MIDCOM_4598 == 0) + /* As a workaround to MIDCOM-4598 (GCC internal defect), these pragmas are not compiled if the GCC version + * is within a certain range, or if a #define is enabled by the build system. For more, see a comment + * in the build system also referring to the MIDCOM issue mentioned, where the environment is updated + * for the GNU toolchain. */ + #undef CSTD_PUSH_WARNING_GCC_WADDRESS + #define CSTD_PUSH_WARNING_GCC_WADDRESS \ + do\ + {\ + _Pragma("GCC diagnostic push")\ + _Pragma("GCC diagnostic ignored \"-Waddress\"")\ + }while(MALI_FALSE) + + #undef CSTD_POP_WARNING_GCC_WADDRESS + #define CSTD_POP_WARNING_GCC_WADDRESS \ + do\ + {\ + _Pragma("GCC diagnostic pop")\ + }while(MALI_FALSE) + #endif + +#elif defined(__ARMCC_VERSION) + #undef CSTD_TOOLCHAIN_RVCT + #define CSTD_TOOLCHAIN_RVCT 1 + +#else + #warning "Unsupported or unknown toolchain" + +#endif + +/* ============================================================================ + Determine the processor +============================================================================ */ +#if 1 == CSTD_TOOLCHAIN_MSVC + #if defined(_M_IX86) + #undef CSTD_CPU_X86_32 + #define CSTD_CPU_X86_32 1 + + #elif defined(_M_X64) || defined(_M_AMD64) + #undef CSTD_CPU_X86_64 + #define CSTD_CPU_X86_64 1 + + #elif defined(_M_ARM) + #undef CSTD_CPU_ARM + #define CSTD_CPU_ARM 1 + + #elif defined(_M_MIPS) + #undef CSTD_CPU_MIPS + #define CSTD_CPU_MIPS 1 + + #else + #warning "Unsupported or unknown host CPU for MSVC tools" + + #endif + +#elif 1 == CSTD_TOOLCHAIN_GCC + #if defined(__amd64__) + #undef CSTD_CPU_X86_64 + #define CSTD_CPU_X86_64 1 + + #elif defined(__i386__) + #undef CSTD_CPU_X86_32 + #define CSTD_CPU_X86_32 1 + + #elif defined(__arm__) + #undef CSTD_CPU_ARM + #define CSTD_CPU_ARM 1 + + #elif defined(__aarch64__) + #undef CSTD_CPU_AARCH64 + #define CSTD_CPU_AARCH64 1 + + #elif defined(__mips__) + #undef CSTD_CPU_MIPS + #define CSTD_CPU_MIPS 1 + + #else + #warning "Unsupported or unknown host CPU for GCC tools" + + #endif + +#elif 1 == CSTD_TOOLCHAIN_RVCT + #undef CSTD_CPU_ARM + #define CSTD_CPU_ARM 1 + +#else + #warning "Unsupported or unknown toolchain" + +#endif + +/* ============================================================================ + Determine the Processor Endianness +============================================================================ */ + +#if ((1 == CSTD_CPU_X86_32) || (1 == CSTD_CPU_X86_64)) + /* Note: x86 and x86-64 are always little endian, so leave at default. */ + +#elif 1 == CSTD_CPU_AARCH64 + /* No big endian support? */ + +#elif 1 == CSTD_TOOLCHAIN_RVCT + #if defined(__BIG_ENDIAN) + #undef CSTD_ENDIAN_BIG + #define CSTD_ENDIAN_BIG 1 + #endif + +#elif ((1 == CSTD_TOOLCHAIN_GCC) && (1 == CSTD_CPU_ARM)) + #if defined(__ARMEB__) + #undef CSTD_ENDIAN_BIG + #define CSTD_ENDIAN_BIG 1 + #endif + +#elif ((1 == CSTD_TOOLCHAIN_GCC) && (1 == CSTD_CPU_MIPS)) + #if defined(__MIPSEB__) + #undef CSTD_ENDIAN_BIG + #define CSTD_ENDIAN_BIG 1 + #endif + +#elif 1 == CSTD_TOOLCHAIN_MSVC + /* Note: Microsoft only support little endian, so leave at default. */ + +#else + #warning "Unsupported or unknown CPU" + +#endif + +/* ============================================================================ + Determine the operating system and addressing width +============================================================================ */ +#if 1 == CSTD_TOOLCHAIN_MSVC + #if defined(_WIN32) && !defined(_WIN64) + #undef CSTD_OS_WINDOWS + #define CSTD_OS_WINDOWS 1 + #undef CSTD_OS_WIN32 + #define CSTD_OS_WIN32 1 + #undef CSTD_CPU_32BIT + #define CSTD_CPU_32BIT 1 + + #elif defined(_WIN32) && defined(_WIN64) + #undef CSTD_OS_WINDOWS + #define CSTD_OS_WINDOWS 1 + #undef CSTD_OS_WIN64 + #define CSTD_OS_WIN64 1 + #undef CSTD_CPU_64BIT + #define CSTD_CPU_64BIT 1 + + #else + #warning "Unsupported or unknown host OS for MSVC tools" + + #endif + +#elif 1 == CSTD_TOOLCHAIN_GCC + #if defined(_WIN32) && defined(_WIN64) + #undef CSTD_OS_WINDOWS + #define CSTD_OS_WINDOWS 1 + #undef CSTD_OS_WIN64 + #define CSTD_OS_WIN64 1 + #undef CSTD_CPU_64BIT + #define CSTD_CPU_64BIT 1 + + #elif defined(_WIN32) && !defined(_WIN64) + #undef CSTD_OS_WINDOWS + #define CSTD_OS_WINDOWS 1 + #undef CSTD_OS_WIN32 + #define CSTD_OS_WIN32 1 + #undef CSTD_CPU_32BIT + #define CSTD_CPU_32BIT 1 + + #elif defined(ANDROID) + #undef CSTD_OS_ANDROID + #define CSTD_OS_ANDROID 1 + + #if defined(__KERNEL__) + #undef CSTD_OS_ANDROID_KERNEL + #define CSTD_OS_ANDROID_KERNEL 1 + #endif + + #if defined(__LP64__) || defined(_LP64) + #undef CSTD_OS_ANDROID64 + #define CSTD_OS_ANDROID64 1 + #undef CSTD_CPU_64BIT + #define CSTD_CPU_64BIT 1 + #else + #undef CSTD_OS_ANDROID32 + #define CSTD_OS_ANDROID32 1 + #undef CSTD_CPU_32BIT + #define CSTD_CPU_32BIT 1 + #endif + + #elif defined(__KERNEL__) || defined(__linux) + #undef CSTD_OS_LINUX + #define CSTD_OS_LINUX 1 + + #if defined(__KERNEL__) + #undef CSTD_OS_LINUX_KERNEL + #define CSTD_OS_LINUX_KERNEL 1 + #endif + + #if defined(__LP64__) || defined(_LP64) + #undef CSTD_OS_LINUX64 + #define CSTD_OS_LINUX64 1 + #undef CSTD_CPU_64BIT + #define CSTD_CPU_64BIT 1 + #else + #undef CSTD_OS_LINUX32 + #define CSTD_OS_LINUX32 1 + #undef CSTD_CPU_32BIT + #define CSTD_CPU_32BIT 1 + #endif + + #elif defined(__APPLE__) + #undef CSTD_OS_APPLEOS + #define CSTD_OS_APPLEOS 1 + + #if defined(__LP64__) || defined(_LP64) + #undef CSTD_OS_APPLEOS64 + #define CSTD_OS_APPLEOS64 1 + #undef CSTD_CPU_64BIT + #define CSTD_CPU_64BIT 1 + #else + #undef CSTD_OS_APPLEOS32 + #define CSTD_OS_APPLEOS32 1 + #undef CSTD_CPU_32BIT + #define CSTD_CPU_32BIT 1 + #endif + + #elif defined(__SYMBIAN32__) + #undef CSTD_OS_SYMBIAN + #define CSTD_OS_SYMBIAN 1 + #undef CSTD_CPU_32BIT + #define CSTD_CPU_32BIT 1 + + #else + #undef CSTD_OS_NONE + #define CSTD_OS_NONE 1 + #undef CSTD_CPU_32BIT + #define CSTD_CPU_32BIT 1 + +#endif + +#elif 1 == CSTD_TOOLCHAIN_RVCT + + #if defined(ANDROID) + #undef CSTD_OS_ANDROID + #undef CSTD_OS_ANDROID32 + #define CSTD_OS_ANDROID 1 + #define CSTD_OS_ANDROID32 1 + + #elif defined(__linux) + #undef CSTD_OS_LINUX + #undef CSTD_OS_LINUX32 + #define CSTD_OS_LINUX 1 + #define CSTD_OS_LINUX32 1 + + #elif defined(__SYMBIAN32__) + #undef CSTD_OS_SYMBIAN + #define CSTD_OS_SYMBIAN 1 + + #else + #undef CSTD_OS_NONE + #define CSTD_OS_NONE 1 + +#endif + +#else + #warning "Unsupported or unknown host OS" + +#endif + +/* ============================================================================ + Determine the correct linker symbol Import and Export Macros +============================================================================ */ +/** + * @defgroup arm_cstd_linkage_specifiers Linkage Specifiers + * @{ + * + * This set of macros contain system-dependent linkage specifiers which + * determine the visibility of symbols across DLL boundaries. A header for a + * particular DLL should define a set of local macros derived from these, + * and should not use these macros to decorate functions directly as there may + * be multiple DLLs being used. + * + * These DLL library local macros should be (with appropriate library prefix) + * [MY_LIBRARY]_API, [MY_LIBRARY]_IMPL, and + * [MY_LIBRARY]_LOCAL. + * + * - [MY_LIBRARY]_API should be use to decorate the function + * declarations in the header. It should be defined as either + * @c CSTD_LINK_IMPORT or @c CSTD_LINK_EXPORT, depending whether the + * current situation is a compile of the DLL itself (use export) or a + * compile of an external user of the DLL (use import). + * - [MY_LIBRARY]_IMPL should be defined as @c CSTD_LINK_IMPL + * and should be used to decorate the definition of functions in the C + * file. + * - [MY_LIBRARY]_LOCAL should be used to decorate function + * declarations which are exported across translation units within the + * DLL, but which are not exported outside of the DLL boundary. + * + * Functions which are @c static in either a C file or in a header file do not + * need any form of linkage decoration, and should therefore have no linkage + * macro applied to them. + */ + +/** + * @def CSTD_LINK_IMPORT + * Specifies a function as being imported to a translation unit across a DLL + * boundary. + */ + +/** + * @def CSTD_LINK_EXPORT + * Specifies a function as being exported across a DLL boundary by a + * translation unit. + */ + +/** + * @def CSTD_LINK_IMPL + * Specifies a function which will be exported across a DLL boundary as + * being implemented by a translation unit. + */ + +/** + * @def CSTD_LINK_LOCAL + * Specifies a function which is internal to a DLL, and which should not be + * exported outside of it. + */ + +/** + * @} + */ + +#if 1 == CSTD_OS_LINUX + #define CSTD_LINK_IMPORT __attribute__((visibility("default"))) + #define CSTD_LINK_EXPORT __attribute__((visibility("default"))) + #define CSTD_LINK_IMPL __attribute__((visibility("default"))) + #define CSTD_LINK_LOCAL __attribute__((visibility("hidden"))) + +#elif 1 == CSTD_OS_WINDOWS + #define CSTD_LINK_IMPORT __declspec(dllimport) + #define CSTD_LINK_EXPORT __declspec(dllexport) + #define CSTD_LINK_IMPL __declspec(dllexport) + #define CSTD_LINK_LOCAL + +#elif 1 == CSTD_OS_SYMBIAN + #define CSTD_LINK_IMPORT IMPORT_C + #define CSTD_LINK_EXPORT IMPORT_C + #define CSTD_LINK_IMPL EXPORT_C + #define CSTD_LINK_LOCAL + +#elif 1 == CSTD_OS_APPLEOS + #define CSTD_LINK_IMPORT __attribute__((visibility("default"))) + #define CSTD_LINK_EXPORT __attribute__((visibility("default"))) + #define CSTD_LINK_IMPL __attribute__((visibility("default"))) + #define CSTD_LINK_LOCAL __attribute__((visibility("hidden"))) + +#elif 1 == CSTD_OS_ANDROID + #define CSTD_LINK_IMPORT __attribute__((visibility("default"))) + #define CSTD_LINK_EXPORT __attribute__((visibility("default"))) + #define CSTD_LINK_IMPL __attribute__((visibility("default"))) + #define CSTD_LINK_LOCAL __attribute__((visibility("hidden"))) + +#else /* CSTD_OS_NONE */ + #define CSTD_LINK_IMPORT + #define CSTD_LINK_EXPORT + #define CSTD_LINK_IMPL + #define CSTD_LINK_LOCAL + +#endif + +/** + * @} + */ + +#endif /* End (_ARM_CSTD_COMPILERS_H_) */ diff --git a/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_pack_pop.h b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_pack_pop.h new file mode 100755 index 00000000000..20862ec1fa7 --- /dev/null +++ b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_pack_pop.h @@ -0,0 +1,27 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _ARM_CSTD_PACK_POP_H_ +#define _ARM_CSTD_PACK_POP_H_ + +#if 1 == CSTD_TOOLCHAIN_MSVC + #include +#endif + +#endif /* End (_ARM_CSTD_PACK_POP_H_) */ diff --git a/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_pack_push.h b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_pack_push.h new file mode 100755 index 00000000000..bc24e6942b1 --- /dev/null +++ b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_pack_push.h @@ -0,0 +1,27 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _ARM_CSTD_PACK_PUSH_H_ +#define _ARM_CSTD_PACK_PUSH_H_ + +#if 1 == CSTD_TOOLCHAIN_MSVC + #include +#endif + +#endif /* End (_ARM_CSTD_PACK_PUSH_H_) */ diff --git a/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types.h b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types.h new file mode 100755 index 00000000000..efeefa590c6 --- /dev/null +++ b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types.h @@ -0,0 +1,33 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _ARM_CSTD_TYPES_H_ +#define _ARM_CSTD_TYPES_H_ + +#if 1 == CSTD_TOOLCHAIN_MSVC + #include "arm_cstd_types_msvc.h" +#elif 1 == CSTD_TOOLCHAIN_GCC + #include "arm_cstd_types_gcc.h" +#elif 1 == CSTD_TOOLCHAIN_RVCT + #include "arm_cstd_types_rvct.h" +#else + #error "Toolchain not recognized" +#endif + +#endif /* End (_ARM_CSTD_TYPES_H_) */ diff --git a/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types_gcc.h b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types_gcc.h new file mode 100755 index 00000000000..67981965500 --- /dev/null +++ b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types_gcc.h @@ -0,0 +1,92 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _ARM_CSTD_TYPES_GCC_H_ +#define _ARM_CSTD_TYPES_GCC_H_ + +/* ============================================================================ + Type definitions +============================================================================ */ +/* All modern versions of GCC support stdint outside of C99 Mode. */ +/* However, Linux kernel limits what headers are available! */ +#if 1 == CSTD_OS_LINUX_KERNEL + #include + #include + #include + #include + + /* Fix up any types which CSTD provdes but which Linux is missing. */ + /* Note Linux assumes pointers are "long", so this is safe. */ + #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) + typedef unsigned long uintptr_t; + #endif + typedef long intptr_t; + +#else + #include + #include + #include +#endif + +typedef uint32_t bool_t; + +#if !defined(TRUE) + #define TRUE ((bool_t)1) +#endif + +#if !defined(FALSE) + #define FALSE ((bool_t)0) +#endif + +/* ============================================================================ + Keywords +============================================================================ */ +/* Doxygen documentation for these is in the RVCT header. */ +#define ASM __asm__ + +#define INLINE __inline__ + +#define FORCE_INLINE __attribute__((__always_inline__)) __inline__ + +#define NEVER_INLINE __attribute__((__noinline__)) + +#define PURE __attribute__((__pure__)) + +#define PACKED __attribute__((__packed__)) + +/* GCC does not support pointers to UNALIGNED data, so we do not define it to + * force a compile error if this macro is used. */ + +#define RESTRICT __restrict__ + +/* RVCT in GCC mode does not support the CHECK_RESULT attribute. */ +#if 0 == CSTD_TOOLCHAIN_RVCT_GCC_MODE + #define CHECK_RESULT __attribute__((__warn_unused_result__)) +#else + #define CHECK_RESULT +#endif + +/* RVCT in GCC mode does not support the __func__ name outside of C99. */ +#if (0 == CSTD_TOOLCHAIN_RVCT_GCC_MODE) + #define CSTD_FUNC __func__ +#else + #define CSTD_FUNC __FUNCTION__ +#endif + +#endif /* End (_ARM_CSTD_TYPES_GCC_H_) */ diff --git a/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types_rvct.h b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types_rvct.h new file mode 100755 index 00000000000..a8efda0040a --- /dev/null +++ b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types_rvct.h @@ -0,0 +1,192 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _ARM_CSTD_TYPES_RVCT_H_ +#define _ARM_CSTD_TYPES_RVCT_H_ + +/* ============================================================================ + Type definitions +============================================================================ */ +#include +#include + +#if 199901L <= __STDC_VERSION__ + #include +#else + typedef unsigned char uint8_t; + typedef signed char int8_t; + typedef unsigned short uint16_t; + typedef signed short int16_t; + typedef unsigned int uint32_t; + typedef signed int int32_t; + typedef unsigned __int64 uint64_t; + typedef signed __int64 int64_t; + typedef ptrdiff_t intptr_t; + typedef size_t uintptr_t; +#endif + +typedef uint32_t bool_t; + +#if !defined(TRUE) + #define TRUE ((bool_t)1) +#endif + +#if !defined(FALSE) + #define FALSE ((bool_t)0) +#endif + +/* ============================================================================ + Keywords +============================================================================ */ +/** + * @addtogroup arm_cstd_coding_standard + * @{ + */ + +/** + * @def ASM + * @hideinitializer + * Mark an assembler block. Such blocks are often compiler specific, so often + * need to be surrounded in appropriate @c ifdef and @c endif blocks + * using the relevant @c CSTD_TOOLCHAIN macro. + */ +#define ASM __asm + +/** + * @def INLINE + * @hideinitializer + * Mark a definition as something which should be inlined. This is not always + * possible on a given compiler, and may be disabled at lower optimization + * levels. + */ +#define INLINE __inline + +/** + * @def FORCE_INLINE + * @hideinitializer + * Mark a definition as something which should be inlined. This provides a much + * stronger hint to the compiler than @c INLINE, and if supported should always + * result in an inlined function being emitted. If not supported this falls + * back to using the @c INLINE definition. + */ +#define FORCE_INLINE __forceinline + +/** + * @def NEVER_INLINE + * @hideinitializer + * Mark a definition as something which should not be inlined. This provides a + * stronger hint to the compiler than the function should not be inlined, + * bypassing any heuristic rules the compiler normally applies. If not + * supported by a toolchain this falls back to being an empty macro. + */ +#define NEVER_INLINE __declspec(noinline) + +/** + * @def PURE + * @hideinitializer + * Denotes that a function's return is only dependent on its inputs, enabling + * more efficient optimizations. Falls back to an empty macro if not supported. + */ +#define PURE __pure + +/** + * @def PACKED + * @hideinitializer + * Denotes that a structure should be stored in a packed form. This macro must + * be used in conjunction with the @c arm_cstd_pack_* headers for portability: + * + * @code + * #include + * + * struct PACKED myStruct { + * ... + * }; + * + * #include + * PACKED + * @endcode + */ +#define PACKED __packed + +/** + * @def UNALIGNED + * @hideinitializer + * Denotes that a pointer points to a buffer with lower alignment than the + * natural alignment required by the C standard. This should only be used + * in extreme cases, as the emitted code is normally more efficient if memory + * is aligned. + * + * @warning This is \b NON-PORTABLE. The GNU tools are anti-unaligned pointers + * and have no support for such a construction. + */ +#define UNALIGNED __packed + +/** + * @def RESTRICT + * @hideinitializer + * Denotes that a pointer does not overlap with any other points currently in + * scope, increasing the range of optimizations which can be performed by the + * compiler. + * + * @warning Specification of @c RESTRICT is a contract between the programmer + * and the compiler. If you place @c RESTICT on buffers which do actually + * overlap the behavior is undefined, and likely to vary at different + * optimization levels.! + */ +#define RESTRICT __restrict + +/** + * @def CHECK_RESULT + * @hideinitializer + * Function attribute which causes a warning to be emitted if the compiler's + * return value is not used by the caller. Compiles to an empty macro if + * there is no supported mechanism for this check in the underlying compiler. + * + * @note At the time of writing this is only supported by GCC. RVCT does not + * support this attribute, even in GCC mode, so engineers are encouraged to + * compile their code using GCC even if primarily working with another + * compiler. + * + * @code + * CHECK_RESULT int my_func( void ); + * @endcode + */ +#define CHECK_RESULT + +/** + * @def CSTD_FUNC + * Specify the @c CSTD_FUNC macro, a portable construct containing the name of + * the current function. On most compilers it is illegal to use this macro + * outside of a function scope. If not supported by the compiler we define + * @c CSTD_FUNC as an empty string. + * + * @warning Due to the implementation of this on most modern compilers this + * expands to a magically defined "static const" variable, not a constant + * string. This makes injecting @c CSTD_FUNC directly in to compile-time + * strings impossible, so if you want to make the function name part of a + * larger string you must use a printf-like function with a @c @%s template + * which is populated with @c CSTD_FUNC + */ +#define CSTD_FUNC __FUNCTION__ + +/** + * @} + */ + +#endif /* End (_ARM_CSTD_TYPES_RVCT_H_) */ diff --git a/drivers/gpu/arm/midgard/malisw/mali_malisw.h b/drivers/gpu/arm/midgard/malisw/mali_malisw.h new file mode 100755 index 00000000000..ed1d07e9382 --- /dev/null +++ b/drivers/gpu/arm/midgard/malisw/mali_malisw.h @@ -0,0 +1,238 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _MALISW_H_ +#define _MALISW_H_ + +/** + * @file mali_malisw.h + * Driver-wide include for common macros and types. + */ + +/** + * @defgroup malisw Mali software definitions and types + * @{ + */ + +#include + +#include "mali_stdtypes.h" + +/** @brief Gets the container object when given a pointer to a member of an object. */ +#define CONTAINER_OF(ptr, type, member) ((type *)((char *)(ptr) - offsetof(type,member))) + +/** @brief Gets the number of elements of type s in a fixed length array of s */ +#define NELEMS(s) (sizeof(s)/sizeof((s)[0])) + +/** + * @brief The lesser of two values. + * May evaluate its arguments more than once. + * @see CSTD_MIN + */ +#define MIN(x,y) CSTD_MIN(x,y) + +/** + * @brief The greater of two values. + * May evaluate its arguments more than once. + * @see CSTD_MAX + */ +#define MAX(x,y) CSTD_MAX(x,y) + +/** + * @brief Clamp value x to within min and max inclusive + * May evaluate its arguments more than once. + * @see CSTD_CLAMP + */ +#define CLAMP( x, min, max ) CSTD_CLAMP( x, min, max ) + +/** + * @brief Convert a pointer into a u64 for storing in a data structure. + * This is commonly used when pairing a 32-bit CPU with a 64-bit peripheral, + * such as a Midgard GPU. C's type promotion is complex and a straight cast + * does not work reliably as pointers are often considered as signed. + */ +#define PTR_TO_U64( x ) CSTD_PTR_TO_U64( x ) + +/** + * @name Mali library linkage specifiers + * These directly map to the cstd versions described in detail here: @ref arm_cstd_linkage_specifiers + * @{ + */ +#define MALI_IMPORT CSTD_LINK_IMPORT +#define MALI_EXPORT CSTD_LINK_EXPORT +#define MALI_IMPL CSTD_LINK_IMPL +#define MALI_LOCAL CSTD_LINK_LOCAL + +/** @brief Decorate exported function prototypes. + * + * The file containing the implementation of the function should define this to be MALI_EXPORT before including + * malisw/mali_malisw.h. + */ +#ifndef MALI_API +#define MALI_API MALI_IMPORT +#endif +/** @} */ + +/** @name Testable static functions + * @{ + * + * These macros can be used to allow functions to be static in release builds but exported from a shared library in unit + * test builds, allowing them to be tested or used to assist testing. + * + * Example mali_foo_bar.c containing the function to test: + * + * @code + * #define MALI_API MALI_EXPORT + * + * #include + * #include "mali_foo_testable_statics.h" + * + * MALI_TESTABLE_STATIC_IMPL void my_func() + * { + * //Implementation + * } + * @endcode + * + * Example mali_foo_testable_statics.h: + * + * @code + * #if 1 == MALI_UNIT_TEST + * #include + * + * MALI_TESTABLE_STATIC_API void my_func(); + * + * #endif + * @endcode + * + * Example mali_foo_tests.c: + * + * @code + * #include + * + * void my_test_func() + * { + * my_func(); + * } + * @endcode + */ + +/** @brief Decorate testable static function implementations. + * + * A header file containing a MALI_TESTABLE_STATIC_API-decorated prototype for each static function will be required + * when MALI_UNIT_TEST == 1 in order to link the function from the test. + */ +#if 1 == MALI_UNIT_TEST +#define MALI_TESTABLE_STATIC_IMPL MALI_IMPL +#else +#define MALI_TESTABLE_STATIC_IMPL static +#endif + +/** @brief Decorate testable static function prototypes. + * + * @note Prototypes should @em only be declared when MALI_UNIT_TEST == 1 + */ +#define MALI_TESTABLE_STATIC_API MALI_API +/** @} */ + +/** @name Testable local functions + * @{ + * + * These macros can be used to allow functions to be local to a shared library in release builds but be exported in unit + * test builds, allowing them to be tested or used to assist testing. + * + * Example mali_foo_bar.c containing the function to test: + * + * @code + * #define MALI_API MALI_EXPORT + * + * #include + * #include "mali_foo_bar.h" + * + * MALI_TESTABLE_LOCAL_IMPL void my_func() + * { + * //Implementation + * } + * @endcode + * + * Example mali_foo_bar.h: + * + * @code + * #include + * + * MALI_TESTABLE_LOCAL_API void my_func(); + * + * @endcode + * + * Example mali_foo_tests.c: + * + * @code + * #include + * + * void my_test_func() + * { + * my_func(); + * } + * @endcode + */ + +/** @brief Decorate testable local function implementations. + * + * This can be used to have a function normally local to the shared library except in debug builds where it will be + * exported. + */ +#ifdef CONFIG_MALI_DEBUG +#define MALI_TESTABLE_LOCAL_IMPL MALI_IMPL +#else +#define MALI_TESTABLE_LOCAL_IMPL MALI_LOCAL +#endif /* CONFIG_MALI_DEBUG */ + +/** @brief Decorate testable local function prototypes. + * + * This can be used to have a function normally local to the shared library except in debug builds where it will be + * exported. + */ +#ifdef CONFIG_MALI_DEBUG +#define MALI_TESTABLE_LOCAL_API MALI_API +#else +#define MALI_TESTABLE_LOCAL_API MALI_LOCAL +#endif /* CONFIG_MALI_DEBUG */ +/** @} */ + +/** + * Flag a cast as a reinterpretation, usually of a pointer type. + * @see CSTD_REINTERPRET_CAST + */ +#define REINTERPRET_CAST(type) CSTD_REINTERPRET_CAST(type) + +/** + * Flag a cast as casting away const, usually of a pointer type. + * @see CSTD_CONST_CAST + */ +#define CONST_CAST(type) (type) CSTD_CONST_CAST(type) + +/** + * Flag a cast as a (potentially complex) value conversion, usually of a numerical type. + * @see CSTD_STATIC_CAST + */ +#define STATIC_CAST(type) (type) CSTD_STATIC_CAST(type) + + +/** @} */ + +#endif /* _MALISW_H_ */ diff --git a/drivers/gpu/arm/midgard/malisw/mali_stdtypes.h b/drivers/gpu/arm/midgard/malisw/mali_stdtypes.h new file mode 100755 index 00000000000..e0afe4b97fe --- /dev/null +++ b/drivers/gpu/arm/midgard/malisw/mali_stdtypes.h @@ -0,0 +1,230 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _MALISW_STDTYPES_H_ +#define _MALISW_STDTYPES_H_ + +/** + * @file mali_stdtypes.h + * This file defines the standard types used by the Mali codebase. + */ + +/** + * @addtogroup malisw + * @{ + */ + +/** + * @defgroup malisw_stdtypes Mali software standard types + * + * Basic driver-wide types. + */ + +/** + * @addtogroup malisw_stdtypes + * @{ + */ + +#include "arm_cstd/arm_cstd.h" + +/** + * @name Scalar types. + * These are the scalar types used within the mali driver. + * @{ + */ +/* Note: if compiling the Linux kernel then avoid redefining these. */ +#if 0 == CSTD_OS_LINUX_KERNEL + typedef uint64_t u64; + typedef uint32_t u32; + typedef uint16_t u16; + typedef uint8_t u8; + + typedef int64_t s64; + typedef int32_t s32; + typedef int16_t s16; + typedef int8_t s8; +#endif + +typedef double f64; +typedef float f32; +typedef u16 f16; + +typedef u32 mali_fixed16_16; +/* @} */ + +/** + * @name Boolean types. + * The intended use is for bool8 to be used when storing boolean values in + * structures, casting to mali_bool to be used in code sections. + * @{ + */ +typedef bool_t mali_bool; +typedef u8 mali_bool8; + +#define MALI_FALSE FALSE +#define MALI_TRUE TRUE +/* @} */ + +/** + * @name Integer bounding values + * Maximum and minimum values for integer types + * @{ + */ +#define U64_MAX UINT64_MAX +#define U32_MAX UINT32_MAX +#define U16_MAX UINT16_MAX +#define U8_MAX UINT8_MAX + +#define S64_MAX INT64_MAX +#define S64_MIN INT64_MIN +#define S32_MAX INT32_MAX +#define S32_MIN INT32_MIN +#define S16_MAX INT16_MAX +#define S16_MIN INT16_MIN +#define S8_MAX INT8_MAX +#define S8_MIN INT8_MIN +/* @} */ + +/** + * @name GPU address types + * Types for integers which hold a GPU pointer or GPU pointer offsets. + * @{ + */ +typedef u64 mali_addr64; +typedef u32 mali_addr32; +typedef u64 mali_size64; +typedef s64 mali_offset64; +/* 32 bit offsets and sizes are always for native types and so use ptrdiff_t and size_t respectively */ +/* @} */ + +/** + * @name Mali error types + * @brief The common error type for the mali drivers + * The mali_error type, all driver error handling should be of this type unless + * it must deal with a specific APIs error type. + * @{ + */ +typedef enum +{ + /** + * @brief Common Mali errors for the entire driver + * MALI_ERROR_NONE is guaranteed to be 0. + * @{ + */ + MALI_ERROR_NONE = 0, + MALI_ERROR_OUT_OF_GPU_MEMORY, + MALI_ERROR_OUT_OF_MEMORY, + MALI_ERROR_FUNCTION_FAILED, + /* @} */ + /** + * @brief Mali errors for Client APIs to pass to EGL when creating EGLImages + * These errors must only be returned to EGL from one of the Client APIs as part of the + * (clientapi)_egl_image_interface.h + * @{ + */ + MALI_ERROR_EGLP_BAD_ACCESS, + MALI_ERROR_EGLP_BAD_PARAMETER, + /* @} */ + /** + * @brief Mali errors for the MCL module. + * These errors must only be used within the private components of the OpenCL implementation that report + * directly to API functions for cases where errors cannot be detected in the entrypoints file. They must + * not be passed between driver components. + * These are errors in the mali error space specifically for the MCL module, hence the MCLP prefix. + * @{ + */ + MALI_ERROR_MCLP_DEVICE_NOT_FOUND, + MALI_ERROR_MCLP_DEVICE_NOT_AVAILABLE, + MALI_ERROR_MCLP_COMPILER_NOT_AVAILABLE, + MALI_ERROR_MCLP_MEM_OBJECT_ALLOCATION_FAILURE, + MALI_ERROR_MCLP_PROFILING_INFO_NOT_AVAILABLE, + MALI_ERROR_MCLP_MEM_COPY_OVERLAP, + MALI_ERROR_MCLP_IMAGE_FORMAT_MISMATCH, + MALI_ERROR_MCLP_IMAGE_FORMAT_NOT_SUPPORTED, + MALI_ERROR_MCLP_BUILD_PROGRAM_FAILURE, + MALI_ERROR_MCLP_MAP_FAILURE, + MALI_ERROR_MCLP_MISALIGNED_SUB_BUFFER_OFFSET, + MALI_ERROR_MCLP_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, + MALI_ERROR_MCLP_INVALID_VALUE, + MALI_ERROR_MCLP_INVALID_DEVICE_TYPE, + MALI_ERROR_MCLP_INVALID_PLATFORM, + MALI_ERROR_MCLP_INVALID_DEVICE, + MALI_ERROR_MCLP_INVALID_CONTEXT, + MALI_ERROR_MCLP_INVALID_QUEUE_PROPERTIES, + MALI_ERROR_MCLP_INVALID_COMMAND_QUEUE, + MALI_ERROR_MCLP_INVALID_HOST_PTR, + MALI_ERROR_MCLP_INVALID_MEM_OBJECT, + MALI_ERROR_MCLP_INVALID_IMAGE_FORMAT_DESCRIPTOR, + MALI_ERROR_MCLP_INVALID_IMAGE_SIZE, + MALI_ERROR_MCLP_INVALID_SAMPLER, + MALI_ERROR_MCLP_INVALID_BINARY, + MALI_ERROR_MCLP_INVALID_BUILD_OPTIONS, + MALI_ERROR_MCLP_INVALID_PROGRAM, + MALI_ERROR_MCLP_INVALID_PROGRAM_EXECUTABLE, + MALI_ERROR_MCLP_INVALID_KERNEL_NAME, + MALI_ERROR_MCLP_INVALID_KERNEL_DEFINITION, + MALI_ERROR_MCLP_INVALID_KERNEL, + MALI_ERROR_MCLP_INVALID_ARG_INDEX, + MALI_ERROR_MCLP_INVALID_ARG_VALUE, + MALI_ERROR_MCLP_INVALID_ARG_SIZE, + MALI_ERROR_MCLP_INVALID_KERNEL_ARGS, + MALI_ERROR_MCLP_INVALID_WORK_DIMENSION, + MALI_ERROR_MCLP_INVALID_WORK_GROUP_SIZE, + MALI_ERROR_MCLP_INVALID_WORK_ITEM_SIZE, + MALI_ERROR_MCLP_INVALID_GLOBAL_OFFSET, + MALI_ERROR_MCLP_INVALID_EVENT_WAIT_LIST, + MALI_ERROR_MCLP_INVALID_EVENT, + MALI_ERROR_MCLP_INVALID_OPERATION, + MALI_ERROR_MCLP_INVALID_GL_OBJECT, + MALI_ERROR_MCLP_INVALID_BUFFER_SIZE, + MALI_ERROR_MCLP_INVALID_MIP_LEVEL, + MALI_ERROR_MCLP_INVALID_GLOBAL_WORK_SIZE, + MALI_ERROR_MCLP_INVALID_GL_SHAREGROUP_REFERENCE_KHR, + MALI_ERROR_MCLP_INVALID_EGL_OBJECT, + /* @} */ + /** + * @brief Mali errors for the BASE module + * These errors must only be used within the private components of the Base implementation. They will not + * passed to other modules by the base driver. + * These are errors in the mali error space specifically for the BASE module, hence the BASEP prefix. + * @{ + */ + MALI_ERROR_BASEP_INVALID_FUNCTION, + /* @} */ + /** A dependency exists upon a resource that the client application wants to modify, so the driver must either + * create a copy of the resource (if possible) or block until the dependency has been satisfied. + */ + MALI_ERROR_RESOURCE_IN_USE, + + /** + * @brief A stride value was too big. + * + * A surface descriptor can store strides of up to 231-1 bytes but strides greater than + * 228-1 bytes cannot be expressed in bits without overflow. + */ + MALI_ERROR_STRIDE_TOO_BIG + +} mali_error; +/* @} */ + +/* @} */ + +/* @} */ + +#endif /* _MALISW_STDTYPES_H_ */ diff --git a/drivers/gpu/arm/midgard/platform/Kbuild b/drivers/gpu/arm/midgard/platform/Kbuild new file mode 100755 index 00000000000..558657bbced --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/Kbuild @@ -0,0 +1,21 @@ +# +# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y) +# remove begin and end quotes from the Kconfig string type + platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)) + obj-y += $(platform_name)/ +endif diff --git a/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h new file mode 100755 index 00000000000..b0d8e3249b8 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h @@ -0,0 +1,26 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @brief Entry point to transfer control to a platform for early initialization + * + * This function is called early on in the initialization during execution of + * @ref kbase_driver_init. + * + * @return Zero to indicate success non-zero for failure. + */ +int kbase_platform_early_init(void); diff --git a/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild new file mode 100755 index 00000000000..084a1561343 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild @@ -0,0 +1,18 @@ +# +# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +obj-y += mali_kbase_config_vexpress.o +obj-y += mali_kbase_cpu_vexpress.o diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c new file mode 100755 index 00000000000..01d67fdc68b --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c @@ -0,0 +1,323 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include +#include +#include +#include +#include "mali_kbase_cpu_vexpress.h" + +/* Versatile Express (VE) configuration defaults shared between config_attributes[] + * and config_attributes_hw_issue_8408[]. Settings are not shared for + * JS_HARD_STOP_TICKS_SS and JS_RESET_TICKS_SS. + */ +#define KBASE_VE_GPU_FREQ_KHZ_MAX 5000 +#define KBASE_VE_GPU_FREQ_KHZ_MIN 5000 + +#define KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG 15000000u /* 15ms, an agressive tick for testing purposes. This will reduce performance significantly */ +#define KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG 1 /* between 15ms and 30ms before soft-stop a job */ +#define KBASE_VE_JS_SOFT_STOP_TICKS_CL_DEBUG 1 /* between 15ms and 30ms before soft-stop a CL job */ +#define KBASE_VE_JS_HARD_STOP_TICKS_SS_DEBUG 333 /* 5s before hard-stop */ +#define KBASE_VE_JS_HARD_STOP_TICKS_SS_8401_DEBUG 2000 /* 30s before hard-stop, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */ +#define KBASE_VE_JS_HARD_STOP_TICKS_CL_DEBUG 166 /* 2.5s before hard-stop */ +#define KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG 100000 /* 1500s (25mins) before NSS hard-stop */ +#define KBASE_VE_JS_RESET_TICKS_SS_DEBUG 500 /* 45s before resetting GPU, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) */ +#define KBASE_VE_JS_RESET_TICKS_SS_8401_DEBUG 3000 /* 7.5s before resetting GPU - for issue 8401 */ +#define KBASE_VE_JS_RESET_TICKS_CL_DEBUG 500 /* 45s before resetting GPU */ +#define KBASE_VE_JS_RESET_TICKS_NSS_DEBUG 100166 /* 1502s before resetting GPU */ + +#define KBASE_VE_JS_SCHEDULING_TICK_NS 1250000000u /* 1.25s */ +#define KBASE_VE_JS_SOFT_STOP_TICKS 2 /* 2.5s before soft-stop a job */ +#define KBASE_VE_JS_SOFT_STOP_TICKS_CL 1 /* 1.25s before soft-stop a CL job */ +#define KBASE_VE_JS_HARD_STOP_TICKS_SS 4 /* 5s before hard-stop */ +#define KBASE_VE_JS_HARD_STOP_TICKS_SS_8401 24 /* 30s before hard-stop, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */ +#define KBASE_VE_JS_HARD_STOP_TICKS_CL 2 /* 2.5s before hard-stop */ +#define KBASE_VE_JS_HARD_STOP_TICKS_NSS 1200 /* 1500s before NSS hard-stop */ +#define KBASE_VE_JS_RESET_TICKS_SS 6 /* 7.5s before resetting GPU */ +#define KBASE_VE_JS_RESET_TICKS_SS_8401 36 /* 45s before resetting GPU, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */ +#define KBASE_VE_JS_RESET_TICKS_CL 3 /* 7.5s before resetting GPU */ +#define KBASE_VE_JS_RESET_TICKS_NSS 1201 /* 1502s before resetting GPU */ + +#define KBASE_VE_JS_RESET_TIMEOUT_MS 3000 /* 3s before cancelling stuck jobs */ +#define KBASE_VE_JS_CTX_TIMESLICE_NS 1000000 /* 1ms - an agressive timeslice for testing purposes (causes lots of scheduling out for >4 ctxs) */ +#define KBASE_VE_SECURE_BUT_LOSS_OF_PERFORMANCE ((uintptr_t)MALI_FALSE) /* By default we prefer performance over security on r0p0-15dev0 and KBASE_CONFIG_ATTR_ earlier */ +#define KBASE_VE_POWER_MANAGEMENT_CALLBACKS ((uintptr_t)&pm_callbacks) +#define KBASE_VE_CPU_SPEED_FUNC ((uintptr_t)&kbase_get_vexpress_cpu_clock_speed) + +#define HARD_RESET_AT_POWER_OFF 0 + +#ifndef CONFIG_OF +static kbase_io_resources io_resources = { + .job_irq_number = 68, + .mmu_irq_number = 69, + .gpu_irq_number = 70, + .io_memory_region = { + .start = 0xFC010000, + .end = 0xFC010000 + (4096 * 4) - 1} +}; +#endif + +static int pm_callback_power_on(kbase_device *kbdev) +{ + /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ + return 1; +} + +static void pm_callback_power_off(kbase_device *kbdev) +{ +#if HARD_RESET_AT_POWER_OFF + /* Cause a GPU hard reset to test whether we have actually idled the GPU + * and that we properly reconfigure the GPU on power up. + * Usually this would be dangerous, but if the GPU is working correctly it should + * be completely safe as the GPU should not be active at this point. + * However this is disabled normally because it will most likely interfere with + * bus logging etc. + */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); + kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET); +#endif +} + +static kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL +}; + +/* Please keep table config_attributes in sync with config_attributes_hw_issue_8408 */ +static kbase_attribute config_attributes[] = { + { + KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX, + KBASE_VE_GPU_FREQ_KHZ_MAX}, + + { + KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MIN, + KBASE_VE_GPU_FREQ_KHZ_MIN}, + +#ifdef CONFIG_MALI_DEBUG +/* Use more aggressive scheduling timeouts in debug builds for testing purposes */ + { + KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS, + KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS, + KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL, + KBASE_VE_JS_SOFT_STOP_TICKS_CL_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS, + KBASE_VE_JS_HARD_STOP_TICKS_SS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL, + KBASE_VE_JS_HARD_STOP_TICKS_CL_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS, + KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS, + KBASE_VE_JS_RESET_TICKS_SS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL, + KBASE_VE_JS_RESET_TICKS_CL_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS, + KBASE_VE_JS_RESET_TICKS_NSS_DEBUG}, +#else /* CONFIG_MALI_DEBUG */ +/* In release builds same as the defaults but scaled for 5MHz FPGA */ + { + KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS, + KBASE_VE_JS_SCHEDULING_TICK_NS}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS, + KBASE_VE_JS_SOFT_STOP_TICKS}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL, + KBASE_VE_JS_SOFT_STOP_TICKS_CL}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS, + KBASE_VE_JS_HARD_STOP_TICKS_SS}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL, + KBASE_VE_JS_HARD_STOP_TICKS_CL}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS, + KBASE_VE_JS_HARD_STOP_TICKS_NSS}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS, + KBASE_VE_JS_RESET_TICKS_SS}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL, + KBASE_VE_JS_RESET_TICKS_CL}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS, + KBASE_VE_JS_RESET_TICKS_NSS}, +#endif /* CONFIG_MALI_DEBUG */ + { + KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS, + KBASE_VE_JS_RESET_TIMEOUT_MS}, + + { + KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS, + KBASE_VE_JS_CTX_TIMESLICE_NS}, + + { + KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS, + KBASE_VE_POWER_MANAGEMENT_CALLBACKS}, + + { + KBASE_CONFIG_ATTR_CPU_SPEED_FUNC, + KBASE_VE_CPU_SPEED_FUNC}, + + { + KBASE_CONFIG_ATTR_SECURE_BUT_LOSS_OF_PERFORMANCE, + KBASE_VE_SECURE_BUT_LOSS_OF_PERFORMANCE}, + + { + KBASE_CONFIG_ATTR_GPU_IRQ_THROTTLE_TIME_US, + 20}, + + { + KBASE_CONFIG_ATTR_END, + 0} +}; + +/* as config_attributes array above except with different settings for + * JS_HARD_STOP_TICKS_SS, JS_RESET_TICKS_SS that + * are needed for BASE_HW_ISSUE_8408. + */ +kbase_attribute config_attributes_hw_issue_8408[] = { + { + KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX, + KBASE_VE_GPU_FREQ_KHZ_MAX}, + + { + KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MIN, + KBASE_VE_GPU_FREQ_KHZ_MIN}, + +#ifdef CONFIG_MALI_DEBUG +/* Use more aggressive scheduling timeouts in debug builds for testing purposes */ + { + KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS, + KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS, + KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS, + KBASE_VE_JS_HARD_STOP_TICKS_SS_8401_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS, + KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS, + KBASE_VE_JS_RESET_TICKS_SS_8401_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS, + KBASE_VE_JS_RESET_TICKS_NSS_DEBUG}, +#else /* CONFIG_MALI_DEBUG */ +/* In release builds same as the defaults but scaled for 5MHz FPGA */ + { + KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS, + KBASE_VE_JS_SCHEDULING_TICK_NS}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS, + KBASE_VE_JS_SOFT_STOP_TICKS}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS, + KBASE_VE_JS_HARD_STOP_TICKS_SS_8401}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS, + KBASE_VE_JS_HARD_STOP_TICKS_NSS}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS, + KBASE_VE_JS_RESET_TICKS_SS_8401}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS, + KBASE_VE_JS_RESET_TICKS_NSS}, +#endif /* CONFIG_MALI_DEBUG */ + { + KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS, + KBASE_VE_JS_RESET_TIMEOUT_MS}, + + { + KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS, + KBASE_VE_JS_CTX_TIMESLICE_NS}, + + { + KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS, + KBASE_VE_POWER_MANAGEMENT_CALLBACKS}, + + { + KBASE_CONFIG_ATTR_CPU_SPEED_FUNC, + KBASE_VE_CPU_SPEED_FUNC}, + + { + KBASE_CONFIG_ATTR_SECURE_BUT_LOSS_OF_PERFORMANCE, + KBASE_VE_SECURE_BUT_LOSS_OF_PERFORMANCE}, + + { + KBASE_CONFIG_ATTR_END, + 0} +}; + +static kbase_platform_config versatile_platform_config = { + .attributes = config_attributes, +#ifndef CONFIG_OF + .io_resources = &io_resources +#endif +}; + +kbase_platform_config *kbase_get_platform_config(void) +{ + return &versatile_platform_config; +} + +int kbase_platform_early_init(void) +{ + /* Nothing needed at this stage */ + return 0; +} diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c new file mode 100755 index 00000000000..1b45d3cb0e3 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c @@ -0,0 +1,180 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include +#include +#include "mali_kbase_cpu_vexpress.h" + +#define HZ_IN_MHZ (1000000) + +#define CORETILE_EXPRESS_A9X4_SCC_START (0x100E2000) +#define MOTHERBOARD_SYS_CFG_START (0x10000000) +#define SYS_CFGDATA_OFFSET (0x000000A0) +#define SYS_CFGCTRL_OFFSET (0x000000A4) +#define SYS_CFGSTAT_OFFSET (0x000000A8) + +#define SYS_CFGCTRL_START_BIT_VALUE (1 << 31) +#define READ_REG_BIT_VALUE (0 << 30) +#define DCC_DEFAULT_BIT_VALUE (0 << 26) +#define SYS_CFG_OSC_FUNC_BIT_VALUE (1 << 20) +#define SITE_DEFAULT_BIT_VALUE (1 << 16) +#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12) +#define DEVICE_DEFAULT_BIT_VALUE (2 << 0) +#define SYS_CFG_COMPLETE_BIT_VALUE (1 << 0) +#define SYS_CFG_ERROR_BIT_VALUE (1 << 1) + +#define FEED_REG_BIT_MASK (0x0F) +#define FCLK_PA_DIVIDE_BIT_SHIFT (0x03) +#define FCLK_PB_DIVIDE_BIT_SHIFT (0x07) +#define FCLK_PC_DIVIDE_BIT_SHIFT (0x0B) +#define AXICLK_PA_DIVIDE_BIT_SHIFT (0x0F) +#define AXICLK_PB_DIVIDE_BIT_SHIFT (0x13) + +#define IS_SINGLE_BIT_SET(val, pos) (val&(1<> FCLK_PA_DIVIDE_BIT_SHIFT); + /* CFGRW0[10:7] */ + pb_divide = ((reg_val & (FEED_REG_BIT_MASK << FCLK_PB_DIVIDE_BIT_SHIFT)) >> FCLK_PB_DIVIDE_BIT_SHIFT); + *cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1); + } else { + if (IS_SINGLE_BIT_SET(reg_val, 1)) { /*CFGRW0[1] - CLKOC */ + /* CFGRW0[6:3] */ + pa_divide = ((reg_val & (FEED_REG_BIT_MASK << FCLK_PA_DIVIDE_BIT_SHIFT)) >> FCLK_PA_DIVIDE_BIT_SHIFT); + /* CFGRW0[14:11] */ + pc_divide = ((reg_val & (FEED_REG_BIT_MASK << FCLK_PC_DIVIDE_BIT_SHIFT)) >> FCLK_PC_DIVIDE_BIT_SHIFT); + *cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1); + } else if (IS_SINGLE_BIT_SET(reg_val, 2)) { /*CFGRW0[2] - FACLK */ + /* CFGRW0[18:15] */ + pa_divide = ((reg_val & (FEED_REG_BIT_MASK << AXICLK_PA_DIVIDE_BIT_SHIFT)) >> AXICLK_PA_DIVIDE_BIT_SHIFT); + /* CFGRW0[22:19] */ + pb_divide = ((reg_val & (FEED_REG_BIT_MASK << AXICLK_PB_DIVIDE_BIT_SHIFT)) >> AXICLK_PB_DIVIDE_BIT_SHIFT); + *cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1); + } else { + result = 1; + } + } + } + } else { + result = 1; + } + raw_spin_unlock(&syscfg_lock); + /* Convert result expressed in Hz to Mhz units. */ + *cpu_clock /= HZ_IN_MHZ; + if(!result) + { + cpu_clock_speed = *cpu_clock; + } + + /* Unmap memory */ + iounmap(pSCCReg); + + pSCCReg_map_failed: + iounmap(pSysCfgReg); + + pSysCfgReg_map_failed: + + return result; + } +} diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h new file mode 100755 index 00000000000..f607d1800a7 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h @@ -0,0 +1,28 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _KBASE_CPU_VEXPRESS_H_ +#define _KBASE_CPU_VEXPRESS_H_ + +/** + * Versatile Express implementation of @ref kbase_cpuprops_clock_speed_function. + */ +int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock); + +#endif /* _KBASE_CPU_VEXPRESS_H_ */ diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild new file mode 100755 index 00000000000..0cb41ce8952 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild @@ -0,0 +1,18 @@ +# +# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +obj-y += mali_kbase_config_vexpress.o +obj-y += mali_kbase_cpu_vexpress.o diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c new file mode 100755 index 00000000000..57e86d5c8a6 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c @@ -0,0 +1,324 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include +#include +#include +#include +#include "mali_kbase_cpu_vexpress.h" + +/* Versatile Express (VE) configuration defaults shared between config_attributes[] + * and config_attributes_hw_issue_8408[]. Settings are not shared for + * JS_HARD_STOP_TICKS_SS and JS_RESET_TICKS_SS. + */ +#define KBASE_VE_GPU_FREQ_KHZ_MAX 10000 +#define KBASE_VE_GPU_FREQ_KHZ_MIN 10000 + +#define KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG 15000000u /* 15ms, an agressive tick for testing purposes. This will reduce performance significantly */ +#define KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG 1 /* between 15ms and 30ms before soft-stop a job */ +#define KBASE_VE_JS_SOFT_STOP_TICKS_CL_DEBUG 1 /* between 15ms and 30ms before soft-stop a CL job */ +#define KBASE_VE_JS_HARD_STOP_TICKS_SS_DEBUG 333 /* 5s before hard-stop */ +#define KBASE_VE_JS_HARD_STOP_TICKS_SS_8401_DEBUG 2000 /* 30s before hard-stop, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */ +#define KBASE_VE_JS_HARD_STOP_TICKS_CL_DEBUG 166 /* 2.5s before hard-stop */ +#define KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG 100000 /* 1500s (25mins) before NSS hard-stop */ +#define KBASE_VE_JS_RESET_TICKS_SS_DEBUG 500 /* 45s before resetting GPU, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) */ +#define KBASE_VE_JS_RESET_TICKS_SS_8401_DEBUG 3000 /* 7.5s before resetting GPU - for issue 8401 */ +#define KBASE_VE_JS_RESET_TICKS_CL_DEBUG 500 /* 45s before resetting GPU */ +#define KBASE_VE_JS_RESET_TICKS_NSS_DEBUG 100166 /* 1502s before resetting GPU */ + +#define KBASE_VE_JS_SCHEDULING_TICK_NS 1250000000u /* 1.25s */ +#define KBASE_VE_JS_SOFT_STOP_TICKS 2 /* 2.5s before soft-stop a job */ +#define KBASE_VE_JS_SOFT_STOP_TICKS_CL 1 /* 1.25s before soft-stop a CL job */ +#define KBASE_VE_JS_HARD_STOP_TICKS_SS 4 /* 5s before hard-stop */ +#define KBASE_VE_JS_HARD_STOP_TICKS_SS_8401 24 /* 30s before hard-stop, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */ +#define KBASE_VE_JS_HARD_STOP_TICKS_CL 2 /* 2.5s before hard-stop */ +#define KBASE_VE_JS_HARD_STOP_TICKS_NSS 1200 /* 1500s before NSS hard-stop */ +#define KBASE_VE_JS_RESET_TICKS_SS 6 /* 7.5s before resetting GPU */ +#define KBASE_VE_JS_RESET_TICKS_SS_8401 36 /* 45s before resetting GPU, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */ +#define KBASE_VE_JS_RESET_TICKS_CL 3 /* 3.75s before resetting GPU */ +#define KBASE_VE_JS_RESET_TICKS_NSS 1201 /* 1502s before resetting GPU */ + +#define KBASE_VE_JS_RESET_TIMEOUT_MS 3000 /* 3s before cancelling stuck jobs */ +#define KBASE_VE_JS_CTX_TIMESLICE_NS 1000000 /* 1ms - an agressive timeslice for testing purposes (causes lots of scheduling out for >4 ctxs) */ +#define KBASE_VE_SECURE_BUT_LOSS_OF_PERFORMANCE ((uintptr_t)MALI_FALSE) /* By default we prefer performance over security on r0p0-15dev0 and KBASE_CONFIG_ATTR_ earlier */ +#define KBASE_VE_POWER_MANAGEMENT_CALLBACKS ((uintptr_t)&pm_callbacks) +#define KBASE_VE_CPU_SPEED_FUNC ((uintptr_t)&kbase_get_vexpress_cpu_clock_speed) + +#define HARD_RESET_AT_POWER_OFF 0 + +#ifndef CONFIG_OF +static kbase_io_resources io_resources = { + .job_irq_number = 75, + .mmu_irq_number = 76, + .gpu_irq_number = 77, + .io_memory_region = { + .start = 0x2F000000, + .end = 0x2F000000 + (4096 * 4) - 1} +}; +#endif + +static int pm_callback_power_on(kbase_device *kbdev) +{ + /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ + return 1; +} + +static void pm_callback_power_off(kbase_device *kbdev) +{ +#if HARD_RESET_AT_POWER_OFF + /* Cause a GPU hard reset to test whether we have actually idled the GPU + * and that we properly reconfigure the GPU on power up. + * Usually this would be dangerous, but if the GPU is working correctly it should + * be completely safe as the GPU should not be active at this point. + * However this is disabled normally because it will most likely interfere with + * bus logging etc. + */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); + kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET); +#endif +} + +static kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL +}; + +/* Please keep table config_attributes in sync with config_attributes_hw_issue_8408 */ +static kbase_attribute config_attributes[] = { + { + KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX, + KBASE_VE_GPU_FREQ_KHZ_MAX}, + + { + KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MIN, + KBASE_VE_GPU_FREQ_KHZ_MIN}, + +#ifdef CONFIG_MALI_DEBUG +/* Use more aggressive scheduling timeouts in debug builds for testing purposes */ + { + KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS, + KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS, + KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL, + KBASE_VE_JS_SOFT_STOP_TICKS_CL_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS, + KBASE_VE_JS_HARD_STOP_TICKS_SS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL, + KBASE_VE_JS_HARD_STOP_TICKS_CL_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS, + KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS, + KBASE_VE_JS_RESET_TICKS_SS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL, + KBASE_VE_JS_RESET_TICKS_CL_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS, + KBASE_VE_JS_RESET_TICKS_NSS_DEBUG}, +#else /* CONFIG_MALI_DEBUG */ +/* In release builds same as the defaults but scaled for 5MHz FPGA */ + { + KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS, + KBASE_VE_JS_SCHEDULING_TICK_NS}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS, + KBASE_VE_JS_SOFT_STOP_TICKS}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL, + KBASE_VE_JS_SOFT_STOP_TICKS_CL}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS, + KBASE_VE_JS_HARD_STOP_TICKS_SS}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL, + KBASE_VE_JS_HARD_STOP_TICKS_CL}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS, + KBASE_VE_JS_HARD_STOP_TICKS_NSS}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS, + KBASE_VE_JS_RESET_TICKS_SS}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL, + KBASE_VE_JS_RESET_TICKS_CL}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS, + KBASE_VE_JS_RESET_TICKS_NSS}, +#endif /* CONFIG_MALI_DEBUG */ + { + KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS, + KBASE_VE_JS_RESET_TIMEOUT_MS}, + + { + KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS, + KBASE_VE_JS_CTX_TIMESLICE_NS}, + + { + KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS, + KBASE_VE_POWER_MANAGEMENT_CALLBACKS}, + + { + KBASE_CONFIG_ATTR_CPU_SPEED_FUNC, + KBASE_VE_CPU_SPEED_FUNC}, + + { + KBASE_CONFIG_ATTR_SECURE_BUT_LOSS_OF_PERFORMANCE, + KBASE_VE_SECURE_BUT_LOSS_OF_PERFORMANCE}, + + { + KBASE_CONFIG_ATTR_GPU_IRQ_THROTTLE_TIME_US, + 20}, + + { + KBASE_CONFIG_ATTR_END, + 0} +}; + +/* as config_attributes array above except with different settings for + * JS_HARD_STOP_TICKS_SS, JS_RESET_TICKS_SS that + * are needed for BASE_HW_ISSUE_8408. + */ +kbase_attribute config_attributes_hw_issue_8408[] = { + { + KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX, + KBASE_VE_GPU_FREQ_KHZ_MAX}, + + { + KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MIN, + KBASE_VE_GPU_FREQ_KHZ_MIN}, + +#ifdef CONFIG_MALI_DEBUG +/* Use more aggressive scheduling timeouts in debug builds for testing purposes */ + { + KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS, + KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS, + KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS, + KBASE_VE_JS_HARD_STOP_TICKS_SS_8401_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS, + KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS, + KBASE_VE_JS_RESET_TICKS_SS_8401_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS, + KBASE_VE_JS_RESET_TICKS_NSS_DEBUG}, +#else /* CONFIG_MALI_DEBUG */ +/* In release builds same as the defaults but scaled for 5MHz FPGA */ + { + KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS, + KBASE_VE_JS_SCHEDULING_TICK_NS}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS, + KBASE_VE_JS_SOFT_STOP_TICKS}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS, + KBASE_VE_JS_HARD_STOP_TICKS_SS_8401}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS, + KBASE_VE_JS_HARD_STOP_TICKS_NSS}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS, + KBASE_VE_JS_RESET_TICKS_SS_8401}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS, + KBASE_VE_JS_RESET_TICKS_NSS}, +#endif /* CONFIG_MALI_DEBUG */ + { + KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS, + KBASE_VE_JS_RESET_TIMEOUT_MS}, + + { + KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS, + KBASE_VE_JS_CTX_TIMESLICE_NS}, + + { + KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS, + KBASE_VE_POWER_MANAGEMENT_CALLBACKS}, + + { + KBASE_CONFIG_ATTR_CPU_SPEED_FUNC, + KBASE_VE_CPU_SPEED_FUNC}, + + { + KBASE_CONFIG_ATTR_SECURE_BUT_LOSS_OF_PERFORMANCE, + KBASE_VE_SECURE_BUT_LOSS_OF_PERFORMANCE}, + + { + KBASE_CONFIG_ATTR_END, + 0} +}; + +static kbase_platform_config versatile_platform_config = { + .attributes = config_attributes, +#ifndef CONFIG_OF + .io_resources = &io_resources +#endif +}; + +kbase_platform_config *kbase_get_platform_config(void) +{ + return &versatile_platform_config; +} + +int kbase_platform_early_init(void) +{ + /* Nothing needed at this stage */ + return 0; +} + diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c new file mode 100755 index 00000000000..1577f8cef78 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c @@ -0,0 +1,71 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include +#include +#include "mali_kbase_cpu_vexpress.h" + +#define HZ_IN_MHZ (1000000) + +#define CORETILE_EXPRESS_A9X4_SCC_START (0x100E2000) +#define MOTHERBOARD_SYS_CFG_START (0x10000000) +#define SYS_CFGDATA_OFFSET (0x000000A0) +#define SYS_CFGCTRL_OFFSET (0x000000A4) +#define SYS_CFGSTAT_OFFSET (0x000000A8) + +#define SYS_CFGCTRL_START_BIT_VALUE (1 << 31) +#define READ_REG_BIT_VALUE (0 << 30) +#define DCC_DEFAULT_BIT_VALUE (0 << 26) +#define SYS_CFG_OSC_FUNC_BIT_VALUE (1 << 20) +#define SITE_DEFAULT_BIT_VALUE (1 << 16) +#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12) +#define DEVICE_DEFAULT_BIT_VALUE (2 << 0) +#define SYS_CFG_COMPLETE_BIT_VALUE (1 << 0) +#define SYS_CFG_ERROR_BIT_VALUE (1 << 1) + +#define FEED_REG_BIT_MASK (0x0F) +#define FCLK_PA_DIVIDE_BIT_SHIFT (0x03) +#define FCLK_PB_DIVIDE_BIT_SHIFT (0x07) +#define FCLK_PC_DIVIDE_BIT_SHIFT (0x0B) +#define AXICLK_PA_DIVIDE_BIT_SHIFT (0x0F) +#define AXICLK_PB_DIVIDE_BIT_SHIFT (0x13) + +#define IS_SINGLE_BIT_SET(val, pos) (val&(1< +#include +#include +#include + +#include "mali_kbase_cpu_vexpress.h" + +/* Versatile Express (VE) configuration defaults shared between config_attributes[] + * and config_attributes_hw_issue_8408[]. Settings are not shared for + * JS_HARD_STOP_TICKS_SS and JS_RESET_TICKS_SS. + */ +#define KBASE_VE_GPU_FREQ_KHZ_MAX 40000 +#define KBASE_VE_GPU_FREQ_KHZ_MIN 40000 + +#define KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG 15000000u /* 15ms, an agressive tick for testing purposes. This will reduce performance significantly */ +#define KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG 1 /* between 15ms and 30ms before soft-stop a job */ +#define KBASE_VE_JS_SOFT_STOP_TICKS_CL_DEBUG 1 /* between 15ms and 30ms before soft-stop a CL job */ +#define KBASE_VE_JS_HARD_STOP_TICKS_SS_DEBUG 333 /* 5s before hard-stop */ +#define KBASE_VE_JS_HARD_STOP_TICKS_SS_8401_DEBUG 2000 /* 30s before hard-stop, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */ +#define KBASE_VE_JS_HARD_STOP_TICKS_CL_DEBUG 166 /* 2.5s before hard-stop */ +#define KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG 100000 /* 1500s (25mins) before NSS hard-stop */ +#define KBASE_VE_JS_RESET_TICKS_SS_DEBUG 500 /* 45s before resetting GPU, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) */ +#define KBASE_VE_JS_RESET_TICKS_SS_8401_DEBUG 3000 /* 7.5s before resetting GPU - for issue 8401 */ +#define KBASE_VE_JS_RESET_TICKS_CL_DEBUG 500 /* 45s before resetting GPU */ +#define KBASE_VE_JS_RESET_TICKS_NSS_DEBUG 100166 /* 1502s before resetting GPU */ + +#define KBASE_VE_JS_SCHEDULING_TICK_NS 1250000000u /* 1.25s */ +#define KBASE_VE_JS_SOFT_STOP_TICKS 2 /* 2.5s before soft-stop a job */ +#define KBASE_VE_JS_SOFT_STOP_TICKS_CL 1 /* 1.25s before soft-stop a CL job */ +#define KBASE_VE_JS_HARD_STOP_TICKS_SS 4 /* 5s before hard-stop */ +#define KBASE_VE_JS_HARD_STOP_TICKS_SS_8401 24 /* 30s before hard-stop, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */ +#define KBASE_VE_JS_HARD_STOP_TICKS_CL 2 /* 2.5s before hard-stop */ +#define KBASE_VE_JS_HARD_STOP_TICKS_NSS 1200 /* 1500s before NSS hard-stop */ +#define KBASE_VE_JS_RESET_TICKS_SS 6 /* 7.5s before resetting GPU */ +#define KBASE_VE_JS_RESET_TICKS_SS_8401 36 /* 45s before resetting GPU, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */ +#define KBASE_VE_JS_RESET_TICKS_CL 3 /* 3.75s before resetting GPU */ +#define KBASE_VE_JS_RESET_TICKS_NSS 1201 /* 1502s before resetting GPU */ + +#define KBASE_VE_JS_RESET_TIMEOUT_MS 3000 /* 3s before cancelling stuck jobs */ +#define KBASE_VE_JS_CTX_TIMESLICE_NS 1000000 /* 1ms - an agressive timeslice for testing purposes (causes lots of scheduling out for >4 ctxs) */ +#define KBASE_VE_SECURE_BUT_LOSS_OF_PERFORMANCE ((uintptr_t)MALI_FALSE) /* By default we prefer performance over security on r0p0-15dev0 and KBASE_CONFIG_ATTR_ earlier */ +#define KBASE_VE_POWER_MANAGEMENT_CALLBACKS ((uintptr_t)&pm_callbacks) +#define KBASE_VE_CPU_SPEED_FUNC ((uintptr_t)&kbase_get_vexpress_cpu_clock_speed) + +#define HARD_RESET_AT_POWER_OFF 0 + +#ifndef CONFIG_OF +static kbase_io_resources io_resources = { + .job_irq_number = 68, + .mmu_irq_number = 69, + .gpu_irq_number = 70, + .io_memory_region = { + .start = 0xFC010000, + .end = 0xFC010000 + (4096 * 4) - 1} +}; +#endif + +static int pm_callback_power_on(kbase_device *kbdev) +{ + /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ + return 1; +} + +static void pm_callback_power_off(kbase_device *kbdev) +{ +#if HARD_RESET_AT_POWER_OFF + /* Cause a GPU hard reset to test whether we have actually idled the GPU + * and that we properly reconfigure the GPU on power up. + * Usually this would be dangerous, but if the GPU is working correctly it should + * be completely safe as the GPU should not be active at this point. + * However this is disabled normally because it will most likely interfere with + * bus logging etc. + */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); + kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET); +#endif +} + +static kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL +}; + +/* Please keep table config_attributes in sync with config_attributes_hw_issue_8408 */ +static kbase_attribute config_attributes[] = { + { + KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX, + KBASE_VE_GPU_FREQ_KHZ_MAX}, + + { + KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MIN, + KBASE_VE_GPU_FREQ_KHZ_MIN}, + +#ifdef CONFIG_MALI_DEBUG +/* Use more aggressive scheduling timeouts in debug builds for testing purposes */ + { + KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS, + KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS, + KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL, + KBASE_VE_JS_SOFT_STOP_TICKS_CL_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS, + KBASE_VE_JS_HARD_STOP_TICKS_SS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL, + KBASE_VE_JS_HARD_STOP_TICKS_CL_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS, + KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS, + KBASE_VE_JS_RESET_TICKS_SS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL, + KBASE_VE_JS_RESET_TICKS_CL_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS, + KBASE_VE_JS_RESET_TICKS_NSS_DEBUG}, +#else /* CONFIG_MALI_DEBUG */ +/* In release builds same as the defaults but scaled for 5MHz FPGA */ + { + KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS, + KBASE_VE_JS_SCHEDULING_TICK_NS}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS, + KBASE_VE_JS_SOFT_STOP_TICKS}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL, + KBASE_VE_JS_SOFT_STOP_TICKS_CL}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS, + KBASE_VE_JS_HARD_STOP_TICKS_SS}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL, + KBASE_VE_JS_HARD_STOP_TICKS_CL}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS, + KBASE_VE_JS_HARD_STOP_TICKS_NSS}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS, + KBASE_VE_JS_RESET_TICKS_SS}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL, + KBASE_VE_JS_RESET_TICKS_CL}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS, + KBASE_VE_JS_RESET_TICKS_NSS}, +#endif /* CONFIG_MALI_DEBUG */ + { + KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS, + KBASE_VE_JS_RESET_TIMEOUT_MS}, + + { + KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS, + KBASE_VE_JS_CTX_TIMESLICE_NS}, + + { + KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS, + KBASE_VE_POWER_MANAGEMENT_CALLBACKS}, + + { + KBASE_CONFIG_ATTR_CPU_SPEED_FUNC, + KBASE_VE_CPU_SPEED_FUNC}, + + { + KBASE_CONFIG_ATTR_SECURE_BUT_LOSS_OF_PERFORMANCE, + KBASE_VE_SECURE_BUT_LOSS_OF_PERFORMANCE}, + + { + KBASE_CONFIG_ATTR_GPU_IRQ_THROTTLE_TIME_US, + 20}, + + { + KBASE_CONFIG_ATTR_END, + 0} +}; + +/* as config_attributes array above except with different settings for + * JS_HARD_STOP_TICKS_SS, JS_RESET_TICKS_SS that + * are needed for BASE_HW_ISSUE_8408. + */ +kbase_attribute config_attributes_hw_issue_8408[] = { + { + KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MAX, + KBASE_VE_GPU_FREQ_KHZ_MAX}, + + { + KBASE_CONFIG_ATTR_GPU_FREQ_KHZ_MIN, + KBASE_VE_GPU_FREQ_KHZ_MIN}, + +#ifdef CONFIG_MALI_DEBUG +/* Use more aggressive scheduling timeouts in debug builds for testing purposes */ + { + KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS, + KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS, + KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS, + KBASE_VE_JS_HARD_STOP_TICKS_SS_8401_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS, + KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS, + KBASE_VE_JS_RESET_TICKS_SS_8401_DEBUG}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS, + KBASE_VE_JS_RESET_TICKS_NSS_DEBUG}, +#else /* CONFIG_MALI_DEBUG */ +/* In release builds same as the defaults but scaled for 5MHz FPGA */ + { + KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS, + KBASE_VE_JS_SCHEDULING_TICK_NS}, + + { + KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS, + KBASE_VE_JS_SOFT_STOP_TICKS}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS, + KBASE_VE_JS_HARD_STOP_TICKS_SS_8401}, + + { + KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS, + KBASE_VE_JS_HARD_STOP_TICKS_NSS}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS, + KBASE_VE_JS_RESET_TICKS_SS_8401}, + + { + KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS, + KBASE_VE_JS_RESET_TICKS_NSS}, +#endif /* CONFIG_MALI_DEBUG */ + { + KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS, + KBASE_VE_JS_RESET_TIMEOUT_MS}, + + { + KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS, + KBASE_VE_JS_CTX_TIMESLICE_NS}, + + { + KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS, + KBASE_VE_POWER_MANAGEMENT_CALLBACKS}, + + { + KBASE_CONFIG_ATTR_CPU_SPEED_FUNC, + KBASE_VE_CPU_SPEED_FUNC}, + + { + KBASE_CONFIG_ATTR_SECURE_BUT_LOSS_OF_PERFORMANCE, + KBASE_VE_SECURE_BUT_LOSS_OF_PERFORMANCE}, + + { + KBASE_CONFIG_ATTR_END, + 0} +}; + +static kbase_platform_config virtex7_platform_config = { + .attributes = config_attributes, +#ifndef CONFIG_OF + .io_resources = &io_resources +#endif +}; + +kbase_platform_config *kbase_get_platform_config(void) +{ + return &virtex7_platform_config; +} + +int kbase_platform_early_init(void) +{ + /* Nothing needed at this stage */ + return 0; +} + diff --git a/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.c new file mode 100755 index 00000000000..47d45e2daf6 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.c @@ -0,0 +1,178 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include +#include +#include "mali_kbase_cpu_vexpress.h" + +#define HZ_IN_MHZ (1000000) + +#define CORETILE_EXPRESS_A9X4_SCC_START (0x100E2000) +#define MOTHERBOARD_SYS_CFG_START (0x10000000) +#define SYS_CFGDATA_OFFSET (0x000000A0) +#define SYS_CFGCTRL_OFFSET (0x000000A4) +#define SYS_CFGSTAT_OFFSET (0x000000A8) + +#define SYS_CFGCTRL_START_BIT_VALUE (1 << 31) +#define READ_REG_BIT_VALUE (0 << 30) +#define DCC_DEFAULT_BIT_VALUE (0 << 26) +#define SYS_CFG_OSC_FUNC_BIT_VALUE (1 << 20) +#define SITE_DEFAULT_BIT_VALUE (1 << 16) +#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12) +#define DEVICE_DEFAULT_BIT_VALUE (2 << 0) +#define SYS_CFG_COMPLETE_BIT_VALUE (1 << 0) +#define SYS_CFG_ERROR_BIT_VALUE (1 << 1) + +#define FEED_REG_BIT_MASK (0x0F) +#define FCLK_PA_DIVIDE_BIT_SHIFT (0x03) +#define FCLK_PB_DIVIDE_BIT_SHIFT (0x07) +#define FCLK_PC_DIVIDE_BIT_SHIFT (0x0B) +#define AXICLK_PA_DIVIDE_BIT_SHIFT (0x0F) +#define AXICLK_PB_DIVIDE_BIT_SHIFT (0x13) + +#define IS_SINGLE_BIT_SET(val, pos) (val&(1<> FCLK_PA_DIVIDE_BIT_SHIFT); + /* CFGRW0[10:7] */ + pb_divide = ((reg_val & (FEED_REG_BIT_MASK << FCLK_PB_DIVIDE_BIT_SHIFT)) >> FCLK_PB_DIVIDE_BIT_SHIFT); + *cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1); + } else { + if (IS_SINGLE_BIT_SET(reg_val, 1)) { /*CFGRW0[1] - CLKOC */ + /* CFGRW0[6:3] */ + pa_divide = ((reg_val & (FEED_REG_BIT_MASK << FCLK_PA_DIVIDE_BIT_SHIFT)) >> FCLK_PA_DIVIDE_BIT_SHIFT); + /* CFGRW0[14:11] */ + pc_divide = ((reg_val & (FEED_REG_BIT_MASK << FCLK_PC_DIVIDE_BIT_SHIFT)) >> FCLK_PC_DIVIDE_BIT_SHIFT); + *cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1); + } else if (IS_SINGLE_BIT_SET(reg_val, 2)) { /*CFGRW0[2] - FACLK */ + /* CFGRW0[18:15] */ + pa_divide = ((reg_val & (FEED_REG_BIT_MASK << AXICLK_PA_DIVIDE_BIT_SHIFT)) >> AXICLK_PA_DIVIDE_BIT_SHIFT); + /* CFGRW0[22:19] */ + pb_divide = ((reg_val & (FEED_REG_BIT_MASK << AXICLK_PB_DIVIDE_BIT_SHIFT)) >> AXICLK_PB_DIVIDE_BIT_SHIFT); + *cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1); + } else { + result = 1; + } + } + } + } else { + result = 1; + } + raw_spin_unlock(&syscfg_lock); + /* Convert result expressed in Hz to Mhz units. */ + *cpu_clock /= HZ_IN_MHZ; + if(!result) + { + cpu_clock_speed = *cpu_clock; + } + + /* Unmap memory */ + iounmap(pSCCReg); + + pSCCReg_map_failed: + iounmap(pSysCfgReg); + + pSysCfgReg_map_failed: + + return result; + } +} diff --git a/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.h new file mode 100755 index 00000000000..3f6c68ece39 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.h @@ -0,0 +1,26 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _KBASE_CPU_VEXPRESS_H_ +#define _KBASE_CPU_VEXPRESS_H_ + +/** + * Versatile Express implementation of @ref kbase_cpuprops_clock_speed_function. + */ +int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock); + +#endif /* _KBASE_CPU_VEXPRESS_H_ */ diff --git a/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h new file mode 100755 index 00000000000..daaa8c0198a --- /dev/null +++ b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h @@ -0,0 +1,54 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_ukk_os.h + * Types and definitions that are common for Linux OSs for the kernel side of the + * User-Kernel interface. + */ + +#ifndef _UKK_OS_H_ /* Linux version */ +#define _UKK_OS_H_ + +#include + +/** + * @addtogroup uk_api User-Kernel Interface API + * @{ + */ + +/** + * @addtogroup uk_api_kernel UKK (Kernel side) + * @{ + */ + +/** + * Internal OS specific data structure associated with each UKK session. Part + * of a ukk_session object. + */ +typedef struct ukkp_session +{ + int dummy; /**< No internal OS specific data at this time */ +} ukkp_session; + +/** @} end group uk_api_kernel */ + +/** @} end group uk_api */ + +#endif /* _UKK_OS_H__ */ diff --git a/drivers/gpu/arm/midgard/sconscript b/drivers/gpu/arm/midgard/sconscript new file mode 100755 index 00000000000..9d463784e6a --- /dev/null +++ b/drivers/gpu/arm/midgard/sconscript @@ -0,0 +1,114 @@ +# +# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +import os +import re +import sys +Import('env') + +if Glob('tests/sconscript'): + SConscript( 'tests/sconscript' ) + +mock_test = 0 + +if env['v'] != '1': + env['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}' + +# Fake platform is a transient solution for GPL drivers running in kernel that does not provide configuration via platform data. +# For such kernels fake_platform_device should be set to 1. For kernels providing platform data fake_platform_device should be set to 0. +fake_platform_device = 1 + +# Source files required for kbase. +kbase_src = [Glob('#kernel/drivers/gpu/arm/midgard/*.c'), + Glob('#kernel/drivers/gpu/arm/midgard/*.c'), + Glob('#kernel/drivers/gpu/arm/midgard/platform/%s/*.c' % (env['platform_config'])), + Glob('#kernel/drivers/gpu/arm/midgard/*.h'), + Glob('#kernel/drivers/gpu/arm/midgard/*.h'), + ] + +if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1': + kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')] + mock_test = 1 + +# we need platform config for GPL version using fake platform +if fake_platform_device==1: + # Check if we are compiling for PBX + linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config' + search_term = '^[\ ]*CONFIG_MACH_REALVIEW_PBX[\ ]*=[\ ]*y' + REALVIEW_PBX = 0 + for line in open(linux_config_file, 'r'): + if re.search(search_term, line): + REALVIEW_PBX = 1 + break + if REALVIEW_PBX == 1 and (env['platform_config'] == 'vexpress' or env['platform_config'] == 'vexpress_virtex7_40mhz' or env['platform_config'] == 'vexpress_6xvirtex7_10mhz'): + sys.stderr.write("WARNING: Building for a PBX kernel but with platform_config=vexpress*\n") + # if the file platform config file is in the tpip directory then use that, otherwise use the default config directory + if Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])): + kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])) + else: + kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/*%s.c' % (env['platform_config'])) + +# Note: cleaning via the Linux kernel build system does not yet work +if env.GetOption('clean') : + makeAction=Action("cd ${SOURCE.dir} && make clean", '$MAKECOMSTR') + cmd = env.Command(['$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/mali_platform_fake.ko'], kbase_src, [makeAction]) +else: + if env['os'] == 'android': + env['android'] = 1 + else: + env['android'] = 0 + + if env['unit'] == '1': + env['kernel_test'] = 1 + else: + env['kernel_test'] = 0 + + makeAction=Action("cd ${SOURCE.dir} && make PLATFORM=${platform} MALI_ERROR_INJECT_ON=${error_inject} MALI_ANDROID=${android} MALI_KERNEL_TEST_API=${kernel_test} MALI_UNIT_TEST=${unit} MALI_RELEASE_NAME=\"${mali_release_name}\" MALI_MOCK_TEST=%s MALI_CUSTOMER_RELEASE=${release} MALI_INSTRUMENTATION_LEVEL=${instr} MALI_COVERAGE=${coverage} %s && cp mali_kbase.ko $STATIC_LIB_PATH/mali_kbase.ko" % (mock_test, env.kernel_get_config_defines(fake_platform_device)), '$MAKECOMSTR') + cmd = env.Command('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src, [makeAction]) + +# Add a dependency on kds.ko. +# Only necessary when KDS is not built into the kernel. +# +if env['os'] != 'android': + linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config' + search_term = '^[\ ]*CONFIG_KDS[\ ]*=[\ ]*y' + kds_in_kernel = 0 + for line in open(linux_config_file, 'r'): + if re.search(search_term, line): + # KDS in kernel. + kds_in_kernel = 1 + if not kds_in_kernel: + env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/kds.ko') + +# need Module.symvers from ump.ko build +if int(env['ump']) == 1: + env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/ump.ko') + +# Until we fathom out how the invoke the Linux build system to clean, we can use Clean +# to remove generated files. +patterns = ['*.mod.c', '*.o', '*.ko', '*.a', '.*.cmd', 'modules.order', '.tmp_versions', 'Module.symvers'] + +for p in patterns: + Clean(cmd, Glob('#kernel/drivers/gpu/arm/midgard/%s' % p)) + Clean(cmd, Glob('#kernel/drivers/gpu/arm/midgard/%s' % p)) + Clean(cmd, Glob('#kernel/drivers/gpu/arm/midgard/config/%s' % p)) + Clean(cmd, Glob('#kernel/drivers/gpu/arm/midgard/%s' % p)) + Clean(cmd, Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/%s' % p)) + Clean(cmd, Glob('#kernel/drivers/gpu/arm/midgard/platform/%s/%s' % ((env['platform_config']), p) )) + +env.ProgTarget('kbase', cmd) + +env.AppendUnique(BASE=['cutils_list']) -- cgit v1.2.3