aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/arm/t6xx/kbase/src/common/mali_kbase_8401_workaround.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/arm/t6xx/kbase/src/common/mali_kbase_8401_workaround.c')
-rwxr-xr-xdrivers/gpu/arm/t6xx/kbase/src/common/mali_kbase_8401_workaround.c422
1 files changed, 422 insertions, 0 deletions
diff --git a/drivers/gpu/arm/t6xx/kbase/src/common/mali_kbase_8401_workaround.c b/drivers/gpu/arm/t6xx/kbase/src/common/mali_kbase_8401_workaround.c
new file mode 100755
index 00000000000..bf42e41668b
--- /dev/null
+++ b/drivers/gpu/arm/t6xx/kbase/src/common/mali_kbase_8401_workaround.c
@@ -0,0 +1,422 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_8401_workaround.c
+ * Functions related to working around BASE_HW_ISSUE_8401
+ */
+
+#include <kbase/src/common/mali_kbase.h>
+#include <kbase/src/common/mali_kbase_defs.h>
+#include <kbase/src/common/mali_kbase_jm.h>
+#include <kbase/src/common/mali_kbase_8401_workaround.h>
+
+#define WORKAROUND_PAGE_OFFSET (2)
+#define URT_POINTER_INDEX (20)
+#define RMU_POINTER_INDEX (23)
+#define RSD_POINTER_INDEX (24)
+#define TSD_POINTER_INDEX (31)
+
+static const u32 compute_job_32bit_header[] = {
+ /* Job Descriptor Header */
+
+ /* Job Status */
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ /* Flags and Indices */
+ /* job_type = compute shader job */
+ 0x00000008, 0x00000000,
+ /* Pointer to next job */
+ 0x00000000,
+ /* Reserved */
+ 0x00000000,
+ /* Job Dimension Data */
+ 0x0000000f, 0x21040842,
+ /* Task Split */
+ 0x08000000,
+ /* Reserved */
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+
+ /* Draw Call Descriptor - 32 bit (Must be aligned to a 64-byte boundry) */
+
+ /* Flags */
+ 0x00000004,
+ /* Primary Attribute Offset */
+ 0x00000000,
+ /* Primitive Index Base Value */
+ 0x00000000,
+
+ /* Pointer To Vertex Position Array (64-byte alignment) */
+ 0x00000000,
+ /* Pointer To Uniform Remapping Table (8-byte alignment) */
+ 0,
+ /* Pointer To Image Descriptor Pointer Table */
+ 0x00000000,
+ /* Pointer To Sampler Array */
+ 0x00000000,
+ /* Pointer To Register-Mapped Uniform Data Area (16-byte alignment) */
+ 0,
+ /* Pointer To Renderer State Descriptor (64-byte alignment) */
+ 0,
+ /* Pointer To Primary Attribute Buffer Array */
+ 0x00000000,
+ /* Pointer To Primary Attribute Array */
+ 0x00000000,
+ /* Pointer To Secondary Attribute Buffer Array */
+ 0x00000000,
+ /* Pointer To Secondary Attribute Array */
+ 0x00000000,
+ /* Pointer To Viewport Descriptor */
+ 0x00000000,
+ /* Pointer To Occlusion Query Result */
+ 0x00000000,
+ /* Pointer To Thread Storage (64 byte alignment) */
+ 0,
+};
+
+static const u32 compute_job_32bit_urt[] = {
+ /* Uniform Remapping Table Entry */
+ 0, 0,
+};
+
+static const u32 compute_job_32bit_rmu[] = {
+ /* Register Mapped Uniform Data Area (16 byte aligned), an array of 128-bit
+ * register values.
+ *
+ * NOTE: this is also used as the URT pointer, so the first 16-byte entry
+ * must be all zeros.
+ *
+ * For BASE_HW_ISSUE_8987, we place 16 RMUs here, because this should only
+ * be run concurrently with other GLES jobs (i.e. FS jobs from slot 0).
+ */
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000
+};
+
+static const u32 compute_job_32bit_rsd[] = {
+ /* Renderer State Descriptor */
+
+ /* Shader program inital PC (low) */
+ 0x00000001,
+ /* Shader program initial PC (high) */
+ 0x00000000,
+ /* Image descriptor array sizes */
+ 0x00000000,
+ /* Attribute array sizes */
+ 0x00000000,
+ /* Uniform array size and Shader Flags */
+ /* Flags set: R, D, SE, Reg Uniforms==16, FPM==OpenCL */
+ 0x42003800,
+ /* Depth bias */
+ 0x00000000,
+ /* Depth slope bias */
+ 0x00000000,
+ /* Depth bias clamp */
+ 0x00000000,
+ /* Multisample Write Mask and Flags */
+ 0x00000000,
+ /* Stencil Write Masks and Alpha parameters */
+ 0x00000000,
+ /* Stencil tests - forward facing */
+ 0x00000000,
+ /* Stencel tests - back facing */
+ 0x00000000,
+ /* Alpha Test Reference Value */
+ 0x00000000,
+ /* Thread Balancing Information */
+ 0x00000000,
+ /* Blend Parameters or Pointer (low) */
+ 0x00000000,
+ /* Blend Parameters or Pointer (high) */
+ 0x00000000,
+};
+
+static const u32 compute_job_32bit_tsd[] = {
+ /* Thread Storage Descriptor */
+
+ /* Thread Local Storage Sizes */
+ 0x00000000,
+ /* Workgroup Local Memory Area Flags */
+ 0x0000001f,
+ /* Pointer to Local Storage Area */
+ 0x00021000, 0x00000001,
+ /* Pointer to Workgroup Local Storage Area */
+ 0x00000000, 0x00000000,
+ /* Pointer to Shader Exception Handler */
+ 0x00000000, 0x00000000
+};
+
+static kbase_jd_atom dummy_job_atom[KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT];
+
+/**
+ * Initialize the compute job sturcture.
+ */
+
+static void kbasep_8401_workaround_update_job_pointers(u32 *dummy_compute_job, int page_nr)
+{
+ u32 base_address = (page_nr + WORKAROUND_PAGE_OFFSET) * PAGE_SIZE;
+ u8 *dummy_job = (u8 *) dummy_compute_job;
+ u8 *dummy_job_urt;
+ u8 *dummy_job_rmu;
+ u8 *dummy_job_rsd;
+ u8 *dummy_job_tsd;
+
+ KBASE_DEBUG_ASSERT(dummy_compute_job);
+
+ /* determin where each job section goes taking alignment restrictions into consideration */
+ dummy_job_urt = (u8 *) ((((uintptr_t) dummy_job + sizeof(compute_job_32bit_header)) + 7) & ~7);
+ dummy_job_rmu = (u8 *) ((((uintptr_t) dummy_job_urt + sizeof(compute_job_32bit_urt)) + 15) & ~15);
+ dummy_job_rsd = (u8 *) ((((uintptr_t) dummy_job_rmu + sizeof(compute_job_32bit_rmu)) + 63) & ~63);
+ dummy_job_tsd = (u8 *) ((((uintptr_t) dummy_job_rsd + sizeof(compute_job_32bit_rsd)) + 63) & ~63);
+
+ /* Make sure the job fits within a single page */
+ KBASE_DEBUG_ASSERT(PAGE_SIZE > ((dummy_job_tsd + sizeof(compute_job_32bit_tsd)) - dummy_job));
+
+ /* Copy the job sections to the allocated memory */
+ memcpy(dummy_job, compute_job_32bit_header, sizeof(compute_job_32bit_header));
+ memcpy(dummy_job_urt, compute_job_32bit_urt, sizeof(compute_job_32bit_urt));
+ memcpy(dummy_job_rmu, compute_job_32bit_rmu, sizeof(compute_job_32bit_rmu));
+ memcpy(dummy_job_rsd, compute_job_32bit_rsd, sizeof(compute_job_32bit_rsd));
+ memcpy(dummy_job_tsd, compute_job_32bit_tsd, sizeof(compute_job_32bit_tsd));
+
+ /* Update header pointers */
+ *(dummy_compute_job + URT_POINTER_INDEX) = (dummy_job_urt - dummy_job) + base_address;
+ *(dummy_compute_job + RMU_POINTER_INDEX) = (dummy_job_rmu - dummy_job) + base_address;
+ *(dummy_compute_job + RSD_POINTER_INDEX) = (dummy_job_rsd - dummy_job) + base_address;
+ *(dummy_compute_job + TSD_POINTER_INDEX) = (dummy_job_tsd - dummy_job) + base_address;
+ /* Update URT pointer */
+ *((u32 *) dummy_job_urt + 0) = (((dummy_job_rmu - dummy_job) + base_address) << 8) & 0xffffff00;
+ *((u32 *) dummy_job_urt + 1) = (((dummy_job_rmu - dummy_job) + base_address) >> 24) & 0xff;
+}
+
+/**
+ * Initialize the memory for 8401 workaround.
+ */
+
+mali_error kbasep_8401_workaround_init(kbase_device * const kbdev)
+{
+ kbasep_js_device_data *js_devdata;
+ kbase_context *workaround_kctx;
+ int i;
+ u16 as_present_mask;
+
+ KBASE_DEBUG_ASSERT(kbdev);
+ KBASE_DEBUG_ASSERT(kbdev->workaround_kctx == NULL);
+
+ js_devdata = &kbdev->js_data;
+
+ /* For this workaround we reserve one address space to allow us to
+ * submit a special job independent of other contexts */
+ --(kbdev->nr_hw_address_spaces);
+
+ /* Only update nr_user_address_spaces if it was unchanged - to ensure
+ * HW workarounds that have modified this will still work */
+ if (kbdev->nr_user_address_spaces == (kbdev->nr_hw_address_spaces + 1))
+ --(kbdev->nr_user_address_spaces);
+
+ KBASE_DEBUG_ASSERT(kbdev->nr_user_address_spaces <= kbdev->nr_hw_address_spaces);
+
+ /* Recalculate the free address spaces bit-pattern */
+ as_present_mask = (1U << kbdev->nr_hw_address_spaces) - 1;
+ js_devdata->as_free &= as_present_mask;
+
+ workaround_kctx = kbase_create_context(kbdev);
+ if (!workaround_kctx)
+ return MALI_ERROR_FUNCTION_FAILED;
+
+ /* Allocate the pages required to contain the job */
+ if (MALI_ERROR_NONE != kbase_mem_allocator_alloc(&workaround_kctx->osalloc, KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT, kbdev->workaround_compute_job_pa, 0))
+ goto no_pages;
+
+ /* Get virtual address of mapped memory and write a compute job for each page */
+ for (i = 0; i < KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT; i++) {
+ kbdev->workaround_compute_job_va[i] = kmap(pfn_to_page(PFN_DOWN(kbdev->workaround_compute_job_pa[i])));
+ if (NULL == kbdev->workaround_compute_job_va[i])
+ goto page_free;
+
+ /* Generate the compute job data */
+ kbasep_8401_workaround_update_job_pointers((u32 *) kbdev->workaround_compute_job_va[i], i);
+ }
+
+ /* Insert pages to the gpu mmu. */
+ kbase_gpu_vm_lock(workaround_kctx);
+
+ kbase_mmu_insert_pages(workaround_kctx,
+ /* vpfn = page number */
+ (u64) WORKAROUND_PAGE_OFFSET,
+ /* physical address */
+ kbdev->workaround_compute_job_pa,
+ /* number of pages */
+ KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT,
+ /* flags */
+ KBASE_REG_GPU_RD | KBASE_REG_CPU_RD | KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
+
+ kbase_gpu_vm_unlock(workaround_kctx);
+
+ kbdev->workaround_kctx = workaround_kctx;
+ return MALI_ERROR_NONE;
+ page_free:
+ while (i--)
+ kunmap(pfn_to_page(PFN_DOWN(kbdev->workaround_compute_job_pa[i])));
+
+ kbase_mem_allocator_free(&workaround_kctx->osalloc, KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT, kbdev->workaround_compute_job_pa, MALI_TRUE);
+ no_pages:
+ kbase_destroy_context(workaround_kctx);
+
+ return MALI_ERROR_FUNCTION_FAILED;
+}
+
+/**
+ * Free up the memory used by 8401 workaround.
+ **/
+
+void kbasep_8401_workaround_term(kbase_device *kbdev)
+{
+ kbasep_js_device_data *js_devdata;
+ int i;
+ u16 restored_as;
+
+ KBASE_DEBUG_ASSERT(kbdev);
+ KBASE_DEBUG_ASSERT(kbdev->workaround_kctx);
+
+ js_devdata = &kbdev->js_data;
+
+ for (i = 0; i < KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT; i++)
+ kunmap(pfn_to_page(PFN_DOWN(kbdev->workaround_compute_job_pa[i])));
+
+ kbase_mem_allocator_free(&kbdev->workaround_kctx->osalloc, KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT, kbdev->workaround_compute_job_pa, MALI_TRUE);
+
+ kbase_destroy_context(kbdev->workaround_kctx);
+ kbdev->workaround_kctx = NULL;
+
+ /* Free up the workaround address space */
+ kbdev->nr_hw_address_spaces++;
+
+ if (kbdev->nr_user_address_spaces == (kbdev->nr_hw_address_spaces - 1)) {
+ /* Only update nr_user_address_spaces if it was unchanged - to ensure
+ * HW workarounds that have modified this will still work */
+ ++(kbdev->nr_user_address_spaces);
+ }
+ KBASE_DEBUG_ASSERT(kbdev->nr_user_address_spaces <= kbdev->nr_hw_address_spaces);
+
+ /* Recalculate the free address spaces bit-pattern */
+ restored_as = (1U << kbdev->nr_hw_address_spaces);
+ js_devdata->as_free |= restored_as;
+}
+
+/**
+ * Submit the 8401 workaround job.
+ *
+ * Important for BASE_HW_ISSUE_8987: This job always uses 16 RMUs
+ * - Therefore, on slot[1] it will always use the same number of RMUs as another
+ * GLES job.
+ * - On slot[2], no other job (GLES or otherwise) will be running on the
+ * cores, by virtue of it being slot[2]. Therefore, any value of RMUs is
+ * acceptable.
+ */
+void kbasep_8401_submit_dummy_job(kbase_device *kbdev, int js)
+{
+ u32 cfg;
+ mali_addr64 jc;
+ u32 pgd_high;
+
+ /* While this workaround is active we reserve the last address space just for submitting the dummy jobs */
+ int as = kbdev->nr_hw_address_spaces;
+
+ /* Don't issue compute jobs on job slot 0 */
+ KBASE_DEBUG_ASSERT(js != 0);
+ KBASE_DEBUG_ASSERT(js < KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT);
+
+ /* Job chain GPU address */
+ jc = (js + WORKAROUND_PAGE_OFFSET) * PAGE_SIZE; /* GPU phys address (see kbase_mmu_insert_pages call in kbasep_8401_workaround_init */
+
+ /* Clear the job status words which may contain values from a previous job completion */
+ memset(kbdev->workaround_compute_job_va[js], 0, 4 * sizeof(u32));
+
+ /* Get the affinity of the previous job */
+ dummy_job_atom[js].affinity = ((u64) kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_LO), NULL)) | (((u64) kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_HI), NULL)) << 32);
+
+ /* Don't submit a compute job if the affinity was previously zero (i.e. no jobs have run yet on this slot) */
+ if (!dummy_job_atom[js].affinity)
+ return;
+
+ /* Ensure that our page tables are programmed into the MMU */
+ kbase_reg_write(kbdev, MMU_AS_REG(as, ASn_TRANSTAB_LO), (kbdev->workaround_kctx->pgd & ASn_TRANSTAB_ADDR_SPACE_MASK) | ASn_TRANSTAB_READ_INNER | ASn_TRANSTAB_ADRMODE_TABLE, NULL);
+
+ /* Need to use a conditional expression to avoid "right shift count >= width of type"
+ * error when using an if statement - although the size_of condition is evaluated at compile
+ * time the unused branch is not removed until after it is type-checked and the error
+ * produced.
+ */
+ pgd_high = sizeof(kbdev->workaround_kctx->pgd) > 4 ? (kbdev->workaround_kctx->pgd >> 32) : 0;
+ kbase_reg_write(kbdev, MMU_AS_REG(as, ASn_TRANSTAB_HI), pgd_high, NULL);
+
+ kbase_reg_write(kbdev, MMU_AS_REG(as, ASn_MEMATTR_LO), ASn_MEMATTR_IMPL_DEF_CACHE_POLICY, NULL);
+ kbase_reg_write(kbdev, MMU_AS_REG(as, ASn_MEMATTR_HI), ASn_MEMATTR_IMPL_DEF_CACHE_POLICY, NULL);
+ kbase_reg_write(kbdev, MMU_AS_REG(as, ASn_COMMAND), ASn_COMMAND_UPDATE, NULL);
+
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_LO), jc & 0xFFFFFFFF, NULL);
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_HI), jc >> 32, NULL);
+
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_NEXT_LO), dummy_job_atom[js].affinity & 0xFFFFFFFF, NULL);
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_NEXT_HI), dummy_job_atom[js].affinity >> 32, NULL);
+
+ /* start MMU, medium priority, cache clean/flush on end, clean/flush on start */
+ cfg = as | JSn_CONFIG_END_FLUSH_CLEAN_INVALIDATE | JSn_CONFIG_START_MMU | JSn_CONFIG_START_FLUSH_CLEAN_INVALIDATE | JSn_CONFIG_THREAD_PRI(8);
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_CONFIG_NEXT), cfg, NULL);
+
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_SUBMIT, NULL, 0, jc, js);
+
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), JSn_COMMAND_START, NULL);
+ /* Report that the job has been submitted */
+ kbasep_jm_enqueue_submit_slot(&kbdev->jm_slots[js], &dummy_job_atom[js]);
+}
+
+/**
+ * Check if the katom given is a dummy compute job.
+ */
+mali_bool kbasep_8401_is_workaround_job(kbase_jd_atom *katom)
+{
+ int i;
+
+ /* Note: we don't check the first dummy_job_atom as slot 0 is never used for the workaround */
+ for (i = 1; i < KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT; i++) {
+ if (katom == &dummy_job_atom[i]) {
+ /* This is a dummy job */
+ return MALI_TRUE;
+ }
+ }
+
+ /* This is a real job */
+ return MALI_FALSE;
+}