aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-09-08 11:16:11 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-09-08 11:16:11 -0700
commit6f6a73c8b715d595977774d48450a734297ab21f (patch)
tree9088e1f6085dcb2952185f7c997c3491fa7a7daf
parent612ab8ad64140f0f291d7baae45982ce7119839a (diff)
parent20561da3a2e1e0e827ef5510cb0f74bcfd377e41 (diff)
Merge tag 'drm-fixes-2020-09-08' of git://anongit.freedesktop.org/drm/drm
Pull drm fixes from Dave Airlie: "The i915 reverts are going to be a bit of a conflict mess for next, so I decided to dequeue them now, along with some msm fixes for a ring corruption issue, that Rob sent over the weekend. Summary: i915: - revert gpu relocation changes due to regression msm: - fixes for RPTR corruption issue" * tag 'drm-fixes-2020-09-08' of git://anongit.freedesktop.org/drm/drm: Revert "drm/i915/gem: Delete unused code" Revert "drm/i915/gem: Async GPU relocations only" Revert "drm/i915: Remove i915_gem_object_get_dirty_page()" drm/msm: Disable the RPTR shadow drm/msm: Disable preemption on all 5xx targets drm/msm: Enable expanded apriv support for a650 drm/msm: Split the a5xx preemption record
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c314
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c14
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c21
-rw-r--r--drivers/gpu/drm/msm/adreno/a2xx_gpu.c5
-rw-r--r--drivers/gpu/drm/msm/adreno/a3xx_gpu.c10
-rw-r--r--drivers/gpu/drm/msm/adreno/a4xx_gpu.c10
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.c14
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.h1
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_preempt.c25
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c13
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.c27
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c2
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.h11
-rw-r--r--drivers/gpu/drm/msm/msm_ringbuffer.c4
15 files changed, 411 insertions, 64 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 6b4ec66cb558..446e76e95c38 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -45,6 +45,13 @@ struct eb_vma_array {
struct eb_vma vma[];
};
+enum {
+ FORCE_CPU_RELOC = 1,
+ FORCE_GTT_RELOC,
+ FORCE_GPU_RELOC,
+#define DBG_FORCE_RELOC 0 /* choose one of the above! */
+};
+
#define __EXEC_OBJECT_HAS_PIN BIT(31)
#define __EXEC_OBJECT_HAS_FENCE BIT(30)
#define __EXEC_OBJECT_NEEDS_MAP BIT(29)
@@ -253,6 +260,8 @@ struct i915_execbuffer {
*/
struct reloc_cache {
struct drm_mm_node node; /** temporary GTT binding */
+ unsigned long vaddr; /** Current kmap address */
+ unsigned long page; /** Currently mapped page index */
unsigned int gen; /** Cached value of INTEL_GEN */
bool use_64bit_reloc : 1;
bool has_llc : 1;
@@ -596,6 +605,23 @@ eb_add_vma(struct i915_execbuffer *eb,
}
}
+static inline int use_cpu_reloc(const struct reloc_cache *cache,
+ const struct drm_i915_gem_object *obj)
+{
+ if (!i915_gem_object_has_struct_page(obj))
+ return false;
+
+ if (DBG_FORCE_RELOC == FORCE_CPU_RELOC)
+ return true;
+
+ if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
+ return false;
+
+ return (cache->has_llc ||
+ obj->cache_dirty ||
+ obj->cache_level != I915_CACHE_NONE);
+}
+
static int eb_reserve_vma(const struct i915_execbuffer *eb,
struct eb_vma *ev,
u64 pin_flags)
@@ -926,6 +952,8 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
static void reloc_cache_init(struct reloc_cache *cache,
struct drm_i915_private *i915)
{
+ cache->page = -1;
+ cache->vaddr = 0;
/* Must be a variable in the struct to allow GCC to unroll. */
cache->gen = INTEL_GEN(i915);
cache->has_llc = HAS_LLC(i915);
@@ -937,6 +965,25 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->target = NULL;
}
+static inline void *unmask_page(unsigned long p)
+{
+ return (void *)(uintptr_t)(p & PAGE_MASK);
+}
+
+static inline unsigned int unmask_flags(unsigned long p)
+{
+ return p & ~PAGE_MASK;
+}
+
+#define KMAP 0x4 /* after CLFLUSH_FLAGS */
+
+static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
+{
+ struct drm_i915_private *i915 =
+ container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
+ return &i915->ggtt;
+}
+
#define RELOC_TAIL 4
static int reloc_gpu_chain(struct reloc_cache *cache)
@@ -1049,6 +1096,181 @@ static int reloc_gpu_flush(struct reloc_cache *cache)
return err;
}
+static void reloc_cache_reset(struct reloc_cache *cache)
+{
+ void *vaddr;
+
+ if (!cache->vaddr)
+ return;
+
+ vaddr = unmask_page(cache->vaddr);
+ if (cache->vaddr & KMAP) {
+ if (cache->vaddr & CLFLUSH_AFTER)
+ mb();
+
+ kunmap_atomic(vaddr);
+ i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
+ } else {
+ struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+
+ intel_gt_flush_ggtt_writes(ggtt->vm.gt);
+ io_mapping_unmap_atomic((void __iomem *)vaddr);
+
+ if (drm_mm_node_allocated(&cache->node)) {
+ ggtt->vm.clear_range(&ggtt->vm,
+ cache->node.start,
+ cache->node.size);
+ mutex_lock(&ggtt->vm.mutex);
+ drm_mm_remove_node(&cache->node);
+ mutex_unlock(&ggtt->vm.mutex);
+ } else {
+ i915_vma_unpin((struct i915_vma *)cache->node.mm);
+ }
+ }
+
+ cache->vaddr = 0;
+ cache->page = -1;
+}
+
+static void *reloc_kmap(struct drm_i915_gem_object *obj,
+ struct reloc_cache *cache,
+ unsigned long page)
+{
+ void *vaddr;
+
+ if (cache->vaddr) {
+ kunmap_atomic(unmask_page(cache->vaddr));
+ } else {
+ unsigned int flushes;
+ int err;
+
+ err = i915_gem_object_prepare_write(obj, &flushes);
+ if (err)
+ return ERR_PTR(err);
+
+ BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
+ BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
+
+ cache->vaddr = flushes | KMAP;
+ cache->node.mm = (void *)obj;
+ if (flushes)
+ mb();
+ }
+
+ vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
+ cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
+ cache->page = page;
+
+ return vaddr;
+}
+
+static void *reloc_iomap(struct drm_i915_gem_object *obj,
+ struct reloc_cache *cache,
+ unsigned long page)
+{
+ struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+ unsigned long offset;
+ void *vaddr;
+
+ if (cache->vaddr) {
+ intel_gt_flush_ggtt_writes(ggtt->vm.gt);
+ io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
+ } else {
+ struct i915_vma *vma;
+ int err;
+
+ if (i915_gem_object_is_tiled(obj))
+ return ERR_PTR(-EINVAL);
+
+ if (use_cpu_reloc(cache, obj))
+ return NULL;
+
+ i915_gem_object_lock(obj);
+ err = i915_gem_object_set_to_gtt_domain(obj, true);
+ i915_gem_object_unlock(obj);
+ if (err)
+ return ERR_PTR(err);
+
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
+ if (IS_ERR(vma)) {
+ memset(&cache->node, 0, sizeof(cache->node));
+ mutex_lock(&ggtt->vm.mutex);
+ err = drm_mm_insert_node_in_range
+ (&ggtt->vm.mm, &cache->node,
+ PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
+ 0, ggtt->mappable_end,
+ DRM_MM_INSERT_LOW);
+ mutex_unlock(&ggtt->vm.mutex);
+ if (err) /* no inactive aperture space, use cpu reloc */
+ return NULL;
+ } else {
+ cache->node.start = vma->node.start;
+ cache->node.mm = (void *)vma;
+ }
+ }
+
+ offset = cache->node.start;
+ if (drm_mm_node_allocated(&cache->node)) {
+ ggtt->vm.insert_page(&ggtt->vm,
+ i915_gem_object_get_dma_address(obj, page),
+ offset, I915_CACHE_NONE, 0);
+ } else {
+ offset += page << PAGE_SHIFT;
+ }
+
+ vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
+ offset);
+ cache->page = page;
+ cache->vaddr = (unsigned long)vaddr;
+
+ return vaddr;
+}
+
+static void *reloc_vaddr(struct drm_i915_gem_object *obj,
+ struct reloc_cache *cache,
+ unsigned long page)
+{
+ void *vaddr;
+
+ if (cache->page == page) {
+ vaddr = unmask_page(cache->vaddr);
+ } else {
+ vaddr = NULL;
+ if ((cache->vaddr & KMAP) == 0)
+ vaddr = reloc_iomap(obj, cache, page);
+ if (!vaddr)
+ vaddr = reloc_kmap(obj, cache, page);
+ }
+
+ return vaddr;
+}
+
+static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
+{
+ if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
+ if (flushes & CLFLUSH_BEFORE) {
+ clflushopt(addr);
+ mb();
+ }
+
+ *addr = value;
+
+ /*
+ * Writes to the same cacheline are serialised by the CPU
+ * (including clflush). On the write path, we only require
+ * that it hits memory in an orderly fashion and place
+ * mb barriers at the start and end of the relocation phase
+ * to ensure ordering of clflush wrt to the system.
+ */
+ if (flushes & CLFLUSH_AFTER)
+ clflushopt(addr);
+ } else
+ *addr = value;
+}
+
static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
@@ -1214,6 +1436,17 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
return cmd;
}
+static inline bool use_reloc_gpu(struct i915_vma *vma)
+{
+ if (DBG_FORCE_RELOC == FORCE_GPU_RELOC)
+ return true;
+
+ if (DBG_FORCE_RELOC)
+ return false;
+
+ return !dma_resv_test_signaled_rcu(vma->resv, true);
+}
+
static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
{
struct page *page;
@@ -1228,10 +1461,10 @@ static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
return addr + offset_in_page(offset);
}
-static int __reloc_entry_gpu(struct i915_execbuffer *eb,
- struct i915_vma *vma,
- u64 offset,
- u64 target_addr)
+static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
+ struct i915_vma *vma,
+ u64 offset,
+ u64 target_addr)
{
const unsigned int gen = eb->reloc_cache.gen;
unsigned int len;
@@ -1247,7 +1480,7 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb,
batch = reloc_gpu(eb, vma, len);
if (IS_ERR(batch))
- return PTR_ERR(batch);
+ return false;
addr = gen8_canonical_addr(vma->node.start + offset);
if (gen >= 8) {
@@ -1296,21 +1529,55 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb,
*batch++ = target_addr;
}
- return 0;
+ return true;
+}
+
+static bool reloc_entry_gpu(struct i915_execbuffer *eb,
+ struct i915_vma *vma,
+ u64 offset,
+ u64 target_addr)
+{
+ if (eb->reloc_cache.vaddr)
+ return false;
+
+ if (!use_reloc_gpu(vma))
+ return false;
+
+ return __reloc_entry_gpu(eb, vma, offset, target_addr);
}
static u64
-relocate_entry(struct i915_execbuffer *eb,
- struct i915_vma *vma,
+relocate_entry(struct i915_vma *vma,
const struct drm_i915_gem_relocation_entry *reloc,
+ struct i915_execbuffer *eb,
const struct i915_vma *target)
{
u64 target_addr = relocation_target(reloc, target);
- int err;
-
- err = __reloc_entry_gpu(eb, vma, reloc->offset, target_addr);
- if (err)
- return err;
+ u64 offset = reloc->offset;
+
+ if (!reloc_entry_gpu(eb, vma, offset, target_addr)) {
+ bool wide = eb->reloc_cache.use_64bit_reloc;
+ void *vaddr;
+
+repeat:
+ vaddr = reloc_vaddr(vma->obj,
+ &eb->reloc_cache,
+ offset >> PAGE_SHIFT);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
+
+ GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
+ clflush_write32(vaddr + offset_in_page(offset),
+ lower_32_bits(target_addr),
+ eb->reloc_cache.vaddr);
+
+ if (wide) {
+ offset += sizeof(u32);
+ target_addr >>= 32;
+ wide = false;
+ goto repeat;
+ }
+ }
return target->node.start | UPDATE;
}
@@ -1375,7 +1642,8 @@ eb_relocate_entry(struct i915_execbuffer *eb,
* If the relocation already has the right value in it, no
* more work needs to be done.
*/
- if (gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
+ if (!DBG_FORCE_RELOC &&
+ gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
return 0;
/* Check that the relocation address is valid... */
@@ -1407,7 +1675,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
ev->flags &= ~EXEC_OBJECT_ASYNC;
/* and update the user's relocation entry */
- return relocate_entry(eb, ev->vma, reloc, target->vma);
+ return relocate_entry(ev->vma, reloc, eb, target->vma);
}
static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
@@ -1445,8 +1713,10 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
* this is bad and so lockdep complains vehemently.
*/
copied = __copy_from_user(r, urelocs, count * sizeof(r[0]));
- if (unlikely(copied))
- return -EFAULT;
+ if (unlikely(copied)) {
+ remain = -EFAULT;
+ goto out;
+ }
remain -= count;
do {
@@ -1454,7 +1724,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
if (likely(offset == 0)) {
} else if ((s64)offset < 0) {
- return (int)offset;
+ remain = (int)offset;
+ goto out;
} else {
/*
* Note that reporting an error now
@@ -1484,8 +1755,9 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
} while (r++, --count);
urelocs += ARRAY_SIZE(stack);
} while (remain);
-
- return 0;
+out:
+ reloc_cache_reset(&eb->reloc_cache);
+ return remain;
}
static int eb_relocate(struct i915_execbuffer *eb)
@@ -2392,7 +2664,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.i915 = i915;
eb.file = file;
eb.args = args;
- if (!(args->flags & I915_EXEC_NO_RELOC))
+ if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
args->flags |= __EXEC_HAS_RELOC;
eb.exec = exec;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index e5b9276d254c..9cf4ad78ece6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -258,6 +258,10 @@ struct page *
i915_gem_object_get_page(struct drm_i915_gem_object *obj,
unsigned int n);
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+ unsigned int n);
+
dma_addr_t
i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
unsigned long n,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index d15ff6748a50..e8a083743e09 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -548,6 +548,20 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
return nth_page(sg_page(sg), offset);
}
+/* Like i915_gem_object_get_page(), but mark the returned page dirty */
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+ unsigned int n)
+{
+ struct page *page;
+
+ page = i915_gem_object_get_page(obj, n);
+ if (!obj->mm.dirty)
+ set_page_dirty(page);
+
+ return page;
+}
+
dma_addr_t
i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
unsigned long n,
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
index 57c14d3340cd..a49016f8ee0d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -37,14 +37,20 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
return err;
/* 8-Byte aligned */
- err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0);
- if (err)
+ if (!__reloc_entry_gpu(eb, vma,
+ offsets[0] * sizeof(u32),
+ 0)) {
+ err = -EIO;
goto unpin_vma;
+ }
/* !8-Byte aligned */
- err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1);
- if (err)
+ if (!__reloc_entry_gpu(eb, vma,
+ offsets[1] * sizeof(u32),
+ 1)) {
+ err = -EIO;
goto unpin_vma;
+ }
/* Skip to the end of the cmd page */
i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1;
@@ -54,9 +60,12 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
eb->reloc_cache.rq_size += i;
/* Force batch chaining */
- err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2);
- if (err)
+ if (!__reloc_entry_gpu(eb, vma,
+ offsets[2] * sizeof(u32),
+ 2)) {
+ err = -EIO;
goto unpin_vma;
+ }
GEM_BUG_ON(!eb->reloc_cache.rq);
rq = i915_request_get(eb->reloc_cache.rq);
diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
index 6021f8d9efd1..48fa49f69d6d 100644
--- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
@@ -164,6 +164,11 @@ static int a2xx_hw_init(struct msm_gpu *gpu)
if (ret)
return ret;
+ gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
+ MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
+
+ gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
+
/* NOTE: PM4/micro-engine firmware registers look to be the same
* for a2xx and a3xx.. we could possibly push that part down to
* adreno_gpu base class. Or push both PM4 and PFP but
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 0a5ea9f56cb8..f6471145a7a6 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -211,6 +211,16 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
if (ret)
return ret;
+ /*
+ * Use the default ringbuffer size and block size but disable the RPTR
+ * shadow
+ */
+ gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
+ MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
+
+ /* Set the ringbuffer address */
+ gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
+
/* setup access protection: */
gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index b9b26b2bf9c5..954753600625 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -267,6 +267,16 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
if (ret)
return ret;
+ /*
+ * Use the default ringbuffer size and block size but disable the RPTR
+ * shadow
+ */
+ gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
+ MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
+
+ /* Set the ringbuffer address */
+ gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
+
/* Load PM4: */
ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 84a5d9c1f2a2..91726da82ed6 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -703,8 +703,6 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
if (ret)
return ret;
- a5xx_preempt_hw_init(gpu);
-
if (!adreno_is_a510(adreno_gpu))
a5xx_gpmu_ucode_init(gpu);
@@ -712,6 +710,15 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
if (ret)
return ret;
+ /* Set the ringbuffer address */
+ gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
+ gpu->rb[0]->iova);
+
+ gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
+ MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
+
+ a5xx_preempt_hw_init(gpu);
+
/* Disable the interrupts through the initial bringup stage */
gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
@@ -1511,7 +1518,8 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
check_speed_bin(&pdev->dev);
- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
+ /* Restricting nr_rings to 1 to temporarily disable preemption */
+ ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
if (ret) {
a5xx_destroy(&(a5xx_gpu->base.base));
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
index 54868d4e3958..1e5b1a15a70f 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
@@ -31,6 +31,7 @@ struct a5xx_gpu {
struct msm_ringbuffer *next_ring;
struct drm_gem_object *preempt_bo[MSM_GPU_MAX_RINGS];
+ struct drm_gem_object *preempt_counters_bo[MSM_GPU_MAX_RINGS];
struct a5xx_preempt_record *preempt[MSM_GPU_MAX_RINGS];
uint64_t preempt_iova[MSM_GPU_MAX_RINGS];
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
index 9cf9353a7ff1..9f3fe177b00e 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
@@ -226,19 +226,31 @@ static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu,
struct adreno_gpu *adreno_gpu = &a5xx_gpu->base;
struct msm_gpu *gpu = &adreno_gpu->base;
struct a5xx_preempt_record *ptr;
- struct drm_gem_object *bo = NULL;
- u64 iova = 0;
+ void *counters;
+ struct drm_gem_object *bo = NULL, *counters_bo = NULL;
+ u64 iova = 0, counters_iova = 0;
ptr = msm_gem_kernel_new(gpu->dev,
A5XX_PREEMPT_RECORD_SIZE + A5XX_PREEMPT_COUNTER_SIZE,
- MSM_BO_UNCACHED, gpu->aspace, &bo, &iova);
+ MSM_BO_UNCACHED | MSM_BO_MAP_PRIV, gpu->aspace, &bo, &iova);
if (IS_ERR(ptr))
return PTR_ERR(ptr);
+ /* The buffer to store counters needs to be unprivileged */
+ counters = msm_gem_kernel_new(gpu->dev,
+ A5XX_PREEMPT_COUNTER_SIZE,
+ MSM_BO_UNCACHED, gpu->aspace, &counters_bo, &counters_iova);
+ if (IS_ERR(counters)) {
+ msm_gem_kernel_put(bo, gpu->aspace, true);
+ return PTR_ERR(counters);
+ }
+
msm_gem_object_set_name(bo, "preempt");
+ msm_gem_object_set_name(counters_bo, "preempt_counters");
a5xx_gpu->preempt_bo[ring->id] = bo;
+ a5xx_gpu->preempt_counters_bo[ring->id] = counters_bo;
a5xx_gpu->preempt_iova[ring->id] = iova;
a5xx_gpu->preempt[ring->id] = ptr;
@@ -249,7 +261,7 @@ static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu,
ptr->data = 0;
ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT;
ptr->rptr_addr = rbmemptr(ring, rptr);
- ptr->counter = iova + A5XX_PREEMPT_RECORD_SIZE;
+ ptr->counter = counters_iova;
return 0;
}
@@ -260,8 +272,11 @@ void a5xx_preempt_fini(struct msm_gpu *gpu)
struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
int i;
- for (i = 0; i < gpu->nr_rings; i++)
+ for (i = 0; i < gpu->nr_rings; i++) {
msm_gem_kernel_put(a5xx_gpu->preempt_bo[i], gpu->aspace, true);
+ msm_gem_kernel_put(a5xx_gpu->preempt_counters_bo[i],
+ gpu->aspace, true);
+ }
}
void a5xx_preempt_init(struct msm_gpu *gpu)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 3966abd523cc..66a95e22b7b3 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -678,7 +678,8 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
A6XX_PROTECT_RDONLY(0x980, 0x4));
gpu_write(gpu, REG_A6XX_CP_PROTECT(25), A6XX_PROTECT_RW(0xa630, 0x0));
- if (adreno_is_a650(adreno_gpu)) {
+ /* Enable expanded apriv for targets that support it */
+ if (gpu->hw_apriv) {
gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
(1 << 6) | (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1));
}
@@ -694,6 +695,13 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
if (ret)
goto out;
+ /* Set the ringbuffer address */
+ gpu_write64(gpu, REG_A6XX_CP_RB_BASE, REG_A6XX_CP_RB_BASE_HI,
+ gpu->rb[0]->iova);
+
+ gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
+ MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
+
/* Always come up on rb 0 */
a6xx_gpu->cur_ring = gpu->rb[0];
@@ -1056,6 +1064,9 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
adreno_gpu->registers = NULL;
adreno_gpu->reg_offsets = a6xx_register_offsets;
+ if (adreno_is_a650(adreno_gpu))
+ adreno_gpu->base.hw_apriv = true;
+
ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
if (ret) {
a6xx_destroy(&(a6xx_gpu->base.base));
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 288141fe4c58..862dd35b27d3 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -400,26 +400,6 @@ int adreno_hw_init(struct msm_gpu *gpu)
ring->memptrs->rptr = 0;
}
- /*
- * Setup REG_CP_RB_CNTL. The same value is used across targets (with
- * the excpetion of A430 that disables the RPTR shadow) - the cacluation
- * for the ringbuffer size and block size is moved to msm_gpu.h for the
- * pre-processor to deal with and the A430 variant is ORed in here
- */
- adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL,
- MSM_GPU_RB_CNTL_DEFAULT |
- (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0));
-
- /* Setup ringbuffer address - use ringbuffer[0] for GPU init */
- adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE,
- REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova);
-
- if (!adreno_is_a430(adreno_gpu)) {
- adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR,
- REG_ADRENO_CP_RB_RPTR_ADDR_HI,
- rbmemptr(gpu->rb[0], rptr));
- }
-
return 0;
}
@@ -427,11 +407,8 @@ int adreno_hw_init(struct msm_gpu *gpu)
static uint32_t get_rptr(struct adreno_gpu *adreno_gpu,
struct msm_ringbuffer *ring)
{
- if (adreno_is_a430(adreno_gpu))
- return ring->memptrs->rptr = adreno_gpu_read(
- adreno_gpu, REG_ADRENO_CP_RB_RPTR);
- else
- return ring->memptrs->rptr;
+ return ring->memptrs->rptr = adreno_gpu_read(
+ adreno_gpu, REG_ADRENO_CP_RB_RPTR);
}
struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu)
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index d5645472b25d..57ddc9438351 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -908,7 +908,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
memptrs = msm_gem_kernel_new(drm,
sizeof(struct msm_rbmemptrs) * nr_rings,
- MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo,
+ check_apriv(gpu, MSM_BO_UNCACHED), gpu->aspace, &gpu->memptrs_bo,
&memptrs_iova);
if (IS_ERR(memptrs)) {
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 0db117a7339b..37cffac4cbe3 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -15,6 +15,7 @@
#include "msm_drv.h"
#include "msm_fence.h"
#include "msm_ringbuffer.h"
+#include "msm_gem.h"
struct msm_gem_submit;
struct msm_gpu_perfcntr;
@@ -139,6 +140,8 @@ struct msm_gpu {
} devfreq;
struct msm_gpu_state *crashstate;
+ /* True if the hardware supports expanded apriv (a650 and newer) */
+ bool hw_apriv;
};
/* It turns out that all targets use the same ringbuffer size */
@@ -327,4 +330,12 @@ static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu)
mutex_unlock(&gpu->dev->struct_mutex);
}
+/*
+ * Simple macro to semi-cleanly add the MAP_PRIV flag for targets that can
+ * support expanded privileges
+ */
+#define check_apriv(gpu, flags) \
+ (((gpu)->hw_apriv ? MSM_BO_MAP_PRIV : 0) | (flags))
+
+
#endif /* __MSM_GPU_H__ */
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 39ecb5a18431..935bf9b1d941 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -27,8 +27,8 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
ring->id = id;
ring->start = msm_gem_kernel_new(gpu->dev, MSM_GPU_RINGBUFFER_SZ,
- MSM_BO_WC | MSM_BO_GPU_READONLY, gpu->aspace, &ring->bo,
- &ring->iova);
+ check_apriv(gpu, MSM_BO_WC | MSM_BO_GPU_READONLY),
+ gpu->aspace, &ring->bo, &ring->iova);
if (IS_ERR(ring->start)) {
ret = PTR_ERR(ring->start);