From 27173f1f95db5e74ceb35fe9a2f2f348ea11bac9 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben@bwidawsk.net>
Date: Wed, 14 Aug 2013 11:38:36 +0200
Subject: drm/i915: Convert execbuf code to use vmas

In order to transition more of our code over to using a VMA instead of
an <OBJ, VM> pair - we must have the vma accessible at execbuf time. Up
until now, we've only had a VMA when actually binding an object.

The previous patch helped handle the distinction on bound vs. unbound.
This patch will help us catch leaks, and other issues before we actually
shuffle a bunch of stuff around.

This attempts to convert all the execbuf code to speak in vmas. Since
the execbuf code is very self contained it was a nice isolated
conversion.

The meat of the code is about turning eb_objects into eb_vma, and then
wiring up the rest of the code to use vmas instead of obj, vm pairs.

Unfortunately, to do this, we must move the exec_list link from the obj
structure. This list is reused in the eviction code, so we must also
modify the eviction code to make this work.

WARNING: This patch makes an already hotly profiled path slower. The cost is
unavoidable. In reply to this mail, I will attach the extra data.

v2: Release table lock early, and two a 2 phase vma lookup to avoid
having to use a GFP_ATOMIC. (Chris)

v3: s/obj_exec_list/obj_exec_link/
Updates to address
commit 6d2b888569d366beb4be72cacfde41adee2c25e1
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Aug 7 18:30:54 2013 +0100

    drm/i915: List objects allocated from stolen memory in debugfs

v4: Use obj = vma->obj for neatness in some places (Chris)
need_reloc_mappable() should return false if ppgtt (Chris)

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Split out prep patches. Also remove a FIXME comment which is
now taken care of.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f21a0c36a40..c4c56f98d6e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3978,7 +3978,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 {
 	INIT_LIST_HEAD(&obj->global_list);
 	INIT_LIST_HEAD(&obj->ring_list);
-	INIT_LIST_HEAD(&obj->exec_list);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 
-- 
cgit v1.2.3


From e656a6cba0febf12a9838882b891e1c5917c7a8b Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Aug 2013 14:14:04 +0200
Subject: drm/i915: inline vma_create into lookup_or_create_vma

In the execbuf code we don't clean up any vmas which ended up not
getting bound for code simplicity. To make sure that we don't end up
creating multiple vma for the same vm kill the somewhat dangerous
vma_create function and inline it into lookup_or_create.

This is just a safety measure to prevent surprises in the future.

Also update the somewhat confused comment in the execbuf code and
clarify what kind of magic is going on with a new one.

v2: Keep the function separate as requested by Chris. But give it a __
prefix for paranoia and move it tighter together with the other vma
stuff.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <ben@bwidawsk.net>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 50 ++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c4c56f98d6e..6820d0a1231 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4109,8 +4109,19 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	i915_gem_object_free(obj);
 }
 
-struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj,
+struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
 				     struct i915_address_space *vm)
+{
+	struct i915_vma *vma;
+	list_for_each_entry(vma, &obj->vma_list, vma_link)
+		if (vma->vm == vm)
+			return vma;
+
+	return NULL;
+}
+
+static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
+					      struct i915_address_space *vm)
 {
 	struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
 	if (vma == NULL)
@@ -4131,6 +4142,19 @@ struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj,
 	return vma;
 }
 
+struct i915_vma *
+i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
+				  struct i915_address_space *vm)
+{
+	struct i915_vma *vma;
+
+	vma = i915_gem_obj_to_vma(obj, vm);
+	if (!vma)
+		vma = __i915_gem_vma_create(obj, vm);
+
+	return vma;
+}
+
 void i915_gem_vma_destroy(struct i915_vma *vma)
 {
 	WARN_ON(vma->node.allocated);
@@ -4857,27 +4881,3 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
 
 	return 0;
 }
-
-struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
-				     struct i915_address_space *vm)
-{
-	struct i915_vma *vma;
-	list_for_each_entry(vma, &obj->vma_list, vma_link)
-		if (vma->vm == vm)
-			return vma;
-
-	return NULL;
-}
-
-struct i915_vma *
-i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
-				  struct i915_address_space *vm)
-{
-	struct i915_vma *vma;
-
-	vma = i915_gem_obj_to_vma(obj, vm);
-	if (!vma)
-		vma = i915_gem_vma_create(obj, vm);
-
-	return vma;
-}
-- 
cgit v1.2.3


From aaa0566792dc7ae68deb1959663581ea8c75d311 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 20 Aug 2013 12:56:40 +0100
Subject: drm/i915: Don't destroy the vma placeholder during execbuffer
 reservation

The execbuffer handle and exec_link were moved from the object into the
vma. As the vma may be unbound and destroyed whilst attempting to
reserve the execbuffer objects (either through a forced unbind to fix up
a misalignment or through an evict-everything call) we need to prevent
the free of the i915_vma itself. Otherwise not only is the list of
objects to reserve corrupt, but we continue to reference stale vma
entries.

Fixes kernel crash with i-g-t/gem_evict_everything

This regression has been introduced in

commit 04038a515d6eda6dd0857c0ade0b3950d372f4c0
Author:     Ben Widawsky <ben@bwidawsk.net>
AuthorDate: Wed Aug 14 11:38:36 2013 +0200

    drm/i915: Convert execbuf code to use vmas

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
References: http://www.spinics.net/lists/intel-gfx/msg32038.html
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68298
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6820d0a1231..29b7e1c32b8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4159,6 +4159,11 @@ void i915_gem_vma_destroy(struct i915_vma *vma)
 {
 	WARN_ON(vma->node.allocated);
 	list_del(&vma->vma_link);
+
+	/* Keep the vma as a placeholder in the execbuffer reservation lists */
+	if (!list_empty(&vma->exec_list))
+		return;
+
 	kfree(vma);
 }
 
-- 
cgit v1.2.3


From b93dab6e9d286ec2cd95b28078afdfa6dd515205 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 26 Aug 2013 11:23:47 +0200
Subject: drm/i915: More vma fixups around unbind/destroy

The important bugfix here is that we must not unlink the vma when
we keep it around as a placeholder for the execbuf code. Since then we
won't find it again when execbuf gets interrupt and restarted and
create a 2nd vma. And since the code as-is isn't fit yet to deal with
more than one vma, hilarity ensues.

Specifically the dma map/unmap of the sg table isn't adjusted for
multiple vmas yet and will blow up like this:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
IP: [<ffffffffa008fb37>] i915_gem_gtt_finish_object+0x73/0xc8 [i915]
PGD 56bb5067 PUD ad3dd067 PMD 0
Oops: 0000 [#1] SMP
Modules linked in: tcp_lp ppdev parport_pc lp parport ipv6 dm_mod dcdbas snd_hda_codec_hdmi pcspkr snd_hda_codec_realtek serio_raw i2c_i801 iTCO_wdt iTCO_vendor_support snd_hda_intel snd_hda_codec lpc_ich snd_hwdep mfd_core snd_pcm snd_page_alloc snd_timer snd soundcore acpi_cpufreq i915 video button drm_kms_helper drm mperf freq_table
CPU: 1 PID: 16650 Comm: fbo-maxsize Not tainted 3.11.0-rc4_nightlytop_d93f59_debug_20130814_+ #6957
Hardware name: Dell Inc. OptiPlex 9010/03JR84, BIOS A01 05/04/2012
task: ffff8800563b3f00 ti: ffff88004bdf4000 task.ti: ffff88004bdf4000
RIP: 0010:[<ffffffffa008fb37>]  [<ffffffffa008fb37>] i915_gem_gtt_finish_object+0x73/0xc8 [i915]
RSP: 0018:ffff88004bdf5958  EFLAGS: 00010246
RAX: 0000000000000000 RBX: ffff8801135e0000 RCX: ffff8800ad3bf8e0
RDX: ffff8800ad3bf8e0 RSI: 0000000000000000 RDI: ffff8801007ee780
RBP: ffff88004bdf5978 R08: ffff8800ad3bf8e0 R09: 0000000000000000
R10: ffffffff86ca1810 R11: ffff880036a17101 R12: ffff8801007ee780
R13: 0000000000018001 R14: ffff880118c4e000 R15: ffff8801007ee780
FS:  00007f401a0ce740(0000) GS:ffff88011e280000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000008 CR3: 000000005635c000 CR4: 00000000001407e0
Stack:
 ffff8801007ee780 ffff88005c253180 0000000000018000 ffff8801135e0000
 ffff88004bdf59a8 ffffffffa0088e55 0000000000000011 ffff8801007eec00
 0000000000018000 ffff880036a17101 ffff88004bdf5a08 ffffffffa0089026
Call Trace:
 [<ffffffffa0088e55>] i915_vma_unbind+0xdf/0x1ab [i915]
 [<ffffffffa0089026>] __i915_gem_shrink+0x105/0x177 [i915]
 [<ffffffffa0089452>] i915_gem_object_get_pages_gtt+0x108/0x309 [i915]
 [<ffffffffa0085ba9>] i915_gem_object_get_pages+0x61/0x90 [i915]
 [<ffffffffa008f22b>] ? gen6_ppgtt_insert_entries+0x103/0x125 [i915]
 [<ffffffffa008a113>] i915_gem_object_pin+0x1fa/0x5df [i915]
 [<ffffffffa008cdfe>] i915_gem_execbuffer_reserve_object.isra.6+0x8d/0x1bc [i915]
 [<ffffffffa008d156>] i915_gem_execbuffer_reserve+0x229/0x367 [i915]
 [<ffffffffa008dbf6>] i915_gem_do_execbuffer.isra.12+0x4dc/0xf3a [i915]
 [<ffffffff810fc823>] ? might_fault+0x40/0x90
 [<ffffffffa008eb89>] i915_gem_execbuffer2+0x187/0x222 [i915]
 [<ffffffffa000971c>] drm_ioctl+0x308/0x442 [drm]
 [<ffffffffa008ea02>] ? i915_gem_execbuffer+0x3ae/0x3ae [i915]
 [<ffffffff817db156>] ? __do_page_fault+0x3dd/0x481
 [<ffffffff8112fdba>] vfs_ioctl+0x26/0x39
 [<ffffffff811306a2>] do_vfs_ioctl+0x40e/0x451
 [<ffffffff817deda7>] ? sysret_check+0x1b/0x56
 [<ffffffff8113073c>] SyS_ioctl+0x57/0x87
 [<ffffffff8135bbfe>] ? trace_hardirqs_on_thunk+0x3a/0x3f
 [<ffffffff817ded82>] system_call_fastpath+0x16/0x1b
Code: 48 c7 c6 84 30 0e a0 31 c0 e8 d0 e9 f7 ff bf c6 a7 00 00 e8 07 af 2c e1 41 f6 84 24 03 01 00 00 10 75 44 49 8b 84 24 08 01 00 00 <8b> 50 08 48 8b 30 49 8b 86 b0 04 00 00 48 89 c7 48 81 c7 98 00
RIP  [<ffffffffa008fb37>] i915_gem_gtt_finish_object+0x73/0xc8 [i915]
 RSP <ffff88004bdf5958>
CR2: 0000000000000008

As a consequence we need to change the "only one vma for now" check in
vma_unbind - since vma_destroy isn't always called the obj->vma_list
might not be empty. Instead check that the vma list is singular at the
beginning of vma_unbind. This is also more symmetric with bind_to_vm.

This fixes the igt/gem_evict_everything|alignment testcases.

v2:
- Add a paranoid WARN to mark_free in the eviction code to make sure
  we never try to evict a vma used by the execbuf code right now.
- Move the check for a temporary execbuf vma into vma_destroy -
  otherwise the failure path cleanup in bind_to_vm will blow up.

Our first attempting at fixing this was

commit 1be81a2f2cfd8789a627401d470423358fba2d76
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Aug 20 12:56:40 2013 +0100

    drm/i915: Don't destroy the vma placeholder during execbuffer reservation

Squash with this when merging!

v3: Improvements suggested in Chris' review:
- Move the WARN_ON in vma_destroy that checks for vmas with an drm_mm
  allocation before the early return.
- Bail out if we hit the WARN in mark_free to hopefully make the
  kernel survive for long enough to capture it.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <ben@bwidawsk.net>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68298
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68171
Tested-by: lu hua <huax.lu@intel.com> (v2)
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 29b7e1c32b8..d57368d5fc1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2604,6 +2604,9 @@ int i915_vma_unbind(struct i915_vma *vma)
 	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
 	int ret;
 
+	/* For now we only ever use 1 vma per object */
+	WARN_ON(!list_is_singular(&obj->vma_list));
+
 	if (list_empty(&vma->vma_link))
 		return 0;
 
@@ -2652,9 +2655,7 @@ destroy:
 	i915_gem_vma_destroy(vma);
 
 	/* Since the unbound list is global, only move to that list if
-	 * no more VMAs exist.
-	 * NB: Until we have real VMAs there will only ever be one */
-	WARN_ON(!list_empty(&obj->vma_list));
+	 * no more VMAs exist. */
 	if (list_empty(&obj->vma_list))
 		list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
 
@@ -4158,12 +4159,13 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
 void i915_gem_vma_destroy(struct i915_vma *vma)
 {
 	WARN_ON(vma->node.allocated);
-	list_del(&vma->vma_link);
 
 	/* Keep the vma as a placeholder in the execbuffer reservation lists */
 	if (!list_empty(&vma->exec_list))
 		return;
 
+	list_del(&vma->vma_link);
+
 	kfree(vma);
 }
 
-- 
cgit v1.2.3


From 9435373ef8870e0a84b6fec0ad89b952bf3097fa Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@gmail.com>
Date: Wed, 28 Aug 2013 16:45:46 -0300
Subject: drm/i915: Report enabled slices on Haswell GT3

Batchbuffers constructed by userspace can conditionalise their URB
allocations through the use of the MI_SET_PREDICATE command. This
command can read the MI_PREDICATE_RESULT_2 register to see how many
slices are enabled on GT3, and by virtue of the result, scale their
memory allocations to fit enabled memory.

Of course, this only works if the kernel sets the appropriate bit in the
register first.

v2: Better commit subject and message by Chris Wilson.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Credits-to: Yejun Guo <yejun.guo@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d57368d5fc1..2d4b72ab122 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4331,6 +4331,11 @@ i915_gem_init_hw(struct drm_device *dev)
 	if (dev_priv->ellc_size)
 		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
 
+	if (IS_HSW_GT3(dev))
+		I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED);
+	else
+		I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+
 	if (HAS_PCH_NOP(dev)) {
 		u32 temp = I915_READ(GEN7_MSG_CTL);
 		temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
-- 
cgit v1.2.3


From 0ff501cbb5d825557da7b9a0226ef031344df87d Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 29 Aug 2013 19:50:31 +0200
Subject: drm/i915: Fix list corruption in vma_unbind

The saga around the breadcrumb vmas used by execbuf continues ...

This time around we've managed to unconditionally move the object to
the unbound list on the last vma unbind even though it might never
have been on either the bound or unbound list. Hilarity ensued.

Chris Wilson tracked this one down but compared to his patches I've
simply opted to completely separate the unbound case for not-yet bound
vmas. Otherwise we imo end up with semantically hard to parse checks
around the list_move_tail(global_list, ...).

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <ben@bwidawsk.net>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68462
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2d4b72ab122..80342c8f02e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2610,8 +2610,11 @@ int i915_vma_unbind(struct i915_vma *vma)
 	if (list_empty(&vma->vma_link))
 		return 0;
 
-	if (!drm_mm_node_allocated(&vma->node))
-		goto destroy;
+	if (!drm_mm_node_allocated(&vma->node)) {
+		i915_gem_vma_destroy(vma);
+
+		return 0;
+	}
 
 	if (obj->pin_count)
 		return -EBUSY;
@@ -2651,7 +2654,6 @@ int i915_vma_unbind(struct i915_vma *vma)
 
 	drm_mm_remove_node(&vma->node);
 
-destroy:
 	i915_gem_vma_destroy(vma);
 
 	/* Since the unbound list is global, only move to that list if
-- 
cgit v1.2.3


From c0321e2c5acaaff7ed41d46d97cee71ad9238481 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 26 Aug 2013 19:50:53 -0300
Subject: drm/i915: Do not add an interrupt for a context switch

We use the request to ensure we hold a reference to the context for the
duration that it remains in use by the ring. Each request only holds a
reference to the current context, hence we emit a request after
switching contexts with the final reference to the old context. However,
the extra interrupt caused by that request is not useful (no timing
critical function will wait for the context object), instead the overhead
of servicing the IRQ shows up in some (lightweight) benchmarks. In order
to keep the useful property of using the request to manage the context
lifetime, we want to add a dummy request that is associated with the
interrupt from the subsequent real request following the batch.

The extra interrupt was added as a side-effect of using
i915_add_request() in

commit 112522f6789581824903f6f72082b5b841a7f0f9
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu May 2 16:48:07 2013 +0300

    drm/i915: put context upon switching

v2: Daniel convinced me that the request here was solely for context
lifetime tracking and that we have the active ref to keep the object
alive whilst the MI_SET_CONTEXT. So the only concern then is which
context should get the blame for MI_SET_CONTEXT failing. The old scheme
added a request for the old context so that any hang upto and including
the switch away would mark the old context as guilty. Now any hang here
implicates the new context. However since we have already gone through a
complete flush with the last context in its last request, and all that
lies in no-man's-land is an invalidate flush and the MI_SET_CONTEXT, we
should be safe in not unduly placing blame on the new context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <ben@bwidawsk.net>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 80342c8f02e..22be39feadc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2045,7 +2045,6 @@ int __i915_add_request(struct intel_ring_buffer *ring,
 	if (request == NULL)
 		return -ENOMEM;
 
-
 	/* Record the position of the start of the request so that
 	 * should we detect the updated seqno part-way through the
 	 * GPU processing the request, we never over-estimate the
-- 
cgit v1.2.3


From 9a7e0c2a1bff84d20ef02a85898f9c8757d1441c Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 26 Aug 2013 19:50:54 -0300
Subject: drm/i915: Rearrange the comments in i915_add_request()

The comments were a little out-of-sequence with the code, forcing the
reader to jump around whilst reading. Whilst moving the comments around,
add one to explain the context reference.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 22be39feadc..fdeecae058e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2062,8 +2062,6 @@ int __i915_add_request(struct intel_ring_buffer *ring,
 	request->ring = ring;
 	request->head = request_start;
 	request->tail = request_ring_position;
-	request->ctx = ring->last_context;
-	request->batch_obj = obj;
 
 	/* Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
@@ -2071,7 +2069,12 @@ int __i915_add_request(struct intel_ring_buffer *ring,
 	 * inactive_list and lose its active reference. Hence we do not need
 	 * to explicitly hold another reference here.
 	 */
+	request->batch_obj = obj;
 
+	/* Hold a reference to the current context so that we can inspect
+	 * it later in case a hangcheck error event fires.
+	 */
+	request->ctx = ring->last_context;
 	if (request->ctx)
 		i915_gem_context_reference(request->ctx);
 
-- 
cgit v1.2.3


From 1823521d2b2fa614e7ad95fdc8a0f59e571f37ce Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 4 Sep 2013 10:45:51 +0100
Subject: drm/i915: Rename ring->outstanding_lazy_request

Prior to preallocating an request for lazy emission, rename the existing
field to make way (and differentiate the seqno from the request struct).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fdeecae058e..858e7888663 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -964,7 +964,7 @@ i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
 	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
 
 	ret = 0;
-	if (seqno == ring->outstanding_lazy_request)
+	if (seqno == ring->outstanding_lazy_seqno)
 		ret = i915_add_request(ring, NULL);
 
 	return ret;
@@ -2094,7 +2094,7 @@ int __i915_add_request(struct intel_ring_buffer *ring,
 	}
 
 	trace_i915_gem_request_add(ring, request->seqno);
-	ring->outstanding_lazy_request = 0;
+	ring->outstanding_lazy_seqno = 0;
 
 	if (!dev_priv->ums.mm_suspended) {
 		i915_queue_hangcheck(ring->dev);
-- 
cgit v1.2.3


From 3c0e234c847318304c12f9e7fffac7e1cf3db3ff Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 4 Sep 2013 10:45:52 +0100
Subject: drm/i915; Preallocate the lazy request

It is possible for us to be forced to perform an allocation for the lazy
request whilst running the shrinker. This allocation may fail, leaving
us unable to reclaim any memory leading to premature OOM. A neat
solution to the problem is to preallocate the request at the same time
as acquiring the seqno for the ring transaction. This means that we can
report ENOMEM prior to touching the rings.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 858e7888663..399e159016e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2041,8 +2041,8 @@ int __i915_add_request(struct intel_ring_buffer *ring,
 	if (ret)
 		return ret;
 
-	request = kmalloc(sizeof(*request), GFP_KERNEL);
-	if (request == NULL)
+	request = ring->preallocated_lazy_request;
+	if (WARN_ON(request == NULL))
 		return -ENOMEM;
 
 	/* Record the position of the start of the request so that
@@ -2053,10 +2053,8 @@ int __i915_add_request(struct intel_ring_buffer *ring,
 	request_ring_position = intel_ring_get_tail(ring);
 
 	ret = ring->add_request(ring);
-	if (ret) {
-		kfree(request);
+	if (ret)
 		return ret;
-	}
 
 	request->seqno = intel_ring_get_seqno(ring);
 	request->ring = ring;
@@ -2095,6 +2093,7 @@ int __i915_add_request(struct intel_ring_buffer *ring,
 
 	trace_i915_gem_request_add(ring, request->seqno);
 	ring->outstanding_lazy_seqno = 0;
+	ring->preallocated_lazy_request = NULL;
 
 	if (!dev_priv->ums.mm_suspended) {
 		i915_queue_hangcheck(ring->dev);
-- 
cgit v1.2.3


From be62acb4cce1389a28296852737e3917d9cc5b25 Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Fri, 30 Aug 2013 16:19:28 +0300
Subject: drm/i915: ban badly behaving contexts

Now when we have mechanism in place to track which context
was guilty of hanging the gpu, it is possible to punish
for bad behaviour.

If context has recently submitted a faulty batchbuffers guilty of
gpu hang and submits another batch which hangs gpu in quick
succession, ban it permanently. If ctx is banned, no more
batchbuffers will be queued for execution.

There is no need for global wedge machinery anymore and
it would be unwise to wedge the whole gpu if we have multiple
hanging batches queued for execution. Instead just ban
the guilty ones and carry on.

v2: Store guilty ban status bool in gpu_error instead of pointers
    that might become danling before hang is declared.

v3: Use return value for banned status instead of stashing state
    into gpu_error (Chris Wilson)

v4: - rebase on top of fixed hang stats api
    - add define for ban period
    - rename commit and improve commit msg

v5: - rely context banning instead of wedging the gpu
    - beautification and fix for ban calculation (Chris)

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 399e159016e..04e810c59d6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2188,6 +2188,21 @@ static bool i915_request_guilty(struct drm_i915_gem_request *request,
 	return false;
 }
 
+static bool i915_context_is_banned(const struct i915_ctx_hang_stats *hs)
+{
+	const unsigned long elapsed = get_seconds() - hs->guilty_ts;
+
+	if (hs->banned)
+		return true;
+
+	if (elapsed <= DRM_I915_CTX_BAN_PERIOD) {
+		DRM_ERROR("context hanging too fast, declaring banned!\n");
+		return true;
+	}
+
+	return false;
+}
+
 static void i915_set_reset_status(struct intel_ring_buffer *ring,
 				  struct drm_i915_gem_request *request,
 				  u32 acthd)
@@ -2224,10 +2239,13 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring,
 		hs = &request->file_priv->hang_stats;
 
 	if (hs) {
-		if (guilty)
+		if (guilty) {
+			hs->banned = i915_context_is_banned(hs);
 			hs->batch_active++;
-		else
+			hs->guilty_ts = get_seconds();
+		} else {
 			hs->batch_pending++;
+		}
 	}
 }
 
-- 
cgit v1.2.3


From 5a1d5eb020a27759f5cab4d72222d1752bb29453 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 10 Sep 2013 11:27:37 +0100
Subject: drm/i915: Remove the double-list iteration from bound_any()

The purpose of the function is to find out whether the object is still
bound in any address space. This can be easily checked by looking at the
vma currently associated with the object, rather than asking if any of
the global address spaces have an active vma on the object.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 04e810c59d6..face1451786 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4887,11 +4887,10 @@ bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
 
 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
 {
-	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
-	struct i915_address_space *vm;
+	struct i915_vma *vma;
 
-	list_for_each_entry(vm, &dev_priv->vm_list, global_link)
-		if (i915_gem_obj_bound(o, vm))
+	list_for_each_entry(vma, &o->vma_list, vma_link)
+		if (drm_mm_node_allocated(&vma->node))
 			return true;
 
 	return false;
-- 
cgit v1.2.3


From 23f54483980cea980af37e436ff4e6701aadce12 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Wed, 11 Sep 2013 14:57:48 -0700
Subject: drm/i915: Synchronize pread/pwrite with wait_rendering

lifted from Daniel:
pread/pwrite isn't about the object's domain at all, but purely about
synchronizing for outstanding rendering. Replacing the call to
set_to_gtt_domain with a wait_rendering would imo improve code
readability. Furthermore we could pimp pread to only block for
outstanding writes and not for reads.

Since you're not the first one to trip over this: Can I volunteer you
for a follow-up patch to fix this?

v2: Switch the pwrite patch to use \!read_only. This was a typo in the
original code. (Chris, Daniel)

Recommended-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Fix up the logic fumble - wait_rendering has a bool readonly
paramater, set_to_gtt_domain otoh has bool write. Breakage reported by
Jani Nikula, I've double-checked that igt/gem_concurrent_blt/prw-*
would have caught this.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index face1451786..d00d24f7a97 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -41,6 +41,9 @@ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *o
 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
 						   bool force);
 static __must_check int
+i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
+			       bool readonly);
+static __must_check int
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 			   struct i915_address_space *vm,
 			   unsigned alignment,
@@ -430,11 +433,9 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		 * optimizes for the case when the gpu will dirty the data
 		 * anyway again before the next pread happens. */
 		needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level);
-		if (i915_gem_obj_bound_any(obj)) {
-			ret = i915_gem_object_set_to_gtt_domain(obj, false);
-			if (ret)
-				return ret;
-		}
+		ret = i915_gem_object_wait_rendering(obj, true);
+		if (ret)
+			return ret;
 	}
 
 	ret = i915_gem_object_get_pages(obj);
@@ -746,11 +747,9 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		 * optimizes for the case when the gpu will use the data
 		 * right away and we therefore have to clflush anyway. */
 		needs_clflush_after = cpu_write_needs_clflush(obj);
-		if (i915_gem_obj_bound_any(obj)) {
-			ret = i915_gem_object_set_to_gtt_domain(obj, true);
-			if (ret)
-				return ret;
-		}
+		ret = i915_gem_object_wait_rendering(obj, false);
+		if (ret)
+			return ret;
 	}
 	/* Same trick applies to invalidate partially written cachelines read
 	 * before writing. */
-- 
cgit v1.2.3


From 35a85ac60618521d41cfdb14f3fbfc8ad7329e9e Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Thu, 19 Sep 2013 11:13:41 -0700
Subject: drm/i915: Add second slice l3 remapping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Certain HSW SKUs have a second bank of L3. This L3 remapping has a
separate register set, and interrupt from the first "slice". A slice is
simply a term to define some subset of the GPU's l3 cache. This patch
implements both the interrupt handler, and ability to communicate with
userspace about this second slice.

v2:  Remove redundant check about non-existent slice.
Change warning about interrupts of unknown slices to WARN_ON_ONCE
Handle the case where we get 2 slice interrupts concurrently, and switch
the tracking of interrupts to be non-destructive (all Ville)
Don't enable/mask the second slice parity interrupt for ivb/vlv (even
though all docs I can find claim it's rsvd) (Ville + Bryan)
Keep BYT excluded from L3 parity

v3: Fix the slice = ffs to be decremented by one (found by Ville). When
I initially did my testing on the series, I was using 1-based slice
counting, so this code was correct. Not sure why my simpler tests that
I've been running since then didn't pick it up sooner.

Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d00d24f7a97..21a3d69679e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4222,16 +4222,15 @@ i915_gem_idle(struct drm_device *dev)
 	return 0;
 }
 
-void i915_gem_l3_remap(struct drm_device *dev)
+void i915_gem_l3_remap(struct drm_device *dev, int slice)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
+	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
 	u32 misccpctl;
 	int i;
 
-	if (!HAS_L3_GPU_CACHE(dev))
-		return;
-
-	if (!dev_priv->l3_parity.remap_info)
+	if (!HAS_L3_GPU_CACHE(dev) || !remap_info)
 		return;
 
 	misccpctl = I915_READ(GEN7_MISCCPCTL);
@@ -4239,17 +4238,17 @@ void i915_gem_l3_remap(struct drm_device *dev)
 	POSTING_READ(GEN7_MISCCPCTL);
 
 	for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
-		u32 remap = I915_READ(GEN7_L3LOG_BASE + i);
-		if (remap && remap != dev_priv->l3_parity.remap_info[i/4])
+		u32 remap = I915_READ(reg_base + i);
+		if (remap && remap != remap_info[i/4])
 			DRM_DEBUG("0x%x was already programmed to %x\n",
-				  GEN7_L3LOG_BASE + i, remap);
-		if (remap && !dev_priv->l3_parity.remap_info[i/4])
+				  reg_base + i, remap);
+		if (remap && !remap_info[i/4])
 			DRM_DEBUG_DRIVER("Clearing remapped register\n");
-		I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]);
+		I915_WRITE(reg_base + i, remap_info[i/4]);
 	}
 
 	/* Make sure all the writes land before disabling dop clock gating */
-	POSTING_READ(GEN7_L3LOG_BASE);
+	POSTING_READ(reg_base);
 
 	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
 }
@@ -4343,7 +4342,7 @@ int
 i915_gem_init_hw(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	int ret;
+	int ret, i;
 
 	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
 		return -EIO;
@@ -4362,7 +4361,8 @@ i915_gem_init_hw(struct drm_device *dev)
 		I915_WRITE(GEN7_MSG_CTL, temp);
 	}
 
-	i915_gem_l3_remap(dev);
+	for (i = 0; i < NUM_L3_SLICES(dev); i++)
+		i915_gem_l3_remap(dev, i);
 
 	i915_gem_init_swizzling(dev);
 
-- 
cgit v1.2.3


From c3787e2eac816a597a7f92daa5d0629a85e77d56 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 17 Sep 2013 21:12:44 -0700
Subject: drm/i915: Make l3 remapping use the ring
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using LRI for setting the remapping registers allows us to stream l3
remapping information. This is necessary to handle per context remaps as
we'll see implemented in an upcoming patch.

Using the ring also means we don't need to frob the DOP clock gating
bits.

v2: Add comment about lack of worry for concurrent register access
(Daniel)

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
[danvet: Bikeshed the comment a bit by doing a s/XXX/Note - there's
nothing to fix.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 21a3d69679e..e4f17e59470 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4222,35 +4222,35 @@ i915_gem_idle(struct drm_device *dev)
 	return 0;
 }
 
-void i915_gem_l3_remap(struct drm_device *dev, int slice)
+int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice)
 {
+	struct drm_device *dev = ring->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
 	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
-	u32 misccpctl;
-	int i;
+	int i, ret;
 
 	if (!HAS_L3_GPU_CACHE(dev) || !remap_info)
-		return;
+		return 0;
 
-	misccpctl = I915_READ(GEN7_MISCCPCTL);
-	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
-	POSTING_READ(GEN7_MISCCPCTL);
+	ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
+	if (ret)
+		return ret;
 
+	/*
+	 * Note: We do not worry about the concurrent register cacheline hang
+	 * here because no other code should access these registers other than
+	 * at initialization time.
+	 */
 	for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
-		u32 remap = I915_READ(reg_base + i);
-		if (remap && remap != remap_info[i/4])
-			DRM_DEBUG("0x%x was already programmed to %x\n",
-				  reg_base + i, remap);
-		if (remap && !remap_info[i/4])
-			DRM_DEBUG_DRIVER("Clearing remapped register\n");
-		I915_WRITE(reg_base + i, remap_info[i/4]);
+		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+		intel_ring_emit(ring, reg_base + i);
+		intel_ring_emit(ring, remap_info[i/4]);
 	}
 
-	/* Make sure all the writes land before disabling dop clock gating */
-	POSTING_READ(reg_base);
+	intel_ring_advance(ring);
 
-	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+	return ret;
 }
 
 void i915_gem_init_swizzling(struct drm_device *dev)
@@ -4361,15 +4361,15 @@ i915_gem_init_hw(struct drm_device *dev)
 		I915_WRITE(GEN7_MSG_CTL, temp);
 	}
 
-	for (i = 0; i < NUM_L3_SLICES(dev); i++)
-		i915_gem_l3_remap(dev, i);
-
 	i915_gem_init_swizzling(dev);
 
 	ret = i915_gem_init_rings(dev);
 	if (ret)
 		return ret;
 
+	for (i = 0; i < NUM_L3_SLICES(dev); i++)
+		i915_gem_l3_remap(&dev_priv->ring[RCS], i);
+
 	/*
 	 * XXX: There was some w/a described somewhere suggesting loading
 	 * contexts before PPGTT.
-- 
cgit v1.2.3


From a33afea5ff6e5b87ac11c87fb60b3704b3ac0fcc Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 17 Sep 2013 21:12:45 -0700
Subject: drm/i915: Keep a list of all contexts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I have implemented this patch before without creating a separate list
(I'm having trouble finding the links, but the messages ids are:
<1364942743-6041-2-git-send-email-ben@bwidawsk.net>
<1365118914-15753-9-git-send-email-ben@bwidawsk.net>)

However, the code is much simpler to just use a list and it makes the
code from the next patch a lot more pretty.

As you'll see in the next patch, the reason for this is to be able to
specify when a context needs to get L3 remapping. More details there.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e4f17e59470..83464aae909 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4541,6 +4541,7 @@ i915_gem_load(struct drm_device *dev)
 	INIT_LIST_HEAD(&dev_priv->vm_list);
 	i915_init_vm(dev_priv, &dev_priv->gtt.base);
 
+	INIT_LIST_HEAD(&dev_priv->context_list);
 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
-- 
cgit v1.2.3


From 040d2baa6229d50c406340035766c4e99725bf3d Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Thu, 19 Sep 2013 11:01:40 -0700
Subject: drm/i915: s/HAS_L3_GPU_CACHE/HAS_L3_DPF
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We'd only ever used this define to denote whether or not we have the
dynamic parity feature (DPF) and never to determine whether or not L3
exists. Baytrail is a good example of where L3 exists, and not DPF.

This patch provides clarify in the code for future use cases which might
want to actually query whether or not L3 exists.

v2: Add /* DPF == dynamic parity feature */

Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/i915/i915_gem.c')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 83464aae909..f1779b352f5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4230,7 +4230,7 @@ int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice)
 	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
 	int i, ret;
 
-	if (!HAS_L3_GPU_CACHE(dev) || !remap_info)
+	if (!HAS_L3_DPF(dev) || !remap_info)
 		return 0;
 
 	ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
-- 
cgit v1.2.3