From 27173f1f95db5e74ceb35fe9a2f2f348ea11bac9 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 14 Aug 2013 11:38:36 +0200 Subject: drm/i915: Convert execbuf code to use vmas In order to transition more of our code over to using a VMA instead of an pair - we must have the vma accessible at execbuf time. Up until now, we've only had a VMA when actually binding an object. The previous patch helped handle the distinction on bound vs. unbound. This patch will help us catch leaks, and other issues before we actually shuffle a bunch of stuff around. This attempts to convert all the execbuf code to speak in vmas. Since the execbuf code is very self contained it was a nice isolated conversion. The meat of the code is about turning eb_objects into eb_vma, and then wiring up the rest of the code to use vmas instead of obj, vm pairs. Unfortunately, to do this, we must move the exec_list link from the obj structure. This list is reused in the eviction code, so we must also modify the eviction code to make this work. WARNING: This patch makes an already hotly profiled path slower. The cost is unavoidable. In reply to this mail, I will attach the extra data. v2: Release table lock early, and two a 2 phase vma lookup to avoid having to use a GFP_ATOMIC. (Chris) v3: s/obj_exec_list/obj_exec_link/ Updates to address commit 6d2b888569d366beb4be72cacfde41adee2c25e1 Author: Chris Wilson Date: Wed Aug 7 18:30:54 2013 +0100 drm/i915: List objects allocated from stolen memory in debugfs v4: Use obj = vma->obj for neatness in some places (Chris) need_reloc_mappable() should return false if ppgtt (Chris) Signed-off-by: Ben Widawsky [danvet: Split out prep patches. Also remove a FIXME comment which is now taken care of.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f21a0c36a40..c4c56f98d6e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3978,7 +3978,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, { INIT_LIST_HEAD(&obj->global_list); INIT_LIST_HEAD(&obj->ring_list); - INIT_LIST_HEAD(&obj->exec_list); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); -- cgit v1.2.3 From e656a6cba0febf12a9838882b891e1c5917c7a8b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 14 Aug 2013 14:14:04 +0200 Subject: drm/i915: inline vma_create into lookup_or_create_vma In the execbuf code we don't clean up any vmas which ended up not getting bound for code simplicity. To make sure that we don't end up creating multiple vma for the same vm kill the somewhat dangerous vma_create function and inline it into lookup_or_create. This is just a safety measure to prevent surprises in the future. Also update the somewhat confused comment in the execbuf code and clarify what kind of magic is going on with a new one. v2: Keep the function separate as requested by Chris. But give it a __ prefix for paranoia and move it tighter together with the other vma stuff. Cc: Chris Wilson Cc: Ben Widawsky Acked-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 50 ++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c4c56f98d6e..6820d0a1231 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4109,8 +4109,19 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) i915_gem_object_free(obj); } -struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj, +struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm) +{ + struct i915_vma *vma; + list_for_each_entry(vma, &obj->vma_list, vma_link) + if (vma->vm == vm) + return vma; + + return NULL; +} + +static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm) { struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); if (vma == NULL) @@ -4131,6 +4142,19 @@ struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj, return vma; } +struct i915_vma * +i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, + struct i915_address_space *vm) +{ + struct i915_vma *vma; + + vma = i915_gem_obj_to_vma(obj, vm); + if (!vma) + vma = __i915_gem_vma_create(obj, vm); + + return vma; +} + void i915_gem_vma_destroy(struct i915_vma *vma) { WARN_ON(vma->node.allocated); @@ -4857,27 +4881,3 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, return 0; } - -struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) -{ - struct i915_vma *vma; - list_for_each_entry(vma, &obj->vma_list, vma_link) - if (vma->vm == vm) - return vma; - - return NULL; -} - -struct i915_vma * -i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) -{ - struct i915_vma *vma; - - vma = i915_gem_obj_to_vma(obj, vm); - if (!vma) - vma = i915_gem_vma_create(obj, vm); - - return vma; -} -- cgit v1.2.3 From aaa0566792dc7ae68deb1959663581ea8c75d311 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 20 Aug 2013 12:56:40 +0100 Subject: drm/i915: Don't destroy the vma placeholder during execbuffer reservation The execbuffer handle and exec_link were moved from the object into the vma. As the vma may be unbound and destroyed whilst attempting to reserve the execbuffer objects (either through a forced unbind to fix up a misalignment or through an evict-everything call) we need to prevent the free of the i915_vma itself. Otherwise not only is the list of objects to reserve corrupt, but we continue to reference stale vma entries. Fixes kernel crash with i-g-t/gem_evict_everything This regression has been introduced in commit 04038a515d6eda6dd0857c0ade0b3950d372f4c0 Author: Ben Widawsky AuthorDate: Wed Aug 14 11:38:36 2013 +0200 drm/i915: Convert execbuf code to use vmas Reported-by: Dan Carpenter References: http://www.spinics.net/lists/intel-gfx/msg32038.html Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68298 Signed-off-by: Chris Wilson Cc: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6820d0a1231..29b7e1c32b8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4159,6 +4159,11 @@ void i915_gem_vma_destroy(struct i915_vma *vma) { WARN_ON(vma->node.allocated); list_del(&vma->vma_link); + + /* Keep the vma as a placeholder in the execbuffer reservation lists */ + if (!list_empty(&vma->exec_list)) + return; + kfree(vma); } -- cgit v1.2.3 From b93dab6e9d286ec2cd95b28078afdfa6dd515205 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 26 Aug 2013 11:23:47 +0200 Subject: drm/i915: More vma fixups around unbind/destroy The important bugfix here is that we must not unlink the vma when we keep it around as a placeholder for the execbuf code. Since then we won't find it again when execbuf gets interrupt and restarted and create a 2nd vma. And since the code as-is isn't fit yet to deal with more than one vma, hilarity ensues. Specifically the dma map/unmap of the sg table isn't adjusted for multiple vmas yet and will blow up like this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [] i915_gem_gtt_finish_object+0x73/0xc8 [i915] PGD 56bb5067 PUD ad3dd067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: tcp_lp ppdev parport_pc lp parport ipv6 dm_mod dcdbas snd_hda_codec_hdmi pcspkr snd_hda_codec_realtek serio_raw i2c_i801 iTCO_wdt iTCO_vendor_support snd_hda_intel snd_hda_codec lpc_ich snd_hwdep mfd_core snd_pcm snd_page_alloc snd_timer snd soundcore acpi_cpufreq i915 video button drm_kms_helper drm mperf freq_table CPU: 1 PID: 16650 Comm: fbo-maxsize Not tainted 3.11.0-rc4_nightlytop_d93f59_debug_20130814_+ #6957 Hardware name: Dell Inc. OptiPlex 9010/03JR84, BIOS A01 05/04/2012 task: ffff8800563b3f00 ti: ffff88004bdf4000 task.ti: ffff88004bdf4000 RIP: 0010:[] [] i915_gem_gtt_finish_object+0x73/0xc8 [i915] RSP: 0018:ffff88004bdf5958 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8801135e0000 RCX: ffff8800ad3bf8e0 RDX: ffff8800ad3bf8e0 RSI: 0000000000000000 RDI: ffff8801007ee780 RBP: ffff88004bdf5978 R08: ffff8800ad3bf8e0 R09: 0000000000000000 R10: ffffffff86ca1810 R11: ffff880036a17101 R12: ffff8801007ee780 R13: 0000000000018001 R14: ffff880118c4e000 R15: ffff8801007ee780 FS: 00007f401a0ce740(0000) GS:ffff88011e280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000005635c000 CR4: 00000000001407e0 Stack: ffff8801007ee780 ffff88005c253180 0000000000018000 ffff8801135e0000 ffff88004bdf59a8 ffffffffa0088e55 0000000000000011 ffff8801007eec00 0000000000018000 ffff880036a17101 ffff88004bdf5a08 ffffffffa0089026 Call Trace: [] i915_vma_unbind+0xdf/0x1ab [i915] [] __i915_gem_shrink+0x105/0x177 [i915] [] i915_gem_object_get_pages_gtt+0x108/0x309 [i915] [] i915_gem_object_get_pages+0x61/0x90 [i915] [] ? gen6_ppgtt_insert_entries+0x103/0x125 [i915] [] i915_gem_object_pin+0x1fa/0x5df [i915] [] i915_gem_execbuffer_reserve_object.isra.6+0x8d/0x1bc [i915] [] i915_gem_execbuffer_reserve+0x229/0x367 [i915] [] i915_gem_do_execbuffer.isra.12+0x4dc/0xf3a [i915] [] ? might_fault+0x40/0x90 [] i915_gem_execbuffer2+0x187/0x222 [i915] [] drm_ioctl+0x308/0x442 [drm] [] ? i915_gem_execbuffer+0x3ae/0x3ae [i915] [] ? __do_page_fault+0x3dd/0x481 [] vfs_ioctl+0x26/0x39 [] do_vfs_ioctl+0x40e/0x451 [] ? sysret_check+0x1b/0x56 [] SyS_ioctl+0x57/0x87 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b Code: 48 c7 c6 84 30 0e a0 31 c0 e8 d0 e9 f7 ff bf c6 a7 00 00 e8 07 af 2c e1 41 f6 84 24 03 01 00 00 10 75 44 49 8b 84 24 08 01 00 00 <8b> 50 08 48 8b 30 49 8b 86 b0 04 00 00 48 89 c7 48 81 c7 98 00 RIP [] i915_gem_gtt_finish_object+0x73/0xc8 [i915] RSP CR2: 0000000000000008 As a consequence we need to change the "only one vma for now" check in vma_unbind - since vma_destroy isn't always called the obj->vma_list might not be empty. Instead check that the vma list is singular at the beginning of vma_unbind. This is also more symmetric with bind_to_vm. This fixes the igt/gem_evict_everything|alignment testcases. v2: - Add a paranoid WARN to mark_free in the eviction code to make sure we never try to evict a vma used by the execbuf code right now. - Move the check for a temporary execbuf vma into vma_destroy - otherwise the failure path cleanup in bind_to_vm will blow up. Our first attempting at fixing this was commit 1be81a2f2cfd8789a627401d470423358fba2d76 Author: Chris Wilson Date: Tue Aug 20 12:56:40 2013 +0100 drm/i915: Don't destroy the vma placeholder during execbuffer reservation Squash with this when merging! v3: Improvements suggested in Chris' review: - Move the WARN_ON in vma_destroy that checks for vmas with an drm_mm allocation before the early return. - Bail out if we hit the WARN in mark_free to hopefully make the kernel survive for long enough to capture it. Cc: Chris Wilson Cc: Ben Widawsky Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68298 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68171 Tested-by: lu hua (v2) Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 29b7e1c32b8..d57368d5fc1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2604,6 +2604,9 @@ int i915_vma_unbind(struct i915_vma *vma) drm_i915_private_t *dev_priv = obj->base.dev->dev_private; int ret; + /* For now we only ever use 1 vma per object */ + WARN_ON(!list_is_singular(&obj->vma_list)); + if (list_empty(&vma->vma_link)) return 0; @@ -2652,9 +2655,7 @@ destroy: i915_gem_vma_destroy(vma); /* Since the unbound list is global, only move to that list if - * no more VMAs exist. - * NB: Until we have real VMAs there will only ever be one */ - WARN_ON(!list_empty(&obj->vma_list)); + * no more VMAs exist. */ if (list_empty(&obj->vma_list)) list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); @@ -4158,12 +4159,13 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, void i915_gem_vma_destroy(struct i915_vma *vma) { WARN_ON(vma->node.allocated); - list_del(&vma->vma_link); /* Keep the vma as a placeholder in the execbuffer reservation lists */ if (!list_empty(&vma->exec_list)) return; + list_del(&vma->vma_link); + kfree(vma); } -- cgit v1.2.3 From 9435373ef8870e0a84b6fec0ad89b952bf3097fa Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 28 Aug 2013 16:45:46 -0300 Subject: drm/i915: Report enabled slices on Haswell GT3 Batchbuffers constructed by userspace can conditionalise their URB allocations through the use of the MI_SET_PREDICATE command. This command can read the MI_PREDICATE_RESULT_2 register to see how many slices are enabled on GT3, and by virtue of the result, scale their memory allocations to fit enabled memory. Of course, this only works if the kernel sets the appropriate bit in the register first. v2: Better commit subject and message by Chris Wilson. Cc: Chris Wilson Credits-to: Yejun Guo Signed-off-by: Rodrigo Vivi Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d57368d5fc1..2d4b72ab122 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4331,6 +4331,11 @@ i915_gem_init_hw(struct drm_device *dev) if (dev_priv->ellc_size) I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); + if (IS_HSW_GT3(dev)) + I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED); + else + I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED); + if (HAS_PCH_NOP(dev)) { u32 temp = I915_READ(GEN7_MSG_CTL); temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); -- cgit v1.2.3 From 0ff501cbb5d825557da7b9a0226ef031344df87d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 29 Aug 2013 19:50:31 +0200 Subject: drm/i915: Fix list corruption in vma_unbind The saga around the breadcrumb vmas used by execbuf continues ... This time around we've managed to unconditionally move the object to the unbound list on the last vma unbind even though it might never have been on either the bound or unbound list. Hilarity ensued. Chris Wilson tracked this one down but compared to his patches I've simply opted to completely separate the unbound case for not-yet bound vmas. Otherwise we imo end up with semantically hard to parse checks around the list_move_tail(global_list, ...). Cc: Chris Wilson Cc: Ben Widawsky Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68462 Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2d4b72ab122..80342c8f02e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2610,8 +2610,11 @@ int i915_vma_unbind(struct i915_vma *vma) if (list_empty(&vma->vma_link)) return 0; - if (!drm_mm_node_allocated(&vma->node)) - goto destroy; + if (!drm_mm_node_allocated(&vma->node)) { + i915_gem_vma_destroy(vma); + + return 0; + } if (obj->pin_count) return -EBUSY; @@ -2651,7 +2654,6 @@ int i915_vma_unbind(struct i915_vma *vma) drm_mm_remove_node(&vma->node); -destroy: i915_gem_vma_destroy(vma); /* Since the unbound list is global, only move to that list if -- cgit v1.2.3 From c0321e2c5acaaff7ed41d46d97cee71ad9238481 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 26 Aug 2013 19:50:53 -0300 Subject: drm/i915: Do not add an interrupt for a context switch We use the request to ensure we hold a reference to the context for the duration that it remains in use by the ring. Each request only holds a reference to the current context, hence we emit a request after switching contexts with the final reference to the old context. However, the extra interrupt caused by that request is not useful (no timing critical function will wait for the context object), instead the overhead of servicing the IRQ shows up in some (lightweight) benchmarks. In order to keep the useful property of using the request to manage the context lifetime, we want to add a dummy request that is associated with the interrupt from the subsequent real request following the batch. The extra interrupt was added as a side-effect of using i915_add_request() in commit 112522f6789581824903f6f72082b5b841a7f0f9 Author: Chris Wilson Date: Thu May 2 16:48:07 2013 +0300 drm/i915: put context upon switching v2: Daniel convinced me that the request here was solely for context lifetime tracking and that we have the active ref to keep the object alive whilst the MI_SET_CONTEXT. So the only concern then is which context should get the blame for MI_SET_CONTEXT failing. The old scheme added a request for the old context so that any hang upto and including the switch away would mark the old context as guilty. Now any hang here implicates the new context. However since we have already gone through a complete flush with the last context in its last request, and all that lies in no-man's-land is an invalidate flush and the MI_SET_CONTEXT, we should be safe in not unduly placing blame on the new context. Signed-off-by: Chris Wilson Cc: Ben Widawsky Cc: Paulo Zanoni Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 80342c8f02e..22be39feadc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2045,7 +2045,6 @@ int __i915_add_request(struct intel_ring_buffer *ring, if (request == NULL) return -ENOMEM; - /* Record the position of the start of the request so that * should we detect the updated seqno part-way through the * GPU processing the request, we never over-estimate the -- cgit v1.2.3 From 9a7e0c2a1bff84d20ef02a85898f9c8757d1441c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 26 Aug 2013 19:50:54 -0300 Subject: drm/i915: Rearrange the comments in i915_add_request() The comments were a little out-of-sequence with the code, forcing the reader to jump around whilst reading. Whilst moving the comments around, add one to explain the context reference. Signed-off-by: Chris Wilson Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 22be39feadc..fdeecae058e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2062,8 +2062,6 @@ int __i915_add_request(struct intel_ring_buffer *ring, request->ring = ring; request->head = request_start; request->tail = request_ring_position; - request->ctx = ring->last_context; - request->batch_obj = obj; /* Whilst this request exists, batch_obj will be on the * active_list, and so will hold the active reference. Only when this @@ -2071,7 +2069,12 @@ int __i915_add_request(struct intel_ring_buffer *ring, * inactive_list and lose its active reference. Hence we do not need * to explicitly hold another reference here. */ + request->batch_obj = obj; + /* Hold a reference to the current context so that we can inspect + * it later in case a hangcheck error event fires. + */ + request->ctx = ring->last_context; if (request->ctx) i915_gem_context_reference(request->ctx); -- cgit v1.2.3 From 1823521d2b2fa614e7ad95fdc8a0f59e571f37ce Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Sep 2013 10:45:51 +0100 Subject: drm/i915: Rename ring->outstanding_lazy_request Prior to preallocating an request for lazy emission, rename the existing field to make way (and differentiate the seqno from the request struct). Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fdeecae058e..858e7888663 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -964,7 +964,7 @@ i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); ret = 0; - if (seqno == ring->outstanding_lazy_request) + if (seqno == ring->outstanding_lazy_seqno) ret = i915_add_request(ring, NULL); return ret; @@ -2094,7 +2094,7 @@ int __i915_add_request(struct intel_ring_buffer *ring, } trace_i915_gem_request_add(ring, request->seqno); - ring->outstanding_lazy_request = 0; + ring->outstanding_lazy_seqno = 0; if (!dev_priv->ums.mm_suspended) { i915_queue_hangcheck(ring->dev); -- cgit v1.2.3 From 3c0e234c847318304c12f9e7fffac7e1cf3db3ff Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Sep 2013 10:45:52 +0100 Subject: drm/i915; Preallocate the lazy request It is possible for us to be forced to perform an allocation for the lazy request whilst running the shrinker. This allocation may fail, leaving us unable to reclaim any memory leading to premature OOM. A neat solution to the problem is to preallocate the request at the same time as acquiring the seqno for the ring transaction. This means that we can report ENOMEM prior to touching the rings. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 858e7888663..399e159016e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2041,8 +2041,8 @@ int __i915_add_request(struct intel_ring_buffer *ring, if (ret) return ret; - request = kmalloc(sizeof(*request), GFP_KERNEL); - if (request == NULL) + request = ring->preallocated_lazy_request; + if (WARN_ON(request == NULL)) return -ENOMEM; /* Record the position of the start of the request so that @@ -2053,10 +2053,8 @@ int __i915_add_request(struct intel_ring_buffer *ring, request_ring_position = intel_ring_get_tail(ring); ret = ring->add_request(ring); - if (ret) { - kfree(request); + if (ret) return ret; - } request->seqno = intel_ring_get_seqno(ring); request->ring = ring; @@ -2095,6 +2093,7 @@ int __i915_add_request(struct intel_ring_buffer *ring, trace_i915_gem_request_add(ring, request->seqno); ring->outstanding_lazy_seqno = 0; + ring->preallocated_lazy_request = NULL; if (!dev_priv->ums.mm_suspended) { i915_queue_hangcheck(ring->dev); -- cgit v1.2.3 From be62acb4cce1389a28296852737e3917d9cc5b25 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 30 Aug 2013 16:19:28 +0300 Subject: drm/i915: ban badly behaving contexts Now when we have mechanism in place to track which context was guilty of hanging the gpu, it is possible to punish for bad behaviour. If context has recently submitted a faulty batchbuffers guilty of gpu hang and submits another batch which hangs gpu in quick succession, ban it permanently. If ctx is banned, no more batchbuffers will be queued for execution. There is no need for global wedge machinery anymore and it would be unwise to wedge the whole gpu if we have multiple hanging batches queued for execution. Instead just ban the guilty ones and carry on. v2: Store guilty ban status bool in gpu_error instead of pointers that might become danling before hang is declared. v3: Use return value for banned status instead of stashing state into gpu_error (Chris Wilson) v4: - rebase on top of fixed hang stats api - add define for ban period - rename commit and improve commit msg v5: - rely context banning instead of wedging the gpu - beautification and fix for ban calculation (Chris) Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 399e159016e..04e810c59d6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2188,6 +2188,21 @@ static bool i915_request_guilty(struct drm_i915_gem_request *request, return false; } +static bool i915_context_is_banned(const struct i915_ctx_hang_stats *hs) +{ + const unsigned long elapsed = get_seconds() - hs->guilty_ts; + + if (hs->banned) + return true; + + if (elapsed <= DRM_I915_CTX_BAN_PERIOD) { + DRM_ERROR("context hanging too fast, declaring banned!\n"); + return true; + } + + return false; +} + static void i915_set_reset_status(struct intel_ring_buffer *ring, struct drm_i915_gem_request *request, u32 acthd) @@ -2224,10 +2239,13 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring, hs = &request->file_priv->hang_stats; if (hs) { - if (guilty) + if (guilty) { + hs->banned = i915_context_is_banned(hs); hs->batch_active++; - else + hs->guilty_ts = get_seconds(); + } else { hs->batch_pending++; + } } } -- cgit v1.2.3 From 5a1d5eb020a27759f5cab4d72222d1752bb29453 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Sep 2013 11:27:37 +0100 Subject: drm/i915: Remove the double-list iteration from bound_any() The purpose of the function is to find out whether the object is still bound in any address space. This can be easily checked by looking at the vma currently associated with the object, rather than asking if any of the global address spaces have an active vma on the object. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 04e810c59d6..face1451786 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4887,11 +4887,10 @@ bool i915_gem_obj_bound(struct drm_i915_gem_object *o, bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) { - struct drm_i915_private *dev_priv = o->base.dev->dev_private; - struct i915_address_space *vm; + struct i915_vma *vma; - list_for_each_entry(vm, &dev_priv->vm_list, global_link) - if (i915_gem_obj_bound(o, vm)) + list_for_each_entry(vma, &o->vma_list, vma_link) + if (drm_mm_node_allocated(&vma->node)) return true; return false; -- cgit v1.2.3 From 23f54483980cea980af37e436ff4e6701aadce12 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 11 Sep 2013 14:57:48 -0700 Subject: drm/i915: Synchronize pread/pwrite with wait_rendering lifted from Daniel: pread/pwrite isn't about the object's domain at all, but purely about synchronizing for outstanding rendering. Replacing the call to set_to_gtt_domain with a wait_rendering would imo improve code readability. Furthermore we could pimp pread to only block for outstanding writes and not for reads. Since you're not the first one to trip over this: Can I volunteer you for a follow-up patch to fix this? v2: Switch the pwrite patch to use \!read_only. This was a typo in the original code. (Chris, Daniel) Recommended-by: Daniel Vetter Signed-off-by: Ben Widawsky [danvet: Fix up the logic fumble - wait_rendering has a bool readonly paramater, set_to_gtt_domain otoh has bool write. Breakage reported by Jani Nikula, I've double-checked that igt/gem_concurrent_blt/prw-* would have caught this.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index face1451786..d00d24f7a97 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -41,6 +41,9 @@ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *o static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, bool force); static __must_check int +i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, + bool readonly); +static __must_check int i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct i915_address_space *vm, unsigned alignment, @@ -430,11 +433,9 @@ i915_gem_shmem_pread(struct drm_device *dev, * optimizes for the case when the gpu will dirty the data * anyway again before the next pread happens. */ needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level); - if (i915_gem_obj_bound_any(obj)) { - ret = i915_gem_object_set_to_gtt_domain(obj, false); - if (ret) - return ret; - } + ret = i915_gem_object_wait_rendering(obj, true); + if (ret) + return ret; } ret = i915_gem_object_get_pages(obj); @@ -746,11 +747,9 @@ i915_gem_shmem_pwrite(struct drm_device *dev, * optimizes for the case when the gpu will use the data * right away and we therefore have to clflush anyway. */ needs_clflush_after = cpu_write_needs_clflush(obj); - if (i915_gem_obj_bound_any(obj)) { - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - return ret; - } + ret = i915_gem_object_wait_rendering(obj, false); + if (ret) + return ret; } /* Same trick applies to invalidate partially written cachelines read * before writing. */ -- cgit v1.2.3 From 35a85ac60618521d41cfdb14f3fbfc8ad7329e9e Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 19 Sep 2013 11:13:41 -0700 Subject: drm/i915: Add second slice l3 remapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain HSW SKUs have a second bank of L3. This L3 remapping has a separate register set, and interrupt from the first "slice". A slice is simply a term to define some subset of the GPU's l3 cache. This patch implements both the interrupt handler, and ability to communicate with userspace about this second slice. v2: Remove redundant check about non-existent slice. Change warning about interrupts of unknown slices to WARN_ON_ONCE Handle the case where we get 2 slice interrupts concurrently, and switch the tracking of interrupts to be non-destructive (all Ville) Don't enable/mask the second slice parity interrupt for ivb/vlv (even though all docs I can find claim it's rsvd) (Ville + Bryan) Keep BYT excluded from L3 parity v3: Fix the slice = ffs to be decremented by one (found by Ville). When I initially did my testing on the series, I was using 1-based slice counting, so this code was correct. Not sure why my simpler tests that I've been running since then didn't pick it up sooner. Reviewed-by: Ville Syrjälä Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d00d24f7a97..21a3d69679e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4222,16 +4222,15 @@ i915_gem_idle(struct drm_device *dev) return 0; } -void i915_gem_l3_remap(struct drm_device *dev) +void i915_gem_l3_remap(struct drm_device *dev, int slice) { drm_i915_private_t *dev_priv = dev->dev_private; + u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); + u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; u32 misccpctl; int i; - if (!HAS_L3_GPU_CACHE(dev)) - return; - - if (!dev_priv->l3_parity.remap_info) + if (!HAS_L3_GPU_CACHE(dev) || !remap_info) return; misccpctl = I915_READ(GEN7_MISCCPCTL); @@ -4239,17 +4238,17 @@ void i915_gem_l3_remap(struct drm_device *dev) POSTING_READ(GEN7_MISCCPCTL); for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { - u32 remap = I915_READ(GEN7_L3LOG_BASE + i); - if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) + u32 remap = I915_READ(reg_base + i); + if (remap && remap != remap_info[i/4]) DRM_DEBUG("0x%x was already programmed to %x\n", - GEN7_L3LOG_BASE + i, remap); - if (remap && !dev_priv->l3_parity.remap_info[i/4]) + reg_base + i, remap); + if (remap && !remap_info[i/4]) DRM_DEBUG_DRIVER("Clearing remapped register\n"); - I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); + I915_WRITE(reg_base + i, remap_info[i/4]); } /* Make sure all the writes land before disabling dop clock gating */ - POSTING_READ(GEN7_L3LOG_BASE); + POSTING_READ(reg_base); I915_WRITE(GEN7_MISCCPCTL, misccpctl); } @@ -4343,7 +4342,7 @@ int i915_gem_init_hw(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; - int ret; + int ret, i; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; @@ -4362,7 +4361,8 @@ i915_gem_init_hw(struct drm_device *dev) I915_WRITE(GEN7_MSG_CTL, temp); } - i915_gem_l3_remap(dev); + for (i = 0; i < NUM_L3_SLICES(dev); i++) + i915_gem_l3_remap(dev, i); i915_gem_init_swizzling(dev); -- cgit v1.2.3 From c3787e2eac816a597a7f92daa5d0629a85e77d56 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 17 Sep 2013 21:12:44 -0700 Subject: drm/i915: Make l3 remapping use the ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using LRI for setting the remapping registers allows us to stream l3 remapping information. This is necessary to handle per context remaps as we'll see implemented in an upcoming patch. Using the ring also means we don't need to frob the DOP clock gating bits. v2: Add comment about lack of worry for concurrent register access (Daniel) Signed-off-by: Ben Widawsky Reviewed-by: Ville Syrjälä [danvet: Bikeshed the comment a bit by doing a s/XXX/Note - there's nothing to fix.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 21a3d69679e..e4f17e59470 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4222,35 +4222,35 @@ i915_gem_idle(struct drm_device *dev) return 0; } -void i915_gem_l3_remap(struct drm_device *dev, int slice) +int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice) { + struct drm_device *dev = ring->dev; drm_i915_private_t *dev_priv = dev->dev_private; u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; - u32 misccpctl; - int i; + int i, ret; if (!HAS_L3_GPU_CACHE(dev) || !remap_info) - return; + return 0; - misccpctl = I915_READ(GEN7_MISCCPCTL); - I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); - POSTING_READ(GEN7_MISCCPCTL); + ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); + if (ret) + return ret; + /* + * Note: We do not worry about the concurrent register cacheline hang + * here because no other code should access these registers other than + * at initialization time. + */ for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { - u32 remap = I915_READ(reg_base + i); - if (remap && remap != remap_info[i/4]) - DRM_DEBUG("0x%x was already programmed to %x\n", - reg_base + i, remap); - if (remap && !remap_info[i/4]) - DRM_DEBUG_DRIVER("Clearing remapped register\n"); - I915_WRITE(reg_base + i, remap_info[i/4]); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, reg_base + i); + intel_ring_emit(ring, remap_info[i/4]); } - /* Make sure all the writes land before disabling dop clock gating */ - POSTING_READ(reg_base); + intel_ring_advance(ring); - I915_WRITE(GEN7_MISCCPCTL, misccpctl); + return ret; } void i915_gem_init_swizzling(struct drm_device *dev) @@ -4361,15 +4361,15 @@ i915_gem_init_hw(struct drm_device *dev) I915_WRITE(GEN7_MSG_CTL, temp); } - for (i = 0; i < NUM_L3_SLICES(dev); i++) - i915_gem_l3_remap(dev, i); - i915_gem_init_swizzling(dev); ret = i915_gem_init_rings(dev); if (ret) return ret; + for (i = 0; i < NUM_L3_SLICES(dev); i++) + i915_gem_l3_remap(&dev_priv->ring[RCS], i); + /* * XXX: There was some w/a described somewhere suggesting loading * contexts before PPGTT. -- cgit v1.2.3 From a33afea5ff6e5b87ac11c87fb60b3704b3ac0fcc Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 17 Sep 2013 21:12:45 -0700 Subject: drm/i915: Keep a list of all contexts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I have implemented this patch before without creating a separate list (I'm having trouble finding the links, but the messages ids are: <1364942743-6041-2-git-send-email-ben@bwidawsk.net> <1365118914-15753-9-git-send-email-ben@bwidawsk.net>) However, the code is much simpler to just use a list and it makes the code from the next patch a lot more pretty. As you'll see in the next patch, the reason for this is to be able to specify when a context needs to get L3 remapping. More details there. Signed-off-by: Ben Widawsky Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e4f17e59470..83464aae909 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4541,6 +4541,7 @@ i915_gem_load(struct drm_device *dev) INIT_LIST_HEAD(&dev_priv->vm_list); i915_init_vm(dev_priv, &dev_priv->gtt.base); + INIT_LIST_HEAD(&dev_priv->context_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); INIT_LIST_HEAD(&dev_priv->mm.bound_list); INIT_LIST_HEAD(&dev_priv->mm.fence_list); -- cgit v1.2.3 From 040d2baa6229d50c406340035766c4e99725bf3d Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 19 Sep 2013 11:01:40 -0700 Subject: drm/i915: s/HAS_L3_GPU_CACHE/HAS_L3_DPF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'd only ever used this define to denote whether or not we have the dynamic parity feature (DPF) and never to determine whether or not L3 exists. Baytrail is a good example of where L3 exists, and not DPF. This patch provides clarify in the code for future use cases which might want to actually query whether or not L3 exists. v2: Add /* DPF == dynamic parity feature */ Reviewed-by: Ville Syrjälä Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 83464aae909..f1779b352f5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4230,7 +4230,7 @@ int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice) u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; int i, ret; - if (!HAS_L3_GPU_CACHE(dev) || !remap_info) + if (!HAS_L3_DPF(dev) || !remap_info) return 0; ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); -- cgit v1.2.3