aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2012-11-15 17:17:22 +0100
committerDaniel Vetter <daniel.vetter@ffwll.ch>2013-01-20 13:11:16 +0100
commit1f83fee08d625f8d0130f9fe5ef7b17c2e022f3c (patch)
treef876ef636a0e99c2afd8d20db81789e50d259439 /drivers/gpu/drm/i915/i915_gem.c
parent308887aad14c4ecc3fc10a3c58ec42641c5e4423 (diff)
drm/i915: clear up wedged transitions
We have two important transitions of the wedged state in the current code: - 0 -> 1: This means a hang has been detected, and signals to everyone that they please get of any locks, so that the reset work item can do its job. - 1 -> 0: The reset handler has completed. Now the last transition mixes up two states: "Reset completed and successful" and "Reset failed". To distinguish these two we do some tricks with the reset completion, but I simply could not convince myself that this doesn't race under odd circumstances. Hence split this up, and add a new terminal state indicating that the hw is gone for good. Also add explicit #defines for both states, update comments. v2: Split out the reset handling bugfix for the throttle ioctl. v3: s/tmp/wedged/ sugested by Chris Wilson. Also fixup up a rebase error which prevented this patch from actually compiling. v4: To unify the wedged state with the reset counter, keep the reset-in-progress state just as a flag. The terminally-wedged state is now denoted with a big number. v5: Add a comment to the reset_counter special values explaining that WEDGED & RESET_IN_PROGRESS needs to be true for the code to be correct. v6: Fixup logic errors introduced with the wedged+reset_counter unification. Since WEDGED implies reset-in-progress (in a way we're terminally stuck in the dead-but-reset-not-completed state), we need ensure that we check for this everywhere. The specific bug was in wait_for_error, which would simply have timed out. v7: Extract an inline i915_reset_in_progress helper to make the code more readable. Also annote the reset-in-progress case with an unlikely, to help the compiler optimize the fastpath. Do the same for the terminally wedged case with i915_terminally_wedged. Reviewed-by: Damien Lespiau <damien.lespiau@intel.com> Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c49
1 files changed, 18 insertions, 31 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c96a501b820..2ca90119482 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -89,36 +89,32 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
static int
i915_gem_wait_for_error(struct i915_gpu_error *error)
{
- struct completion *x = &error->completion;
- unsigned long flags;
int ret;
- if (!atomic_read(&error->wedged))
+#define EXIT_COND (!i915_reset_in_progress(error))
+ if (EXIT_COND)
return 0;
+ /* GPU is already declared terminally dead, give up. */
+ if (i915_terminally_wedged(error))
+ return -EIO;
+
/*
* Only wait 10 seconds for the gpu reset to complete to avoid hanging
* userspace. If it takes that long something really bad is going on and
* we should simply try to bail out and fail as gracefully as possible.
*/
- ret = wait_for_completion_interruptible_timeout(x, 10*HZ);
+ ret = wait_event_interruptible_timeout(error->reset_queue,
+ EXIT_COND,
+ 10*HZ);
if (ret == 0) {
DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
return -EIO;
} else if (ret < 0) {
return ret;
}
+#undef EXIT_COND
- if (atomic_read(&error->wedged)) {
- /* GPU is hung, bump the completion count to account for
- * the token we just consumed so that we never hit zero and
- * end up waiting upon a subsequent completion event that
- * will never happen.
- */
- spin_lock_irqsave(&x->wait.lock, flags);
- x->done++;
- spin_unlock_irqrestore(&x->wait.lock, flags);
- }
return 0;
}
@@ -942,23 +938,14 @@ int
i915_gem_check_wedge(struct i915_gpu_error *error,
bool interruptible)
{
- if (atomic_read(&error->wedged)) {
- struct completion *x = &error->completion;
- bool recovery_complete;
- unsigned long flags;
-
- /* Give the error handler a chance to run. */
- spin_lock_irqsave(&x->wait.lock, flags);
- recovery_complete = x->done > 0;
- spin_unlock_irqrestore(&x->wait.lock, flags);
-
+ if (i915_reset_in_progress(error)) {
/* Non-interruptible callers can't handle -EAGAIN, hence return
* -EIO unconditionally for these. */
if (!interruptible)
return -EIO;
- /* Recovery complete, but still wedged means reset failure. */
- if (recovery_complete)
+ /* Recovery complete, but the reset failed ... */
+ if (i915_terminally_wedged(error))
return -EIO;
return -EAGAIN;
@@ -1025,7 +1012,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
#define EXIT_COND \
(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
- atomic_read(&dev_priv->gpu_error.wedged))
+ i915_reset_in_progress(&dev_priv->gpu_error))
do {
if (interruptible)
end = wait_event_interruptible_timeout(ring->irq_queue,
@@ -1379,7 +1366,7 @@ out:
/* If this -EIO is due to a gpu hang, give the reset code a
* chance to clean up the mess. Otherwise return the proper
* SIGBUS. */
- if (!atomic_read(&dev_priv->gpu_error.wedged))
+ if (i915_terminally_wedged(&dev_priv->gpu_error))
return VM_FAULT_SIGBUS;
case -EAGAIN:
/* Give the error handler a chance to run and move the
@@ -3983,9 +3970,9 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
if (drm_core_check_feature(dev, DRIVER_MODESET))
return 0;
- if (atomic_read(&dev_priv->gpu_error.wedged)) {
+ if (i915_reset_in_progress(&dev_priv->gpu_error)) {
DRM_ERROR("Reenabling wedged hardware, good luck\n");
- atomic_set(&dev_priv->gpu_error.wedged, 0);
+ atomic_set(&dev_priv->gpu_error.reset_counter, 0);
}
mutex_lock(&dev->struct_mutex);
@@ -4069,7 +4056,7 @@ i915_gem_load(struct drm_device *dev)
INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
i915_gem_retire_work_handler);
- init_completion(&dev_priv->gpu_error.completion);
+ init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
if (IS_GEN3(dev)) {