diff options
author | Dave Airlie <airlied@redhat.com> | 2013-10-01 10:00:50 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2013-10-01 10:00:50 +1000 |
commit | 4821ff14a32886ee95c8b2850746ec020ca65189 (patch) | |
tree | dad7b2c822e55669b687ae754cf1b958316ef538 /drivers/gpu/drm/i915/i915_gem.c | |
parent | 15c03dd4859ab16f9212238f29dd315654aa94f6 (diff) | |
parent | b599c89e8c5cf0c37352e0871be240291f8ce922 (diff) |
Merge tag 'drm-intel-next-2013-09-21-merged' of git://people.freedesktop.org/~danvet/drm-intel into drm-next
drm-intel-next-2013-09-21:
- clock state handling rework from Ville
- l3 parity handling fixes for hsw from Ben
- some more watermark improvements from Ville
- ban badly behaved context from Mika
- a few vlv improvements from Jesse
- VGA power domain handling from Ville
drm-intel-next-2013-09-06:
- Basic mipi dsi support from Jani. Not yet converted over to drm_bridge
since that was too fresh, but the porting is in progress already.
- More vma patches from Ben, this time the code to convert the execbuffer
code. Now that the shrinker recursion bug is tracked down we can move
ahead here again. Yay!
- Optimize hw context switching to not generate needless interrupts (Chris
Wilson). Also some shuffling for the oustanding request allocation.
- Opregion support for SWSCI, although not yet fully wired up (we need a
bit of runtime D3 support for that apparently, due to Windows design
deficiencies), from Jani Nikula.
- A few smaller changes all over.
[airlied: merge conflict fix in i9xx_set_pipeconf]
* tag 'drm-intel-next-2013-09-21-merged' of git://people.freedesktop.org/~danvet/drm-intel: (119 commits)
drm/i915: assume all GM45 Acer laptops use inverted backlight PWM
drm/i915: cleanup a min_t() cast
drm/i915: Pull intel_init_power_well() out of intel_modeset_init_hw()
drm/i915: Add POWER_DOMAIN_VGA
drm/i915: Refactor power well refcount inc/dec operations
drm/i915: Add intel_display_power_{get, put} to request power for specific domains
drm/i915: Change i915_request power well handling
drm/i915: POSTING_READ IPS_CTL before waiting for the vblank
drm/i915: don't disable ERR_INT on the IRQ handler
drm/i915/vlv: disable rc6p and rc6pp residency reporting on BYT
drm/i915/vlv: honor i915_enable_rc6 boot param on VLV
drm/i915: s/HAS_L3_GPU_CACHE/HAS_L3_DPF
drm/i915: Do remaps for all contexts
drm/i915: Keep a list of all contexts
drm/i915: Make l3 remapping use the ring
drm/i915: Add second slice l3 remapping
drm/i915: Fix HSW parity test
drm/i915: dump crtc timings from the pipe config
drm/i915: register backlight device also when backlight class is a module
drm/i915: write D_COMP using the mailbox
...
Conflicts:
drivers/gpu/drm/i915/intel_display.c
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 195 |
1 files changed, 113 insertions, 82 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cdfb9da0e4c..36c4ad9c752 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -41,6 +41,9 @@ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *o static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, bool force); static __must_check int +i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, + bool readonly); +static __must_check int i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct i915_address_space *vm, unsigned alignment, @@ -432,11 +435,9 @@ i915_gem_shmem_pread(struct drm_device *dev, * optimizes for the case when the gpu will dirty the data * anyway again before the next pread happens. */ needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level); - if (i915_gem_obj_bound_any(obj)) { - ret = i915_gem_object_set_to_gtt_domain(obj, false); - if (ret) - return ret; - } + ret = i915_gem_object_wait_rendering(obj, true); + if (ret) + return ret; } ret = i915_gem_object_get_pages(obj); @@ -748,11 +749,9 @@ i915_gem_shmem_pwrite(struct drm_device *dev, * optimizes for the case when the gpu will use the data * right away and we therefore have to clflush anyway. */ needs_clflush_after = cpu_write_needs_clflush(obj); - if (i915_gem_obj_bound_any(obj)) { - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - return ret; - } + ret = i915_gem_object_wait_rendering(obj, false); + if (ret) + return ret; } /* Same trick applies to invalidate partially written cachelines read * before writing. */ @@ -966,7 +965,7 @@ i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); ret = 0; - if (seqno == ring->outstanding_lazy_request) + if (seqno == ring->outstanding_lazy_seqno) ret = i915_add_request(ring, NULL); return ret; @@ -2078,11 +2077,10 @@ int __i915_add_request(struct intel_ring_buffer *ring, if (ret) return ret; - request = kmalloc(sizeof(*request), GFP_KERNEL); - if (request == NULL) + request = ring->preallocated_lazy_request; + if (WARN_ON(request == NULL)) return -ENOMEM; - /* Record the position of the start of the request so that * should we detect the updated seqno part-way through the * GPU processing the request, we never over-estimate the @@ -2091,17 +2089,13 @@ int __i915_add_request(struct intel_ring_buffer *ring, request_ring_position = intel_ring_get_tail(ring); ret = ring->add_request(ring); - if (ret) { - kfree(request); + if (ret) return ret; - } request->seqno = intel_ring_get_seqno(ring); request->ring = ring; request->head = request_start; request->tail = request_ring_position; - request->ctx = ring->last_context; - request->batch_obj = obj; /* Whilst this request exists, batch_obj will be on the * active_list, and so will hold the active reference. Only when this @@ -2109,7 +2103,12 @@ int __i915_add_request(struct intel_ring_buffer *ring, * inactive_list and lose its active reference. Hence we do not need * to explicitly hold another reference here. */ + request->batch_obj = obj; + /* Hold a reference to the current context so that we can inspect + * it later in case a hangcheck error event fires. + */ + request->ctx = ring->last_context; if (request->ctx) i915_gem_context_reference(request->ctx); @@ -2129,7 +2128,8 @@ int __i915_add_request(struct intel_ring_buffer *ring, } trace_i915_gem_request_add(ring, request->seqno); - ring->outstanding_lazy_request = 0; + ring->outstanding_lazy_seqno = 0; + ring->preallocated_lazy_request = NULL; if (!dev_priv->ums.mm_suspended) { i915_queue_hangcheck(ring->dev); @@ -2224,6 +2224,21 @@ static bool i915_request_guilty(struct drm_i915_gem_request *request, return false; } +static bool i915_context_is_banned(const struct i915_ctx_hang_stats *hs) +{ + const unsigned long elapsed = get_seconds() - hs->guilty_ts; + + if (hs->banned) + return true; + + if (elapsed <= DRM_I915_CTX_BAN_PERIOD) { + DRM_ERROR("context hanging too fast, declaring banned!\n"); + return true; + } + + return false; +} + static void i915_set_reset_status(struct intel_ring_buffer *ring, struct drm_i915_gem_request *request, u32 acthd) @@ -2260,10 +2275,13 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring, hs = &request->file_priv->hang_stats; if (hs) { - if (guilty) + if (guilty) { + hs->banned = i915_context_is_banned(hs); hs->batch_active++; - else + hs->guilty_ts = get_seconds(); + } else { hs->batch_pending++; + } } } @@ -2641,11 +2659,17 @@ int i915_vma_unbind(struct i915_vma *vma) drm_i915_private_t *dev_priv = obj->base.dev->dev_private; int ret; + /* For now we only ever use 1 vma per object */ + WARN_ON(!list_is_singular(&obj->vma_list)); + if (list_empty(&vma->vma_link)) return 0; - if (!drm_mm_node_allocated(&vma->node)) - goto destroy; + if (!drm_mm_node_allocated(&vma->node)) { + i915_gem_vma_destroy(vma); + + return 0; + } if (obj->pin_count) return -EBUSY; @@ -2685,13 +2709,10 @@ int i915_vma_unbind(struct i915_vma *vma) drm_mm_remove_node(&vma->node); -destroy: i915_gem_vma_destroy(vma); /* Since the unbound list is global, only move to that list if - * no more VMAs exist. - * NB: Until we have real VMAs there will only ever be one */ - WARN_ON(!list_empty(&obj->vma_list)); + * no more VMAs exist. */ if (list_empty(&obj->vma_list)) list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); @@ -4015,7 +4036,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, { INIT_LIST_HEAD(&obj->global_list); INIT_LIST_HEAD(&obj->ring_list); - INIT_LIST_HEAD(&obj->exec_list); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); @@ -4147,9 +4167,20 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) i915_gem_object_free(obj); } -struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj, +struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm) { + struct i915_vma *vma; + list_for_each_entry(vma, &obj->vma_list, vma_link) + if (vma->vm == vm) + return vma; + + return NULL; +} + +static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm) +{ struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); if (vma == NULL) return ERR_PTR(-ENOMEM); @@ -4169,10 +4200,29 @@ struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj, return vma; } +struct i915_vma * +i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, + struct i915_address_space *vm) +{ + struct i915_vma *vma; + + vma = i915_gem_obj_to_vma(obj, vm); + if (!vma) + vma = __i915_gem_vma_create(obj, vm); + + return vma; +} + void i915_gem_vma_destroy(struct i915_vma *vma) { WARN_ON(vma->node.allocated); + + /* Keep the vma as a placeholder in the execbuffer reservation lists */ + if (!list_empty(&vma->exec_list)) + return; + list_del(&vma->vma_link); + kfree(vma); } @@ -4209,36 +4259,35 @@ i915_gem_idle(struct drm_device *dev) return 0; } -void i915_gem_l3_remap(struct drm_device *dev) +int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice) { + struct drm_device *dev = ring->dev; drm_i915_private_t *dev_priv = dev->dev_private; - u32 misccpctl; - int i; - - if (!HAS_L3_GPU_CACHE(dev)) - return; + u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); + u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; + int i, ret; - if (!dev_priv->l3_parity.remap_info) - return; + if (!HAS_L3_DPF(dev) || !remap_info) + return 0; - misccpctl = I915_READ(GEN7_MISCCPCTL); - I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); - POSTING_READ(GEN7_MISCCPCTL); + ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); + if (ret) + return ret; + /* + * Note: We do not worry about the concurrent register cacheline hang + * here because no other code should access these registers other than + * at initialization time. + */ for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { - u32 remap = I915_READ(GEN7_L3LOG_BASE + i); - if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) - DRM_DEBUG("0x%x was already programmed to %x\n", - GEN7_L3LOG_BASE + i, remap); - if (remap && !dev_priv->l3_parity.remap_info[i/4]) - DRM_DEBUG_DRIVER("Clearing remapped register\n"); - I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, reg_base + i); + intel_ring_emit(ring, remap_info[i/4]); } - /* Make sure all the writes land before disabling dop clock gating */ - POSTING_READ(GEN7_L3LOG_BASE); + intel_ring_advance(ring); - I915_WRITE(GEN7_MISCCPCTL, misccpctl); + return ret; } void i915_gem_init_swizzling(struct drm_device *dev) @@ -4330,7 +4379,7 @@ int i915_gem_init_hw(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; - int ret; + int ret, i; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; @@ -4338,20 +4387,26 @@ i915_gem_init_hw(struct drm_device *dev) if (dev_priv->ellc_size) I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); + if (IS_HSW_GT3(dev)) + I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED); + else + I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED); + if (HAS_PCH_NOP(dev)) { u32 temp = I915_READ(GEN7_MSG_CTL); temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); I915_WRITE(GEN7_MSG_CTL, temp); } - i915_gem_l3_remap(dev); - i915_gem_init_swizzling(dev); ret = i915_gem_init_rings(dev); if (ret) return ret; + for (i = 0; i < NUM_L3_SLICES(dev); i++) + i915_gem_l3_remap(&dev_priv->ring[RCS], i); + /* * XXX: There was some w/a described somewhere suggesting loading * contexts before PPGTT. @@ -4523,6 +4578,7 @@ i915_gem_load(struct drm_device *dev) INIT_LIST_HEAD(&dev_priv->vm_list); i915_init_vm(dev_priv, &dev_priv->gtt.base); + INIT_LIST_HEAD(&dev_priv->context_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); INIT_LIST_HEAD(&dev_priv->mm.bound_list); INIT_LIST_HEAD(&dev_priv->mm.fence_list); @@ -4859,11 +4915,10 @@ bool i915_gem_obj_bound(struct drm_i915_gem_object *o, bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) { - struct drm_i915_private *dev_priv = o->base.dev->dev_private; - struct i915_address_space *vm; + struct i915_vma *vma; - list_for_each_entry(vm, &dev_priv->vm_list, global_link) - if (i915_gem_obj_bound(o, vm)) + list_for_each_entry(vma, &o->vma_list, vma_link) + if (drm_mm_node_allocated(&vma->node)) return true; return false; @@ -4920,27 +4975,3 @@ i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc) mutex_unlock(&dev->struct_mutex); return freed; } - -struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) -{ - struct i915_vma *vma; - list_for_each_entry(vma, &obj->vma_list, vma_link) - if (vma->vm == vm) - return vma; - - return NULL; -} - -struct i915_vma * -i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) -{ - struct i915_vma *vma; - - vma = i915_gem_obj_to_vma(obj, vm); - if (!vma) - vma = i915_gem_vma_create(obj, vm); - - return vma; -} |