Loading Makefile +0 −4 Original line number Diff line number Diff line Loading @@ -882,10 +882,6 @@ KBUILD_CFLAGS_KERNEL += -ffunction-sections -fdata-sections LDFLAGS_vmlinux += --gc-sections endif ifdef CONFIG_LIVEPATCH KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone) endif ifdef CONFIG_SHADOW_CALL_STACK CC_FLAGS_SCS := -fsanitize=shadow-call-stack KBUILD_CFLAGS += $(CC_FLAGS_SCS) Loading drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +293 −21 Original line number Diff line number Diff line Loading @@ -46,6 +46,13 @@ struct eb_vma_array { struct eb_vma vma[]; }; enum { FORCE_CPU_RELOC = 1, FORCE_GTT_RELOC, FORCE_GPU_RELOC, #define DBG_FORCE_RELOC 0 /* choose one of the above! */ }; #define __EXEC_OBJECT_HAS_PIN BIT(31) #define __EXEC_OBJECT_HAS_FENCE BIT(30) #define __EXEC_OBJECT_NEEDS_MAP BIT(29) Loading Loading @@ -261,6 +268,8 @@ struct i915_execbuffer { */ struct reloc_cache { struct drm_mm_node node; /** temporary GTT binding */ unsigned long vaddr; /** Current kmap address */ unsigned long page; /** Currently mapped page index */ unsigned int gen; /** Cached value of INTEL_GEN */ bool use_64bit_reloc : 1; bool has_llc : 1; Loading Loading @@ -607,6 +616,23 @@ eb_add_vma(struct i915_execbuffer *eb, } } static inline int use_cpu_reloc(const struct reloc_cache *cache, const struct drm_i915_gem_object *obj) { if (!i915_gem_object_has_struct_page(obj)) return false; if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) return true; if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) return false; return (cache->has_llc || obj->cache_dirty || obj->cache_level != I915_CACHE_NONE); } static int eb_reserve_vma(const struct i915_execbuffer *eb, struct eb_vma *ev, u64 pin_flags) Loading Loading @@ -937,6 +963,8 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc, static void reloc_cache_init(struct reloc_cache *cache, struct drm_i915_private *i915) { cache->page = -1; cache->vaddr = 0; /* Must be a variable in the struct to allow GCC to unroll. */ cache->gen = INTEL_GEN(i915); cache->has_llc = HAS_LLC(i915); Loading @@ -948,6 +976,25 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->target = NULL; } static inline void *unmask_page(unsigned long p) { return (void *)(uintptr_t)(p & PAGE_MASK); } static inline unsigned int unmask_flags(unsigned long p) { return p & ~PAGE_MASK; } #define KMAP 0x4 /* after CLFLUSH_FLAGS */ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) { struct drm_i915_private *i915 = container_of(cache, struct i915_execbuffer, reloc_cache)->i915; return &i915->ggtt; } #define RELOC_TAIL 4 static int reloc_gpu_chain(struct reloc_cache *cache) Loading Loading @@ -1060,6 +1107,181 @@ static int reloc_gpu_flush(struct reloc_cache *cache) return err; } static void reloc_cache_reset(struct reloc_cache *cache) { void *vaddr; if (!cache->vaddr) return; vaddr = unmask_page(cache->vaddr); if (cache->vaddr & KMAP) { if (cache->vaddr & CLFLUSH_AFTER) mb(); kunmap_atomic(vaddr); i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm); } else { struct i915_ggtt *ggtt = cache_to_ggtt(cache); intel_gt_flush_ggtt_writes(ggtt->vm.gt); io_mapping_unmap_atomic((void __iomem *)vaddr); if (drm_mm_node_allocated(&cache->node)) { ggtt->vm.clear_range(&ggtt->vm, cache->node.start, cache->node.size); mutex_lock(&ggtt->vm.mutex); drm_mm_remove_node(&cache->node); mutex_unlock(&ggtt->vm.mutex); } else { i915_vma_unpin((struct i915_vma *)cache->node.mm); } } cache->vaddr = 0; cache->page = -1; } static void *reloc_kmap(struct drm_i915_gem_object *obj, struct reloc_cache *cache, unsigned long page) { void *vaddr; if (cache->vaddr) { kunmap_atomic(unmask_page(cache->vaddr)); } else { unsigned int flushes; int err; err = i915_gem_object_prepare_write(obj, &flushes); if (err) return ERR_PTR(err); BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); cache->vaddr = flushes | KMAP; cache->node.mm = (void *)obj; if (flushes) mb(); } vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; cache->page = page; return vaddr; } static void *reloc_iomap(struct drm_i915_gem_object *obj, struct reloc_cache *cache, unsigned long page) { struct i915_ggtt *ggtt = cache_to_ggtt(cache); unsigned long offset; void *vaddr; if (cache->vaddr) { intel_gt_flush_ggtt_writes(ggtt->vm.gt); io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); } else { struct i915_vma *vma; int err; if (i915_gem_object_is_tiled(obj)) return ERR_PTR(-EINVAL); if (use_cpu_reloc(cache, obj)) return NULL; i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); if (err) return ERR_PTR(err); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | PIN_NONBLOCK /* NOWARN */ | PIN_NOEVICT); if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); mutex_lock(&ggtt->vm.mutex); err = drm_mm_insert_node_in_range (&ggtt->vm.mm, &cache->node, PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); mutex_unlock(&ggtt->vm.mutex); if (err) /* no inactive aperture space, use cpu reloc */ return NULL; } else { cache->node.start = vma->node.start; cache->node.mm = (void *)vma; } } offset = cache->node.start; if (drm_mm_node_allocated(&cache->node)) { ggtt->vm.insert_page(&ggtt->vm, i915_gem_object_get_dma_address(obj, page), offset, I915_CACHE_NONE, 0); } else { offset += page << PAGE_SHIFT; } vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, offset); cache->page = page; cache->vaddr = (unsigned long)vaddr; return vaddr; } static void *reloc_vaddr(struct drm_i915_gem_object *obj, struct reloc_cache *cache, unsigned long page) { void *vaddr; if (cache->page == page) { vaddr = unmask_page(cache->vaddr); } else { vaddr = NULL; if ((cache->vaddr & KMAP) == 0) vaddr = reloc_iomap(obj, cache, page); if (!vaddr) vaddr = reloc_kmap(obj, cache, page); } return vaddr; } static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) { if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { if (flushes & CLFLUSH_BEFORE) { clflushopt(addr); mb(); } *addr = value; /* * Writes to the same cacheline are serialised by the CPU * (including clflush). On the write path, we only require * that it hits memory in an orderly fashion and place * mb barriers at the start and end of the relocation phase * to ensure ordering of clflush wrt to the system. */ if (flushes & CLFLUSH_AFTER) clflushopt(addr); } else *addr = value; } static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; Loading Loading @@ -1225,6 +1447,17 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, return cmd; } static inline bool use_reloc_gpu(struct i915_vma *vma) { if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) return true; if (DBG_FORCE_RELOC) return false; return !dma_resv_test_signaled_rcu(vma->resv, true); } static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) { struct page *page; Loading @@ -1239,7 +1472,7 @@ static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) return addr + offset_in_page(offset); } static int __reloc_entry_gpu(struct i915_execbuffer *eb, static bool __reloc_entry_gpu(struct i915_execbuffer *eb, struct i915_vma *vma, u64 offset, u64 target_addr) Loading @@ -1258,7 +1491,7 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb, batch = reloc_gpu(eb, vma, len); if (IS_ERR(batch)) return PTR_ERR(batch); return false; addr = gen8_canonical_addr(vma->node.start + offset); if (gen >= 8) { Loading Loading @@ -1307,21 +1540,55 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb, *batch++ = target_addr; } return 0; return true; } static u64 relocate_entry(struct i915_execbuffer *eb, static bool reloc_entry_gpu(struct i915_execbuffer *eb, struct i915_vma *vma, u64 offset, u64 target_addr) { if (eb->reloc_cache.vaddr) return false; if (!use_reloc_gpu(vma)) return false; return __reloc_entry_gpu(eb, vma, offset, target_addr); } static u64 relocate_entry(struct i915_vma *vma, const struct drm_i915_gem_relocation_entry *reloc, struct i915_execbuffer *eb, const struct i915_vma *target) { u64 target_addr = relocation_target(reloc, target); int err; u64 offset = reloc->offset; err = __reloc_entry_gpu(eb, vma, reloc->offset, target_addr); if (err) return err; if (!reloc_entry_gpu(eb, vma, offset, target_addr)) { bool wide = eb->reloc_cache.use_64bit_reloc; void *vaddr; repeat: vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); clflush_write32(vaddr + offset_in_page(offset), lower_32_bits(target_addr), eb->reloc_cache.vaddr); if (wide) { offset += sizeof(u32); target_addr >>= 32; wide = false; goto repeat; } } return target->node.start | UPDATE; } Loading Loading @@ -1386,7 +1653,8 @@ eb_relocate_entry(struct i915_execbuffer *eb, * If the relocation already has the right value in it, no * more work needs to be done. */ if (gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset) if (!DBG_FORCE_RELOC && gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset) return 0; /* Check that the relocation address is valid... */ Loading Loading @@ -1418,7 +1686,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, ev->flags &= ~EXEC_OBJECT_ASYNC; /* and update the user's relocation entry */ return relocate_entry(eb, ev->vma, reloc, target->vma); return relocate_entry(ev->vma, reloc, eb, target->vma); } static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) Loading Loading @@ -1456,8 +1724,10 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) * this is bad and so lockdep complains vehemently. */ copied = __copy_from_user(r, urelocs, count * sizeof(r[0])); if (unlikely(copied)) return -EFAULT; if (unlikely(copied)) { remain = -EFAULT; goto out; } remain -= count; do { Loading @@ -1465,7 +1735,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) if (likely(offset == 0)) { } else if ((s64)offset < 0) { return (int)offset; remain = (int)offset; goto out; } else { /* * Note that reporting an error now Loading Loading @@ -1495,8 +1766,9 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) } while (r++, --count); urelocs += ARRAY_SIZE(stack); } while (remain); return 0; out: reloc_cache_reset(&eb->reloc_cache); return remain; } static int eb_relocate(struct i915_execbuffer *eb) Loading Loading @@ -2584,7 +2856,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.i915 = i915; eb.file = file; eb.args = args; if (!(args->flags & I915_EXEC_NO_RELOC)) if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) args->flags |= __EXEC_HAS_RELOC; eb.exec = exec; Loading drivers/gpu/drm/i915/gem/i915_gem_object.h +4 −0 Original line number Diff line number Diff line Loading @@ -258,6 +258,10 @@ struct page * i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n); struct page * i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, unsigned int n); dma_addr_t i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, unsigned long n, Loading drivers/gpu/drm/i915/gem/i915_gem_pages.c +14 −0 Original line number Diff line number Diff line Loading @@ -548,6 +548,20 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) return nth_page(sg_page(sg), offset); } /* Like i915_gem_object_get_page(), but mark the returned page dirty */ struct page * i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, unsigned int n) { struct page *page; page = i915_gem_object_get_page(obj, n); if (!obj->mm.dirty) set_page_dirty(page); return page; } dma_addr_t i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, unsigned long n, Loading drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +15 −6 Original line number Diff line number Diff line Loading @@ -37,14 +37,20 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb, return err; /* 8-Byte aligned */ err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0); if (err) if (!__reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0)) { err = -EIO; goto unpin_vma; } /* !8-Byte aligned */ err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1); if (err) if (!__reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1)) { err = -EIO; goto unpin_vma; } /* Skip to the end of the cmd page */ i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1; Loading @@ -54,9 +60,12 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb, eb->reloc_cache.rq_size += i; /* Force batch chaining */ err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2); if (err) if (!__reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2)) { err = -EIO; goto unpin_vma; } GEM_BUG_ON(!eb->reloc_cache.rq); rq = i915_request_get(eb->reloc_cache.rq); Loading Loading
Makefile +0 −4 Original line number Diff line number Diff line Loading @@ -882,10 +882,6 @@ KBUILD_CFLAGS_KERNEL += -ffunction-sections -fdata-sections LDFLAGS_vmlinux += --gc-sections endif ifdef CONFIG_LIVEPATCH KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone) endif ifdef CONFIG_SHADOW_CALL_STACK CC_FLAGS_SCS := -fsanitize=shadow-call-stack KBUILD_CFLAGS += $(CC_FLAGS_SCS) Loading
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +293 −21 Original line number Diff line number Diff line Loading @@ -46,6 +46,13 @@ struct eb_vma_array { struct eb_vma vma[]; }; enum { FORCE_CPU_RELOC = 1, FORCE_GTT_RELOC, FORCE_GPU_RELOC, #define DBG_FORCE_RELOC 0 /* choose one of the above! */ }; #define __EXEC_OBJECT_HAS_PIN BIT(31) #define __EXEC_OBJECT_HAS_FENCE BIT(30) #define __EXEC_OBJECT_NEEDS_MAP BIT(29) Loading Loading @@ -261,6 +268,8 @@ struct i915_execbuffer { */ struct reloc_cache { struct drm_mm_node node; /** temporary GTT binding */ unsigned long vaddr; /** Current kmap address */ unsigned long page; /** Currently mapped page index */ unsigned int gen; /** Cached value of INTEL_GEN */ bool use_64bit_reloc : 1; bool has_llc : 1; Loading Loading @@ -607,6 +616,23 @@ eb_add_vma(struct i915_execbuffer *eb, } } static inline int use_cpu_reloc(const struct reloc_cache *cache, const struct drm_i915_gem_object *obj) { if (!i915_gem_object_has_struct_page(obj)) return false; if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) return true; if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) return false; return (cache->has_llc || obj->cache_dirty || obj->cache_level != I915_CACHE_NONE); } static int eb_reserve_vma(const struct i915_execbuffer *eb, struct eb_vma *ev, u64 pin_flags) Loading Loading @@ -937,6 +963,8 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc, static void reloc_cache_init(struct reloc_cache *cache, struct drm_i915_private *i915) { cache->page = -1; cache->vaddr = 0; /* Must be a variable in the struct to allow GCC to unroll. */ cache->gen = INTEL_GEN(i915); cache->has_llc = HAS_LLC(i915); Loading @@ -948,6 +976,25 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->target = NULL; } static inline void *unmask_page(unsigned long p) { return (void *)(uintptr_t)(p & PAGE_MASK); } static inline unsigned int unmask_flags(unsigned long p) { return p & ~PAGE_MASK; } #define KMAP 0x4 /* after CLFLUSH_FLAGS */ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) { struct drm_i915_private *i915 = container_of(cache, struct i915_execbuffer, reloc_cache)->i915; return &i915->ggtt; } #define RELOC_TAIL 4 static int reloc_gpu_chain(struct reloc_cache *cache) Loading Loading @@ -1060,6 +1107,181 @@ static int reloc_gpu_flush(struct reloc_cache *cache) return err; } static void reloc_cache_reset(struct reloc_cache *cache) { void *vaddr; if (!cache->vaddr) return; vaddr = unmask_page(cache->vaddr); if (cache->vaddr & KMAP) { if (cache->vaddr & CLFLUSH_AFTER) mb(); kunmap_atomic(vaddr); i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm); } else { struct i915_ggtt *ggtt = cache_to_ggtt(cache); intel_gt_flush_ggtt_writes(ggtt->vm.gt); io_mapping_unmap_atomic((void __iomem *)vaddr); if (drm_mm_node_allocated(&cache->node)) { ggtt->vm.clear_range(&ggtt->vm, cache->node.start, cache->node.size); mutex_lock(&ggtt->vm.mutex); drm_mm_remove_node(&cache->node); mutex_unlock(&ggtt->vm.mutex); } else { i915_vma_unpin((struct i915_vma *)cache->node.mm); } } cache->vaddr = 0; cache->page = -1; } static void *reloc_kmap(struct drm_i915_gem_object *obj, struct reloc_cache *cache, unsigned long page) { void *vaddr; if (cache->vaddr) { kunmap_atomic(unmask_page(cache->vaddr)); } else { unsigned int flushes; int err; err = i915_gem_object_prepare_write(obj, &flushes); if (err) return ERR_PTR(err); BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); cache->vaddr = flushes | KMAP; cache->node.mm = (void *)obj; if (flushes) mb(); } vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; cache->page = page; return vaddr; } static void *reloc_iomap(struct drm_i915_gem_object *obj, struct reloc_cache *cache, unsigned long page) { struct i915_ggtt *ggtt = cache_to_ggtt(cache); unsigned long offset; void *vaddr; if (cache->vaddr) { intel_gt_flush_ggtt_writes(ggtt->vm.gt); io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); } else { struct i915_vma *vma; int err; if (i915_gem_object_is_tiled(obj)) return ERR_PTR(-EINVAL); if (use_cpu_reloc(cache, obj)) return NULL; i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); if (err) return ERR_PTR(err); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | PIN_NONBLOCK /* NOWARN */ | PIN_NOEVICT); if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); mutex_lock(&ggtt->vm.mutex); err = drm_mm_insert_node_in_range (&ggtt->vm.mm, &cache->node, PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); mutex_unlock(&ggtt->vm.mutex); if (err) /* no inactive aperture space, use cpu reloc */ return NULL; } else { cache->node.start = vma->node.start; cache->node.mm = (void *)vma; } } offset = cache->node.start; if (drm_mm_node_allocated(&cache->node)) { ggtt->vm.insert_page(&ggtt->vm, i915_gem_object_get_dma_address(obj, page), offset, I915_CACHE_NONE, 0); } else { offset += page << PAGE_SHIFT; } vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, offset); cache->page = page; cache->vaddr = (unsigned long)vaddr; return vaddr; } static void *reloc_vaddr(struct drm_i915_gem_object *obj, struct reloc_cache *cache, unsigned long page) { void *vaddr; if (cache->page == page) { vaddr = unmask_page(cache->vaddr); } else { vaddr = NULL; if ((cache->vaddr & KMAP) == 0) vaddr = reloc_iomap(obj, cache, page); if (!vaddr) vaddr = reloc_kmap(obj, cache, page); } return vaddr; } static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) { if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { if (flushes & CLFLUSH_BEFORE) { clflushopt(addr); mb(); } *addr = value; /* * Writes to the same cacheline are serialised by the CPU * (including clflush). On the write path, we only require * that it hits memory in an orderly fashion and place * mb barriers at the start and end of the relocation phase * to ensure ordering of clflush wrt to the system. */ if (flushes & CLFLUSH_AFTER) clflushopt(addr); } else *addr = value; } static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; Loading Loading @@ -1225,6 +1447,17 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, return cmd; } static inline bool use_reloc_gpu(struct i915_vma *vma) { if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) return true; if (DBG_FORCE_RELOC) return false; return !dma_resv_test_signaled_rcu(vma->resv, true); } static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) { struct page *page; Loading @@ -1239,7 +1472,7 @@ static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) return addr + offset_in_page(offset); } static int __reloc_entry_gpu(struct i915_execbuffer *eb, static bool __reloc_entry_gpu(struct i915_execbuffer *eb, struct i915_vma *vma, u64 offset, u64 target_addr) Loading @@ -1258,7 +1491,7 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb, batch = reloc_gpu(eb, vma, len); if (IS_ERR(batch)) return PTR_ERR(batch); return false; addr = gen8_canonical_addr(vma->node.start + offset); if (gen >= 8) { Loading Loading @@ -1307,21 +1540,55 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb, *batch++ = target_addr; } return 0; return true; } static u64 relocate_entry(struct i915_execbuffer *eb, static bool reloc_entry_gpu(struct i915_execbuffer *eb, struct i915_vma *vma, u64 offset, u64 target_addr) { if (eb->reloc_cache.vaddr) return false; if (!use_reloc_gpu(vma)) return false; return __reloc_entry_gpu(eb, vma, offset, target_addr); } static u64 relocate_entry(struct i915_vma *vma, const struct drm_i915_gem_relocation_entry *reloc, struct i915_execbuffer *eb, const struct i915_vma *target) { u64 target_addr = relocation_target(reloc, target); int err; u64 offset = reloc->offset; err = __reloc_entry_gpu(eb, vma, reloc->offset, target_addr); if (err) return err; if (!reloc_entry_gpu(eb, vma, offset, target_addr)) { bool wide = eb->reloc_cache.use_64bit_reloc; void *vaddr; repeat: vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); clflush_write32(vaddr + offset_in_page(offset), lower_32_bits(target_addr), eb->reloc_cache.vaddr); if (wide) { offset += sizeof(u32); target_addr >>= 32; wide = false; goto repeat; } } return target->node.start | UPDATE; } Loading Loading @@ -1386,7 +1653,8 @@ eb_relocate_entry(struct i915_execbuffer *eb, * If the relocation already has the right value in it, no * more work needs to be done. */ if (gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset) if (!DBG_FORCE_RELOC && gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset) return 0; /* Check that the relocation address is valid... */ Loading Loading @@ -1418,7 +1686,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, ev->flags &= ~EXEC_OBJECT_ASYNC; /* and update the user's relocation entry */ return relocate_entry(eb, ev->vma, reloc, target->vma); return relocate_entry(ev->vma, reloc, eb, target->vma); } static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) Loading Loading @@ -1456,8 +1724,10 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) * this is bad and so lockdep complains vehemently. */ copied = __copy_from_user(r, urelocs, count * sizeof(r[0])); if (unlikely(copied)) return -EFAULT; if (unlikely(copied)) { remain = -EFAULT; goto out; } remain -= count; do { Loading @@ -1465,7 +1735,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) if (likely(offset == 0)) { } else if ((s64)offset < 0) { return (int)offset; remain = (int)offset; goto out; } else { /* * Note that reporting an error now Loading Loading @@ -1495,8 +1766,9 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) } while (r++, --count); urelocs += ARRAY_SIZE(stack); } while (remain); return 0; out: reloc_cache_reset(&eb->reloc_cache); return remain; } static int eb_relocate(struct i915_execbuffer *eb) Loading Loading @@ -2584,7 +2856,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.i915 = i915; eb.file = file; eb.args = args; if (!(args->flags & I915_EXEC_NO_RELOC)) if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) args->flags |= __EXEC_HAS_RELOC; eb.exec = exec; Loading
drivers/gpu/drm/i915/gem/i915_gem_object.h +4 −0 Original line number Diff line number Diff line Loading @@ -258,6 +258,10 @@ struct page * i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n); struct page * i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, unsigned int n); dma_addr_t i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, unsigned long n, Loading
drivers/gpu/drm/i915/gem/i915_gem_pages.c +14 −0 Original line number Diff line number Diff line Loading @@ -548,6 +548,20 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) return nth_page(sg_page(sg), offset); } /* Like i915_gem_object_get_page(), but mark the returned page dirty */ struct page * i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, unsigned int n) { struct page *page; page = i915_gem_object_get_page(obj, n); if (!obj->mm.dirty) set_page_dirty(page); return page; } dma_addr_t i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, unsigned long n, Loading
drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +15 −6 Original line number Diff line number Diff line Loading @@ -37,14 +37,20 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb, return err; /* 8-Byte aligned */ err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0); if (err) if (!__reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0)) { err = -EIO; goto unpin_vma; } /* !8-Byte aligned */ err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1); if (err) if (!__reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1)) { err = -EIO; goto unpin_vma; } /* Skip to the end of the cmd page */ i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1; Loading @@ -54,9 +60,12 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb, eb->reloc_cache.rq_size += i; /* Force batch chaining */ err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2); if (err) if (!__reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2)) { err = -EIO; goto unpin_vma; } GEM_BUG_ON(!eb->reloc_cache.rq); rq = i915_request_get(eb->reloc_cache.rq); Loading