Loading drivers/dax/super.c +11 −3 Original line number Diff line number Diff line Loading @@ -86,6 +86,7 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) { struct block_device *bdev = sb->s_bdev; struct dax_device *dax_dev; bool dax_enabled = false; pgoff_t pgoff; int err, id; void *kaddr; Loading Loading @@ -134,14 +135,21 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) * on being able to do (page_address(pfn_to_page())). */ WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)); dax_enabled = true; } else if (pfn_t_devmap(pfn)) { /* pass */; } else { struct dev_pagemap *pgmap; pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL); if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX) dax_enabled = true; put_dev_pagemap(pgmap); } if (!dax_enabled) { pr_debug("VFS (%s): error: dax support not enabled\n", sb->s_id); return -EOPNOTSUPP; } return 0; } EXPORT_SYMBOL_GPL(__bdev_dax_supported); Loading drivers/nvdimm/pfn_devs.c +0 −2 Original line number Diff line number Diff line Loading @@ -561,8 +561,6 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) res->start += start_pad; res->end -= end_trunc; pgmap->type = MEMORY_DEVICE_HOST; if (nd_pfn->mode == PFN_MODE_RAM) { if (offset < SZ_8K) return -EINVAL; Loading drivers/nvdimm/pmem.c +25 −0 Original line number Diff line number Diff line Loading @@ -289,6 +289,27 @@ static void pmem_release_disk(void *__pmem) put_disk(pmem->disk); } static void pmem_release_pgmap_ops(void *__pgmap) { dev_pagemap_put_ops(); } static void fsdax_pagefree(struct page *page, void *data) { wake_up_var(&page->_refcount); } static int setup_pagemap_fsdax(struct device *dev, struct dev_pagemap *pgmap) { dev_pagemap_get_ops(); if (devm_add_action_or_reset(dev, pmem_release_pgmap_ops, pgmap)) return -ENOMEM; pgmap->type = MEMORY_DEVICE_FS_DAX; pgmap->page_free = fsdax_pagefree; return 0; } static int pmem_attach_disk(struct device *dev, struct nd_namespace_common *ndns) { Loading Loading @@ -347,6 +368,8 @@ static int pmem_attach_disk(struct device *dev, pmem->pfn_flags = PFN_DEV; pmem->pgmap.ref = &q->q_usage_counter; if (is_nd_pfn(dev)) { if (setup_pagemap_fsdax(dev, &pmem->pgmap)) return -ENOMEM; addr = devm_memremap_pages(dev, &pmem->pgmap); pfn_sb = nd_pfn->pfn_sb; pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); Loading @@ -358,6 +381,8 @@ static int pmem_attach_disk(struct device *dev, } else if (pmem_should_map_pages(dev)) { memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res)); pmem->pgmap.altmap_valid = false; if (setup_pagemap_fsdax(dev, &pmem->pgmap)) return -ENOMEM; addr = devm_memremap_pages(dev, &pmem->pgmap); pmem->pfn_flags |= PFN_MAP; memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); Loading fs/Kconfig +1 −0 Original line number Diff line number Diff line Loading @@ -38,6 +38,7 @@ config FS_DAX bool "Direct Access (DAX) support" depends on MMU depends on !(ARM || MIPS || SPARC) select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED) select FS_IOMAP select DAX help Loading fs/dax.c +99 −16 Original line number Diff line number Diff line Loading @@ -351,6 +351,19 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping, } } static struct page *dax_busy_page(void *entry) { unsigned long pfn; for_each_mapped_pfn(entry, pfn) { struct page *page = pfn_to_page(pfn); if (page_ref_count(page) > 1) return page; } return NULL; } /* * Find radix tree entry at given index. If it points to an exceptional entry, * return it with the radix tree entry locked. If the radix tree doesn't Loading Loading @@ -492,6 +505,90 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index, return entry; } /** * dax_layout_busy_page - find first pinned page in @mapping * @mapping: address space to scan for a page with ref count > 1 * * DAX requires ZONE_DEVICE mapped pages. These pages are never * 'onlined' to the page allocator so they are considered idle when * page->count == 1. A filesystem uses this interface to determine if * any page in the mapping is busy, i.e. for DMA, or other * get_user_pages() usages. * * It is expected that the filesystem is holding locks to block the * establishment of new mappings in this address_space. I.e. it expects * to be able to run unmap_mapping_range() and subsequently not race * mapping_mapped() becoming true. */ struct page *dax_layout_busy_page(struct address_space *mapping) { pgoff_t indices[PAGEVEC_SIZE]; struct page *page = NULL; struct pagevec pvec; pgoff_t index, end; unsigned i; /* * In the 'limited' case get_user_pages() for dax is disabled. */ if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return NULL; if (!dax_mapping(mapping) || !mapping_mapped(mapping)) return NULL; pagevec_init(&pvec); index = 0; end = -1; /* * If we race get_user_pages_fast() here either we'll see the * elevated page count in the pagevec_lookup and wait, or * get_user_pages_fast() will see that the page it took a reference * against is no longer mapped in the page tables and bail to the * get_user_pages() slow path. The slow path is protected by * pte_lock() and pmd_lock(). New references are not taken without * holding those locks, and unmap_mapping_range() will not zero the * pte or pmd without holding the respective lock, so we are * guaranteed to either see new references or prevent new * references from being established. */ unmap_mapping_range(mapping, 0, 0, 1); while (index < end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { for (i = 0; i < pagevec_count(&pvec); i++) { struct page *pvec_ent = pvec.pages[i]; void *entry; index = indices[i]; if (index >= end) break; if (!radix_tree_exceptional_entry(pvec_ent)) continue; xa_lock_irq(&mapping->i_pages); entry = get_unlocked_mapping_entry(mapping, index, NULL); if (entry) page = dax_busy_page(entry); put_unlocked_mapping_entry(mapping, index, entry); xa_unlock_irq(&mapping->i_pages); if (page) break; } pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); index++; if (page) break; } return page; } EXPORT_SYMBOL_GPL(dax_layout_busy_page); static int __dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index, bool trunc) { Loading Loading @@ -912,7 +1009,6 @@ static int dax_load_hole(struct address_space *mapping, void *entry, unsigned long vaddr = vmf->address; int ret = VM_FAULT_NOPAGE; struct page *zero_page; void *entry2; pfn_t pfn; zero_page = ZERO_PAGE(0); Loading @@ -922,13 +1018,8 @@ static int dax_load_hole(struct address_space *mapping, void *entry, } pfn = page_to_pfn_t(zero_page); entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE, false); if (IS_ERR(entry2)) { ret = VM_FAULT_SIGBUS; goto out; } dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE, false); vm_insert_mixed(vmf->vma, vaddr, pfn); out: trace_dax_load_hole(inode, vmf, ret); Loading Loading @@ -1240,10 +1331,6 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, 0, write && !sync); if (IS_ERR(entry)) { error = PTR_ERR(entry); goto error_finish_iomap; } /* * If we are doing synchronous page fault and inode needs fsync, Loading Loading @@ -1327,8 +1414,6 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, pfn = page_to_pfn_t(zero_page); ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false); if (IS_ERR(ret)) goto fallback; ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); if (!pmd_none(*(vmf->pmd))) { Loading Loading @@ -1450,8 +1535,6 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_PMD, write && !sync); if (IS_ERR(entry)) goto finish_iomap; /* * If we are doing synchronous page fault and inode needs fsync, Loading Loading
drivers/dax/super.c +11 −3 Original line number Diff line number Diff line Loading @@ -86,6 +86,7 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) { struct block_device *bdev = sb->s_bdev; struct dax_device *dax_dev; bool dax_enabled = false; pgoff_t pgoff; int err, id; void *kaddr; Loading Loading @@ -134,14 +135,21 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) * on being able to do (page_address(pfn_to_page())). */ WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)); dax_enabled = true; } else if (pfn_t_devmap(pfn)) { /* pass */; } else { struct dev_pagemap *pgmap; pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL); if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX) dax_enabled = true; put_dev_pagemap(pgmap); } if (!dax_enabled) { pr_debug("VFS (%s): error: dax support not enabled\n", sb->s_id); return -EOPNOTSUPP; } return 0; } EXPORT_SYMBOL_GPL(__bdev_dax_supported); Loading
drivers/nvdimm/pfn_devs.c +0 −2 Original line number Diff line number Diff line Loading @@ -561,8 +561,6 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) res->start += start_pad; res->end -= end_trunc; pgmap->type = MEMORY_DEVICE_HOST; if (nd_pfn->mode == PFN_MODE_RAM) { if (offset < SZ_8K) return -EINVAL; Loading
drivers/nvdimm/pmem.c +25 −0 Original line number Diff line number Diff line Loading @@ -289,6 +289,27 @@ static void pmem_release_disk(void *__pmem) put_disk(pmem->disk); } static void pmem_release_pgmap_ops(void *__pgmap) { dev_pagemap_put_ops(); } static void fsdax_pagefree(struct page *page, void *data) { wake_up_var(&page->_refcount); } static int setup_pagemap_fsdax(struct device *dev, struct dev_pagemap *pgmap) { dev_pagemap_get_ops(); if (devm_add_action_or_reset(dev, pmem_release_pgmap_ops, pgmap)) return -ENOMEM; pgmap->type = MEMORY_DEVICE_FS_DAX; pgmap->page_free = fsdax_pagefree; return 0; } static int pmem_attach_disk(struct device *dev, struct nd_namespace_common *ndns) { Loading Loading @@ -347,6 +368,8 @@ static int pmem_attach_disk(struct device *dev, pmem->pfn_flags = PFN_DEV; pmem->pgmap.ref = &q->q_usage_counter; if (is_nd_pfn(dev)) { if (setup_pagemap_fsdax(dev, &pmem->pgmap)) return -ENOMEM; addr = devm_memremap_pages(dev, &pmem->pgmap); pfn_sb = nd_pfn->pfn_sb; pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); Loading @@ -358,6 +381,8 @@ static int pmem_attach_disk(struct device *dev, } else if (pmem_should_map_pages(dev)) { memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res)); pmem->pgmap.altmap_valid = false; if (setup_pagemap_fsdax(dev, &pmem->pgmap)) return -ENOMEM; addr = devm_memremap_pages(dev, &pmem->pgmap); pmem->pfn_flags |= PFN_MAP; memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); Loading
fs/Kconfig +1 −0 Original line number Diff line number Diff line Loading @@ -38,6 +38,7 @@ config FS_DAX bool "Direct Access (DAX) support" depends on MMU depends on !(ARM || MIPS || SPARC) select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED) select FS_IOMAP select DAX help Loading
fs/dax.c +99 −16 Original line number Diff line number Diff line Loading @@ -351,6 +351,19 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping, } } static struct page *dax_busy_page(void *entry) { unsigned long pfn; for_each_mapped_pfn(entry, pfn) { struct page *page = pfn_to_page(pfn); if (page_ref_count(page) > 1) return page; } return NULL; } /* * Find radix tree entry at given index. If it points to an exceptional entry, * return it with the radix tree entry locked. If the radix tree doesn't Loading Loading @@ -492,6 +505,90 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index, return entry; } /** * dax_layout_busy_page - find first pinned page in @mapping * @mapping: address space to scan for a page with ref count > 1 * * DAX requires ZONE_DEVICE mapped pages. These pages are never * 'onlined' to the page allocator so they are considered idle when * page->count == 1. A filesystem uses this interface to determine if * any page in the mapping is busy, i.e. for DMA, or other * get_user_pages() usages. * * It is expected that the filesystem is holding locks to block the * establishment of new mappings in this address_space. I.e. it expects * to be able to run unmap_mapping_range() and subsequently not race * mapping_mapped() becoming true. */ struct page *dax_layout_busy_page(struct address_space *mapping) { pgoff_t indices[PAGEVEC_SIZE]; struct page *page = NULL; struct pagevec pvec; pgoff_t index, end; unsigned i; /* * In the 'limited' case get_user_pages() for dax is disabled. */ if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return NULL; if (!dax_mapping(mapping) || !mapping_mapped(mapping)) return NULL; pagevec_init(&pvec); index = 0; end = -1; /* * If we race get_user_pages_fast() here either we'll see the * elevated page count in the pagevec_lookup and wait, or * get_user_pages_fast() will see that the page it took a reference * against is no longer mapped in the page tables and bail to the * get_user_pages() slow path. The slow path is protected by * pte_lock() and pmd_lock(). New references are not taken without * holding those locks, and unmap_mapping_range() will not zero the * pte or pmd without holding the respective lock, so we are * guaranteed to either see new references or prevent new * references from being established. */ unmap_mapping_range(mapping, 0, 0, 1); while (index < end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { for (i = 0; i < pagevec_count(&pvec); i++) { struct page *pvec_ent = pvec.pages[i]; void *entry; index = indices[i]; if (index >= end) break; if (!radix_tree_exceptional_entry(pvec_ent)) continue; xa_lock_irq(&mapping->i_pages); entry = get_unlocked_mapping_entry(mapping, index, NULL); if (entry) page = dax_busy_page(entry); put_unlocked_mapping_entry(mapping, index, entry); xa_unlock_irq(&mapping->i_pages); if (page) break; } pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); index++; if (page) break; } return page; } EXPORT_SYMBOL_GPL(dax_layout_busy_page); static int __dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index, bool trunc) { Loading Loading @@ -912,7 +1009,6 @@ static int dax_load_hole(struct address_space *mapping, void *entry, unsigned long vaddr = vmf->address; int ret = VM_FAULT_NOPAGE; struct page *zero_page; void *entry2; pfn_t pfn; zero_page = ZERO_PAGE(0); Loading @@ -922,13 +1018,8 @@ static int dax_load_hole(struct address_space *mapping, void *entry, } pfn = page_to_pfn_t(zero_page); entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE, false); if (IS_ERR(entry2)) { ret = VM_FAULT_SIGBUS; goto out; } dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE, false); vm_insert_mixed(vmf->vma, vaddr, pfn); out: trace_dax_load_hole(inode, vmf, ret); Loading Loading @@ -1240,10 +1331,6 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, 0, write && !sync); if (IS_ERR(entry)) { error = PTR_ERR(entry); goto error_finish_iomap; } /* * If we are doing synchronous page fault and inode needs fsync, Loading Loading @@ -1327,8 +1414,6 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, pfn = page_to_pfn_t(zero_page); ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false); if (IS_ERR(ret)) goto fallback; ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); if (!pmd_none(*(vmf->pmd))) { Loading Loading @@ -1450,8 +1535,6 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_PMD, write && !sync); if (IS_ERR(entry)) goto finish_iomap; /* * If we are doing synchronous page fault and inode needs fsync, Loading