Loading fs/xfs/Kconfig +1 −0 Original line number Diff line number Diff line Loading @@ -4,6 +4,7 @@ config XFS_FS depends on (64BIT || LBDAF) select EXPORTFS select LIBCRC32C select FS_IOMAP help XFS is a high performance journaling filesystem which originated on the SGI IRIX platform. It is completely multi-threaded, can Loading fs/xfs/xfs_aops.c +19 −264 Original line number Diff line number Diff line Loading @@ -1143,6 +1143,8 @@ __xfs_get_blocks( ssize_t size; int new = 0; BUG_ON(create && !direct); if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; Loading @@ -1150,22 +1152,14 @@ __xfs_get_blocks( ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); size = bh_result->b_size; if (!create && direct && offset >= i_size_read(inode)) if (!create && offset >= i_size_read(inode)) return 0; /* * Direct I/O is usually done on preallocated files, so try getting * a block mapping without an exclusive lock first. For buffered * writes we already have the exclusive iolock anyway, so avoiding * a lock roundtrip here by taking the ilock exclusive from the * beginning is a useful micro optimization. * a block mapping without an exclusive lock first. */ if (create && !direct) { lockmode = XFS_ILOCK_EXCL; xfs_ilock(ip, lockmode); } else { lockmode = xfs_ilock_data_map_shared(ip); } ASSERT(offset <= mp->m_super->s_maxbytes); if (offset + size > mp->m_super->s_maxbytes) Loading @@ -1184,7 +1178,6 @@ __xfs_get_blocks( (imap.br_startblock == HOLESTARTBLOCK || imap.br_startblock == DELAYSTARTBLOCK) || (IS_DAX(inode) && ISUNWRITTEN(&imap)))) { if (direct || xfs_get_extsz_hint(ip)) { /* * xfs_iomap_write_direct() expects the shared lock. It * is unlocked on return. Loading @@ -1198,23 +1191,6 @@ __xfs_get_blocks( return error; new = 1; } else { /* * Delalloc reservations do not require a transaction, * we can go on without dropping the lock here. If we * are allocating a new delalloc block, make sure that * we set the new flag so that we mark the buffer new so * that we know that it is newly allocated if the write * fails. */ if (nimaps && imap.br_startblock == HOLESTARTBLOCK) new = 1; error = xfs_iomap_write_delay(ip, offset, size, &imap); if (error) goto out_unlock; xfs_iunlock(ip, lockmode); } trace_xfs_get_blocks_alloc(ip, offset, size, ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN : XFS_IO_DELALLOC, &imap); Loading @@ -1235,9 +1211,7 @@ __xfs_get_blocks( } /* trim mapping down to size requested */ if (direct || size > (1 << inode->i_blkbits)) xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size); xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size); /* * For unwritten extents do not report a disk address in the buffered Loading @@ -1250,7 +1224,7 @@ __xfs_get_blocks( if (ISUNWRITTEN(&imap)) set_buffer_unwritten(bh_result); /* direct IO needs special help */ if (create && direct) { if (create) { if (dax_fault) ASSERT(!ISUNWRITTEN(&imap)); else Loading Loading @@ -1279,14 +1253,7 @@ __xfs_get_blocks( (new || ISUNWRITTEN(&imap)))) set_buffer_new(bh_result); if (imap.br_startblock == DELAYSTARTBLOCK) { BUG_ON(direct); if (create) { set_buffer_uptodate(bh_result); set_buffer_mapped(bh_result); set_buffer_delay(bh_result); } } BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK); return 0; Loading Loading @@ -1427,216 +1394,6 @@ xfs_vm_direct_IO( xfs_get_blocks_direct, endio, NULL, flags); } /* * Punch out the delalloc blocks we have already allocated. * * Don't bother with xfs_setattr given that nothing can have made it to disk yet * as the page is still locked at this point. */ STATIC void xfs_vm_kill_delalloc_range( struct inode *inode, loff_t start, loff_t end) { struct xfs_inode *ip = XFS_I(inode); xfs_fileoff_t start_fsb; xfs_fileoff_t end_fsb; int error; start_fsb = XFS_B_TO_FSB(ip->i_mount, start); end_fsb = XFS_B_TO_FSB(ip->i_mount, end); if (end_fsb <= start_fsb) return; xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_bmap_punch_delalloc_range(ip, start_fsb, end_fsb - start_fsb); if (error) { /* something screwed, just bail */ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { xfs_alert(ip->i_mount, "xfs_vm_write_failed: unable to clean up ino %lld", ip->i_ino); } } xfs_iunlock(ip, XFS_ILOCK_EXCL); } STATIC void xfs_vm_write_failed( struct inode *inode, struct page *page, loff_t pos, unsigned len) { loff_t block_offset; loff_t block_start; loff_t block_end; loff_t from = pos & (PAGE_SIZE - 1); loff_t to = from + len; struct buffer_head *bh, *head; struct xfs_mount *mp = XFS_I(inode)->i_mount; /* * The request pos offset might be 32 or 64 bit, this is all fine * on 64-bit platform. However, for 64-bit pos request on 32-bit * platform, the high 32-bit will be masked off if we evaluate the * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is * 0xfffff000 as an unsigned long, hence the result is incorrect * which could cause the following ASSERT failed in most cases. * In order to avoid this, we can evaluate the block_offset of the * start of the page by using shifts rather than masks the mismatch * problem. */ block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT; ASSERT(block_offset + from == pos); head = page_buffers(page); block_start = 0; for (bh = head; bh != head || !block_start; bh = bh->b_this_page, block_start = block_end, block_offset += bh->b_size) { block_end = block_start + bh->b_size; /* skip buffers before the write */ if (block_end <= from) continue; /* if the buffer is after the write, we're done */ if (block_start >= to) break; /* * Process delalloc and unwritten buffers beyond EOF. We can * encounter unwritten buffers in the event that a file has * post-EOF unwritten extents and an extending write happens to * fail (e.g., an unaligned write that also involves a delalloc * to the same page). */ if (!buffer_delay(bh) && !buffer_unwritten(bh)) continue; if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) && block_offset < i_size_read(inode)) continue; if (buffer_delay(bh)) xfs_vm_kill_delalloc_range(inode, block_offset, block_offset + bh->b_size); /* * This buffer does not contain data anymore. make sure anyone * who finds it knows that for certain. */ clear_buffer_delay(bh); clear_buffer_uptodate(bh); clear_buffer_mapped(bh); clear_buffer_new(bh); clear_buffer_dirty(bh); clear_buffer_unwritten(bh); } } /* * This used to call block_write_begin(), but it unlocks and releases the page * on error, and we need that page to be able to punch stale delalloc blocks out * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at * the appropriate point. */ STATIC int xfs_vm_write_begin( struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { pgoff_t index = pos >> PAGE_SHIFT; struct page *page; int status; struct xfs_mount *mp = XFS_I(mapping->host)->i_mount; ASSERT(len <= PAGE_SIZE); page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; status = __block_write_begin(page, pos, len, xfs_get_blocks); if (xfs_mp_fail_writes(mp)) status = -EIO; if (unlikely(status)) { struct inode *inode = mapping->host; size_t isize = i_size_read(inode); xfs_vm_write_failed(inode, page, pos, len); unlock_page(page); /* * If the write is beyond EOF, we only want to kill blocks * allocated in this write, not blocks that were previously * written successfully. */ if (xfs_mp_fail_writes(mp)) isize = 0; if (pos + len > isize) { ssize_t start = max_t(ssize_t, pos, isize); truncate_pagecache_range(inode, start, pos + len); } put_page(page); page = NULL; } *pagep = page; return status; } /* * On failure, we only need to kill delalloc blocks beyond EOF in the range of * this specific write because they will never be written. Previous writes * beyond EOF where block allocation succeeded do not need to be trashed, so * only new blocks from this write should be trashed. For blocks within * EOF, generic_write_end() zeros them so they are safe to leave alone and be * written with all the other valid data. */ STATIC int xfs_vm_write_end( struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { int ret; ASSERT(len <= PAGE_SIZE); ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); if (unlikely(ret < len)) { struct inode *inode = mapping->host; size_t isize = i_size_read(inode); loff_t to = pos + len; if (to > isize) { /* only kill blocks in this write beyond EOF */ if (pos > isize) isize = pos; xfs_vm_kill_delalloc_range(inode, isize, to); truncate_pagecache_range(inode, isize, to); } } return ret; } STATIC sector_t xfs_vm_bmap( struct address_space *mapping, Loading Loading @@ -1747,8 +1504,6 @@ const struct address_space_operations xfs_address_space_operations = { .set_page_dirty = xfs_vm_set_page_dirty, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .write_begin = xfs_vm_write_begin, .write_end = xfs_vm_write_end, .bmap = xfs_vm_bmap, .direct_IO = xfs_vm_direct_IO, .migratepage = buffer_migrate_page, Loading fs/xfs/xfs_bmap_util.c +130 −213 Original line number Diff line number Diff line Loading @@ -1087,99 +1087,120 @@ xfs_alloc_file_space( return error; } /* * Zero file bytes between startoff and endoff inclusive. * The iolock is held exclusive and no blocks are buffered. * * This function is used by xfs_free_file_space() to zero * partial blocks when the range to free is not block aligned. * When unreserving space with boundaries that are not block * aligned we round up the start and round down the end * boundaries and then use this function to zero the parts of * the blocks that got dropped during the rounding. */ STATIC int xfs_zero_remaining_bytes( xfs_inode_t *ip, xfs_off_t startoff, xfs_off_t endoff) static int xfs_unmap_extent( struct xfs_inode *ip, xfs_fileoff_t startoffset_fsb, xfs_filblks_t len_fsb, int *done) { xfs_bmbt_irec_t imap; xfs_fileoff_t offset_fsb; xfs_off_t lastoffset; xfs_off_t offset; xfs_buf_t *bp; xfs_mount_t *mp = ip->i_mount; int nimap; int error = 0; struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; struct xfs_bmap_free free_list; xfs_fsblock_t firstfsb; uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); int error; /* * Avoid doing I/O beyond eof - it's not necessary * since nothing can read beyond eof. The space will * be zeroed when the file is extended anyway. */ if (startoff >= XFS_ISIZE(ip)) return 0; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); if (error) { ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); return error; } if (endoff > XFS_ISIZE(ip)) endoff = XFS_ISIZE(ip); xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot, ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS); if (error) goto out_trans_cancel; for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { uint lock_mode; xfs_trans_ijoin(tp, ip, 0); offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; xfs_bmap_init(&free_list, &firstfsb); error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb, &free_list, done); if (error) goto out_bmap_cancel; lock_mode = xfs_ilock_data_map_shared(ip); error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); xfs_iunlock(ip, lock_mode); error = xfs_bmap_finish(&tp, &free_list, NULL); if (error) goto out_bmap_cancel; if (error || nimap < 1) break; ASSERT(imap.br_blockcount >= 1); ASSERT(imap.br_startoff == offset_fsb); ASSERT(imap.br_startblock != DELAYSTARTBLOCK); error = xfs_trans_commit(tp); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; if (imap.br_startblock == HOLESTARTBLOCK || imap.br_state == XFS_EXT_UNWRITTEN) { /* skip the entire extent */ lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + imap.br_blockcount) - 1; continue; out_bmap_cancel: xfs_bmap_cancel(&free_list); out_trans_cancel: xfs_trans_cancel(tp); goto out_unlock; } lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; if (lastoffset > endoff) lastoffset = endoff; static int xfs_adjust_extent_unmap_boundaries( struct xfs_inode *ip, xfs_fileoff_t *startoffset_fsb, xfs_fileoff_t *endoffset_fsb) { struct xfs_mount *mp = ip->i_mount; struct xfs_bmbt_irec imap; int nimap, error; xfs_extlen_t mod = 0; /* DAX can just zero the backing device directly */ if (IS_DAX(VFS_I(ip))) { error = dax_zero_page_range(VFS_I(ip), offset, lastoffset - offset + 1, xfs_get_blocks_direct); nimap = 1; error = xfs_bmapi_read(ip, *startoffset_fsb, 1, &imap, &nimap, 0); if (error) return error; continue; if (nimap && imap.br_startblock != HOLESTARTBLOCK) { xfs_daddr_t block; ASSERT(imap.br_startblock != DELAYSTARTBLOCK); block = imap.br_startblock; mod = do_div(block, mp->m_sb.sb_rextsize); if (mod) *startoffset_fsb += mp->m_sb.sb_rextsize - mod; } error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp, xfs_fsb_to_db(ip, imap.br_startblock), BTOBB(mp->m_sb.sb_blocksize), 0, &bp, NULL); nimap = 1; error = xfs_bmapi_read(ip, *endoffset_fsb - 1, 1, &imap, &nimap, 0); if (error) return error; memset(bp->b_addr + (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 0, lastoffset - offset + 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { ASSERT(imap.br_startblock != DELAYSTARTBLOCK); mod++; if (mod && mod != mp->m_sb.sb_rextsize) *endoffset_fsb -= mod; } return 0; } error = xfs_bwrite(bp); xfs_buf_relse(bp); static int xfs_flush_unmap_range( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len) { struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); xfs_off_t rounding, start, end; int error; /* wait for the completion of any pending DIOs */ inode_dio_wait(inode); rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE); start = round_down(offset, rounding); end = round_up(offset + len, rounding) - 1; error = filemap_write_and_wait_range(inode->i_mapping, start, end); if (error) return error; } return error; truncate_pagecache_range(inode, start, end); return 0; } int Loading @@ -1188,24 +1209,10 @@ xfs_free_file_space( xfs_off_t offset, xfs_off_t len) { int done; xfs_fileoff_t endoffset_fsb; int error; xfs_fsblock_t firstfsb; xfs_bmap_free_t free_list; xfs_bmbt_irec_t imap; xfs_off_t ioffset; xfs_off_t iendoffset; xfs_extlen_t mod=0; xfs_mount_t *mp; int nimap; uint resblks; xfs_off_t rounding; int rt; struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t startoffset_fsb; xfs_trans_t *tp; mp = ip->i_mount; xfs_fileoff_t endoffset_fsb; int done = 0, error; trace_xfs_free_file_space(ip); Loading @@ -1213,135 +1220,45 @@ xfs_free_file_space( if (error) return error; error = 0; if (len <= 0) /* if nothing being freed */ return 0; error = xfs_flush_unmap_range(ip, offset, len); if (error) return error; rt = XFS_IS_REALTIME_INODE(ip); startoffset_fsb = XFS_B_TO_FSB(mp, offset); endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); /* wait for the completion of any pending DIOs */ inode_dio_wait(VFS_I(ip)); rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE); ioffset = round_down(offset, rounding); iendoffset = round_up(offset + len, rounding) - 1; error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset, iendoffset); if (error) goto out; truncate_pagecache_range(VFS_I(ip), ioffset, iendoffset); /* * Need to zero the stuff we're not freeing, on disk. * If it's a realtime file & can't use unwritten extents then we * actually need to zero the extent edges. Otherwise xfs_bunmapi * will take care of it for us. * Need to zero the stuff we're not freeing, on disk. If it's a RT file * and we can't use unwritten extents then we actually need to ensure * to zero the whole extent, otherwise we just need to take of block * boundaries, and xfs_bunmapi will handle the rest. */ if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { nimap = 1; error = xfs_bmapi_read(ip, startoffset_fsb, 1, &imap, &nimap, 0); if (XFS_IS_REALTIME_INODE(ip) && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { error = xfs_adjust_extent_unmap_boundaries(ip, &startoffset_fsb, &endoffset_fsb); if (error) goto out; ASSERT(nimap == 0 || nimap == 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { xfs_daddr_t block; ASSERT(imap.br_startblock != DELAYSTARTBLOCK); block = imap.br_startblock; mod = do_div(block, mp->m_sb.sb_rextsize); if (mod) startoffset_fsb += mp->m_sb.sb_rextsize - mod; return error; } nimap = 1; error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1, &imap, &nimap, 0); if (endoffset_fsb > startoffset_fsb) { while (!done) { error = xfs_unmap_extent(ip, startoffset_fsb, endoffset_fsb - startoffset_fsb, &done); if (error) goto out; ASSERT(nimap == 0 || nimap == 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { ASSERT(imap.br_startblock != DELAYSTARTBLOCK); mod++; if (mod && (mod != mp->m_sb.sb_rextsize)) endoffset_fsb -= mod; } } if ((done = (endoffset_fsb <= startoffset_fsb))) /* * One contiguous piece to clear */ error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); else { /* * Some full blocks, possibly two pieces to clear */ if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) error = xfs_zero_remaining_bytes(ip, offset, XFS_FSB_TO_B(mp, startoffset_fsb) - 1); if (!error && XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) error = xfs_zero_remaining_bytes(ip, XFS_FSB_TO_B(mp, endoffset_fsb), offset + len - 1); return error; } /* * free file space until done or until there is an error */ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); while (!error && !done) { /* * allocate and setup the transaction. Allow this * transaction to dip into the reserve blocks to ensure * the freeing of the space succeeds at ENOSPC. */ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); if (error) { ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); break; } xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot, ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS); if (error) goto error1; xfs_trans_ijoin(tp, ip, 0); /* * issue the bunmapi() call to free the blocks * Now that we've unmap all full blocks we'll have to zero out any * partial block at the beginning and/or end. xfs_zero_range is * smart enough to skip any holes, including those we just created. */ xfs_bmap_init(&free_list, &firstfsb); error = xfs_bunmapi(tp, ip, startoffset_fsb, endoffset_fsb - startoffset_fsb, 0, 2, &firstfsb, &free_list, &done); if (error) goto error0; /* * complete the transaction */ error = xfs_bmap_finish(&tp, &free_list, NULL); if (error) goto error0; error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); } out: return error; error0: xfs_bmap_cancel(&free_list); error1: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); goto out; return xfs_zero_range(ip, offset, len, NULL); } /* Loading fs/xfs/xfs_file.c +12 −181 File changed.Preview size limit exceeded, changes collapsed. Show changes fs/xfs/xfs_inode.h +2 −1 Original line number Diff line number Diff line Loading @@ -427,7 +427,8 @@ int xfs_update_prealloc_flags(struct xfs_inode *ip, enum xfs_prealloc_flags flags); int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset, xfs_fsize_t isize, bool *did_zeroing); int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count); int xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count, bool *did_zero); loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start, loff_t eof, int whence); Loading Loading
fs/xfs/Kconfig +1 −0 Original line number Diff line number Diff line Loading @@ -4,6 +4,7 @@ config XFS_FS depends on (64BIT || LBDAF) select EXPORTFS select LIBCRC32C select FS_IOMAP help XFS is a high performance journaling filesystem which originated on the SGI IRIX platform. It is completely multi-threaded, can Loading
fs/xfs/xfs_aops.c +19 −264 Original line number Diff line number Diff line Loading @@ -1143,6 +1143,8 @@ __xfs_get_blocks( ssize_t size; int new = 0; BUG_ON(create && !direct); if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; Loading @@ -1150,22 +1152,14 @@ __xfs_get_blocks( ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); size = bh_result->b_size; if (!create && direct && offset >= i_size_read(inode)) if (!create && offset >= i_size_read(inode)) return 0; /* * Direct I/O is usually done on preallocated files, so try getting * a block mapping without an exclusive lock first. For buffered * writes we already have the exclusive iolock anyway, so avoiding * a lock roundtrip here by taking the ilock exclusive from the * beginning is a useful micro optimization. * a block mapping without an exclusive lock first. */ if (create && !direct) { lockmode = XFS_ILOCK_EXCL; xfs_ilock(ip, lockmode); } else { lockmode = xfs_ilock_data_map_shared(ip); } ASSERT(offset <= mp->m_super->s_maxbytes); if (offset + size > mp->m_super->s_maxbytes) Loading @@ -1184,7 +1178,6 @@ __xfs_get_blocks( (imap.br_startblock == HOLESTARTBLOCK || imap.br_startblock == DELAYSTARTBLOCK) || (IS_DAX(inode) && ISUNWRITTEN(&imap)))) { if (direct || xfs_get_extsz_hint(ip)) { /* * xfs_iomap_write_direct() expects the shared lock. It * is unlocked on return. Loading @@ -1198,23 +1191,6 @@ __xfs_get_blocks( return error; new = 1; } else { /* * Delalloc reservations do not require a transaction, * we can go on without dropping the lock here. If we * are allocating a new delalloc block, make sure that * we set the new flag so that we mark the buffer new so * that we know that it is newly allocated if the write * fails. */ if (nimaps && imap.br_startblock == HOLESTARTBLOCK) new = 1; error = xfs_iomap_write_delay(ip, offset, size, &imap); if (error) goto out_unlock; xfs_iunlock(ip, lockmode); } trace_xfs_get_blocks_alloc(ip, offset, size, ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN : XFS_IO_DELALLOC, &imap); Loading @@ -1235,9 +1211,7 @@ __xfs_get_blocks( } /* trim mapping down to size requested */ if (direct || size > (1 << inode->i_blkbits)) xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size); xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size); /* * For unwritten extents do not report a disk address in the buffered Loading @@ -1250,7 +1224,7 @@ __xfs_get_blocks( if (ISUNWRITTEN(&imap)) set_buffer_unwritten(bh_result); /* direct IO needs special help */ if (create && direct) { if (create) { if (dax_fault) ASSERT(!ISUNWRITTEN(&imap)); else Loading Loading @@ -1279,14 +1253,7 @@ __xfs_get_blocks( (new || ISUNWRITTEN(&imap)))) set_buffer_new(bh_result); if (imap.br_startblock == DELAYSTARTBLOCK) { BUG_ON(direct); if (create) { set_buffer_uptodate(bh_result); set_buffer_mapped(bh_result); set_buffer_delay(bh_result); } } BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK); return 0; Loading Loading @@ -1427,216 +1394,6 @@ xfs_vm_direct_IO( xfs_get_blocks_direct, endio, NULL, flags); } /* * Punch out the delalloc blocks we have already allocated. * * Don't bother with xfs_setattr given that nothing can have made it to disk yet * as the page is still locked at this point. */ STATIC void xfs_vm_kill_delalloc_range( struct inode *inode, loff_t start, loff_t end) { struct xfs_inode *ip = XFS_I(inode); xfs_fileoff_t start_fsb; xfs_fileoff_t end_fsb; int error; start_fsb = XFS_B_TO_FSB(ip->i_mount, start); end_fsb = XFS_B_TO_FSB(ip->i_mount, end); if (end_fsb <= start_fsb) return; xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_bmap_punch_delalloc_range(ip, start_fsb, end_fsb - start_fsb); if (error) { /* something screwed, just bail */ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { xfs_alert(ip->i_mount, "xfs_vm_write_failed: unable to clean up ino %lld", ip->i_ino); } } xfs_iunlock(ip, XFS_ILOCK_EXCL); } STATIC void xfs_vm_write_failed( struct inode *inode, struct page *page, loff_t pos, unsigned len) { loff_t block_offset; loff_t block_start; loff_t block_end; loff_t from = pos & (PAGE_SIZE - 1); loff_t to = from + len; struct buffer_head *bh, *head; struct xfs_mount *mp = XFS_I(inode)->i_mount; /* * The request pos offset might be 32 or 64 bit, this is all fine * on 64-bit platform. However, for 64-bit pos request on 32-bit * platform, the high 32-bit will be masked off if we evaluate the * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is * 0xfffff000 as an unsigned long, hence the result is incorrect * which could cause the following ASSERT failed in most cases. * In order to avoid this, we can evaluate the block_offset of the * start of the page by using shifts rather than masks the mismatch * problem. */ block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT; ASSERT(block_offset + from == pos); head = page_buffers(page); block_start = 0; for (bh = head; bh != head || !block_start; bh = bh->b_this_page, block_start = block_end, block_offset += bh->b_size) { block_end = block_start + bh->b_size; /* skip buffers before the write */ if (block_end <= from) continue; /* if the buffer is after the write, we're done */ if (block_start >= to) break; /* * Process delalloc and unwritten buffers beyond EOF. We can * encounter unwritten buffers in the event that a file has * post-EOF unwritten extents and an extending write happens to * fail (e.g., an unaligned write that also involves a delalloc * to the same page). */ if (!buffer_delay(bh) && !buffer_unwritten(bh)) continue; if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) && block_offset < i_size_read(inode)) continue; if (buffer_delay(bh)) xfs_vm_kill_delalloc_range(inode, block_offset, block_offset + bh->b_size); /* * This buffer does not contain data anymore. make sure anyone * who finds it knows that for certain. */ clear_buffer_delay(bh); clear_buffer_uptodate(bh); clear_buffer_mapped(bh); clear_buffer_new(bh); clear_buffer_dirty(bh); clear_buffer_unwritten(bh); } } /* * This used to call block_write_begin(), but it unlocks and releases the page * on error, and we need that page to be able to punch stale delalloc blocks out * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at * the appropriate point. */ STATIC int xfs_vm_write_begin( struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { pgoff_t index = pos >> PAGE_SHIFT; struct page *page; int status; struct xfs_mount *mp = XFS_I(mapping->host)->i_mount; ASSERT(len <= PAGE_SIZE); page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; status = __block_write_begin(page, pos, len, xfs_get_blocks); if (xfs_mp_fail_writes(mp)) status = -EIO; if (unlikely(status)) { struct inode *inode = mapping->host; size_t isize = i_size_read(inode); xfs_vm_write_failed(inode, page, pos, len); unlock_page(page); /* * If the write is beyond EOF, we only want to kill blocks * allocated in this write, not blocks that were previously * written successfully. */ if (xfs_mp_fail_writes(mp)) isize = 0; if (pos + len > isize) { ssize_t start = max_t(ssize_t, pos, isize); truncate_pagecache_range(inode, start, pos + len); } put_page(page); page = NULL; } *pagep = page; return status; } /* * On failure, we only need to kill delalloc blocks beyond EOF in the range of * this specific write because they will never be written. Previous writes * beyond EOF where block allocation succeeded do not need to be trashed, so * only new blocks from this write should be trashed. For blocks within * EOF, generic_write_end() zeros them so they are safe to leave alone and be * written with all the other valid data. */ STATIC int xfs_vm_write_end( struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { int ret; ASSERT(len <= PAGE_SIZE); ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); if (unlikely(ret < len)) { struct inode *inode = mapping->host; size_t isize = i_size_read(inode); loff_t to = pos + len; if (to > isize) { /* only kill blocks in this write beyond EOF */ if (pos > isize) isize = pos; xfs_vm_kill_delalloc_range(inode, isize, to); truncate_pagecache_range(inode, isize, to); } } return ret; } STATIC sector_t xfs_vm_bmap( struct address_space *mapping, Loading Loading @@ -1747,8 +1504,6 @@ const struct address_space_operations xfs_address_space_operations = { .set_page_dirty = xfs_vm_set_page_dirty, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .write_begin = xfs_vm_write_begin, .write_end = xfs_vm_write_end, .bmap = xfs_vm_bmap, .direct_IO = xfs_vm_direct_IO, .migratepage = buffer_migrate_page, Loading
fs/xfs/xfs_bmap_util.c +130 −213 Original line number Diff line number Diff line Loading @@ -1087,99 +1087,120 @@ xfs_alloc_file_space( return error; } /* * Zero file bytes between startoff and endoff inclusive. * The iolock is held exclusive and no blocks are buffered. * * This function is used by xfs_free_file_space() to zero * partial blocks when the range to free is not block aligned. * When unreserving space with boundaries that are not block * aligned we round up the start and round down the end * boundaries and then use this function to zero the parts of * the blocks that got dropped during the rounding. */ STATIC int xfs_zero_remaining_bytes( xfs_inode_t *ip, xfs_off_t startoff, xfs_off_t endoff) static int xfs_unmap_extent( struct xfs_inode *ip, xfs_fileoff_t startoffset_fsb, xfs_filblks_t len_fsb, int *done) { xfs_bmbt_irec_t imap; xfs_fileoff_t offset_fsb; xfs_off_t lastoffset; xfs_off_t offset; xfs_buf_t *bp; xfs_mount_t *mp = ip->i_mount; int nimap; int error = 0; struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; struct xfs_bmap_free free_list; xfs_fsblock_t firstfsb; uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); int error; /* * Avoid doing I/O beyond eof - it's not necessary * since nothing can read beyond eof. The space will * be zeroed when the file is extended anyway. */ if (startoff >= XFS_ISIZE(ip)) return 0; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); if (error) { ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); return error; } if (endoff > XFS_ISIZE(ip)) endoff = XFS_ISIZE(ip); xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot, ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS); if (error) goto out_trans_cancel; for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { uint lock_mode; xfs_trans_ijoin(tp, ip, 0); offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; xfs_bmap_init(&free_list, &firstfsb); error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb, &free_list, done); if (error) goto out_bmap_cancel; lock_mode = xfs_ilock_data_map_shared(ip); error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); xfs_iunlock(ip, lock_mode); error = xfs_bmap_finish(&tp, &free_list, NULL); if (error) goto out_bmap_cancel; if (error || nimap < 1) break; ASSERT(imap.br_blockcount >= 1); ASSERT(imap.br_startoff == offset_fsb); ASSERT(imap.br_startblock != DELAYSTARTBLOCK); error = xfs_trans_commit(tp); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; if (imap.br_startblock == HOLESTARTBLOCK || imap.br_state == XFS_EXT_UNWRITTEN) { /* skip the entire extent */ lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + imap.br_blockcount) - 1; continue; out_bmap_cancel: xfs_bmap_cancel(&free_list); out_trans_cancel: xfs_trans_cancel(tp); goto out_unlock; } lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; if (lastoffset > endoff) lastoffset = endoff; static int xfs_adjust_extent_unmap_boundaries( struct xfs_inode *ip, xfs_fileoff_t *startoffset_fsb, xfs_fileoff_t *endoffset_fsb) { struct xfs_mount *mp = ip->i_mount; struct xfs_bmbt_irec imap; int nimap, error; xfs_extlen_t mod = 0; /* DAX can just zero the backing device directly */ if (IS_DAX(VFS_I(ip))) { error = dax_zero_page_range(VFS_I(ip), offset, lastoffset - offset + 1, xfs_get_blocks_direct); nimap = 1; error = xfs_bmapi_read(ip, *startoffset_fsb, 1, &imap, &nimap, 0); if (error) return error; continue; if (nimap && imap.br_startblock != HOLESTARTBLOCK) { xfs_daddr_t block; ASSERT(imap.br_startblock != DELAYSTARTBLOCK); block = imap.br_startblock; mod = do_div(block, mp->m_sb.sb_rextsize); if (mod) *startoffset_fsb += mp->m_sb.sb_rextsize - mod; } error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp, xfs_fsb_to_db(ip, imap.br_startblock), BTOBB(mp->m_sb.sb_blocksize), 0, &bp, NULL); nimap = 1; error = xfs_bmapi_read(ip, *endoffset_fsb - 1, 1, &imap, &nimap, 0); if (error) return error; memset(bp->b_addr + (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 0, lastoffset - offset + 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { ASSERT(imap.br_startblock != DELAYSTARTBLOCK); mod++; if (mod && mod != mp->m_sb.sb_rextsize) *endoffset_fsb -= mod; } return 0; } error = xfs_bwrite(bp); xfs_buf_relse(bp); static int xfs_flush_unmap_range( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len) { struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); xfs_off_t rounding, start, end; int error; /* wait for the completion of any pending DIOs */ inode_dio_wait(inode); rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE); start = round_down(offset, rounding); end = round_up(offset + len, rounding) - 1; error = filemap_write_and_wait_range(inode->i_mapping, start, end); if (error) return error; } return error; truncate_pagecache_range(inode, start, end); return 0; } int Loading @@ -1188,24 +1209,10 @@ xfs_free_file_space( xfs_off_t offset, xfs_off_t len) { int done; xfs_fileoff_t endoffset_fsb; int error; xfs_fsblock_t firstfsb; xfs_bmap_free_t free_list; xfs_bmbt_irec_t imap; xfs_off_t ioffset; xfs_off_t iendoffset; xfs_extlen_t mod=0; xfs_mount_t *mp; int nimap; uint resblks; xfs_off_t rounding; int rt; struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t startoffset_fsb; xfs_trans_t *tp; mp = ip->i_mount; xfs_fileoff_t endoffset_fsb; int done = 0, error; trace_xfs_free_file_space(ip); Loading @@ -1213,135 +1220,45 @@ xfs_free_file_space( if (error) return error; error = 0; if (len <= 0) /* if nothing being freed */ return 0; error = xfs_flush_unmap_range(ip, offset, len); if (error) return error; rt = XFS_IS_REALTIME_INODE(ip); startoffset_fsb = XFS_B_TO_FSB(mp, offset); endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); /* wait for the completion of any pending DIOs */ inode_dio_wait(VFS_I(ip)); rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE); ioffset = round_down(offset, rounding); iendoffset = round_up(offset + len, rounding) - 1; error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset, iendoffset); if (error) goto out; truncate_pagecache_range(VFS_I(ip), ioffset, iendoffset); /* * Need to zero the stuff we're not freeing, on disk. * If it's a realtime file & can't use unwritten extents then we * actually need to zero the extent edges. Otherwise xfs_bunmapi * will take care of it for us. * Need to zero the stuff we're not freeing, on disk. If it's a RT file * and we can't use unwritten extents then we actually need to ensure * to zero the whole extent, otherwise we just need to take of block * boundaries, and xfs_bunmapi will handle the rest. */ if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { nimap = 1; error = xfs_bmapi_read(ip, startoffset_fsb, 1, &imap, &nimap, 0); if (XFS_IS_REALTIME_INODE(ip) && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { error = xfs_adjust_extent_unmap_boundaries(ip, &startoffset_fsb, &endoffset_fsb); if (error) goto out; ASSERT(nimap == 0 || nimap == 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { xfs_daddr_t block; ASSERT(imap.br_startblock != DELAYSTARTBLOCK); block = imap.br_startblock; mod = do_div(block, mp->m_sb.sb_rextsize); if (mod) startoffset_fsb += mp->m_sb.sb_rextsize - mod; return error; } nimap = 1; error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1, &imap, &nimap, 0); if (endoffset_fsb > startoffset_fsb) { while (!done) { error = xfs_unmap_extent(ip, startoffset_fsb, endoffset_fsb - startoffset_fsb, &done); if (error) goto out; ASSERT(nimap == 0 || nimap == 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { ASSERT(imap.br_startblock != DELAYSTARTBLOCK); mod++; if (mod && (mod != mp->m_sb.sb_rextsize)) endoffset_fsb -= mod; } } if ((done = (endoffset_fsb <= startoffset_fsb))) /* * One contiguous piece to clear */ error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); else { /* * Some full blocks, possibly two pieces to clear */ if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) error = xfs_zero_remaining_bytes(ip, offset, XFS_FSB_TO_B(mp, startoffset_fsb) - 1); if (!error && XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) error = xfs_zero_remaining_bytes(ip, XFS_FSB_TO_B(mp, endoffset_fsb), offset + len - 1); return error; } /* * free file space until done or until there is an error */ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); while (!error && !done) { /* * allocate and setup the transaction. Allow this * transaction to dip into the reserve blocks to ensure * the freeing of the space succeeds at ENOSPC. */ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); if (error) { ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); break; } xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot, ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS); if (error) goto error1; xfs_trans_ijoin(tp, ip, 0); /* * issue the bunmapi() call to free the blocks * Now that we've unmap all full blocks we'll have to zero out any * partial block at the beginning and/or end. xfs_zero_range is * smart enough to skip any holes, including those we just created. */ xfs_bmap_init(&free_list, &firstfsb); error = xfs_bunmapi(tp, ip, startoffset_fsb, endoffset_fsb - startoffset_fsb, 0, 2, &firstfsb, &free_list, &done); if (error) goto error0; /* * complete the transaction */ error = xfs_bmap_finish(&tp, &free_list, NULL); if (error) goto error0; error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); } out: return error; error0: xfs_bmap_cancel(&free_list); error1: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); goto out; return xfs_zero_range(ip, offset, len, NULL); } /* Loading
fs/xfs/xfs_inode.h +2 −1 Original line number Diff line number Diff line Loading @@ -427,7 +427,8 @@ int xfs_update_prealloc_flags(struct xfs_inode *ip, enum xfs_prealloc_flags flags); int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset, xfs_fsize_t isize, bool *did_zeroing); int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count); int xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count, bool *did_zero); loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start, loff_t eof, int whence); Loading