Loading fs/orangefs/file.c +6 −276 Original line number Diff line number Diff line Loading @@ -256,168 +256,6 @@ static ssize_t wait_for_direct_io(enum PVFS_io_type type, struct inode *inode, return ret; } /* * The reason we need to do this is to be able to support readv and writev * that are larger than (pvfs_bufmap_size_query()) Default is * PVFS2_BUFMAP_DEFAULT_DESC_SIZE MB. What that means is that we will * create a new io vec descriptor for those memory addresses that * go beyond the limit. Return value for this routine is negative in case * of errors and 0 in case of success. * * Further, the new_nr_segs pointer is updated to hold the new value * of number of iovecs, the new_vec pointer is updated to hold the pointer * to the new split iovec, and the size array is an array of integers holding * the number of iovecs that straddle pvfs_bufmap_size_query(). * The max_new_nr_segs value is computed by the caller and returned. * (It will be (count of all iov_len/ block_size) + 1). */ static int split_iovecs(unsigned long max_new_nr_segs, /* IN */ unsigned long nr_segs, /* IN */ const struct iovec *original_iovec, /* IN */ unsigned long *new_nr_segs, /* OUT */ struct iovec **new_vec, /* OUT */ unsigned long *seg_count, /* OUT */ unsigned long **seg_array) /* OUT */ { unsigned long seg; unsigned long count = 0; unsigned long begin_seg; unsigned long tmpnew_nr_segs = 0; struct iovec *new_iovec = NULL; struct iovec *orig_iovec; unsigned long *sizes = NULL; unsigned long sizes_count = 0; if (nr_segs <= 0 || original_iovec == NULL || new_nr_segs == NULL || new_vec == NULL || seg_count == NULL || seg_array == NULL || max_new_nr_segs <= 0) { gossip_err("Invalid parameters to split_iovecs\n"); return -EINVAL; } *new_nr_segs = 0; *new_vec = NULL; *seg_count = 0; *seg_array = NULL; /* copy the passed in iovec descriptor to a temp structure */ orig_iovec = kmalloc_array(nr_segs, sizeof(*orig_iovec), PVFS2_BUFMAP_GFP_FLAGS); if (orig_iovec == NULL) { gossip_err( "split_iovecs: Could not allocate memory for %lu bytes!\n", (unsigned long)(nr_segs * sizeof(*orig_iovec))); return -ENOMEM; } new_iovec = kcalloc(max_new_nr_segs, sizeof(*new_iovec), PVFS2_BUFMAP_GFP_FLAGS); if (new_iovec == NULL) { kfree(orig_iovec); gossip_err( "split_iovecs: Could not allocate memory for %lu bytes!\n", (unsigned long)(max_new_nr_segs * sizeof(*new_iovec))); return -ENOMEM; } sizes = kcalloc(max_new_nr_segs, sizeof(*sizes), PVFS2_BUFMAP_GFP_FLAGS); if (sizes == NULL) { kfree(new_iovec); kfree(orig_iovec); gossip_err( "split_iovecs: Could not allocate memory for %lu bytes!\n", (unsigned long)(max_new_nr_segs * sizeof(*sizes))); return -ENOMEM; } /* copy the passed in iovec to a temp structure */ memcpy(orig_iovec, original_iovec, nr_segs * sizeof(*orig_iovec)); begin_seg = 0; repeat: for (seg = begin_seg; seg < nr_segs; seg++) { if (tmpnew_nr_segs >= max_new_nr_segs || sizes_count >= max_new_nr_segs) { kfree(sizes); kfree(orig_iovec); kfree(new_iovec); gossip_err ("split_iovecs: exceeded the index limit (%lu)\n", tmpnew_nr_segs); return -EINVAL; } if (count + orig_iovec[seg].iov_len < pvfs_bufmap_size_query()) { count += orig_iovec[seg].iov_len; memcpy(&new_iovec[tmpnew_nr_segs], &orig_iovec[seg], sizeof(*new_iovec)); tmpnew_nr_segs++; sizes[sizes_count]++; } else { new_iovec[tmpnew_nr_segs].iov_base = orig_iovec[seg].iov_base; new_iovec[tmpnew_nr_segs].iov_len = (pvfs_bufmap_size_query() - count); tmpnew_nr_segs++; sizes[sizes_count]++; sizes_count++; begin_seg = seg; orig_iovec[seg].iov_base += (pvfs_bufmap_size_query() - count); orig_iovec[seg].iov_len -= (pvfs_bufmap_size_query() - count); count = 0; break; } } if (seg != nr_segs) goto repeat; else sizes_count++; *new_nr_segs = tmpnew_nr_segs; /* new_iovec is freed by the caller */ *new_vec = new_iovec; *seg_count = sizes_count; /* seg_array is also freed by the caller */ *seg_array = sizes; kfree(orig_iovec); return 0; } static long bound_max_iovecs(const struct iovec *curr, unsigned long nr_segs, ssize_t *total_count) { unsigned long i; long max_nr_iovecs; ssize_t total; ssize_t count; total = 0; count = 0; max_nr_iovecs = 0; for (i = 0; i < nr_segs; i++) { const struct iovec *iv = &curr[i]; count += iv->iov_len; if (unlikely((ssize_t) (count | iv->iov_len) < 0)) return -EINVAL; if (total + iv->iov_len < pvfs_bufmap_size_query()) { total += iv->iov_len; max_nr_iovecs++; } else { total = (total + iv->iov_len - pvfs_bufmap_size_query()); max_nr_iovecs += (total / pvfs_bufmap_size_query() + 2); } } *total_count = count; return max_nr_iovecs; } /* * Common entry point for read/write/readv/writev * This function will dispatch it to either the direct I/O Loading @@ -431,25 +269,10 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, struct inode *inode = file->f_mapping->host; struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle; ssize_t ret; ssize_t total_count; unsigned int to_free; size_t count; unsigned long seg; unsigned long new_nr_segs; unsigned long max_new_nr_segs; unsigned long seg_count; unsigned long *seg_array; struct iovec *iovecptr; struct iovec *ptr; total_count = 0; ret = -EINVAL; count = 0; to_free = 0; /* Compute total and max number of segments after split */ max_new_nr_segs = bound_max_iovecs(iov, nr_segs, &count); struct iov_iter iter; size_t count = iov_length(iov, nr_segs); ssize_t total_count = 0; ssize_t ret = -EINVAL; gossip_debug(GOSSIP_FILE_DEBUG, "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", Loading @@ -472,93 +295,10 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, goto out; } /* * if the total size of data transfer requested is greater than * the kernel-set blocksize of PVFS2, then we split the iovecs * such that no iovec description straddles a block size limit */ gossip_debug(GOSSIP_FILE_DEBUG, "%s: pvfs_bufmap_size:%d\n", __func__, pvfs_bufmap_size_query()); if (count > pvfs_bufmap_size_query()) { /* * Split up the given iovec description such that * no iovec descriptor straddles over the block-size limitation. * This makes us our job easier to stage the I/O. * In addition, this function will also compute an array * with seg_count entries that will store the number of * segments that straddle the block-size boundaries. */ ret = split_iovecs(max_new_nr_segs, /* IN */ nr_segs, /* IN */ iov, /* IN */ &new_nr_segs, /* OUT */ &iovecptr, /* OUT */ &seg_count, /* OUT */ &seg_array); /* OUT */ if (ret < 0) { gossip_err("%s: Failed to split iovecs to satisfy larger than blocksize readv/writev request %zd\n", __func__, ret); goto out; } gossip_debug(GOSSIP_FILE_DEBUG, "%s: Splitting iovecs from %lu to %lu" " [max_new %lu]\n", __func__, nr_segs, new_nr_segs, max_new_nr_segs); /* We must free seg_array and iovecptr */ to_free = 1; } else { new_nr_segs = nr_segs; /* use the given iovec description */ iovecptr = (struct iovec *)iov; /* There is only 1 element in the seg_array */ seg_count = 1; /* and its value is the number of segments passed in */ seg_array = &nr_segs; /* We dont have to free up anything */ to_free = 0; } ptr = iovecptr; gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU) %zd@%llu\n", __func__, handle, count, llu(*offset)); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): new_nr_segs: %lu, seg_count: %lu\n", __func__, handle, new_nr_segs, seg_count); iov_iter_init(&iter, type == PVFS_IO_READ ? READ : WRITE, iov, nr_segs, count); /* PVFS2_KERNEL_DEBUG is a CFLAGS define. */ #ifdef PVFS2_KERNEL_DEBUG for (seg = 0; seg < new_nr_segs; seg++) gossip_debug(GOSSIP_FILE_DEBUG, "%s: %d) %p to %p [%d bytes]\n", __func__, (int)seg + 1, iovecptr[seg].iov_base, iovecptr[seg].iov_base + iovecptr[seg].iov_len, (int)iovecptr[seg].iov_len); for (seg = 0; seg < seg_count; seg++) gossip_debug(GOSSIP_FILE_DEBUG, "%s: %zd) %lu\n", __func__, seg + 1, seg_array[seg]); #endif seg = 0; while (total_count < count) { struct iov_iter iter; size_t each_count; size_t amt_complete; Loading @@ -579,9 +319,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, handle, (int)*offset); iov_iter_init(&iter, type == PVFS_IO_READ ? READ : WRITE, ptr, seg_array[seg], each_count); ret = wait_for_direct_io(type, inode, offset, &iter, each_count, 0); gossip_debug(GOSSIP_FILE_DEBUG, Loading @@ -593,9 +330,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, if (ret < 0) goto out; /* advance the iovec pointer */ ptr += seg_array[seg]; seg++; *offset += ret; total_count += ret; amt_complete = ret; Loading @@ -617,10 +351,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, if (total_count > 0) ret = total_count; out: if (to_free) { kfree(iovecptr); kfree(seg_array); } if (ret > 0) { if (type == PVFS_IO_READ) { file_accessed(file); Loading Loading
fs/orangefs/file.c +6 −276 Original line number Diff line number Diff line Loading @@ -256,168 +256,6 @@ static ssize_t wait_for_direct_io(enum PVFS_io_type type, struct inode *inode, return ret; } /* * The reason we need to do this is to be able to support readv and writev * that are larger than (pvfs_bufmap_size_query()) Default is * PVFS2_BUFMAP_DEFAULT_DESC_SIZE MB. What that means is that we will * create a new io vec descriptor for those memory addresses that * go beyond the limit. Return value for this routine is negative in case * of errors and 0 in case of success. * * Further, the new_nr_segs pointer is updated to hold the new value * of number of iovecs, the new_vec pointer is updated to hold the pointer * to the new split iovec, and the size array is an array of integers holding * the number of iovecs that straddle pvfs_bufmap_size_query(). * The max_new_nr_segs value is computed by the caller and returned. * (It will be (count of all iov_len/ block_size) + 1). */ static int split_iovecs(unsigned long max_new_nr_segs, /* IN */ unsigned long nr_segs, /* IN */ const struct iovec *original_iovec, /* IN */ unsigned long *new_nr_segs, /* OUT */ struct iovec **new_vec, /* OUT */ unsigned long *seg_count, /* OUT */ unsigned long **seg_array) /* OUT */ { unsigned long seg; unsigned long count = 0; unsigned long begin_seg; unsigned long tmpnew_nr_segs = 0; struct iovec *new_iovec = NULL; struct iovec *orig_iovec; unsigned long *sizes = NULL; unsigned long sizes_count = 0; if (nr_segs <= 0 || original_iovec == NULL || new_nr_segs == NULL || new_vec == NULL || seg_count == NULL || seg_array == NULL || max_new_nr_segs <= 0) { gossip_err("Invalid parameters to split_iovecs\n"); return -EINVAL; } *new_nr_segs = 0; *new_vec = NULL; *seg_count = 0; *seg_array = NULL; /* copy the passed in iovec descriptor to a temp structure */ orig_iovec = kmalloc_array(nr_segs, sizeof(*orig_iovec), PVFS2_BUFMAP_GFP_FLAGS); if (orig_iovec == NULL) { gossip_err( "split_iovecs: Could not allocate memory for %lu bytes!\n", (unsigned long)(nr_segs * sizeof(*orig_iovec))); return -ENOMEM; } new_iovec = kcalloc(max_new_nr_segs, sizeof(*new_iovec), PVFS2_BUFMAP_GFP_FLAGS); if (new_iovec == NULL) { kfree(orig_iovec); gossip_err( "split_iovecs: Could not allocate memory for %lu bytes!\n", (unsigned long)(max_new_nr_segs * sizeof(*new_iovec))); return -ENOMEM; } sizes = kcalloc(max_new_nr_segs, sizeof(*sizes), PVFS2_BUFMAP_GFP_FLAGS); if (sizes == NULL) { kfree(new_iovec); kfree(orig_iovec); gossip_err( "split_iovecs: Could not allocate memory for %lu bytes!\n", (unsigned long)(max_new_nr_segs * sizeof(*sizes))); return -ENOMEM; } /* copy the passed in iovec to a temp structure */ memcpy(orig_iovec, original_iovec, nr_segs * sizeof(*orig_iovec)); begin_seg = 0; repeat: for (seg = begin_seg; seg < nr_segs; seg++) { if (tmpnew_nr_segs >= max_new_nr_segs || sizes_count >= max_new_nr_segs) { kfree(sizes); kfree(orig_iovec); kfree(new_iovec); gossip_err ("split_iovecs: exceeded the index limit (%lu)\n", tmpnew_nr_segs); return -EINVAL; } if (count + orig_iovec[seg].iov_len < pvfs_bufmap_size_query()) { count += orig_iovec[seg].iov_len; memcpy(&new_iovec[tmpnew_nr_segs], &orig_iovec[seg], sizeof(*new_iovec)); tmpnew_nr_segs++; sizes[sizes_count]++; } else { new_iovec[tmpnew_nr_segs].iov_base = orig_iovec[seg].iov_base; new_iovec[tmpnew_nr_segs].iov_len = (pvfs_bufmap_size_query() - count); tmpnew_nr_segs++; sizes[sizes_count]++; sizes_count++; begin_seg = seg; orig_iovec[seg].iov_base += (pvfs_bufmap_size_query() - count); orig_iovec[seg].iov_len -= (pvfs_bufmap_size_query() - count); count = 0; break; } } if (seg != nr_segs) goto repeat; else sizes_count++; *new_nr_segs = tmpnew_nr_segs; /* new_iovec is freed by the caller */ *new_vec = new_iovec; *seg_count = sizes_count; /* seg_array is also freed by the caller */ *seg_array = sizes; kfree(orig_iovec); return 0; } static long bound_max_iovecs(const struct iovec *curr, unsigned long nr_segs, ssize_t *total_count) { unsigned long i; long max_nr_iovecs; ssize_t total; ssize_t count; total = 0; count = 0; max_nr_iovecs = 0; for (i = 0; i < nr_segs; i++) { const struct iovec *iv = &curr[i]; count += iv->iov_len; if (unlikely((ssize_t) (count | iv->iov_len) < 0)) return -EINVAL; if (total + iv->iov_len < pvfs_bufmap_size_query()) { total += iv->iov_len; max_nr_iovecs++; } else { total = (total + iv->iov_len - pvfs_bufmap_size_query()); max_nr_iovecs += (total / pvfs_bufmap_size_query() + 2); } } *total_count = count; return max_nr_iovecs; } /* * Common entry point for read/write/readv/writev * This function will dispatch it to either the direct I/O Loading @@ -431,25 +269,10 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, struct inode *inode = file->f_mapping->host; struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle; ssize_t ret; ssize_t total_count; unsigned int to_free; size_t count; unsigned long seg; unsigned long new_nr_segs; unsigned long max_new_nr_segs; unsigned long seg_count; unsigned long *seg_array; struct iovec *iovecptr; struct iovec *ptr; total_count = 0; ret = -EINVAL; count = 0; to_free = 0; /* Compute total and max number of segments after split */ max_new_nr_segs = bound_max_iovecs(iov, nr_segs, &count); struct iov_iter iter; size_t count = iov_length(iov, nr_segs); ssize_t total_count = 0; ssize_t ret = -EINVAL; gossip_debug(GOSSIP_FILE_DEBUG, "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", Loading @@ -472,93 +295,10 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, goto out; } /* * if the total size of data transfer requested is greater than * the kernel-set blocksize of PVFS2, then we split the iovecs * such that no iovec description straddles a block size limit */ gossip_debug(GOSSIP_FILE_DEBUG, "%s: pvfs_bufmap_size:%d\n", __func__, pvfs_bufmap_size_query()); if (count > pvfs_bufmap_size_query()) { /* * Split up the given iovec description such that * no iovec descriptor straddles over the block-size limitation. * This makes us our job easier to stage the I/O. * In addition, this function will also compute an array * with seg_count entries that will store the number of * segments that straddle the block-size boundaries. */ ret = split_iovecs(max_new_nr_segs, /* IN */ nr_segs, /* IN */ iov, /* IN */ &new_nr_segs, /* OUT */ &iovecptr, /* OUT */ &seg_count, /* OUT */ &seg_array); /* OUT */ if (ret < 0) { gossip_err("%s: Failed to split iovecs to satisfy larger than blocksize readv/writev request %zd\n", __func__, ret); goto out; } gossip_debug(GOSSIP_FILE_DEBUG, "%s: Splitting iovecs from %lu to %lu" " [max_new %lu]\n", __func__, nr_segs, new_nr_segs, max_new_nr_segs); /* We must free seg_array and iovecptr */ to_free = 1; } else { new_nr_segs = nr_segs; /* use the given iovec description */ iovecptr = (struct iovec *)iov; /* There is only 1 element in the seg_array */ seg_count = 1; /* and its value is the number of segments passed in */ seg_array = &nr_segs; /* We dont have to free up anything */ to_free = 0; } ptr = iovecptr; gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU) %zd@%llu\n", __func__, handle, count, llu(*offset)); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): new_nr_segs: %lu, seg_count: %lu\n", __func__, handle, new_nr_segs, seg_count); iov_iter_init(&iter, type == PVFS_IO_READ ? READ : WRITE, iov, nr_segs, count); /* PVFS2_KERNEL_DEBUG is a CFLAGS define. */ #ifdef PVFS2_KERNEL_DEBUG for (seg = 0; seg < new_nr_segs; seg++) gossip_debug(GOSSIP_FILE_DEBUG, "%s: %d) %p to %p [%d bytes]\n", __func__, (int)seg + 1, iovecptr[seg].iov_base, iovecptr[seg].iov_base + iovecptr[seg].iov_len, (int)iovecptr[seg].iov_len); for (seg = 0; seg < seg_count; seg++) gossip_debug(GOSSIP_FILE_DEBUG, "%s: %zd) %lu\n", __func__, seg + 1, seg_array[seg]); #endif seg = 0; while (total_count < count) { struct iov_iter iter; size_t each_count; size_t amt_complete; Loading @@ -579,9 +319,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, handle, (int)*offset); iov_iter_init(&iter, type == PVFS_IO_READ ? READ : WRITE, ptr, seg_array[seg], each_count); ret = wait_for_direct_io(type, inode, offset, &iter, each_count, 0); gossip_debug(GOSSIP_FILE_DEBUG, Loading @@ -593,9 +330,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, if (ret < 0) goto out; /* advance the iovec pointer */ ptr += seg_array[seg]; seg++; *offset += ret; total_count += ret; amt_complete = ret; Loading @@ -617,10 +351,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file, if (total_count > 0) ret = total_count; out: if (to_free) { kfree(iovecptr); kfree(seg_array); } if (ret > 0) { if (type == PVFS_IO_READ) { file_accessed(file); Loading