Commit aeb96447 authored by yu kuai's avatar yu kuai Committed by Chen Jun
Browse files

fs/dirty_pages: dump the number of dirty pages for each inode

euler inclusion
category: feature
bugzilla: 46858
CVE: NA

---------------------------

In order to analysing the IO performance when using buffer IO, it's
useful to obtain the number of dirty pages for a inode in the filesystem.

This feather is migrated from redhat-7.2. It create 3 interfaces by using
profs. /proc/dirty/buffer_size for buffer allocation and release;
/proc/dirty/page_threshold to filter result; /proc/dirty/dirty_list
to get dirty pages.

Visit http://openeuler.huawei.com/bugzilla/show_bug.cgi?id=23941

 for
details about modifications and implementations.

Signed-off-by: default avataryu kuai <yukuai3@huawei.com>
Reviewed-by: default avatarzhangyi (F) <yi.zhang@huawei.com>
Signed-off-by: default avatarzhangyi (F) <yi.zhang@huawei.com>
Signed-off-by: default avatarDianfang Zhang <zhangdianfang@huawei.com>
Acked-by: default avatarXie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: default avatarChen Jun <chenjun102@huawei.com>
parent bf5b10b3
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -52,6 +52,7 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
obj-$(CONFIG_NFS_COMMON)	+= nfs_common/
obj-$(CONFIG_NFS_COMMON)	+= nfs_common/
obj-$(CONFIG_COREDUMP)		+= coredump.o
obj-$(CONFIG_COREDUMP)		+= coredump.o
obj-$(CONFIG_SYSCTL)		+= drop_caches.o
obj-$(CONFIG_SYSCTL)		+= drop_caches.o
obj-$(CONFIG_SYSCTL)		+= dirty_pages.o


obj-$(CONFIG_FHANDLE)		+= fhandle.o
obj-$(CONFIG_FHANDLE)		+= fhandle.o
obj-y				+= iomap/
obj-y				+= iomap/

fs/dirty_pages.c

0 → 100644
+474 −0
Original line number Original line Diff line number Diff line
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/proc_fs.h>
#include <linux/kdev_t.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include "internal.h"

static char *buf_dirty;	/* buffer to store number of dirty pages */
static unsigned long buf_size;	/* size of buffer in bytes */
static long buff_num;	/* size of buffer in number of pages */
static int buff_limit;	/* filter threshold of dirty pages*/
static spinlock_t inode_sb_list_lock;

static struct proc_dir_entry *dirty_dir;

static bool warn_once;	/* print warn message once */
static bool buff_used;	/* buffer is in used */
static struct mutex buff_lock;	/* lock when buffer is changed */

/* proc root directory */
#define DIRTY_ROOT "dirty"
/* proc file for buffer allocation and release */
#define DIRTY_SWITCH "buffer_size"
/* proc file to obtain diry pages of each inode */
#define DIRTY_PAGES "dirty_list"
/* proc file to filter result */
#define DIRTY_LIMIT "page_threshold"

static void seq_set_overflow(struct seq_file *m)
{
	m->count = m->size;
}

static unsigned long dump_dirtypages_inode(struct inode *inode)
{
	struct pagevec pvec;
	unsigned long nr_dirtys = 0;
	unsigned int nr_pages;
	pgoff_t index = 0;

	pagevec_init(&pvec);

	while (1) {
		nr_pages = pagevec_lookup_range_tag(&pvec, inode->i_mapping,
				&index, (pgoff_t)-1, PAGECACHE_TAG_DIRTY);
		if (!nr_pages)
			break;

		pagevec_release(&pvec);
		cond_resched();

		nr_dirtys += nr_pages;
	}

	return nr_dirtys;
}

static char *inode_filename(struct inode *inode, char *tmpname)
{
	struct dentry *dentry;
	char *filename;

	dentry = d_find_alias(inode);
	if (!dentry)
		return ERR_PTR(-ENOENT);

	tmpname[PATH_MAX-1] = '\0';
	filename = dentry_path_raw(dentry, tmpname, PATH_MAX);

	dput(dentry);

	return filename;
}

static inline bool is_sb_writable(struct super_block *sb)
{
	if (sb_rdonly(sb))
		return false;

	if (sb->s_writers.frozen == SB_FREEZE_COMPLETE)
		return false;

	return true;
}

/*
 * dump_dirtypages_sb - dump the dirty pages of each inode in the sb
 * @sb the super block
 * @m the seq_file witch is initialized in proc_dpages_open
 *
 * For each inode in the sb, call dump_dirtypages_pages to get the number
 * of dirty pages. And use seq_printf to store the result in the buffer
 * if it's not less than the threshold. The inode in unusual state will
 * be skipped.
 */
static void dump_dirtypages_sb(struct super_block *sb, struct seq_file *m)
{
	struct inode *inode, *toput_inode = NULL;
	unsigned long nr_dirtys;
	const char *fstype;
	char *filename;
	char *tmpname;
	int limit = READ_ONCE(buff_limit);

	if (warn_once)
		return;

	if (!is_sb_writable(sb))
		return;

	tmpname = kmalloc(PATH_MAX, GFP_KERNEL);
	if (!tmpname)
		return;

	spin_lock(&inode_sb_list_lock);
	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
		spin_lock(&inode->i_lock);
		/*
		 * We must skip inodes in unusual state. We may also skip
		 * inodes without pages but we deliberately won't in case
		 * we need to reschedule to avoid softlockups.
		 */
		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
		    (inode->i_mapping->nrpages == 0 && !need_resched())) {
			spin_unlock(&inode->i_lock);
			continue;
		}
		__iget(inode);
		spin_unlock(&inode->i_lock);
		spin_unlock(&inode_sb_list_lock);

		cond_resched();

		nr_dirtys = dump_dirtypages_inode(inode);
		if (!nr_dirtys || nr_dirtys < limit)
			goto skip;

		filename = inode_filename(inode, tmpname);
		if (IS_ERR_OR_NULL(filename))
			filename = "unknown";

		if (sb->s_type && sb->s_type->name)
			fstype = sb->s_type->name;
		else
			fstype = "unknown";
		/*
		 * seq_printf return nothing, if the buffer is exhausted
		 * (m->size <= m->count), seq_printf will not store
		 * anything, just set m->count = m->size and return. In
		 * that case, log a warn message in buffer to remind users.
		 */
		if (!warn_once && m->size <= m->count) {
			warn_once = true;
			seq_set_overflow(m);
			strncpy(m->buf+m->count-12, "terminated\n\0", 12);
			goto done;
		}
		seq_printf(m, "FSType: %s, Dev ID: %u(%u:%u) ino %lu, dirty pages %lu, path %s\n",
			fstype, sb->s_dev, MAJOR(sb->s_dev),
			MINOR(sb->s_dev), inode->i_ino,
			nr_dirtys, filename);
skip:
		iput(toput_inode);
		toput_inode = inode;
		spin_lock(&inode_sb_list_lock);
	}
	spin_unlock(&inode_sb_list_lock);
done:
	iput(toput_inode);
	kfree(tmpname);
}

static int proc_dpages_show(struct seq_file *m, void *v)
{
	iterate_supers((void *)dump_dirtypages_sb, (void *)m);
	return 0;
}


static ssize_t seq_read_dirty(
	struct file *file,
	char __user *buf,
	size_t size,
	loff_t *ppos)
{
	struct seq_file *m = (struct seq_file *)file->private_data;
	size_t copied = 0;
	size_t n;
	int err = 0;

	buff_used = true;
	if (m->count == 0) {
		err = m->op->show(m, NULL);
		if (err < 0)
			goto done;
	}

	n = min(m->count - m->from, size);
	err = simple_read_from_buffer(buf, n,
		(loff_t *) &m->from, m->buf, m->count);
	if (err < 0) {
		err = -EFAULT;
		goto done;
	}
	copied += n;
done:
	if (!copied)
		copied = err;
	else
		*ppos += copied;
	buff_used = false;
	return copied;
}

static void free_buf_dirty(void)
{
	if (buf_dirty != NULL) {
		vfree(buf_dirty);
		buf_dirty = NULL;
		buf_size = 0;
	}
}
static ssize_t write_proc(
	struct file *filp,
	const char *buf,
	size_t count,
	loff_t *offp)
{
	char *msg;
	int ret = 0;
	long old_buff_num;

	msg = kmalloc(PAGE_SIZE, GFP_KERNEL);
	if (!msg)
		return -ENOMEM;

	if (count > PAGE_SIZE) {
		ret = -EINVAL;
		goto error;
	}

	msg[count] = '\0';

	if (copy_from_user(msg, buf, count)) {
		ret = -EINVAL;
		goto error;
	}

	old_buff_num = buff_num;
	ret = kstrtol(msg, 10, &buff_num);
	if (ret != 0 || buff_num < 0 || buff_num > 102400) {
		buff_num = 0;
		ret = -EINVAL;
		goto error;
	}

	mutex_lock(&buff_lock);

	if (buff_used) {
		ret = -EBUSY;
		goto out;
	}

	buff_used = true;

	ret = count;
	if (buff_num == 0) {
		free_buf_dirty();
		goto out;
	}
	if (buff_num == old_buff_num)
		goto out;

	free_buf_dirty();
	buf_size = PAGE_SIZE * buff_num;
	buf_dirty = vmalloc(buf_size);

	if (!buf_dirty) {
		ret = -ENOMEM;
		goto out;
	}
out:
	buff_used = false;
	mutex_unlock(&buff_lock);
error:
	kfree(msg);
	return ret;
}

static int proc_dpages_open(struct inode *inode, struct file *filp)
{
	int ret;
	struct seq_file *m;

	ret = single_open(filp, proc_dpages_show, NULL);
	m = filp->private_data;
	mutex_lock(&buff_lock);
	if (buff_used) {
		ret = -EBUSY;
		goto out;
	}
	if (!ret) {
		if (buf_dirty == NULL || buf_size == 0) {
			pr_info("please allocate buffer before getting dirty pages\n");
			ret = -ENOMEM;
			goto out;
		} else {
			warn_once = false;
			memset(buf_dirty, 0, buf_size);
			if (!m->buf) {
				m->size = buf_size;
				m->buf = buf_dirty;
			}
		}
	}
out:
	mutex_unlock(&buff_lock);
	return ret;
}

static int seq_release_dirty(struct inode *inode, struct file *file)
{
	struct seq_file *m = file->private_data;

	buff_used = false;
	/* we don't want to free the buf */
	m->buf = NULL;
	single_release(inode, file);
	return 0;
}

static const struct proc_ops proc_dpages_operations = {
	.proc_open           = proc_dpages_open,
	.proc_read           = seq_read_dirty,
	.proc_release        = seq_release_dirty,
};

static int proc_switch_show(struct seq_file *m, void *v)
{
	seq_printf(m, "%ld\n", buff_num);
	return 0;
}

static int proc_limit_show(struct seq_file *m, void *v)
{
	seq_printf(m, "%d\n", READ_ONCE(buff_limit));
	return 0;
}

static int proc_switch_open(struct inode *inode, struct file *filp)
{
	return single_open(filp, proc_switch_show, NULL);
}

static int proc_limit_open(struct inode *inode, struct file *filp)
{
	return single_open(filp, proc_limit_show, NULL);
}

static ssize_t write_limit_proc(
	struct file *filp,
	const char *buf,
	size_t count,
	loff_t *offp)
{
	char *msg;
	int ret = 0;
	long temp;

	msg = kmalloc(PAGE_SIZE, GFP_KERNEL);
	if (!msg)
		return -ENOMEM;

	if (count > PAGE_SIZE) {
		ret = -EINVAL;
		goto error;
	}

	msg[count] = '\0';
	if (copy_from_user(msg, buf, count)) {
		ret = -EINVAL;
		goto error;
	}
	ret = kstrtol(msg, 10, &temp);
	if (ret != 0 || temp < 0) {
	ret = -EINVAL;
	goto error;
}

	WRITE_ONCE(buff_limit, temp);
	ret = count;

error:
	kfree(msg);
	return ret;
}


static const struct proc_ops proc_switch_operations = {
	.proc_open           = proc_switch_open,
	.proc_read           = seq_read,
	.proc_write          = write_proc,
	.proc_lseek          = seq_lseek,
	.proc_release        = single_release,
};

static const struct proc_ops proc_limit_operations = {
	.proc_open           = proc_limit_open,
	.proc_read           = seq_read,
	.proc_write          = write_limit_proc,
	.proc_lseek          = seq_lseek,
	.proc_release        = single_release,
};


static int __init dpages_proc_init(void)
{
	static struct proc_dir_entry *proc_file;

	dirty_dir = proc_mkdir(DIRTY_ROOT, NULL);
	if (!dirty_dir)
		goto fail_dir;

	proc_file = proc_create(DIRTY_PAGES, 0440,
					dirty_dir, &proc_dpages_operations);
	if (!proc_file)
		goto fail_pages;

	proc_file = proc_create(DIRTY_SWITCH, 0640,
					dirty_dir, &proc_switch_operations);
	if (!proc_file)
		goto fail_switch;

	proc_file = proc_create(DIRTY_LIMIT, 0640,
					dirty_dir, &proc_limit_operations);
	if (!proc_file)
		goto fail_limit;

	mutex_init(&buff_lock);
	return 0;

fail_limit:
	remove_proc_entry(DIRTY_SWITCH, dirty_dir);
fail_switch:
	remove_proc_entry(DIRTY_PAGES, dirty_dir);
fail_pages:
	remove_proc_entry(DIRTY_ROOT, NULL);
fail_dir:
	return -ENOMEM;
}

static void dpages_proc_exit(void)
{
	mutex_lock(&buff_lock);
	free_buf_dirty();
	mutex_unlock(&buff_lock);
	remove_proc_entry(DIRTY_PAGES, dirty_dir);
	remove_proc_entry(DIRTY_SWITCH, dirty_dir);
	remove_proc_entry(DIRTY_LIMIT, dirty_dir);
	remove_proc_entry(DIRTY_ROOT, NULL);
}

module_init(dpages_proc_init);
module_exit(dpages_proc_exit);