Commit 6e52a9f0 authored by Alexey Gladkov's avatar Alexey Gladkov Committed by Eric W. Biederman
Browse files

Reimplement RLIMIT_MSGQUEUE on top of ucounts



The rlimit counter is tied to uid in the user_namespace. This allows
rlimit values to be specified in userns even if they are already
globally exceeded by the user. However, the value of the previous
user_namespaces cannot be exceeded.

Signed-off-by: default avatarAlexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/2531f42f7884bbfee56a978040b3e0d25cdf6cde.1619094428.git.legion@kernel.org


Signed-off-by: default avatarEric W. Biederman <ebiederm@xmission.com>
parent 21d1c5e3
Loading
Loading
Loading
Loading
+0 −4
Original line number Diff line number Diff line
@@ -18,10 +18,6 @@ struct user_struct {
#endif
#ifdef CONFIG_EPOLL
	atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
#endif
#ifdef CONFIG_POSIX_MQUEUE
	/* protected by mq_lock	*/
	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
#endif
	unsigned long locked_shm; /* How many pages of mlocked shm ? */
	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
+1 −0
Original line number Diff line number Diff line
@@ -51,6 +51,7 @@ enum ucount_type {
	UCOUNT_INOTIFY_WATCHES,
#endif
	UCOUNT_RLIMIT_NPROC,
	UCOUNT_RLIMIT_MSGQUEUE,
	UCOUNT_COUNTS,
};

+21 −19
Original line number Diff line number Diff line
@@ -144,7 +144,7 @@ struct mqueue_inode_info {
	struct pid *notify_owner;
	u32 notify_self_exec_id;
	struct user_namespace *notify_user_ns;
	struct user_struct *user;	/* user who created, for accounting */
	struct ucounts *ucounts;	/* user who created, for accounting */
	struct sock *notify_sock;
	struct sk_buff *notify_cookie;

@@ -292,7 +292,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
		struct ipc_namespace *ipc_ns, umode_t mode,
		struct mq_attr *attr)
{
	struct user_struct *u = current_user();
	struct inode *inode;
	int ret = -ENOMEM;

@@ -321,7 +320,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
		info->notify_owner = NULL;
		info->notify_user_ns = NULL;
		info->qsize = 0;
		info->user = NULL;	/* set when all is ok */
		info->ucounts = NULL;	/* set when all is ok */
		info->msg_tree = RB_ROOT;
		info->msg_tree_rightmost = NULL;
		info->node_cache = NULL;
@@ -371,19 +370,23 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
		if (mq_bytes + mq_treesize < mq_bytes)
			goto out_inode;
		mq_bytes += mq_treesize;
		info->ucounts = get_ucounts(current_ucounts());
		if (info->ucounts) {
			long msgqueue;

			spin_lock(&mq_lock);
		if (u->mq_bytes + mq_bytes < u->mq_bytes ||
		    u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
			msgqueue = inc_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
			if (msgqueue == LONG_MAX || msgqueue > rlimit(RLIMIT_MSGQUEUE)) {
				dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
				spin_unlock(&mq_lock);
				put_ucounts(info->ucounts);
				info->ucounts = NULL;
				/* mqueue_evict_inode() releases info->messages */
				ret = -EMFILE;
				goto out_inode;
			}
		u->mq_bytes += mq_bytes;
			spin_unlock(&mq_lock);

		/* all is ok */
		info->user = get_uid(u);
		}
	} else if (S_ISDIR(mode)) {
		inc_nlink(inode);
		/* Some things misbehave if size == 0 on a directory */
@@ -497,7 +500,6 @@ static void mqueue_free_inode(struct inode *inode)
static void mqueue_evict_inode(struct inode *inode)
{
	struct mqueue_inode_info *info;
	struct user_struct *user;
	struct ipc_namespace *ipc_ns;
	struct msg_msg *msg, *nmsg;
	LIST_HEAD(tmp_msg);
@@ -520,8 +522,7 @@ static void mqueue_evict_inode(struct inode *inode)
		free_msg(msg);
	}

	user = info->user;
	if (user) {
	if (info->ucounts) {
		unsigned long mq_bytes, mq_treesize;

		/* Total amount of bytes accounted for the mqueue */
@@ -533,7 +534,7 @@ static void mqueue_evict_inode(struct inode *inode)
					  info->attr.mq_msgsize);

		spin_lock(&mq_lock);
		user->mq_bytes -= mq_bytes;
		dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
		/*
		 * get_ns_from_inode() ensures that the
		 * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
@@ -543,7 +544,8 @@ static void mqueue_evict_inode(struct inode *inode)
		if (ipc_ns)
			ipc_ns->mq_queues_count--;
		spin_unlock(&mq_lock);
		free_uid(user);
		put_ucounts(info->ucounts);
		info->ucounts = NULL;
	}
	if (ipc_ns)
		put_ipc_ns(ipc_ns);
+1 −0
Original line number Diff line number Diff line
@@ -823,6 +823,7 @@ void __init fork_init(void)
		init_user_ns.ucount_max[i] = max_threads/2;

	init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC);
	init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE);

#ifdef CONFIG_VMAP_STACK
	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
+1 −0
Original line number Diff line number Diff line
@@ -80,6 +80,7 @@ static struct ctl_table user_table[] = {
	UCOUNT_ENTRY("max_inotify_instances"),
	UCOUNT_ENTRY("max_inotify_watches"),
#endif
	{ },
	{ },
	{ }
};
Loading