Commit 00833408 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag '9p-for-6.1' of https://github.com/martinetd/linux

Pull 9p updates from Dominique Martinet:
 "Smaller buffers for small messages and fixes.

  The highlight of this is Christian's patch to allocate smaller buffers
  for most metadata requests: 9p with a big msize would try to allocate
  large buffers when just 4 or 8k would be more than enough; this brings
  in nice performance improvements.

  There's also a few fixes for problems reported by syzkaller (thanks to
  Schspa Shi, Tetsuo Handa for tests and feedback/patches) as well as
  some minor cleanup"

* tag '9p-for-6.1' of https://github.com/martinetd/linux:
  net/9p: clarify trans_fd parse_opt failure handling
  net/9p: add __init/__exit annotations to module init/exit funcs
  net/9p: use a dedicated spinlock for trans_fd
  9p/trans_fd: always use O_NONBLOCK read/write
  net/9p: allocate appropriate reduced message buffers
  net/9p: add 'pooled_rbuffers' flag to struct p9_trans_module
  net/9p: add p9_msg_buf_size()
  9p: add P9_ERRMAX for 9p2000 and 9p2000.u
  net/9p: split message size argument into 't_size' and 'r_size' pair
  9p: trans_fd/p9_conn_cancel: drop client lock earlier
parents 288fc860 a8e633c6
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -331,6 +331,9 @@ enum p9_qid_t {
/* size of header for zero copy read/write */
#define P9_ZC_HDR_SZ 4096

/* maximum length of an error string */
#define P9_ERRMAX 128

/**
 * struct p9_qid - file system entity information
 * @type: 8-bit type &p9_qid_t
+5 −0
Original line number Diff line number Diff line
@@ -19,6 +19,10 @@
 * @list: used to maintain a list of currently available transports
 * @name: the human-readable name of the transport
 * @maxsize: transport provided maximum packet size
 * @pooled_rbuffers: currently only set for RDMA transport which pulls the
 *                   response buffers from a shared pool, and accordingly
 *                   we're less flexible when choosing the response message
 *                   size in this case
 * @def: set if this transport should be considered the default
 * @create: member function to create a new connection on this transport
 * @close: member function to discard a connection on this transport
@@ -38,6 +42,7 @@ struct p9_trans_module {
	struct list_head list;
	char *name;		/* name of transport */
	int maxsize;		/* max message size of transport */
	bool pooled_rbuffers;
	int def;		/* this transport should be default */
	struct module *owner;
	int (*create)(struct p9_client *client,
+39 −9
Original line number Diff line number Diff line
@@ -255,24 +255,42 @@ static struct kmem_cache *p9_req_cache;
 * p9_tag_alloc - Allocate a new request.
 * @c: Client session.
 * @type: Transaction type.
 * @max_size: Maximum packet size for this request.
 * @t_size: Buffer size for holding this request
 * (automatic calculation by format template if 0).
 * @r_size: Buffer size for holding server's reply on this request
 * (automatic calculation by format template if 0).
 * @fmt: Format template for assembling 9p request message
 * (see p9pdu_vwritef).
 * @ap: Variable arguments to be fed to passed format template
 * (see p9pdu_vwritef).
 *
 * Context: Process context.
 * Return: Pointer to new request.
 */
static struct p9_req_t *
p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
p9_tag_alloc(struct p9_client *c, int8_t type, uint t_size, uint r_size,
	      const char *fmt, va_list ap)
{
	struct p9_req_t *req = kmem_cache_alloc(p9_req_cache, GFP_NOFS);
	int alloc_msize = min(c->msize, max_size);
	int alloc_tsize;
	int alloc_rsize;
	int tag;
	va_list apc;

	va_copy(apc, ap);
	alloc_tsize = min_t(size_t, c->msize,
			    t_size ?: p9_msg_buf_size(c, type, fmt, apc));
	va_end(apc);

	alloc_rsize = min_t(size_t, c->msize,
			    r_size ?: p9_msg_buf_size(c, type + 1, fmt, ap));

	if (!req)
		return ERR_PTR(-ENOMEM);

	if (p9_fcall_init(c, &req->tc, alloc_msize))
	if (p9_fcall_init(c, &req->tc, alloc_tsize))
		goto free_req;
	if (p9_fcall_init(c, &req->rc, alloc_msize))
	if (p9_fcall_init(c, &req->rc, alloc_rsize))
		goto free;

	p9pdu_reset(&req->tc);
@@ -592,11 +610,12 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
}

static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
					      int8_t type, int req_size,
					      int8_t type, uint t_size, uint r_size,
					      const char *fmt, va_list ap)
{
	int err;
	struct p9_req_t *req;
	va_list apc;

	p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type);

@@ -608,7 +627,9 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
	if (c->status == BeginDisconnect && type != P9_TCLUNK)
		return ERR_PTR(-EIO);

	req = p9_tag_alloc(c, type, req_size);
	va_copy(apc, ap);
	req = p9_tag_alloc(c, type, t_size, r_size, fmt, apc);
	va_end(apc);
	if (IS_ERR(req))
		return req;

@@ -643,9 +664,18 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
	int sigpending, err;
	unsigned long flags;
	struct p9_req_t *req;
	/* Passing zero for tsize/rsize to p9_client_prepare_req() tells it to
	 * auto determine an appropriate (small) request/response size
	 * according to actual message data being sent. Currently RDMA
	 * transport is excluded from this response message size optimization,
	 * as it would not cope with it, due to its pooled response buffers
	 * (using an optimized request size for RDMA as well though).
	 */
	const uint tsize = 0;
	const uint rsize = c->trans_mod->pooled_rbuffers ? c->msize : 0;

	va_start(ap, fmt);
	req = p9_client_prepare_req(c, type, c->msize, fmt, ap);
	req = p9_client_prepare_req(c, type, tsize, rsize, fmt, ap);
	va_end(ap);
	if (IS_ERR(req))
		return req;
@@ -743,7 +773,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
	/* We allocate a inline protocol data of only 4k bytes.
	 * The actual content is passed in zero-copy fashion.
	 */
	req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap);
	req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, P9_ZC_HDR_SZ, fmt, ap);
	va_end(ap);
	if (IS_ERR(req))
		return req;
+167 −0
Original line number Diff line number Diff line
@@ -23,6 +23,173 @@

#include <trace/events/9p.h>

/* len[2] text[len] */
#define P9_STRLEN(s) \
	(2 + min_t(size_t, s ? strlen(s) : 0, USHRT_MAX))

/**
 * p9_msg_buf_size - Returns a buffer size sufficiently large to hold the
 * intended 9p message.
 * @c: client
 * @type: message type
 * @fmt: format template for assembling request message
 * (see p9pdu_vwritef)
 * @ap: variable arguments to be fed to passed format template
 * (see p9pdu_vwritef)
 *
 * Note: Even for response types (P9_R*) the format template and variable
 * arguments must always be for the originating request type (P9_T*).
 */
size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type,
			const char *fmt, va_list ap)
{
	/* size[4] type[1] tag[2] */
	const int hdr = 4 + 1 + 2;
	/* ename[s] errno[4] */
	const int rerror_size = hdr + P9_ERRMAX + 4;
	/* ecode[4] */
	const int rlerror_size = hdr + 4;
	const int err_size =
		c->proto_version == p9_proto_2000L ? rlerror_size : rerror_size;

	static_assert(NAME_MAX <= 4*1024, "p9_msg_buf_size() currently assumes "
				  "a max. allowed directory entry name length of 4k");

	switch (type) {

	/* message types not used at all */
	case P9_TERROR:
	case P9_TLERROR:
	case P9_TAUTH:
	case P9_RAUTH:
		BUG();

	/* variable length & potentially large message types */
	case P9_TATTACH:
		BUG_ON(strcmp("ddss?u", fmt));
		va_arg(ap, int32_t);
		va_arg(ap, int32_t);
		{
			const char *uname = va_arg(ap, const char *);
			const char *aname = va_arg(ap, const char *);
			/* fid[4] afid[4] uname[s] aname[s] n_uname[4] */
			return hdr + 4 + 4 + P9_STRLEN(uname) + P9_STRLEN(aname) + 4;
		}
	case P9_TWALK:
		BUG_ON(strcmp("ddT", fmt));
		va_arg(ap, int32_t);
		va_arg(ap, int32_t);
		{
			uint i, nwname = va_arg(ap, int);
			size_t wname_all;
			const char **wnames = va_arg(ap, const char **);
			for (i = 0, wname_all = 0; i < nwname; ++i) {
				wname_all += P9_STRLEN(wnames[i]);
			}
			/* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
			return hdr + 4 + 4 + 2 + wname_all;
		}
	case P9_RWALK:
		BUG_ON(strcmp("ddT", fmt));
		va_arg(ap, int32_t);
		va_arg(ap, int32_t);
		{
			uint nwname = va_arg(ap, int);
			/* nwqid[2] nwqid*(wqid[13]) */
			return max_t(size_t, hdr + 2 + nwname * 13, err_size);
		}
	case P9_TCREATE:
		BUG_ON(strcmp("dsdb?s", fmt));
		va_arg(ap, int32_t);
		{
			const char *name = va_arg(ap, const char *);
			if (c->proto_version == p9_proto_legacy) {
				/* fid[4] name[s] perm[4] mode[1] */
				return hdr + 4 + P9_STRLEN(name) + 4 + 1;
			} else {
				va_arg(ap, int32_t);
				va_arg(ap, int);
				{
					const char *ext = va_arg(ap, const char *);
					/* fid[4] name[s] perm[4] mode[1] extension[s] */
					return hdr + 4 + P9_STRLEN(name) + 4 + 1 + P9_STRLEN(ext);
				}
			}
		}
	case P9_TLCREATE:
		BUG_ON(strcmp("dsddg", fmt));
		va_arg(ap, int32_t);
		{
			const char *name = va_arg(ap, const char *);
			/* fid[4] name[s] flags[4] mode[4] gid[4] */
			return hdr + 4 + P9_STRLEN(name) + 4 + 4 + 4;
		}
	case P9_RREAD:
	case P9_RREADDIR:
		BUG_ON(strcmp("dqd", fmt));
		va_arg(ap, int32_t);
		va_arg(ap, int64_t);
		{
			const int32_t count = va_arg(ap, int32_t);
			/* count[4] data[count] */
			return max_t(size_t, hdr + 4 + count, err_size);
		}
	case P9_TWRITE:
		BUG_ON(strcmp("dqV", fmt));
		va_arg(ap, int32_t);
		va_arg(ap, int64_t);
		{
			const int32_t count = va_arg(ap, int32_t);
			/* fid[4] offset[8] count[4] data[count] */
			return hdr + 4 + 8 + 4 + count;
		}
	case P9_TRENAMEAT:
		BUG_ON(strcmp("dsds", fmt));
		va_arg(ap, int32_t);
		{
			const char *oldname, *newname;
			oldname = va_arg(ap, const char *);
			va_arg(ap, int32_t);
			newname = va_arg(ap, const char *);
			/* olddirfid[4] oldname[s] newdirfid[4] newname[s] */
			return hdr + 4 + P9_STRLEN(oldname) + 4 + P9_STRLEN(newname);
		}
	case P9_TSYMLINK:
		BUG_ON(strcmp("dssg", fmt));
		va_arg(ap, int32_t);
		{
			const char *name = va_arg(ap, const char *);
			const char *symtgt = va_arg(ap, const char *);
			/* fid[4] name[s] symtgt[s] gid[4] */
			return hdr + 4 + P9_STRLEN(name) + P9_STRLEN(symtgt) + 4;
		}

	case P9_RERROR:
		return rerror_size;
	case P9_RLERROR:
		return rlerror_size;

	/* small message types */
	case P9_TWSTAT:
	case P9_RSTAT:
	case P9_RREADLINK:
	case P9_TXATTRWALK:
	case P9_TXATTRCREATE:
	case P9_TLINK:
	case P9_TMKDIR:
	case P9_TMKNOD:
	case P9_TRENAME:
	case P9_TUNLINKAT:
	case P9_TLOCK:
		return 8 * 1024;

	/* tiny message types */
	default:
		return 4 * 1024;

	}
}

static int
p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);

+2 −0
Original line number Diff line number Diff line
@@ -8,6 +8,8 @@
 *  Copyright (C) 2008 by IBM, Corp.
 */

size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type,
			const char *fmt, va_list ap);
int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
		  va_list ap);
int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
Loading