Commit ffcddcae authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

1) Reduce jumpstack footprint: Stash chain in last rule marker in blob for
   tracing. Remove last rule and chain from jumpstack. From Florian Westphal.

2) nf_tables validates all tables before committing the new rules.
   Unfortunately, this has two drawbacks:

   - Since addition of the transaction mutex pernet state gets written to
     outside of the locked section from the cleanup callback, this is
     wrong so do this cleanup directly after table has passed all checks.

   - Revalidate tables that saw no changes. This can be avoided by
     keeping the validation state per table, not per netns.

   From Florian Westphal.

3) Get rid of a few redundant pointers in the traceinfo structure.
   The three removed pointers are used in the expression evaluation loop,
   so gcc keeps them in registers. Passing them to the (inlined) helpers
   thus doesn't increase nft_do_chain text size, while stack is reduced
   by another 24 bytes on 64bit arches. From Florian Westphal.

4) IPVS cleanups in several ways without implementing any functional
   changes, aside from removing some debugging output:

   - Update width of source for ip_vs_sync_conn_options
     The operation is safe, use an annotation to describe it properly.

   - Consistently use array_size() in ip_vs_conn_init()
     It seems better to use helpers consistently.

   - Remove {Enter,Leave}Function. These seem to be well past their
     use-by date.

   - Correct spelling in comments.

   From Simon Horman.

5) Extended netlink error report for netdevice in flowtables and
   netdev/chains. Allow for incrementally add/delete devices to netdev
   basechain. Allow to create netdev chain without device.

* tag 'nf-next-23-04-22' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  netfilter: nf_tables: allow to create netdev chain without device
  netfilter: nf_tables: support for deleting devices in an existing netdev chain
  netfilter: nf_tables: support for adding new devices to an existing netdev chain
  netfilter: nf_tables: rename function to destroy hook list
  netfilter: nf_tables: do not send complete notification of deletions
  netfilter: nf_tables: extended netlink error reporting for netdevice
  ipvs: Correct spelling in comments
  ipvs: Remove {Enter,Leave}Function
  ipvs: Consistently use array_size() in ip_vs_conn_init()
  ipvs: Update width of source for ip_vs_sync_conn_options
  netfilter: nf_tables: do not store rule in traceinfo structure
  netfilter: nf_tables: do not store verdict in traceinfo structure
  netfilter: nf_tables: do not store pktinfo in traceinfo structure
  netfilter: nf_tables: remove unneeded conditional
  netfilter: nf_tables: make validation state per table
  netfilter: nf_tables: don't write table validation state without mutex
  netfilter: nf_tables: don't store chain address on jump
  netfilter: nf_tables: don't store address of last rule on jump
  netfilter: nf_tables: merge nft_rules_old structure and end of ruleblob marker
====================

Link: https://lore.kernel.org/r/20230421235021.216950-1-pablo@netfilter.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 2efb07b5 207296f1
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -45,7 +45,6 @@ struct nfnetlink_subsystem {
	int (*commit)(struct net *net, struct sk_buff *skb);
	int (*abort)(struct net *net, struct sk_buff *skb,
		     enum nfnl_abort_action action);
	void (*cleanup)(struct net *net);
	bool (*valid_genid)(struct net *net, u32 genid);
};

+7 −25
Original line number Diff line number Diff line
@@ -265,26 +265,6 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
			pr_err(msg, ##__VA_ARGS__);			\
	} while (0)

#ifdef CONFIG_IP_VS_DEBUG
#define EnterFunction(level)						\
	do {								\
		if (level <= ip_vs_get_debug_level())			\
			printk(KERN_DEBUG				\
			       pr_fmt("Enter: %s, %s line %i\n"),	\
			       __func__, __FILE__, __LINE__);		\
	} while (0)
#define LeaveFunction(level)						\
	do {								\
		if (level <= ip_vs_get_debug_level())			\
			printk(KERN_DEBUG				\
			       pr_fmt("Leave: %s, %s line %i\n"),	\
			       __func__, __FILE__, __LINE__);		\
	} while (0)
#else
#define EnterFunction(level)   do {} while (0)
#define LeaveFunction(level)   do {} while (0)
#endif

/* The port number of FTP service (in network order). */
#define FTPPORT  cpu_to_be16(21)
#define FTPDATA  cpu_to_be16(20)
@@ -604,7 +584,7 @@ struct ip_vs_conn {
	spinlock_t              lock;           /* lock for state transition */
	volatile __u16          state;          /* state info */
	volatile __u16          old_state;      /* old state, to be used for
						 * state transition triggerd
						 * state transition triggered
						 * synchronization
						 */
	__u32			fwmark;		/* Fire wall mark from skb */
@@ -630,8 +610,10 @@ struct ip_vs_conn {
	 */
	struct ip_vs_app        *app;           /* bound ip_vs_app object */
	void                    *app_data;      /* Application private data */
	struct_group(sync_conn_opt,
		struct ip_vs_seq  in_seq;       /* incoming seq. struct */
		struct ip_vs_seq  out_seq;      /* outgoing seq. struct */
	);

	const struct ip_vs_pe	*pe;
	char			*pe_data;
@@ -653,7 +635,7 @@ struct ip_vs_service_user_kern {
	u16			protocol;
	union nf_inet_addr	addr;		/* virtual ip address */
	__be16			port;
	u32			fwmark;		/* firwall mark of service */
	u32			fwmark;		/* firewall mark of service */

	/* virtual service options */
	char			*sched_name;
@@ -1054,7 +1036,7 @@ struct netns_ipvs {
	struct ipvs_sync_daemon_cfg	bcfg;	/* Backup Configuration */
	/* net name space ptr */
	struct net		*net;            /* Needed by timer routines */
	/* Number of heterogeneous destinations, needed becaus heterogeneous
	/* Number of heterogeneous destinations, needed because heterogeneous
	 * are not supported when synchronization is enabled.
	 */
	unsigned int		mixed_address_family_dests;
+24 −11
Original line number Diff line number Diff line
@@ -1046,6 +1046,18 @@ struct nft_rule_dp {
		__attribute__((aligned(__alignof__(struct nft_expr))));
};

struct nft_rule_dp_last {
	struct nft_rule_dp end;		/* end of nft_rule_blob marker */
	struct rcu_head h;		/* call_rcu head */
	struct nft_rule_blob *blob;	/* ptr to free via call_rcu */
	const struct nft_chain *chain;	/* for nftables tracing */
};

static inline const struct nft_rule_dp *nft_rule_next(const struct nft_rule_dp *rule)
{
	return (void *)rule + sizeof(*rule) + rule->dlen;
}

struct nft_rule_blob {
	unsigned long			size;
	unsigned char			data[]
@@ -1197,6 +1209,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
 *	@genmask: generation mask
 *	@afinfo: address family info
 *	@name: name of the table
 *	@validate_state: internal, set when transaction adds jumps
 */
struct nft_table {
	struct list_head		list;
@@ -1215,6 +1228,7 @@ struct nft_table {
	char				*name;
	u16				udlen;
	u8				*udata;
	u8				validate_state;
};

static inline bool nft_table_has_owner(const struct nft_table *table)
@@ -1394,11 +1408,7 @@ void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
 *	@type: event type (enum nft_trace_types)
 *	@skbid: hash of skb to be used as trace id
 *	@packet_dumped: packet headers sent in a previous traceinfo message
 *	@pkt: pktinfo currently processed
 *	@basechain: base chain currently processed
 *	@chain: chain currently processed
 *	@rule:  rule that was evaluated
 *	@verdict: verdict given by rule
 */
struct nft_traceinfo {
	bool				trace;
@@ -1406,18 +1416,16 @@ struct nft_traceinfo {
	bool				packet_dumped;
	enum nft_trace_types		type:8;
	u32				skbid;
	const struct nft_pktinfo	*pkt;
	const struct nft_base_chain	*basechain;
	const struct nft_chain		*chain;
	const struct nft_rule_dp	*rule;
	const struct nft_verdict	*verdict;
};

void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
		    const struct nft_verdict *verdict,
		    const struct nft_chain *basechain);

void nft_trace_notify(struct nft_traceinfo *info);
void nft_trace_notify(const struct nft_pktinfo *pkt,
		      const struct nft_verdict *verdict,
		      const struct nft_rule_dp *rule,
		      struct nft_traceinfo *info);

#define MODULE_ALIAS_NFT_CHAIN(family, name) \
	MODULE_ALIAS("nft-chain-" __stringify(family) "-" name)
@@ -1601,6 +1609,8 @@ struct nft_trans_chain {
	struct nft_stats __percpu	*stats;
	u8				policy;
	u32				chain_id;
	struct nft_base_chain		*basechain;
	struct list_head		hook_list;
};

#define nft_trans_chain_update(trans)	\
@@ -1613,6 +1623,10 @@ struct nft_trans_chain {
	(((struct nft_trans_chain *)trans->data)->policy)
#define nft_trans_chain_id(trans)	\
	(((struct nft_trans_chain *)trans->data)->chain_id)
#define nft_trans_basechain(trans)	\
	(((struct nft_trans_chain *)trans->data)->basechain)
#define nft_trans_chain_hooks(trans)	\
	(((struct nft_trans_chain *)trans->data)->hook_list)

struct nft_trans_table {
	bool				update;
@@ -1688,7 +1702,6 @@ struct nftables_pernet {
	struct mutex		commit_mutex;
	u64			table_handle;
	unsigned int		base_seq;
	u8			validate_state;
};

extern unsigned int nf_tables_net_id;
+6 −6
Original line number Diff line number Diff line
@@ -1481,6 +1481,7 @@ void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs)

int __init ip_vs_conn_init(void)
{
	size_t tab_array_size;
	int idx;

	/* Compute size and mask */
@@ -1494,8 +1495,9 @@ int __init ip_vs_conn_init(void)
	/*
	 * Allocate the connection hash table and initialize its list heads
	 */
	ip_vs_conn_tab = vmalloc(array_size(ip_vs_conn_tab_size,
					    sizeof(*ip_vs_conn_tab)));
	tab_array_size = array_size(ip_vs_conn_tab_size,
				    sizeof(*ip_vs_conn_tab));
	ip_vs_conn_tab = vmalloc(tab_array_size);
	if (!ip_vs_conn_tab)
		return -ENOMEM;

@@ -1508,10 +1510,8 @@ int __init ip_vs_conn_init(void)
		return -ENOMEM;
	}

	pr_info("Connection hash table configured "
		"(size=%d, memory=%ldKbytes)\n",
		ip_vs_conn_tab_size,
		(long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024);
	pr_info("Connection hash table configured (size=%d, memory=%zdKbytes)\n",
		ip_vs_conn_tab_size, tab_array_size / 1024);
	IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n",
		  sizeof(struct ip_vs_conn));

+0 −8
Original line number Diff line number Diff line
@@ -1140,7 +1140,6 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
	__be16 vport;
	unsigned int flags;

	EnterFunction(12);
	vaddr = &svc->addr;
	vport = svc->port;
	daddr = &iph->saddr;
@@ -1208,7 +1207,6 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
		      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
		      IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
		      cp->flags, refcount_read(&cp->refcnt));
	LeaveFunction(12);
	return cp;
}

@@ -1316,13 +1314,11 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
		ip_vs_update_conntrack(skb, cp, 0);
	ip_vs_conn_put(cp);

	LeaveFunction(11);
	return NF_ACCEPT;

drop:
	ip_vs_conn_put(cp);
	kfree_skb(skb);
	LeaveFunction(11);
	return NF_STOLEN;
}

@@ -1341,8 +1337,6 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
	int af = state->pf;
	struct sock *sk;

	EnterFunction(11);

	/* Already marked as IPVS request or reply? */
	if (skb->ipvs_property)
		return NF_ACCEPT;
@@ -2365,7 +2359,6 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
	struct netns_ipvs *ipvs;
	struct net *net;

	EnterFunction(2);
	list_for_each_entry(net, net_list, exit_list) {
		ipvs = net_ipvs(net);
		ip_vs_unregister_hooks(ipvs, AF_INET);
@@ -2374,7 +2367,6 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
		smp_wmb();
		ip_vs_sync_net_cleanup(ipvs);
	}
	LeaveFunction(2);
}

static struct pernet_operations ipvs_core_ops = {
Loading