Commit 8e8b6c63 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'rmnet-tx-pkt-aggregation'

Daniele Palmas says:

====================
net: add tx packets aggregation to ethtool and rmnet

Hello maintainers and all,

this patchset implements tx qmap packets aggregation in rmnet and generic
ethtool support for that.

Some low-cat Thread-x based modems are not capable of properly reaching the maximum
allowed throughput both in tx and rx during a bidirectional test if tx packets
aggregation is not enabled.

I verified this problem with rmnet + qmi_wwan by using a MDM9207 Cat. 4 based modem
(50Mbps/150Mbps max throughput). What is actually happening is pictured at
https://drive.google.com/file/d/1gSbozrtd9h0X63i6vdkNpN68d-9sg8f9/view

Testing with iperf TCP, when rx and tx flows are tested singularly there's no issue
in tx and minor issues in rx (not able to reach max throughput). When there are concurrent
tx and rx flows, tx throughput has an huge drop. rx a minor one, but still present.

The same scenario with tx aggregation enabled is pictured at
https://drive.google.com/file/d/1jcVIKNZD7K3lHtwKE5W02mpaloudYYih/view
showing a regular graph.

This issue does not happen with high-cat modems (e.g. SDX20), or at least it
does not happen at the throughputs I'm able to test currently: maybe the same
could happen when moving close to the maximum rates supported by those modems.
Anyway, having the tx aggregation enabled should not hurt.

The first attempt to solve this issue was in qmi_wwan qmap implementation,
see the discussion at https://lore.kernel.org/netdev/20221019132503.6783-1-dnlplm@gmail.com/



However, it turned out that rmnet was a better candidate for the implementation.

Moreover, Greg and Jakub suggested also to use ethtool for the configuration:
not sure if I got their advice right, but this patchset add also generic ethtool
support for tx aggregation.

The patches have been tested mainly against an MDM9207 based modem through USB
and SDX55 through PCI (MHI).

v2 should address the comments highlighted in the review: the implementation is
still in rmnet, due to Subash's request of keeping tx aggregation there.

v3 fixes ethtool-netlink.rst content out of table bounds and a W=1 build warning
for patch 2.

v4 solves a race related to egress_agg_params.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9a06cce6 db8a563a
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -1004,6 +1004,9 @@ Kernel response contents:
  ``ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL``  u32     rate sampling interval
  ``ETHTOOL_A_COALESCE_USE_CQE_TX``            bool    timer reset mode, Tx
  ``ETHTOOL_A_COALESCE_USE_CQE_RX``            bool    timer reset mode, Rx
  ``ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES``     u32     max aggr size, Tx
  ``ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES``    u32     max aggr packets, Tx
  ``ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS``    u32     time (us), aggr, Tx
  ===========================================  ======  =======================

Attributes are only included in reply if their value is not zero or the
@@ -1022,6 +1025,17 @@ each packet event resets the timer. In this mode timer is used to force
the interrupt if queue goes idle, while busy queues depend on the packet
limit to trigger interrupts.

Tx aggregation consists of copying frames into a contiguous buffer so that they
can be submitted as a single IO operation. ``ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES``
describes the maximum size in bytes for the submitted buffer.
``ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES`` describes the maximum number of frames
that can be aggregated into a single buffer.
``ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS`` describes the amount of time in usecs,
counted since the first packet arrival in an aggregated block, after which the
block should be sent.
This feature is mainly of interest for specific USB devices which does not cope
well with frequent small-sized URBs transmissions.

COALESCE_SET
============

@@ -1055,6 +1069,9 @@ Request contents:
  ``ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL``  u32     rate sampling interval
  ``ETHTOOL_A_COALESCE_USE_CQE_TX``            bool    timer reset mode, Tx
  ``ETHTOOL_A_COALESCE_USE_CQE_RX``            bool    timer reset mode, Rx
  ``ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES``     u32     max aggr size, Tx
  ``ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES``    u32     max aggr packets, Tx
  ``ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS``    u32     time (us), aggr, Tx
  ===========================================  ======  =======================

Request is rejected if it attributes declared as unsupported by driver (i.e.
+5 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include "rmnet_handlers.h"
#include "rmnet_vnd.h"
#include "rmnet_private.h"
#include "rmnet_map.h"

/* Local Definitions and Declarations */

@@ -39,6 +40,8 @@ static int rmnet_unregister_real_device(struct net_device *real_dev)
	if (port->nr_rmnet_devs)
		return -EINVAL;

	rmnet_map_tx_aggregate_exit(port);

	netdev_rx_handler_unregister(real_dev);

	kfree(port);
@@ -79,6 +82,8 @@ static int rmnet_register_real_device(struct net_device *real_dev,
	for (entry = 0; entry < RMNET_MAX_LOGICAL_EP; entry++)
		INIT_HLIST_HEAD(&port->muxed_ep[entry]);

	rmnet_map_tx_aggregate_init(port);

	netdev_dbg(real_dev, "registered with rmnet\n");
	return 0;
}
+20 −0
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
 */

#include <linux/skbuff.h>
#include <linux/time.h>
#include <net/gro_cells.h>

#ifndef _RMNET_CONFIG_H_
@@ -19,6 +20,12 @@ struct rmnet_endpoint {
	struct hlist_node hlnode;
};

struct rmnet_egress_agg_params {
	u32 bytes;
	u32 count;
	u64 time_nsec;
};

/* One instance of this structure is instantiated for each real_dev associated
 * with rmnet.
 */
@@ -30,6 +37,19 @@ struct rmnet_port {
	struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP];
	struct net_device *bridge_ep;
	struct net_device *rmnet_dev;

	/* Egress aggregation information */
	struct rmnet_egress_agg_params egress_agg_params;
	/* Protect aggregation related elements */
	spinlock_t agg_lock;
	struct sk_buff *skbagg_head;
	struct sk_buff *skbagg_tail;
	int agg_state;
	u8 agg_count;
	struct timespec64 agg_time;
	struct timespec64 agg_last;
	struct hrtimer hrtimer;
	struct work_struct agg_wq;
};

extern struct rtnl_link_ops rmnet_link_ops;
+16 −2
Original line number Diff line number Diff line
@@ -164,8 +164,18 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,

	map_header->mux_id = mux_id;

	skb->protocol = htons(ETH_P_MAP);
	if (READ_ONCE(port->egress_agg_params.count) > 1) {
		unsigned int len;

		len = rmnet_map_tx_aggregate(skb, port, orig_dev);
		if (likely(len)) {
			rmnet_vnd_tx_fixup_len(len, orig_dev);
			return -EINPROGRESS;
		}
		return -ENOMEM;
	}

	skb->protocol = htons(ETH_P_MAP);
	return 0;
}

@@ -235,6 +245,7 @@ void rmnet_egress_handler(struct sk_buff *skb)
	struct rmnet_port *port;
	struct rmnet_priv *priv;
	u8 mux_id;
	int err;

	sk_pacing_shift_update(skb->sk, 8);

@@ -247,8 +258,11 @@ void rmnet_egress_handler(struct sk_buff *skb)
	if (!port)
		goto drop;

	if (rmnet_map_egress_handler(skb, port, mux_id, orig_dev))
	err = rmnet_map_egress_handler(skb, port, mux_id, orig_dev);
	if (err == -ENOMEM)
		goto drop;
	else if (err == -EINPROGRESS)
		return;

	rmnet_vnd_tx_fixup(skb, orig_dev);

+6 −0
Original line number Diff line number Diff line
@@ -53,5 +53,11 @@ void rmnet_map_checksum_uplink_packet(struct sk_buff *skb,
				      struct net_device *orig_dev,
				      int csum_type);
int rmnet_map_process_next_hdr_packet(struct sk_buff *skb, u16 len);
unsigned int rmnet_map_tx_aggregate(struct sk_buff *skb, struct rmnet_port *port,
				    struct net_device *orig_dev);
void rmnet_map_tx_aggregate_init(struct rmnet_port *port);
void rmnet_map_tx_aggregate_exit(struct rmnet_port *port);
void rmnet_map_update_ul_agg_config(struct rmnet_port *port, u32 size,
				    u32 count, u32 time);

#endif /* _RMNET_MAP_H_ */
Loading