Commit eb441289 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Tony Nguyen says:

====================
igc: TX timestamping fixes

This is the fixes part of the series intended to add support for using
the 4 timestamp registers present in i225/i226.

Moving the timestamp handling to be inline with the interrupt handling
has the advantage of improving the TX timestamping retrieval latency,
here are some numbers using ntpperf:

Before:

$ sudo ./ntpperf -i enp3s0 -m 10:22:22:22:22:21 -d 192.168.1.3 -s 172.18.0.0/16 -I -H -o -37
               |          responses            |     TX timestamp offset (ns)
rate   clients |  lost invalid   basic  xleave |    min    mean     max stddev
1000       100   0.00%   0.00%   0.00% 100.00%      -56      +9     +52     19
1500       150   0.00%   0.00%   0.00% 100.00%      -40     +30     +75     22
2250       225   0.00%   0.00%   0.00% 100.00%      -11     +29     +72     15
3375       337   0.00%   0.00%   0.00% 100.00%      -18     +40     +88     22
5062       506   0.00%   0.00%   0.00% 100.00%      -19     +23     +77     15
7593       759   0.00%   0.00%   0.00% 100.00%       +7     +47   +5168     43
11389     1138   0.00%   0.00%   0.00% 100.00%      -11     +41   +5240     39
17083     1708   0.00%   0.00%   0.00% 100.00%      +19     +60   +5288     50
25624     2562   0.00%   0.00%   0.00% 100.00%       +1     +56   +5368     58
38436     3843   0.00%   0.00%   0.00% 100.00%      -84     +12   +8847     66
57654     5765   0.00%   0.00% 100.00%   0.00%
86481     8648   0.00%   0.00% 100.00%   0.00%
129721   12972   0.00%   0.00% 100.00%   0.00%
194581   16384   0.00%   0.00% 100.00%   0.00%
291871   16384  27.35%   0.00%  72.65%   0.00%
437806   16384  50.05%   0.00%  49.95%   0.00%

After:

$ sudo ./ntpperf -i enp3s0 -m 10:22:22:22:22:21 -d 192.168.1.3 -s 172.18.0.0/16 -I -H -o -37
               |          responses            |     TX timestamp offset (ns)
rate   clients |  lost invalid   basic  xleave |    min    mean     max stddev
1000       100   0.00%   0.00%   0.00% 100.00%      -44      +0     +61     19
1500       150   0.00%   0.00%   0.00% 100.00%       -6     +39     +81     16
2250       225   0.00%   0.00%   0.00% 100.00%      -22     +25     +69     15
3375       337   0.00%   0.00%   0.00% 100.00%      -28     +15     +56     14
5062       506   0.00%   0.00%   0.00% 100.00%       +7     +78    +143     27
7593       759   0.00%   0.00%   0.00% 100.00%      -54     +24    +144     47
11389     1138   0.00%   0.00%   0.00% 100.00%      -90     -33     +28     21
17083     1708   0.00%   0.00%   0.00% 100.00%      -50      -2     +35     14
25624     2562   0.00%   0.00%   0.00% 100.00%      -62      +7     +66     23
38436     3843   0.00%   0.00%   0.00% 100.00%      -33     +30   +5395     36
57654     5765   0.00%   0.00% 100.00%   0.00%
86481     8648   0.00%   0.00% 100.00%   0.00%
129721   12972   0.00%   0.00% 100.00%   0.00%
194581   16384  19.50%   0.00%  80.50%   0.00%
291871   16384  35.81%   0.00%  64.19%   0.00%
437806   16384  55.40%   0.00%  44.60%   0.00%

During this series, and to show that as is always the case, things are
never easy as they should be, a hardware issue was found, and it took
some time to find the workaround(s). The bug and workaround are better
explained in patch 4/4.

Note: the workaround has a simpler alternative, but it would involve
adding support for the other timestamp registers, and only using the
TXSTMP{H/L}_0 as a way to clear the interrupt. But I feel bad about
throwing this kind of resources away. Didn't test this extensively but
it should work.

Also, as Marc Kleine-Budde suggested, after some consensus is reached
on this series, most parts of it will be proposed for igb.

* '1GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
  igc: Work around HW bug causing missing timestamps
  igc: Retrieve TX timestamp during interrupt handling
  igc: Check if hardware TX timestamping is enabled earlier
  igc: Fix race condition in PTP tx code
====================

Link: https://lore.kernel.org/r/20230622165244.2202786-1-anthony.l.nguyen@intel.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 6a940abd c789ad7c
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -228,7 +228,10 @@ struct igc_adapter {

	struct ptp_clock *ptp_clock;
	struct ptp_clock_info ptp_caps;
	struct work_struct ptp_tx_work;
	/* Access to ptp_tx_skb and ptp_tx_start are protected by the
	 * ptp_tx_lock.
	 */
	spinlock_t ptp_tx_lock;
	struct sk_buff *ptp_tx_skb;
	struct hwtstamp_config tstamp_config;
	unsigned long ptp_tx_start;
@@ -401,7 +404,6 @@ enum igc_state_t {
	__IGC_TESTING,
	__IGC_RESETTING,
	__IGC_DOWN,
	__IGC_PTP_TX_IN_PROGRESS,
};

enum igc_tx_flags {
@@ -578,6 +580,7 @@ enum igc_ring_flags_t {
	IGC_RING_FLAG_TX_CTX_IDX,
	IGC_RING_FLAG_TX_DETECT_HANG,
	IGC_RING_FLAG_AF_XDP_ZC,
	IGC_RING_FLAG_TX_HWTSTAMP,
};

#define ring_uses_large_buffer(ring) \
@@ -634,6 +637,7 @@ int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
void igc_ptp_tx_hang(struct igc_adapter *adapter);
void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts);
void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter);

#define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))

+9 −5
Original line number Diff line number Diff line
@@ -1585,14 +1585,16 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
		}
	}

	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
	if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) &&
		     skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
		/* FIXME: add support for retrieving timestamps from
		 * the other timer registers before skipping the
		 * timestamping request.
		 */
		if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
		    !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS,
					   &adapter->state)) {
		unsigned long flags;

		spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
		if (!adapter->ptp_tx_skb) {
			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
			tx_flags |= IGC_TX_FLAGS_TSTAMP;

@@ -1601,6 +1603,8 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
		} else {
			adapter->tx_hwtstamp_skipped++;
		}

		spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
	}

	if (skb_vlan_tag_present(skb)) {
@@ -5219,7 +5223,7 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter)

	if (tsicr & IGC_TSICR_TXTS) {
		/* retrieve hardware timestamp */
		schedule_work(&adapter->ptp_tx_work);
		igc_ptp_tx_tstamp_event(adapter);
		ack |= IGC_TSICR_TXTS;
	}

+102 −40
Original line number Diff line number Diff line
@@ -536,9 +536,34 @@ static void igc_ptp_enable_rx_timestamp(struct igc_adapter *adapter)
	wr32(IGC_TSYNCRXCTL, val);
}

static void igc_ptp_clear_tx_tstamp(struct igc_adapter *adapter)
{
	unsigned long flags;

	spin_lock_irqsave(&adapter->ptp_tx_lock, flags);

	dev_kfree_skb_any(adapter->ptp_tx_skb);
	adapter->ptp_tx_skb = NULL;

	spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
}

static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter)
{
	struct igc_hw *hw = &adapter->hw;
	int i;

	/* Clear the flags first to avoid new packets to be enqueued
	 * for TX timestamping.
	 */
	for (i = 0; i < adapter->num_tx_queues; i++) {
		struct igc_ring *tx_ring = adapter->tx_ring[i];

		clear_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags);
	}

	/* Now we can clean the pending TX timestamp requests. */
	igc_ptp_clear_tx_tstamp(adapter);

	wr32(IGC_TSYNCTXCTL, 0);
}
@@ -546,12 +571,23 @@ static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter)
static void igc_ptp_enable_tx_timestamp(struct igc_adapter *adapter)
{
	struct igc_hw *hw = &adapter->hw;
	int i;

	wr32(IGC_TSYNCTXCTL, IGC_TSYNCTXCTL_ENABLED | IGC_TSYNCTXCTL_TXSYNSIG);

	/* Read TXSTMP registers to discard any timestamp previously stored. */
	rd32(IGC_TXSTMPL);
	rd32(IGC_TXSTMPH);

	/* The hardware is ready to accept TX timestamp requests,
	 * notify the transmit path.
	 */
	for (i = 0; i < adapter->num_tx_queues; i++) {
		struct igc_ring *tx_ring = adapter->tx_ring[i];

		set_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags);
	}

}

/**
@@ -603,6 +639,7 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter,
	return 0;
}

/* Requires adapter->ptp_tx_lock held by caller. */
static void igc_ptp_tx_timeout(struct igc_adapter *adapter)
{
	struct igc_hw *hw = &adapter->hw;
@@ -610,7 +647,6 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter)
	dev_kfree_skb_any(adapter->ptp_tx_skb);
	adapter->ptp_tx_skb = NULL;
	adapter->tx_hwtstamp_timeouts++;
	clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
	/* Clear the tx valid bit in TSYNCTXCTL register to enable interrupt. */
	rd32(IGC_TXSTMPH);
	netdev_warn(adapter->netdev, "Tx timestamp timeout\n");
@@ -618,20 +654,20 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter)

void igc_ptp_tx_hang(struct igc_adapter *adapter)
{
	bool timeout = time_is_before_jiffies(adapter->ptp_tx_start +
					      IGC_PTP_TX_TIMEOUT);
	unsigned long flags;

	if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state))
		return;
	spin_lock_irqsave(&adapter->ptp_tx_lock, flags);

	if (!adapter->ptp_tx_skb)
		goto unlock;

	if (time_is_after_jiffies(adapter->ptp_tx_start + IGC_PTP_TX_TIMEOUT))
		goto unlock;

	/* If we haven't received a timestamp within the timeout, it is
	 * reasonable to assume that it will never occur, so we can unlock the
	 * timestamp bit when this occurs.
	 */
	if (timeout) {
		cancel_work_sync(&adapter->ptp_tx_work);
	igc_ptp_tx_timeout(adapter);
	}

unlock:
	spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
}

/**
@@ -641,20 +677,57 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter)
 * If we were asked to do hardware stamping and such a time stamp is
 * available, then it must have been for this skb here because we only
 * allow only one such packet into the queue.
 *
 * Context: Expects adapter->ptp_tx_lock to be held by caller.
 */
static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
{
	struct sk_buff *skb = adapter->ptp_tx_skb;
	struct skb_shared_hwtstamps shhwtstamps;
	struct igc_hw *hw = &adapter->hw;
	u32 tsynctxctl;
	int adjust = 0;
	u64 regval;

	if (WARN_ON_ONCE(!skb))
		return;

	tsynctxctl = rd32(IGC_TSYNCTXCTL);
	tsynctxctl &= IGC_TSYNCTXCTL_TXTT_0;
	if (tsynctxctl) {
		regval = rd32(IGC_TXSTMPL);
		regval |= (u64)rd32(IGC_TXSTMPH) << 32;
	} else {
		/* There's a bug in the hardware that could cause
		 * missing interrupts for TX timestamping. The issue
		 * is that for new interrupts to be triggered, the
		 * IGC_TXSTMPH_0 register must be read.
		 *
		 * To avoid discarding a valid timestamp that just
		 * happened at the "wrong" time, we need to confirm
		 * that there was no timestamp captured, we do that by
		 * assuming that no two timestamps in sequence have
		 * the same nanosecond value.
		 *
		 * So, we read the "low" register, read the "high"
		 * register (to latch a new timestamp) and read the
		 * "low" register again, if "old" and "new" versions
		 * of the "low" register are different, a valid
		 * timestamp was captured, we can read the "high"
		 * register again.
		 */
		u32 txstmpl_old, txstmpl_new;

		txstmpl_old = rd32(IGC_TXSTMPL);
		rd32(IGC_TXSTMPH);
		txstmpl_new = rd32(IGC_TXSTMPL);

		if (txstmpl_old == txstmpl_new)
			return;

		regval = txstmpl_new;
		regval |= (u64)rd32(IGC_TXSTMPH) << 32;
	}
	if (igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval))
		return;

@@ -676,13 +749,7 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
	shhwtstamps.hwtstamp =
		ktime_add_ns(shhwtstamps.hwtstamp, adjust);

	/* Clear the lock early before calling skb_tstamp_tx so that
	 * applications are not woken up before the lock bit is clear. We use
	 * a copy of the skb pointer to ensure other threads can't change it
	 * while we're notifying the stack.
	 */
	adapter->ptp_tx_skb = NULL;
	clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);

	/* Notify the stack and free the skb after we've unlocked */
	skb_tstamp_tx(skb, &shhwtstamps);
@@ -690,27 +757,25 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
}

/**
 * igc_ptp_tx_work
 * @work: pointer to work struct
 * igc_ptp_tx_tstamp_event
 * @adapter: board private structure
 *
 * This work function polls the TSYNCTXCTL valid bit to determine when a
 * timestamp has been taken for the current stored skb.
 * Called when a TX timestamp interrupt happens to retrieve the
 * timestamp and send it up to the socket.
 */
static void igc_ptp_tx_work(struct work_struct *work)
void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter)
{
	struct igc_adapter *adapter = container_of(work, struct igc_adapter,
						   ptp_tx_work);
	struct igc_hw *hw = &adapter->hw;
	u32 tsynctxctl;
	unsigned long flags;

	if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state))
		return;
	spin_lock_irqsave(&adapter->ptp_tx_lock, flags);

	tsynctxctl = rd32(IGC_TSYNCTXCTL);
	if (WARN_ON_ONCE(!(tsynctxctl & IGC_TSYNCTXCTL_TXTT_0)))
		return;
	if (!adapter->ptp_tx_skb)
		goto unlock;

	igc_ptp_tx_hwtstamp(adapter);

unlock:
	spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
}

/**
@@ -959,8 +1024,8 @@ void igc_ptp_init(struct igc_adapter *adapter)
		return;
	}

	spin_lock_init(&adapter->ptp_tx_lock);
	spin_lock_init(&adapter->tmreg_lock);
	INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work);

	adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
	adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
@@ -1020,10 +1085,7 @@ void igc_ptp_suspend(struct igc_adapter *adapter)
	if (!(adapter->ptp_flags & IGC_PTP_ENABLED))
		return;

	cancel_work_sync(&adapter->ptp_tx_work);
	dev_kfree_skb_any(adapter->ptp_tx_skb);
	adapter->ptp_tx_skb = NULL;
	clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
	igc_ptp_clear_tx_tstamp(adapter);

	if (pci_device_is_present(adapter->pdev)) {
		igc_ptp_time_save(adapter);