summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-16 09:41:21 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-16 09:41:21 -0700
commit634ec1fc7982efeeeeed4a7688b0004827b43a21 (patch)
treea2aaba8e2a7c3fec162b30612d9e369ffe50d25b /net
parentef25485516b09db57493f5e78b3358db7cbdcaa0 (diff)
parent6de1dec1c166c7f7324ce52ccfdf43e2fa743b19 (diff)
downloadlinux-634ec1fc7982efeeeeed4a7688b0004827b43a21.tar.gz
linux-634ec1fc7982efeeeeed4a7688b0004827b43a21.tar.bz2
linux-634ec1fc7982efeeeeed4a7688b0004827b43a21.zip
Merge tag 'net-6.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni: "Including fixes from CAN Current release - regressions: - udp: do not use skb_release_head_state() before skb_attempt_defer_free() - gro_cells: use nested-BH locking for gro_cell - dpll: zl3073x: increase maximum size of flash utility Previous releases - regressions: - core: fix lockdep splat on device unregister - tcp: fix tcp_tso_should_defer() vs large RTT - tls: - don't rely on tx_work during send() - wait for pending async decryptions if tls_strp_msg_hold fails - can: j1939: add missing calls in NETDEV_UNREGISTER notification handler - eth: lan78xx: fix lost EEPROM write timeout in lan78xx_write_raw_eeprom Previous releases - always broken: - ip6_tunnel: prevent perpetual tunnel growth - dpll: zl3073x: handle missing or corrupted flash configuration - can: m_can: fix pm_runtime and CAN state handling - eth: - ixgbe: fix too early devlink_free() in ixgbe_remove() - ixgbevf: fix mailbox API compatibility - gve: Check valid ts bit on RX descriptor before hw timestamping - idpf: cleanup remaining SKBs in PTP flows - r8169: fix packet truncation after S4 resume on RTL8168H/RTL8111H" * tag 'net-6.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (50 commits) udp: do not use skb_release_head_state() before skb_attempt_defer_free() net: usb: lan78xx: fix use of improperly initialized dev->chipid in lan78xx_reset netdevsim: set the carrier when the device goes up selftests: tls: add test for short splice due to full skmsg selftests: net: tls: add tests for cmsg vs MSG_MORE tls: don't rely on tx_work during send() tls: wait for pending async decryptions if tls_strp_msg_hold fails tls: always set record_type in tls_process_cmsg tls: wait for async encrypt in case of error during latter iterations of sendmsg tls: trim encrypted message to match the plaintext on short splice tg3: prevent use of uninitialized remote_adv and local_adv variables MAINTAINERS: new entry for IPv6 IOAM gve: Check valid ts bit on RX descriptor before hw timestamping net: core: fix lockdep splat on device unregister MAINTAINERS: add myself as maintainer for b53 selftests: net: check jq command is supported net: airoha: Take into account out-of-order tx completions in airoha_dev_xmit() tcp: fix tcp_tso_should_defer() vs large RTT r8152: add error handling in rtl8152_driver_init usbnet: Fix using smp_processor_id() in preemptible code warnings ...
Diffstat (limited to 'net')
-rw-r--r--net/can/j1939/main.c2
-rw-r--r--net/core/dev.c40
-rw-r--r--net/core/gro_cells.c10
-rw-r--r--net/core/skbuff.c1
-rw-r--r--net/ipv4/ip_tunnel.c14
-rw-r--r--net/ipv4/tcp_output.c19
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv6/ip6_tunnel.c3
-rw-r--r--net/tls/tls_main.c7
-rw-r--r--net/tls/tls_sw.c31
10 files changed, 91 insertions, 38 deletions
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index 3706a872ecaf..a93af55df5fd 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -378,6 +378,8 @@ static int j1939_netdev_notify(struct notifier_block *nb,
j1939_ecu_unmap_all(priv);
break;
case NETDEV_UNREGISTER:
+ j1939_cancel_active_session(priv, NULL);
+ j1939_sk_netdev_event_netdown(priv);
j1939_sk_netdev_event_unregister(priv);
break;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index a64cef2c537e..2acfa44927da 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -12176,6 +12176,35 @@ static void dev_memory_provider_uninstall(struct net_device *dev)
}
}
+/* devices must be UP and netdev_lock()'d */
+static void netif_close_many_and_unlock(struct list_head *close_head)
+{
+ struct net_device *dev, *tmp;
+
+ netif_close_many(close_head, false);
+
+ /* ... now unlock them */
+ list_for_each_entry_safe(dev, tmp, close_head, close_list) {
+ netdev_unlock(dev);
+ list_del_init(&dev->close_list);
+ }
+}
+
+static void netif_close_many_and_unlock_cond(struct list_head *close_head)
+{
+#ifdef CONFIG_LOCKDEP
+ /* We can only track up to MAX_LOCK_DEPTH locks per task.
+ *
+ * Reserve half the available slots for additional locks possibly
+ * taken by notifiers and (soft)irqs.
+ */
+ unsigned int limit = MAX_LOCK_DEPTH / 2;
+
+ if (lockdep_depth(current) > limit)
+ netif_close_many_and_unlock(close_head);
+#endif
+}
+
void unregister_netdevice_many_notify(struct list_head *head,
u32 portid, const struct nlmsghdr *nlh)
{
@@ -12208,17 +12237,18 @@ void unregister_netdevice_many_notify(struct list_head *head,
/* If device is running, close it first. Start with ops locked... */
list_for_each_entry(dev, head, unreg_list) {
+ if (!(dev->flags & IFF_UP))
+ continue;
if (netdev_need_ops_lock(dev)) {
list_add_tail(&dev->close_list, &close_head);
netdev_lock(dev);
}
+ netif_close_many_and_unlock_cond(&close_head);
}
- netif_close_many(&close_head, true);
- /* ... now unlock them and go over the rest. */
+ netif_close_many_and_unlock(&close_head);
+ /* ... now go over the rest. */
list_for_each_entry(dev, head, unreg_list) {
- if (netdev_need_ops_lock(dev))
- netdev_unlock(dev);
- else
+ if (!netdev_need_ops_lock(dev))
list_add_tail(&dev->close_list, &close_head);
}
netif_close_many(&close_head, true);
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index ff8e5b64bf6b..b43911562f4d 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -8,11 +8,13 @@
struct gro_cell {
struct sk_buff_head napi_skbs;
struct napi_struct napi;
+ local_lock_t bh_lock;
};
int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
+ bool have_bh_lock = false;
struct gro_cell *cell;
int res;
@@ -25,6 +27,8 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
goto unlock;
}
+ local_lock_nested_bh(&gcells->cells->bh_lock);
+ have_bh_lock = true;
cell = this_cpu_ptr(gcells->cells);
if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(net_hotdata.max_backlog)) {
@@ -39,6 +43,9 @@ drop:
if (skb_queue_len(&cell->napi_skbs) == 1)
napi_schedule(&cell->napi);
+ if (have_bh_lock)
+ local_unlock_nested_bh(&gcells->cells->bh_lock);
+
res = NET_RX_SUCCESS;
unlock:
@@ -54,6 +61,7 @@ static int gro_cell_poll(struct napi_struct *napi, int budget)
struct sk_buff *skb;
int work_done = 0;
+ __local_lock_nested_bh(&cell->bh_lock);
while (work_done < budget) {
skb = __skb_dequeue(&cell->napi_skbs);
if (!skb)
@@ -64,6 +72,7 @@ static int gro_cell_poll(struct napi_struct *napi, int budget)
if (work_done < budget)
napi_complete_done(napi, work_done);
+ __local_unlock_nested_bh(&cell->bh_lock);
return work_done;
}
@@ -79,6 +88,7 @@ int gro_cells_init(struct gro_cells *gcells, struct net_device *dev)
struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
__skb_queue_head_init(&cell->napi_skbs);
+ local_lock_init(&cell->bh_lock);
set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bc12790017b0..6be01454f262 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -7200,6 +7200,7 @@ nodefer: kfree_skb_napi_cache(skb);
DEBUG_NET_WARN_ON_ONCE(skb_dst(skb));
DEBUG_NET_WARN_ON_ONCE(skb->destructor);
+ DEBUG_NET_WARN_ON_ONCE(skb_nfct(skb));
sdn = per_cpu_ptr(net_hotdata.skb_defer_nodes, cpu) + numa_node_id();
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index aaeb5d16f0c9..158a30ae7c5f 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -568,20 +568,6 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
return 0;
}
-static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
-{
- /* we must cap headroom to some upperlimit, else pskb_expand_head
- * will overflow header offsets in skb_headers_offset_update().
- */
- static const unsigned int max_allowed = 512;
-
- if (headroom > max_allowed)
- headroom = max_allowed;
-
- if (headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, headroom);
-}
-
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
u8 proto, int tunnel_hlen)
{
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bb3576ac0ad7..b94efb3050d2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2369,7 +2369,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
u32 max_segs)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- u32 send_win, cong_win, limit, in_flight;
+ u32 send_win, cong_win, limit, in_flight, threshold;
+ u64 srtt_in_ns, expected_ack, how_far_is_the_ack;
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *head;
int win_divisor;
@@ -2431,9 +2432,19 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
head = tcp_rtx_queue_head(sk);
if (!head)
goto send_now;
- delta = tp->tcp_clock_cache - head->tstamp;
- /* If next ACK is likely to come too late (half srtt), do not defer */
- if ((s64)(delta - (u64)NSEC_PER_USEC * (tp->srtt_us >> 4)) < 0)
+
+ srtt_in_ns = (u64)(NSEC_PER_USEC >> 3) * tp->srtt_us;
+ /* When is the ACK expected ? */
+ expected_ack = head->tstamp + srtt_in_ns;
+ /* How far from now is the ACK expected ? */
+ how_far_is_the_ack = expected_ack - tp->tcp_clock_cache;
+
+ /* If next ACK is likely to come too late,
+ * ie in more than min(1ms, half srtt), do not defer.
+ */
+ threshold = min(srtt_in_ns >> 1, NSEC_PER_MSEC);
+
+ if ((s64)(how_far_is_the_ack - threshold) > 0)
goto send_now;
/* Ok, it looks like it is advisable to defer.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 95241093b7f0..30dfbf73729d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1851,8 +1851,6 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
sk_peek_offset_bwd(sk, len);
if (!skb_shared(skb)) {
- if (unlikely(udp_skb_has_head_state(skb)))
- skb_release_head_state(skb);
skb_attempt_defer_free(skb);
return;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3262e81223df..6405072050e0 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1257,8 +1257,7 @@ route_lookup:
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr)
+ dst->header_len + t->hlen;
- if (max_headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, max_headroom);
+ ip_tunnel_adj_headroom(dev, max_headroom);
err = ip6_tnl_encap(skb, t, &proto, fl6);
if (err)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index a3ccb3135e51..39a2ab47fe72 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -255,12 +255,9 @@ int tls_process_cmsg(struct sock *sk, struct msghdr *msg,
if (msg->msg_flags & MSG_MORE)
return -EINVAL;
- rc = tls_handle_open_record(sk, msg->msg_flags);
- if (rc)
- return rc;
-
*record_type = *(unsigned char *)CMSG_DATA(cmsg);
- rc = 0;
+
+ rc = tls_handle_open_record(sk, msg->msg_flags);
break;
default:
return -EINVAL;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index daac9fd4be7e..d17135369980 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1054,7 +1054,7 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg,
if (ret == -EINPROGRESS)
num_async++;
else if (ret != -EAGAIN)
- goto send_end;
+ goto end;
}
}
@@ -1112,8 +1112,11 @@ alloc_encrypted:
goto send_end;
tls_ctx->pending_open_record_frags = true;
- if (sk_msg_full(msg_pl))
+ if (sk_msg_full(msg_pl)) {
full_record = true;
+ sk_msg_trim(sk, msg_en,
+ msg_pl->sg.size + prot->overhead_size);
+ }
if (full_record || eor)
goto copied;
@@ -1149,6 +1152,13 @@ alloc_encrypted:
} else if (ret != -EAGAIN)
goto send_end;
}
+
+ /* Transmit if any encryptions have completed */
+ if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
+ cancel_delayed_work(&ctx->tx_work.work);
+ tls_tx_records(sk, msg->msg_flags);
+ }
+
continue;
rollback_iter:
copied -= try_to_copy;
@@ -1204,6 +1214,12 @@ copied:
goto send_end;
}
}
+
+ /* Transmit if any encryptions have completed */
+ if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
+ cancel_delayed_work(&ctx->tx_work.work);
+ tls_tx_records(sk, msg->msg_flags);
+ }
}
continue;
@@ -1223,8 +1239,9 @@ trim_sgl:
goto alloc_encrypted;
}
+send_end:
if (!num_async) {
- goto send_end;
+ goto end;
} else if (num_zc || eor) {
int err;
@@ -1242,7 +1259,7 @@ trim_sgl:
tls_tx_records(sk, msg->msg_flags);
}
-send_end:
+end:
ret = sk_stream_error(sk, msg->msg_flags, ret);
return copied > 0 ? copied : ret;
}
@@ -1637,8 +1654,10 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
if (unlikely(darg->async)) {
err = tls_strp_msg_hold(&ctx->strp, &ctx->async_hold);
- if (err)
- __skb_queue_tail(&ctx->async_hold, darg->skb);
+ if (err) {
+ err = tls_decrypt_async_wait(ctx);
+ darg->async = false;
+ }
return err;
}