summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS3
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c2
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c12
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.h4
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_txrx.c28
-rw-r--r--drivers/net/usb/qmi_wwan.c2
-rw-r--r--include/net/inet_connection_sock.h2
-rw-r--r--include/trace/events/qdisc.h2
-rw-r--r--kernel/bpf/arena.c16
-rw-r--r--kernel/bpf/ringbuf.c31
-rw-r--r--kernel/bpf/verifier.c61
-rw-r--r--net/core/xdp.c4
-rw-r--r--net/dccp/ipv4.c7
-rw-r--r--net/dccp/ipv6.c7
-rw-r--r--net/ipv4/inet_connection_sock.c17
-rw-r--r--net/ipv4/tcp_input.c45
-rw-r--r--net/unix/af_unix.c37
-rw-r--r--tools/testing/selftests/bpf/Makefile2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c56
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_write.c46
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c146
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_movsx.c63
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c734
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c436
28 files changed, 1274 insertions, 496 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 972e1c8fec86..b24bb64604f7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4083,12 +4083,13 @@ F: kernel/bpf/ringbuf.c
BPF [SECURITY & LSM] (Security Audit and Enforcement using BPF)
M: KP Singh <kpsingh@kernel.org>
-R: Matt Bobrowski <mattbobrowski@google.com>
+M: Matt Bobrowski <mattbobrowski@google.com>
L: bpf@vger.kernel.org
S: Maintained
F: Documentation/bpf/prog_lsm.rst
F: include/linux/bpf_lsm.h
F: kernel/bpf/bpf_lsm.c
+F: kernel/trace/bpf_trace.c
F: security/bpf/
BPF [SELFTESTS] (Test Runners & Infrastructure)
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 0433109b42e5..0580b2fee21c 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -2196,7 +2196,7 @@ static void ksz_irq_bus_sync_unlock(struct irq_data *d)
struct ksz_device *dev = kirq->dev;
int ret;
- ret = ksz_write32(dev, kirq->reg_mask, kirq->masked);
+ ret = ksz_write8(dev, kirq->reg_mask, kirq->masked);
if (ret)
dev_err(dev->dev, "failed to change IRQ mask\n");
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5490f0f9c112..23ebeb143987 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2482,6 +2482,18 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
(tx_pool->consumer_index + 1) % tx_pool->num_buffers;
tx_buff = &tx_pool->tx_buff[bufidx];
+
+ /* Sanity checks on our free map to make sure it points to an index
+ * that is not being occupied by another skb. If skb memory is
+ * not freed then we see congestion control kick in and halt tx.
+ */
+ if (unlikely(tx_buff->skb)) {
+ dev_warn_ratelimited(dev, "TX free map points to untracked skb (%s %d idx=%d)\n",
+ skb_is_gso(skb) ? "tso_pool" : "tx_pool",
+ queue_num, bufidx);
+ dev_kfree_skb_any(tx_buff->skb);
+ }
+
tx_buff->skb = skb;
tx_buff->index = bufidx;
tx_buff->pool_index = queue_num;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index d087cf954f75..608ad31a9702 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -2798,6 +2798,8 @@ static int add_adev(struct gdma_dev *gd)
if (ret)
goto init_fail;
+ /* madev is owned by the auxiliary device */
+ madev = NULL;
ret = auxiliary_device_add(adev);
if (ret)
goto add_fail;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index f30eee4a5a80..b6c01a88098d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -375,7 +375,9 @@ typedef void (*ionic_cq_done_cb)(void *done_arg);
unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do,
ionic_cq_cb cb, ionic_cq_done_cb done_cb,
void *done_arg);
-unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do);
+unsigned int ionic_tx_cq_service(struct ionic_cq *cq,
+ unsigned int work_to_do,
+ bool in_napi);
int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev,
struct ionic_queue *q, unsigned int index, const char *name,
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 1934e9d6d9e4..1837a30ba08a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1189,7 +1189,7 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget)
ionic_rx_service, NULL, NULL);
if (lif->hwstamp_txq)
- tx_work = ionic_tx_cq_service(&lif->hwstamp_txq->cq, budget);
+ tx_work = ionic_tx_cq_service(&lif->hwstamp_txq->cq, budget, !!budget);
work_done = max(max(n_work, a_work), max(rx_work, tx_work));
if (work_done < budget && napi_complete_done(napi, work_done)) {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index aed7d9cbce03..9fdd7cd3ef19 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -23,7 +23,8 @@ static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q,
static void ionic_tx_clean(struct ionic_queue *q,
struct ionic_tx_desc_info *desc_info,
- struct ionic_txq_comp *comp);
+ struct ionic_txq_comp *comp,
+ bool in_napi);
static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell)
{
@@ -944,7 +945,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget)
u32 work_done = 0;
u32 flags = 0;
- work_done = ionic_tx_cq_service(cq, budget);
+ work_done = ionic_tx_cq_service(cq, budget, !!budget);
if (unlikely(!budget))
return budget;
@@ -1028,7 +1029,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
txqcq = lif->txqcqs[qi];
txcq = &lif->txqcqs[qi]->cq;
- tx_work_done = ionic_tx_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT);
+ tx_work_done = ionic_tx_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT, !!budget);
if (unlikely(!budget))
return budget;
@@ -1161,7 +1162,8 @@ static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q,
static void ionic_tx_clean(struct ionic_queue *q,
struct ionic_tx_desc_info *desc_info,
- struct ionic_txq_comp *comp)
+ struct ionic_txq_comp *comp,
+ bool in_napi)
{
struct ionic_tx_stats *stats = q_to_tx_stats(q);
struct ionic_qcq *qcq = q_to_qcq(q);
@@ -1213,11 +1215,13 @@ static void ionic_tx_clean(struct ionic_queue *q,
desc_info->bytes = skb->len;
stats->clean++;
- napi_consume_skb(skb, 1);
+ napi_consume_skb(skb, likely(in_napi) ? 1 : 0);
}
static bool ionic_tx_service(struct ionic_cq *cq,
- unsigned int *total_pkts, unsigned int *total_bytes)
+ unsigned int *total_pkts,
+ unsigned int *total_bytes,
+ bool in_napi)
{
struct ionic_tx_desc_info *desc_info;
struct ionic_queue *q = cq->bound_q;
@@ -1239,7 +1243,7 @@ static bool ionic_tx_service(struct ionic_cq *cq,
desc_info->bytes = 0;
index = q->tail_idx;
q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
- ionic_tx_clean(q, desc_info, comp);
+ ionic_tx_clean(q, desc_info, comp, in_napi);
if (desc_info->skb) {
pkts++;
bytes += desc_info->bytes;
@@ -1253,7 +1257,9 @@ static bool ionic_tx_service(struct ionic_cq *cq,
return true;
}
-unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do)
+unsigned int ionic_tx_cq_service(struct ionic_cq *cq,
+ unsigned int work_to_do,
+ bool in_napi)
{
unsigned int work_done = 0;
unsigned int bytes = 0;
@@ -1262,7 +1268,7 @@ unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do)
if (work_to_do == 0)
return 0;
- while (ionic_tx_service(cq, &pkts, &bytes)) {
+ while (ionic_tx_service(cq, &pkts, &bytes, in_napi)) {
if (cq->tail_idx == cq->num_descs - 1)
cq->done_color = !cq->done_color;
cq->tail_idx = (cq->tail_idx + 1) & (cq->num_descs - 1);
@@ -1288,7 +1294,7 @@ void ionic_tx_flush(struct ionic_cq *cq)
{
u32 work_done;
- work_done = ionic_tx_cq_service(cq, cq->num_descs);
+ work_done = ionic_tx_cq_service(cq, cq->num_descs, false);
if (work_done)
ionic_intr_credits(cq->idev->intr_ctrl, cq->bound_intr->index,
work_done, IONIC_INTR_CRED_RESET_COALESCE);
@@ -1305,7 +1311,7 @@ void ionic_tx_empty(struct ionic_queue *q)
desc_info = &q->tx_info[q->tail_idx];
desc_info->bytes = 0;
q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
- ionic_tx_clean(q, desc_info, NULL);
+ ionic_tx_clean(q, desc_info, NULL, false);
if (desc_info->skb) {
pkts++;
bytes += desc_info->bytes;
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 663e46348ce3..386d62769ded 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1372,6 +1372,8 @@ static const struct usb_device_id products[] = {
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1260, 2)}, /* Telit LE910Cx */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1261, 2)}, /* Telit LE910Cx */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1900, 1)}, /* Telit LN940 series */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x3000, 0)}, /* Telit FN912 series */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x3001, 0)}, /* Telit FN912 series */
{QMI_FIXED_INTF(0x1c9e, 0x9801, 3)}, /* Telewell TW-3G HSPA+ */
{QMI_FIXED_INTF(0x1c9e, 0x9803, 4)}, /* Telewell TW-3G HSPA+ */
{QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 7d6b1254c92d..c0deaafebfdc 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -263,7 +263,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
struct request_sock *req,
struct sock *child);
-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
unsigned long timeout);
struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
struct request_sock *req,
diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h
index f1b5e816e7e5..ff33f41a9db7 100644
--- a/include/trace/events/qdisc.h
+++ b/include/trace/events/qdisc.h
@@ -81,7 +81,7 @@ TRACE_EVENT(qdisc_reset,
TP_ARGS(q),
TP_STRUCT__entry(
- __string( dev, qdisc_dev(q)->name )
+ __string( dev, qdisc_dev(q) ? qdisc_dev(q)->name : "(null)" )
__string( kind, q->ops->id )
__field( u32, parent )
__field( u32, handle )
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 583ee4fe48ef..e52b3ad231b9 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -212,6 +212,7 @@ static u64 arena_map_mem_usage(const struct bpf_map *map)
struct vma_list {
struct vm_area_struct *vma;
struct list_head head;
+ atomic_t mmap_count;
};
static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
@@ -221,20 +222,30 @@ static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
vml = kmalloc(sizeof(*vml), GFP_KERNEL);
if (!vml)
return -ENOMEM;
+ atomic_set(&vml->mmap_count, 1);
vma->vm_private_data = vml;
vml->vma = vma;
list_add(&vml->head, &arena->vma_list);
return 0;
}
+static void arena_vm_open(struct vm_area_struct *vma)
+{
+ struct vma_list *vml = vma->vm_private_data;
+
+ atomic_inc(&vml->mmap_count);
+}
+
static void arena_vm_close(struct vm_area_struct *vma)
{
struct bpf_map *map = vma->vm_file->private_data;
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
- struct vma_list *vml;
+ struct vma_list *vml = vma->vm_private_data;
+ if (!atomic_dec_and_test(&vml->mmap_count))
+ return;
guard(mutex)(&arena->lock);
- vml = vma->vm_private_data;
+ /* update link list under lock */
list_del(&vml->head);
vma->vm_private_data = NULL;
kfree(vml);
@@ -287,6 +298,7 @@ out:
}
static const struct vm_operations_struct arena_vm_ops = {
+ .open = arena_vm_open,
.close = arena_vm_close,
.fault = arena_vm_fault,
};
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 0ee653a936ea..e20b90c36131 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -51,7 +51,8 @@ struct bpf_ringbuf {
* This prevents a user-space application from modifying the
* position and ruining in-kernel tracking. The permissions of the
* pages depend on who is producing samples: user-space or the
- * kernel.
+ * kernel. Note that the pending counter is placed in the same
+ * page as the producer, so that it shares the same cache line.
*
* Kernel-producer
* ---------------
@@ -70,6 +71,7 @@ struct bpf_ringbuf {
*/
unsigned long consumer_pos __aligned(PAGE_SIZE);
unsigned long producer_pos __aligned(PAGE_SIZE);
+ unsigned long pending_pos;
char data[] __aligned(PAGE_SIZE);
};
@@ -179,6 +181,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
rb->mask = data_sz - 1;
rb->consumer_pos = 0;
rb->producer_pos = 0;
+ rb->pending_pos = 0;
return rb;
}
@@ -404,9 +407,9 @@ bpf_ringbuf_restore_from_rec(struct bpf_ringbuf_hdr *hdr)
static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
{
- unsigned long cons_pos, prod_pos, new_prod_pos, flags;
- u32 len, pg_off;
+ unsigned long cons_pos, prod_pos, new_prod_pos, pend_pos, flags;
struct bpf_ringbuf_hdr *hdr;
+ u32 len, pg_off, tmp_size, hdr_len;
if (unlikely(size > RINGBUF_MAX_RECORD_SZ))
return NULL;
@@ -424,13 +427,29 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
spin_lock_irqsave(&rb->spinlock, flags);
}
+ pend_pos = rb->pending_pos;
prod_pos = rb->producer_pos;
new_prod_pos = prod_pos + len;
- /* check for out of ringbuf space by ensuring producer position
- * doesn't advance more than (ringbuf_size - 1) ahead
+ while (pend_pos < prod_pos) {
+ hdr = (void *)rb->data + (pend_pos & rb->mask);
+ hdr_len = READ_ONCE(hdr->len);
+ if (hdr_len & BPF_RINGBUF_BUSY_BIT)
+ break;
+ tmp_size = hdr_len & ~BPF_RINGBUF_DISCARD_BIT;
+ tmp_size = round_up(tmp_size + BPF_RINGBUF_HDR_SZ, 8);
+ pend_pos += tmp_size;
+ }
+ rb->pending_pos = pend_pos;
+
+ /* check for out of ringbuf space:
+ * - by ensuring producer position doesn't advance more than
+ * (ringbuf_size - 1) ahead
+ * - by ensuring oldest not yet committed record until newest
+ * record does not span more than (ringbuf_size - 1)
*/
- if (new_prod_pos - cons_pos > rb->mask) {
+ if (new_prod_pos - cons_pos > rb->mask ||
+ new_prod_pos - pend_pos > rb->mask) {
spin_unlock_irqrestore(&rb->spinlock, flags);
return NULL;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 010cfee7ffe9..214a9fa8c6fb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6236,6 +6236,7 @@ static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
}
reg->u32_min_value = 0;
reg->u32_max_value = U32_MAX;
+ reg->var_off = tnum_subreg(tnum_unknown);
}
static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
@@ -6280,6 +6281,7 @@ static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
reg->s32_max_value = s32_max;
reg->u32_min_value = (u32)s32_min;
reg->u32_max_value = (u32)s32_max;
+ reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
return;
}
@@ -12719,6 +12721,16 @@ static bool signed_add32_overflows(s32 a, s32 b)
return res < a;
}
+static bool signed_add16_overflows(s16 a, s16 b)
+{
+ /* Do the add in u16, where overflow is well-defined */
+ s16 res = (s16)((u16)a + (u16)b);
+
+ if (b < 0)
+ return res > a;
+ return res < a;
+}
+
static bool signed_sub_overflows(s64 a, s64 b)
{
/* Do the sub in u64, where overflow is well-defined */
@@ -17448,11 +17460,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
goto skip_inf_loop_check;
}
if (is_may_goto_insn_at(env, insn_idx)) {
- if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
+ if (sl->state.may_goto_depth != cur->may_goto_depth &&
+ states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
update_loop_entry(cur, &sl->state);
goto hit;
}
- goto skip_inf_loop_check;
}
if (calls_callback(env, insn_idx)) {
if (states_equal(env, &sl->state, cur, RANGE_WITHIN))
@@ -18730,6 +18742,39 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
return new_prog;
}
+/*
+ * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
+ * jump offset by 'delta'.
+ */
+static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
+{
+ struct bpf_insn *insn = prog->insnsi;
+ u32 insn_cnt = prog->len, i;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ u8 code = insn->code;
+
+ if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
+ BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
+ continue;
+
+ if (insn->code == (BPF_JMP32 | BPF_JA)) {
+ if (i + 1 + insn->imm != tgt_idx)
+ continue;
+ if (signed_add32_overflows(insn->imm, delta))
+ return -ERANGE;
+ insn->imm += delta;
+ } else {
+ if (i + 1 + insn->off != tgt_idx)
+ continue;
+ if (signed_add16_overflows(insn->imm, delta))
+ return -ERANGE;
+ insn->off += delta;
+ }
+ }
+ return 0;
+}
+
static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
u32 off, u32 cnt)
{
@@ -20004,7 +20049,10 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
stack_depth_extra = 8;
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
- insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
+ if (insn->off >= 0)
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
+ else
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
cnt = 4;
@@ -20546,6 +20594,13 @@ next_insn:
if (!new_prog)
return -ENOMEM;
env->prog = prog = new_prog;
+ /*
+ * If may_goto is a first insn of a prog there could be a jmp
+ * insn that points to it, hence adjust all such jmps to point
+ * to insn after BPF_ST that inits may_goto count.
+ * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
+ */
+ WARN_ON(adjust_jmp_off(env->prog, subprog_start, 1));
}
/* Since poke tab is now finalized, publish aux to tracker. */
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 41693154e426..022c12059cf2 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -295,10 +295,8 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
mutex_lock(&mem_id_lock);
ret = __mem_id_init_hash_table();
mutex_unlock(&mem_id_lock);
- if (ret < 0) {
- WARN_ON(1);
+ if (ret < 0)
return ERR_PTR(ret);
- }
}
xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ff41bd6f99c3..5926159a6f20 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -657,8 +657,11 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_v4_send_response(sk, req))
goto drop_and_free;
- inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
- reqsk_put(req);
+ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT)))
+ reqsk_free(req);
+ else
+ reqsk_put(req);
+
return 0;
drop_and_free:
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 85f4b8fdbe5e..da5dba120bc9 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -400,8 +400,11 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_v6_send_response(sk, req))
goto drop_and_free;
- inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
- reqsk_put(req);
+ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT)))
+ reqsk_free(req);
+ else
+ reqsk_put(req);
+
return 0;
drop_and_free:
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d81f74ce0f02..d4f0eff8b20f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1122,25 +1122,34 @@ drop:
inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
}
-static void reqsk_queue_hash_req(struct request_sock *req,
+static bool reqsk_queue_hash_req(struct request_sock *req,
unsigned long timeout)
{
+ bool found_dup_sk = false;
+
+ if (!inet_ehash_insert(req_to_sk(req), NULL, &found_dup_sk))
+ return false;
+
+ /* The timer needs to be setup after a successful insertion. */
timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
mod_timer(&req->rsk_timer, jiffies + timeout);
- inet_ehash_insert(req_to_sk(req), NULL, NULL);
/* before letting lookups find us, make sure all req fields
* are committed to memory and refcnt initialized.
*/
smp_wmb();
refcount_set(&req->rsk_refcnt, 2 + 1);
+ return true;
}
-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
unsigned long timeout)
{
- reqsk_queue_hash_req(req, timeout);
+ if (!reqsk_queue_hash_req(req, timeout))
+ return false;
+
inet_csk_reqsk_queue_added(sk);
+ return true;
}
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 01d208e0eef3..2e39cb881e20 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2782,13 +2782,37 @@ static void tcp_mtup_probe_success(struct sock *sk)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
}
+/* Sometimes we deduce that packets have been dropped due to reasons other than
+ * congestion, like path MTU reductions or failed client TFO attempts. In these
+ * cases we call this function to retransmit as many packets as cwnd allows,
+ * without reducing cwnd. Given that retransmits will set retrans_stamp to a
+ * non-zero value (and may do so in a later calling context due to TSQ), we
+ * also enter CA_Loss so that we track when all retransmitted packets are ACKed
+ * and clear retrans_stamp when that happens (to ensure later recurring RTOs
+ * are using the correct retrans_stamp and don't declare ETIMEDOUT
+ * prematurely).
+ */
+static void tcp_non_congestion_loss_retransmit(struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (icsk->icsk_ca_state != TCP_CA_Loss) {
+ tp->high_seq = tp->snd_nxt;
+ tp->snd_ssthresh = tcp_current_ssthresh(sk);
+ tp->prior_ssthresh = 0;
+ tp->undo_marker = 0;
+ tcp_set_ca_state(sk, TCP_CA_Loss);
+ }
+ tcp_xmit_retransmit_queue(sk);
+}
+
/* Do a simple retransmit without using the backoff mechanisms in
* tcp_timer. This is used for path mtu discovery.
* The socket is already locked here.
*/
void tcp_simple_retransmit(struct sock *sk)
{
- const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
int mss;
@@ -2828,14 +2852,7 @@ void tcp_simple_retransmit(struct sock *sk)
* in network, but units changed and effective
* cwnd/ssthresh really reduced now.
*/
- if (icsk->icsk_ca_state != TCP_CA_Loss) {
- tp->high_seq = tp->snd_nxt;
- tp->snd_ssthresh = tcp_current_ssthresh(sk);
- tp->prior_ssthresh = 0;
- tp->undo_marker = 0;
- tcp_set_ca_state(sk, TCP_CA_Loss);
- }
- tcp_xmit_retransmit_queue(sk);
+ tcp_non_congestion_loss_retransmit(sk);
}
EXPORT_SYMBOL(tcp_simple_retransmit);
@@ -6295,8 +6312,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
skb_rbtree_walk_from(data)
tcp_mark_skb_lost(sk, data);
- tcp_xmit_retransmit_queue(sk);
- tp->retrans_stamp = 0;
+ tcp_non_congestion_loss_retransmit(sk);
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPFASTOPENACTIVEFAIL);
return true;
@@ -7257,7 +7273,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->tfo_listener = false;
if (!want_cookie) {
req->timeout = tcp_timeout_init((struct sock *)req);
- inet_csk_reqsk_queue_hash_add(sk, req, req->timeout);
+ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req,
+ req->timeout))) {
+ reqsk_free(req);
+ return 0;
+ }
+
}
af_ops->send_synack(sk, dst, &fl, req, &foc,
!want_cookie ? TCP_SYNACK_NORMAL :
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5e695a9a609c..142f56770b77 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2613,10 +2613,24 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
{
struct unix_sock *u = unix_sk(sk);
- if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
- skb_unlink(skb, &sk->sk_receive_queue);
- consume_skb(skb);
- skb = NULL;
+ if (!unix_skb_len(skb)) {
+ struct sk_buff *unlinked_skb = NULL;
+
+ spin_lock(&sk->sk_receive_queue.lock);
+
+ if (copied && (!u->oob_skb || skb == u->oob_skb)) {
+ skb = NULL;
+ } else if (flags & MSG_PEEK) {
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ } else {
+ unlinked_skb = skb;
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ __skb_unlink(unlinked_skb, &sk->sk_receive_queue);
+ }
+
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ consume_skb(unlinked_skb);
} else {
struct sk_buff *unlinked_skb = NULL;
@@ -3093,12 +3107,23 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
case SIOCATMARK:
{
+ struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb;
int answ = 0;
+ mutex_lock(&u->iolock);
+
skb = skb_peek(&sk->sk_receive_queue);
- if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
- answ = 1;
+ if (skb) {
+ struct sk_buff *oob_skb = READ_ONCE(u->oob_skb);
+
+ if (skb == oob_skb ||
+ (!oob_skb && !unix_skb_len(skb)))
+ answ = 1;
+ }
+
+ mutex_unlock(&u->iolock);
+
err = put_user(answ, (int __user *)arg);
}
break;
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e0b3887b3d2d..dd49c1d23a60 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -457,7 +457,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \
trace_printk.c trace_vprintk.c map_ptr_kern.c \
core_kern.c core_kern_overflow.c test_ringbuf.c \
- test_ringbuf_n.c test_ringbuf_map_key.c
+ test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
# Generate both light skeleton and libbpf skeleton for these
LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index 4c6f42dae409..da430df45aa4 100644
--- a/