From 61f9e2924f4981d626b3a931fed935f2fa3cb4de Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 22 Oct 2016 18:51:25 +0800 Subject: netfilter: nf_tables: fix *leak* when expr clone fail When nft_expr_clone failed, a series of problems will happen: 1. module refcnt will leak, we call __module_get at the beginning but we forget to put it back if ops->clone returns fail 2. memory will be leaked, if clone fail, we just return NULL and forget to free the alloced element 3. set->nelems will become incorrect when set->size is specified. If clone fail, we should decrease the set->nelems Now this patch fixes these problems. And fortunately, clone fail will only happen on counter expression when memory is exhausted. Fixes: 086f332167d6 ("netfilter: nf_tables: add clone interface to expression operations") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5031e072567b..741dcded5b4f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -542,7 +542,8 @@ void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *data, u64 timeout, gfp_t gfp); -void nft_set_elem_destroy(const struct nft_set *set, void *elem); +void nft_set_elem_destroy(const struct nft_set *set, void *elem, + bool destroy_expr); /** * struct nft_set_gc_batch_head - nf_tables set garbage collection batch @@ -693,7 +694,6 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) { int err; - __module_get(src->ops->type->owner); if (src->ops->clone) { dst->ops = src->ops; err = src->ops->clone(dst, src); @@ -702,6 +702,8 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) } else { memcpy(dst, src, src->ops->size); } + + __module_get(src->ops->type->owner); return 0; } -- cgit v1.2.3 From f1d505bb762e30bf316ff5d3b604914649d6aed3 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 25 Oct 2016 15:56:39 -0400 Subject: netfilter: nf_tables: fix type mismatch with error return from nft_parse_u32_check Commit 36b701fae12ac ("netfilter: nf_tables: validate maximum value of u32 netlink attributes") introduced nft_parse_u32_check with a return value of "unsigned int", yet on error it returns "-ERANGE". This patch corrects the mismatch by changing the return value to "int", which happens to match the actual users of nft_parse_u32_check already. Found by Coverity, CID 1373930. Note that commit 21a9e0f1568ea ("netfilter: nft_exthdr: fix error handling in nft_exthdr_init()) attempted to address the issue, but did not address the return type of nft_parse_u32_check. Signed-off-by: John W. Linville Cc: Laura Garcia Liebana Cc: Pablo Neira Ayuso Cc: Dan Carpenter Fixes: 36b701fae12ac ("netfilter: nf_tables: validate maximum value...") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 741dcded5b4f..d79d1e9b9546 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -145,7 +145,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE; } -unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); +int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); unsigned int nft_parse_register(const struct nlattr *attr); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); -- cgit v1.2.3 From cdb436d181d21af4d273b49ec7734eecd6a37fe9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Oct 2016 23:46:17 +0200 Subject: netfilter: conntrack: avoid excess memory allocation This is now a fixed-size extension, so we don't need to pass a variable alloc size. This (harmless) error results in allocating 32 instead of the needed 16 bytes for this extension as the size gets passed twice. Fixes: 23014011ba420 ("netfilter: conntrack: support a fixed size of 128 distinct labels") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_labels.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index 498814626e28..1723a67c0b0a 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -30,8 +30,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) if (net->ct.labels_used == 0) return NULL; - return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, - sizeof(struct nf_conn_labels), GFP_ATOMIC); + return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC); #else return NULL; #endif -- cgit v1.2.3 From 723c038475b78edc9327eb952f95f9881cc9d79d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 30 Oct 2016 11:42:02 -0500 Subject: fs: remove the never implemented aio_fsync file operation Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 16d2b6e874d6..ff7bcd9e8398 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1709,7 +1709,6 @@ struct file_operations { int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); int (*fsync) (struct file *, loff_t, loff_t, int datasync); - int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); -- cgit v1.2.3 From 70fe2f48152e60664809e2fed76bbb50c9fa2aa3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 30 Oct 2016 11:42:04 -0500 Subject: aio: fix freeze protection of aio writes Currently we dropped freeze protection of aio writes just after IO was submitted. Thus aio write could be in flight while the filesystem was frozen and that could result in unexpected situation like aio completion wanting to convert extent type on frozen filesystem. Testcase from Dmitry triggering this is like: for ((i=0;i<60;i++));do fsfreeze -f /mnt ;sleep 1;fsfreeze -u /mnt;done & fio --bs=4k --ioengine=libaio --iodepth=128 --size=1g --direct=1 \ --runtime=60 --filename=/mnt/file --name=rand-write --rw=randwrite Fix the problem by dropping freeze protection only once IO is completed in aio_complete(). Reported-by: Dmitry Monakhov Signed-off-by: Jan Kara [hch: forward ported on top of various VFS and aio changes] Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index ff7bcd9e8398..dc0478c07b2a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -321,6 +321,7 @@ struct writeback_control; #define IOCB_HIPRI (1 << 3) #define IOCB_DSYNC (1 << 4) #define IOCB_SYNC (1 << 5) +#define IOCB_WRITE (1 << 6) struct kiocb { struct file *ki_filp; -- cgit v1.2.3 From dae399d7fdee84d8f5227a9711d95bb4e9a05d4e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 31 Oct 2016 20:32:33 +0800 Subject: sctp: hold transport instead of assoc when lookup assoc in rx path Prior to this patch, in rx path, before calling lock_sock, it needed to hold assoc when got it by __sctp_lookup_association, in case other place would free/put assoc. But in __sctp_lookup_association, it lookup and hold transport, then got assoc by transport->assoc, then hold assoc and put transport. It means it didn't hold transport, yet it was returned and later on directly assigned to chunk->transport. Without the protection of sock lock, the transport may be freed/put by other places, which would cause a use-after-free issue. This patch is to fix this issue by holding transport instead of assoc. As holding transport can make sure to access assoc is also safe, and actually it looks up assoc by searching transport rhashtable, to hold transport here makes more sense. Note that the function will be renamed later on on another patch. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 87a7f42e7639..31acc3f4f132 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -152,7 +152,7 @@ void sctp_unhash_endpoint(struct sctp_endpoint *); struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *, struct sctphdr *, struct sctp_association **, struct sctp_transport **); -void sctp_err_finish(struct sock *, struct sctp_association *); +void sctp_err_finish(struct sock *, struct sctp_transport *); void sctp_icmp_frag_needed(struct sock *, struct sctp_association *, struct sctp_transport *t, __u32 pmtu); void sctp_icmp_redirect(struct sock *, struct sctp_transport *, -- cgit v1.2.3 From 23f4ffedb7d751c7e298732ba91ca75d224bc1a6 Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Tue, 1 Nov 2016 23:45:12 +0800 Subject: ip6_tunnel: Clear IP6CB in ip6tunnel_xmit() skb->cb may contain data from previous layers. In the observed scenario, the garbage data were misinterpreted as IP6CB(skb)->frag_max_size, so that small packets sent through the tunnel are mistakenly fragmented. This patch unconditionally clears the control buffer in ip6tunnel_xmit(), which affects ip6_tunnel, ip6_udp_tunnel and ip6_gre. Currently none of these tunnels set IP6CB(skb)->flags, otherwise it needs to be done earlier. Cc: stable@vger.kernel.org Signed-off-by: Eli Cooper Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 20ed9699fcd4..1b1cf33cbfb0 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -146,6 +146,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, { int pkt_len, err; + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); pkt_len = skb->len - skb_inner_network_offset(skb); err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); if (unlikely(net_xmit_eval(err))) -- cgit v1.2.3 From da96786e26c3ae47316db2b92046b11268c4379c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 2 Nov 2016 12:08:25 -0700 Subject: net: tcp: check skb is non-NULL for exact match on lookups Andrey reported the following error report while running the syzkaller fuzzer: general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 648 Comm: syz-executor Not tainted 4.9.0-rc3+ #333 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff8800398c4480 task.stack: ffff88003b468000 RIP: 0010:[] [< inline >] inet_exact_dif_match include/net/tcp.h:808 RIP: 0010:[] [] __inet_lookup_listener+0xb6/0x500 net/ipv4/inet_hashtables.c:219 RSP: 0018:ffff88003b46f270 EFLAGS: 00010202 RAX: 0000000000000004 RBX: 0000000000004242 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffffc90000e3c000 RDI: 0000000000000054 RBP: ffff88003b46f2d8 R08: 0000000000004000 R09: ffffffff830910e7 R10: 0000000000000000 R11: 000000000000000a R12: ffffffff867fa0c0 R13: 0000000000004242 R14: 0000000000000003 R15: dffffc0000000000 FS: 00007fb135881700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020cc3000 CR3: 000000006d56a000 CR4: 00000000000006f0 Stack: 0000000000000000 000000000601a8c0 0000000000000000 ffffffff00004242 424200003b9083c2 ffff88003def4041 ffffffff84e7e040 0000000000000246 ffff88003a0911c0 0000000000000000 ffff88003a091298 ffff88003b9083ae Call Trace: [] tcp_v4_send_reset+0x584/0x1700 net/ipv4/tcp_ipv4.c:643 [] tcp_v4_rcv+0x198b/0x2e50 net/ipv4/tcp_ipv4.c:1718 [] ip_local_deliver_finish+0x332/0xad0 net/ipv4/ip_input.c:216 ... MD5 has a code path that calls __inet_lookup_listener with a null skb, so inet{6}_exact_dif_match needs to check skb against null before pulling the flag. Fixes: a04a480d4392 ("net: Require exact match for TCP socket lookups if dif is l3mdev") Reported-by: Andrey Konovalov Signed-off-by: David Ahern Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/tcp.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ca1ad9ebbc92..a0649973ee5b 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -149,7 +149,7 @@ static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb) { #if defined(CONFIG_NET_L3_MASTER_DEV) if (!net->ipv4.sysctl_tcp_l3mdev_accept && - ipv6_l3mdev_skb(IP6CB(skb)->flags)) + skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) return true; #endif return false; diff --git a/include/net/tcp.h b/include/net/tcp.h index 5b82d4d94834..304a8e17bc87 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -805,7 +805,7 @@ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (!net->ipv4.sysctl_tcp_l3mdev_accept && - ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) + skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) return true; #endif return false; -- cgit v1.2.3 From 9ee6c5dc816aa8256257f2cd4008a9291ec7e985 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Wed, 2 Nov 2016 16:36:17 -0400 Subject: ipv4: allow local fragmentation in ip_finish_output_gso() Some configurations (e.g. geneve interface with default MTU of 1500 over an ethernet interface with 1500 MTU) result in the transmission of packets that exceed the configured MTU. While this should be considered to be a "bad" configuration, it is still allowed and should not result in the sending of packets that exceed the configured MTU. Fix by dropping the assumption in ip_finish_output_gso() that locally originated gso packets will never need fragmentation. Basic testing using iperf (observing CPU usage and bandwidth) have shown no measurable performance impact for traffic not requiring fragmentation. Fixes: c7ba65d7b649 ("net: ip: push gso skb forwarding handling down the stack") Reported-by: Jan Tluka Signed-off-by: Lance Richardson Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 5413883ac47f..d3a107850a41 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -47,8 +47,7 @@ struct inet_skb_parm { #define IPSKB_REROUTED BIT(4) #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) -#define IPSKB_FRAG_SEGS BIT(7) -#define IPSKB_L3SLAVE BIT(8) +#define IPSKB_L3SLAVE BIT(7) u16 frag_max_size; }; -- cgit v1.2.3 From c3f24cfb3e508c70c26ee8569d537c8ca67a36c6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 17:14:41 -0700 Subject: dccp: do not release listeners too soon Andrey Konovalov reported following error while fuzzing with syzkaller : IPv4: Attempt to release alive inet socket ffff880068e98940 kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Modules linked in: CPU: 1 PID: 3905 Comm: a.out Not tainted 4.9.0-rc3+ #333 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88006b9e0000 task.stack: ffff880068770000 RIP: 0010:[] [] selinux_socket_sock_rcv_skb+0xff/0x6a0 security/selinux/hooks.c:4639 RSP: 0018:ffff8800687771c8 EFLAGS: 00010202 RAX: ffff88006b9e0000 RBX: 1ffff1000d0eee3f RCX: 1ffff1000d1d312a RDX: 1ffff1000d1d31a6 RSI: dffffc0000000000 RDI: 0000000000000010 RBP: ffff880068777360 R08: 0000000000000000 R09: 0000000000000002 R10: dffffc0000000000 R11: 0000000000000006 R12: ffff880068e98940 R13: 0000000000000002 R14: ffff880068777338 R15: 0000000000000000 FS: 00007f00ff760700(0000) GS:ffff88006cd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020008000 CR3: 000000006a308000 CR4: 00000000000006e0 Stack: ffff8800687771e0 ffffffff812508a5 ffff8800686f3168 0000000000000007 ffff88006ac8cdfc ffff8800665ea500 0000000041b58ab3 ffffffff847b5480 ffffffff819eac60 ffff88006b9e0860 ffff88006b9e0868 ffff88006b9e07f0 Call Trace: [] security_sock_rcv_skb+0x75/0xb0 security/security.c:1317 [] sk_filter_trim_cap+0x67/0x10e0 net/core/filter.c:81 [] __sk_receive_skb+0x30/0xa00 net/core/sock.c:460 [] dccp_v4_rcv+0xdb2/0x1910 net/dccp/ipv4.c:873 [] ip_local_deliver_finish+0x332/0xad0 net/ipv4/ip_input.c:216 [< inline >] NF_HOOK_THRESH ./include/linux/netfilter.h:232 [< inline >] NF_HOOK ./include/linux/netfilter.h:255 [] ip_local_deliver+0x1c2/0x4b0 net/ipv4/ip_input.c:257 [< inline >] dst_input ./include/net/dst.h:507 [] ip_rcv_finish+0x750/0x1c40 net/ipv4/ip_input.c:396 [< inline >] NF_HOOK_THRESH ./include/linux/netfilter.h:232 [< inline >] NF_HOOK ./include/linux/netfilter.h:255 [] ip_rcv+0x96f/0x12f0 net/ipv4/ip_input.c:487 [] __netif_receive_skb_core+0x1897/0x2a50 net/core/dev.c:4213 [] __netif_receive_skb+0x2a/0x170 net/core/dev.c:4251 [] netif_receive_skb_internal+0x1b3/0x390 net/core/dev.c:4279 [] netif_receive_skb+0x48/0x250 net/core/dev.c:4303 [] tun_get_user+0xbd5/0x28a0 drivers/net/tun.c:1308 [] tun_chr_write_iter+0xda/0x190 drivers/net/tun.c:1332 [< inline >] new_sync_write fs/read_write.c:499 [] __vfs_write+0x334/0x570 fs/read_write.c:512 [] vfs_write+0x17b/0x500 fs/read_write.c:560 [< inline >] SYSC_write fs/read_write.c:607 [] SyS_write+0xd4/0x1a0 fs/read_write.c:599 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 It turns out DCCP calls __sk_receive_skb(), and this broke when lookups no longer took a reference on listeners. Fix this issue by adding a @refcounted parameter to __sk_receive_skb(), so that sock_put() is used only when needed. Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 73c6b008f1b7..92b269709b9a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1596,11 +1596,11 @@ static inline void sock_put(struct sock *sk) void sock_gen_put(struct sock *sk); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, - unsigned int trim_cap); + unsigned int trim_cap, bool refcounted); static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) { - return __sk_receive_skb(sk, skb, nested, 1); + return __sk_receive_skb(sk, skb, nested, 1, true); } static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) -- cgit v1.2.3 From 98430c7aad6a3fdedcc78a0d6780dabb6580dc38 Mon Sep 17 00:00:00 2001 From: Randy Li Date: Tue, 25 Oct 2016 22:15:34 +0800 Subject: phy: Add reset callback for not generic phy Add a dummy function for phy_reset in case the CONFIG_GENERIC_PHY is disabled. Signed-off-by: Randy Li Signed-off-by: Kishon Vijay Abraham I --- include/linux/phy/phy.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index ee1bed7dbfc6..78bb0d7f6b11 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -253,6 +253,13 @@ static inline int phy_set_mode(struct phy *phy, enum phy_mode mode) return -ENOSYS; } +static inline int phy_reset(struct phy *phy) +{ + if (!phy) + return 0; + return -ENOSYS; +} + static inline int phy_get_bus_width(struct phy *phy) { return -ENOSYS; -- cgit v1.2.3 From 1571875beecd5de9657f73931449bda1b1329b6f Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 3 Nov 2016 16:21:26 +0200 Subject: ACPI / platform: Add support for build-in properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have a couple of drivers, acpi_apd.c and acpi_lpss.c, that need to pass extra build-in properties to the devices they create. Previously the drivers added those properties to the struct device which is member of the struct acpi_device, but that does not work. Those properties need to be assigned to the struct device of the platform device instead in order for them to become available to the drivers. To fix this, this patch changes acpi_create_platform_device function to take struct property_entry pointer as parameter. Fixes: 20a875e2e86e (serial: 8250_dw: Add quirk for APM X-Gene SoC) Signed-off-by: Heikki Krogerus Tested-by: Yazen Ghannam Tested-by: Jérôme de Bretagne Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 632ec16a855e..c09936f55166 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -546,7 +546,8 @@ int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *); int acpi_device_modalias(struct device *, char *, int); void acpi_walk_dep_device_list(acpi_handle handle); -struct platform_device *acpi_create_platform_device(struct acpi_device *); +struct platform_device *acpi_create_platform_device(struct acpi_device *, + struct property_entry *); #define ACPI_PTR(_ptr) (_ptr) static inline void acpi_device_set_enumerated(struct acpi_device *adev) -- cgit v1.2.3 From 264048afab27d7c27eedf5394714e0b396d787f7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 8 Nov 2016 15:15:24 +0100 Subject: libceph: initialize last_linger_id with a large integer osdc->last_linger_id is a counter for lreq->linger_id, which is used for watch cookies. Starting with a large integer should ease the task of telling apart kernel and userspace clients. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 96337b15a60d..a8e66344bacc 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -258,6 +258,8 @@ struct ceph_watch_item { struct ceph_entity_addr addr; }; +#define CEPH_LINGER_ID_START 0xffff000000000000ULL + struct ceph_osd_client { struct ceph_client *client; -- cgit v1.2.3 From 5e322beefc8699b5747cfb35539a9496034e4296 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 10 Nov 2016 10:46:07 -0800 Subject: mm, frontswap: make sure allocated frontswap map is assigned Christian Borntraeger reports: With commit 8ea1d2a1985a ("mm, frontswap: convert frontswap_enabled to static key") kmemleak complains about a memory leak in swapon unreferenced object 0x3e09ba56000 (size 32112640): comm "swapon", pid 7852, jiffies 4294968787 (age 1490.770s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: __vmalloc_node_range+0x194/0x2d8 vzalloc+0x58/0x68 SyS_swapon+0xd60/0x12f8 system_call+0xd6/0x270 Turns out kmemleak is right. We now allocate the frontswap map depending on the kernel config (and no longer on the enablement) swapfile.c: [...] if (IS_ENABLED(CONFIG_FRONTSWAP)) frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long)); but later on this is passed along --> enable_swap_info(p, prio, swap_map, cluster_info, frontswap_map); and ignored if frontswap is disabled --> frontswap_init(p->type, frontswap_map); static inline void frontswap_init(unsigned type, unsigned long *map) { if (frontswap_enabled()) __frontswap_init(type, map); } Thing is, that frontswap map is never freed. The leakage is relatively not that bad, because swapon is an infrequent and privileged operation. However, if the first frontswap backend is registered after a swap type has been already enabled, it will WARN_ON in frontswap_register_ops() and frontswap will not be available for the swap type. Fix this by making sure the map is assigned by frontswap_init() as long as CONFIG_FRONTSWAP is enabled. Fixes: 8ea1d2a1985a ("mm, frontswap: convert frontswap_enabled to static key") Link: http://lkml.kernel.org/r/20161026134220.2566-1-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reported-by: Christian Borntraeger Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: David Vrabel Cc: Juergen Gross Cc: "Kirill A. Shutemov" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index c46d2aa16d81..1d18af034554 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -106,8 +106,9 @@ static inline void frontswap_invalidate_area(unsigned type) static inline void frontswap_init(unsigned type, unsigned long *map) { - if (frontswap_enabled()) - __frontswap_init(type, map); +#ifdef CONFIG_FRONTSWAP + __frontswap_init(type, map); +#endif } #endif /* _LINUX_FRONTSWAP_H */ -- cgit v1.2.3 From c6c7d83b9c9e6a8b3e6d84c820ac61fbffc9e396 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 10 Nov 2016 10:46:26 -0800 Subject: Revert "console: don't prefer first registered if DT specifies stdout-path" This reverts commit 05fd007e4629 ("console: don't prefer first registered if DT specifies stdout-path"). The reverted commit changes existing behavior on which many ARM boards rely. Many ARM small-board-computers, like e.g. the Raspberry Pi have both a video output and a serial console. Depending on whether the user is using the device as a more regular computer; or as a headless device we need to have the console on either one or the other. Many users rely on the kernel behavior of the console being present on both outputs, before the reverted commit the console setup with no console= kernel arguments on an ARM board which sets stdout-path in dt would look like this: [root@localhost ~]# cat /proc/consoles ttyS0 -W- (EC p a) 4:64 tty0 -WU (E p ) 4:1 Where as after the reverted commit, it looks like this: [root@localhost ~]# cat /proc/consoles ttyS0 -W- (EC p a) 4:64 This commit reverts commit 05fd007e4629 ("console: don't prefer first registered if DT specifies stdout-path") restoring the original behavior. Fixes: 05fd007e4629 ("console: don't prefer first registered if DT specifies stdout-path") Link: http://lkml.kernel.org/r/20161104121135.4780-2-hdegoede@redhat.com Signed-off-by: Hans de Goede Cc: Paul Burton Cc: Rob Herring Cc: Frank Rowand Cc: Thorsten Leemhuis Cc: Greg Kroah-Hartman Cc: Tejun Heo Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/console.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/console.h b/include/linux/console.h index 3672809234a7..d530c4627e54 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -173,12 +173,6 @@ static inline void console_sysfs_notify(void) #endif extern bool console_suspend_enabled; -#ifdef CONFIG_OF -extern void console_set_by_of(void); -#else -static inline void console_set_by_of(void) {} -#endif - /* Suspend and resume console messages over PM events */ extern void suspend_console(void); extern void resume_console(void); -- cgit v1.2.3 From d7c19b066dcf4bd19c4385e8065558d4e74f9e73 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 10 Nov 2016 10:46:44 -0800 Subject: mm: kmemleak: scan .data.ro_after_init Limit the number of kmemleak false positives by including .data.ro_after_init in memory scanning. To achieve this we need to add symbols for start and end of the section to the linker scripts. The problem was been uncovered by commit 56989f6d8568 ("genetlink: mark families as __ro_after_init"). Link: http://lkml.kernel.org/r/1478274173-15218-1-git-send-email-jakub.kicinski@netronome.com Reviewed-by: Catalin Marinas Signed-off-by: Jakub Kicinski Cc: Arnd Bergmann Cc: Cong Wang Cc: Johannes Berg Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/sections.h | 3 +++ include/asm-generic/vmlinux.lds.h | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index af0254c09424..4df64a1fc09e 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -14,6 +14,8 @@ * [_sdata, _edata]: contains .data.* sections, may also contain .rodata.* * and/or .init.* sections. * [__start_rodata, __end_rodata]: contains .rodata.* sections + * [__start_data_ro_after_init, __end_data_ro_after_init]: + * contains data.ro_after_init section * [__init_begin, __init_end]: contains .init.* sections, but .init.text.* * may be out of this range on some architectures. * [_sinittext, _einittext]: contains .init.text.* sections @@ -31,6 +33,7 @@ extern char _data[], _sdata[], _edata[]; extern char __bss_start[], __bss_stop[]; extern char __init_begin[], __init_end[]; extern char _sinittext[], _einittext[]; +extern char __start_data_ro_after_init[], __end_data_ro_after_init[]; extern char _end[]; extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 30747960bc54..31e1d639abed 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -259,7 +259,10 @@ * own by defining an empty RO_AFTER_INIT_DATA. */ #ifndef RO_AFTER_INIT_DATA -#define RO_AFTER_INIT_DATA *(.data..ro_after_init) +#define RO_AFTER_INIT_DATA \ + __start_data_ro_after_init = .; \ + *(.data..ro_after_init) \ + __end_data_ro_after_init = .; #endif /* -- cgit v1.2.3 From 4e3264d21b90984c2165e8fe5a7b64cf25bc2c2d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 9 Nov 2016 15:36:33 -0800 Subject: bpf: Fix bpf_redirect to an ipip/ip6tnl dev If the bpf program calls bpf_redirect(dev, 0) and dev is an ipip/ip6tnl, it currently includes the mac header. e.g. If dev is ipip, the end result is IP-EthHdr-IP instead of IP-IP. The fix is to pull the mac header. At ingress, skb_postpull_rcsum() is not needed because the ethhdr should have been pulled once already and then got pushed back just before calling the bpf_prog. At egress, this patch calls skb_postpull_rcsum(). If bpf_redirect(dev, BPF_F_INGRESS) is called, it also fails now because it calls dev_forward_skb() which eventually calls eth_type_trans(skb, dev). The eth_type_trans() will set skb->type = PACKET_OTHERHOST because the mac address does not match the redirecting dev->dev_addr. The PACKET_OTHERHOST will eventually cause the ip_rcv() errors out. To fix this, ____dev_forward_skb() is added. Joint work with Daniel Borkmann. Fixes: cfc7381b3002 ("ip_tunnel: add collect_md mode to IPIP tunnel") Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels") Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 91ee3643ccc8..bf04a46f6d5b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3354,6 +3354,21 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb); +static __always_inline int ____dev_forward_skb(struct net_device *dev, + struct sk_buff *skb) +{ + if (skb_orphan_frags(skb, GFP_ATOMIC) || + unlikely(!is_skb_forwardable(dev, skb))) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + + skb_scrub_packet(skb, true); + skb->priority = 0; + return 0; +} + void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern int netdev_budget; -- cgit v1.2.3 From 10b217681ddec4fa3ddb375bb188fec504523da4 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 10 Nov 2016 13:21:42 +0200 Subject: net: bpqether.h: remove if_ether.h guard __LINUX_IF_ETHER_H is not defined anywhere, and if_ether.h can keep itself from double inclusion, though it uses a single underscore prefix. Signed-off-by: Baruch Siach Signed-off-by: David S. Miller --- include/uapi/linux/bpqether.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpqether.h b/include/uapi/linux/bpqether.h index a6c35e1a89ad..05865edaefda 100644 --- a/include/uapi/linux/bpqether.h +++ b/include/uapi/linux/bpqether.h @@ -5,9 +5,7 @@ * Defines for the BPQETHER pseudo device driver */ -#ifndef __LINUX_IF_ETHER_H #include -#endif #define SIOCSBPQETHOPT (SIOCDEVPRIVATE+0) /* reserved */ #define SIOCSBPQETHADDR (SIOCDEVPRIVATE+1) -- cgit v1.2.3 From ac6e780070e30e4c35bd395acfe9191e6268bdd3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Nov 2016 13:12:35 -0800 Subject: tcp: take care of truncations done by sk_filter() With syzkaller help, Marco Grassi found a bug in TCP stack, crashing in tcp_collapse() Root cause is that sk_filter() can truncate the incoming skb, but TCP stack was not really expecting this to happen. It probably was expecting a simple DROP or ACCEPT behavior. We first need to make sure no part of TCP header could be removed. Then we need to adjust TCP_SKB_CB(skb)->end_seq Many thanks to syzkaller team and Marco for giving us a reproducer. Signed-off-by: Eric Dumazet Reported-by: Marco Grassi Reported-by: Vladis Dronov Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 304a8e17bc87..123979fe12bf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1220,6 +1220,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); +int tcp_filter(struct sock *sk, struct sk_buff *skb); #undef STATE_TRACE -- cgit v1.2.3 From 7b5b74efcca00f15c2aec1dc7175bfe34b6ec643 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Thu, 10 Nov 2016 19:08:39 -0500 Subject: Revert "include/uapi/linux/atm_zatm.h: include linux/time.h" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit cf00713a655d ("include/uapi/linux/atm_zatm.h: include linux/time.h"). This attempted to fix userspace breakage that no longer existed when the patch was merged. Almost one year earlier, commit 70ba07b675b5 ("atm: remove 'struct zatm_t_hist'") deleted the struct in question. After this patch was merged, we now have to deal with people being unable to include this header in conjunction with standard C library headers like stdlib.h (which linux-atm does). Example breakage: x86_64-pc-linux-gnu-gcc -DHAVE_CONFIG_H -I. -I../.. -I./../q2931 -I./../saal \ -I. -DCPPFLAGS_TEST -I../../src/include -O2 -march=native -pipe -g \ -frecord-gcc-switches -freport-bug -Wimplicit-function-declaration \ -Wnonnull -Wstrict-aliasing -Wparentheses -Warray-bounds \ -Wfree-nonheap-object -Wreturn-local-addr -fno-strict-aliasing -Wall \ -Wshadow -Wpointer-arith -Wwrite-strings -Wstrict-prototypes -c zntune.c In file included from /usr/include/linux/atm_zatm.h:17:0, from zntune.c:17: /usr/include/linux/time.h:9:8: error: redefinition of ‘struct timespec’ struct timespec { ^ In file included from /usr/include/sys/select.h:43:0, from /usr/include/sys/types.h:219, from /usr/include/stdlib.h:314, from zntune.c:9: /usr/include/time.h:120:8: note: originally defined here struct timespec ^ Signed-off-by: Mike Frysinger Acked-by: Mikko Rapeli Signed-off-by: David S. Miller --- include/uapi/linux/atm_zatm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h index 5cd4d4d2dd1d..9c9c6ad55f14 100644 --- a/include/uapi/linux/atm_zatm.h +++ b/include/uapi/linux/atm_zatm.h @@ -14,7 +14,6 @@ #include #include -#include #define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc) /* get pool statistics */ -- cgit v1.2.3 From ea08e39230e898844d9de5b60cdbb30067cebfe7 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 11 Nov 2016 13:16:22 -0500 Subject: sunrpc: svc_age_temp_xprts_now should not call setsockopt non-tcp transports This fixes the following panic that can occur with NFSoRDMA. general protection fault: 0000 [#1] SMP Modules linked in: rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm mlx5_ib ib_core intel_powerclamp coretemp kvm_intel kvm sg ioatdma ipmi_devintf ipmi_ssif dcdbas iTCO_wdt iTCO_vendor_support pcspkr irqbypass sb_edac shpchp dca crc32_pclmul ghash_clmulni_intel edac_core lpc_ich aesni_intel lrw gf128mul glue_helper ablk_helper mei_me mei ipmi_si cryptd wmi ipmi_msghandler acpi_pad acpi_power_meter nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt ahci fb_sys_fops ttm libahci mlx5_core tg3 crct10dif_pclmul drm crct10dif_common ptp i2c_core libata crc32c_intel pps_core fjes dm_mirror dm_region_hash dm_log dm_mod CPU: 1 PID: 120 Comm: kworker/1:1 Not tainted 3.10.0-514.el7.x86_64 #1 Hardware name: Dell Inc. PowerEdge R320/0KM5PX, BIOS 2.4.2 01/29/2015 Workqueue: events check_lifetime task: ffff88031f506dd0 ti: ffff88031f584000 task.ti: ffff88031f584000 RIP: 0010:[] [] _raw_spin_lock_bh+0x17/0x50 RSP: 0018:ffff88031f587ba8 EFLAGS: 00010206 RAX: 0000000000020000 RBX: 20041fac02080072 RCX: ffff88031f587fd8 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 20041fac02080072 RBP: ffff88031f587bb0 R08: 0000000000000008 R09: ffffffff8155be77 R10: ffff880322a59b00 R11: ffffea000bf39f00 R12: 20041fac02080072 R13: 000000000000000d R14: ffff8800c4fbd800 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff880322a40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3c52d4547e CR3: 00000000019ba000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: 20041fac02080002 ffff88031f587bd0 ffffffff81557830 20041fac02080002 ffff88031f587c78 ffff88031f587c40 ffffffff8155ae08 000000010157df32 0000000800000001 ffff88031f587c20 ffffffff81096acb ffffffff81aa37d0 Call Trace: [] lock_sock_nested+0x20/0x50 [] sock_setsockopt+0x78/0x940 [] ? lock_timer_base.isra.33+0x2b/0x50 [] kernel_setsockopt+0x4d/0x50 [] svc_age_temp_xprts_now+0x174/0x1e0 [sunrpc] [] nfsd_inetaddr_event+0x9d/0xd0 [nfsd] [] notifier_call_chain+0x4c/0x70 [] __blocking_notifier_call_chain+0x4d/0x70 [] blocking_notifier_call_chain+0x16/0x20 [] __inet_del_ifa+0x168/0x2d0 [] check_lifetime+0x25f/0x270 [] process_one_work+0x17b/0x470 [] worker_thread+0x126/0x410 [] ? rescuer_thread+0x460/0x460 [] kthread+0xcf/0xe0 [] ? kthread_create_on_node+0x140/0x140 [] ret_from_fork+0x58/0x90 [] ? kthread_create_on_node+0x140/0x140 Code: ca 75 f1 5d c3 0f 1f 80 00 00 00 00 eb d9 66 0f 1f 44 00 00 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb e8 7e 04 a0 ff b8 00 00 02 00 0f c1 03 89 c2 c1 ea 10 66 39 c2 75 03 5b 5d c3 83 e2 fe 0f RIP [] _raw_spin_lock_bh+0x17/0x50 RSP Signed-off-by: Scott Mayhew Fixes: c3d4879e ("sunrpc: Add a function to close temporary transports immediately") Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index ab02a457da1f..e5d193440374 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -25,6 +25,7 @@ struct svc_xprt_ops { void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); int (*xpo_secure_port)(struct svc_rqst *); + void (*xpo_kill_temp_xprt)(struct svc_xprt *); }; struct svc_xprt_class { -- cgit v1.2.3 From e2174b0c24caca170ca61eda2ae49c9561ff8896 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Nov 2016 20:56:17 +0100 Subject: Revert "ACPICA: FADT support cleanup" Pavel Machek reports that commit 6ea8c546f365 (ACPICA: FADT support cleanup) breaks thermal management on his Thinkpad X60 and T40p, so revert it. Link: https://bugzilla.kernel.org/show_bug.cgi?id=187311 Fixes: 6ea8c546f365 (ACPICA: FADT support cleanup) Reported-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl.h | 164 ++++++++++++++++++++++----------------------------- 1 file changed, 70 insertions(+), 94 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 1b949e08015c..c19700e2a2fe 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -230,72 +230,62 @@ struct acpi_table_facs { /* Fields common to all versions of the FADT */ struct acpi_table_fadt { - struct acpi_table_header header; /* [V1] Common ACPI table header */ - u32 facs; /* [V1] 32-bit physical address of FACS */ - u32 dsdt; /* [V1] 32-bit physical address of DSDT */ - u8 model; /* [V1] System Interrupt Model (ACPI 1.0) - not used in ACPI 2.0+ */ - u8 preferred_profile; /* [V1] Conveys preferred power management profile to OSPM. */ - u16 sci_interrupt; /* [V1] System vector of SCI interrupt */ - u32 smi_command; /* [V1] 32-bit Port address of SMI command port */ - u8 acpi_enable; /* [V1] Value to write to SMI_CMD to enable ACPI */ - u8 acpi_disable; /* [V1] Value to write to SMI_CMD to disable ACPI */ - u8 s4_bios_request; /* [V1] Value to write to SMI_CMD to enter S4BIOS state */ - u8 pstate_control; /* [V1] Processor performance state control */ - u32 pm1a_event_block; /* [V1] 32-bit port address of Power Mgt 1a Event Reg Blk */ - u32 pm1b_event_block; /* [V1] 32-bit port address of Power Mgt 1b Event Reg Blk */ - u32 pm1a_control_block; /* [V1] 32-bit port address of Power Mgt 1a Control Reg Blk */ - u32 pm1b_control_block; /* [V1] 32-bit port address of Power Mgt 1b Control Reg Blk */ - u32 pm2_control_block; /* [V1] 32-bit port address of Power Mgt 2 Control Reg Blk */ - u32 pm_timer_block; /* [V1] 32-bit port address of Power Mgt Timer Ctrl Reg Blk */ - u32 gpe0_block; /* [V1] 32-bit port address of General Purpose Event 0 Reg Blk */ - u32 gpe1_block; /* [V1] 32-bit port address of General Purpose Event 1 Reg Blk */ - u8 pm1_event_length; /* [V1] Byte Length of ports at pm1x_event_block */ - u8 pm1_control_length; /* [V1] Byte Length of ports at pm1x_control_block */ - u8 pm2_control_length; /* [V1] Byte Length of ports at pm2_control_block */ - u8 pm_timer_length; /* [V1] Byte Length of ports at pm_timer_block */ - u8 gpe0_block_length; /* [V1] Byte Length of ports at gpe0_block */ - u8 gpe1_block_length; /* [V1] Byte Length of ports at gpe1_block */ - u8 gpe1_base; /* [V1] Offset in GPE number space where GPE1 events start */ - u8 cst_control; /* [V1] Support for the _CST object and C-States change notification */ - u16 c2_latency; /* [V1] Worst case HW latency to enter/exit C2 state */ - u16 c3_latency; /* [V1] Worst case HW latency to enter/exit C3 state */ - u16 flush_size; /* [V1] Processor memory cache line width, in bytes */ - u16 flush_stride; /* [V1] Number of flush strides that need to be read */ - u8 duty_offset; /* [V1] Processor duty cycle index in processor P_CNT reg */ - u8 duty_width; /* [V1] Processor duty cycle value bit width in P_CNT register */ - u8 day_alarm; /* [V1] Index to day-of-month alarm in RTC CMOS RAM */ - u8 month_alarm; /* [V1] Index to month-of-year alarm in RTC CMOS RAM */ - u8 century; /* [V1] Index to century in RTC CMOS RAM */ - u16 boot_flags; /* [V3] IA-PC Boot Architecture Flags (see below for individual flags) */ - u8 reserved; /* [V1] Reserved, must be zero */ - u32 flags; /* [V1] Miscellaneous flag bits (see below for individual flags) */ - /* End of Version 1 FADT fields (ACPI 1.0) */ - - struct acpi_generic_address reset_register; /* [V3] 64-bit address of the Reset register */ - u8 reset_value; /* [V3] Value to write to the reset_register port to reset the system */ - u16 arm_boot_flags; /* [V5] ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */ - u8 minor_revision; /* [V5] FADT Minor Revision (ACPI 5.1) */ - u64 Xfacs; /* [V3] 64-bit physical address of FACS */ - u64 Xdsdt; /* [V3] 64-bit physical address of DSDT */ - struct acpi_generic_address xpm1a_event_block; /* [V3] 64-bit Extended Power Mgt 1a Event Reg Blk address */ - struct acpi_generic_address xpm1b_event_block; /* [V3] 64-bit Extended Power Mgt 1b Event Reg Blk address */ - struct acpi_generic_address xpm1a_control_block; /* [V3] 64-bit Extended Power Mgt 1a Control Reg Blk address */ - struct acpi_generic_address xpm1b_control_block; /* [V3] 64-bit Extended Power Mgt 1b Control Reg Blk address */ - struct acpi_generic_address xpm2_control_block; /* [V3] 64-bit Extended Power Mgt 2 Control Reg Blk address */ - struct acpi_generic_address xpm_timer_block; /* [V3] 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */ - struct acpi_generic_address xgpe0_block; /* [V3] 64-bit Extended General Purpose Event 0 Reg Blk address */ - struct acpi_generic_address xgpe1_block; /* [V3] 64-bit Extended General Purpose Event 1 Reg Blk address */ - /* End of Version 3 FADT fields (ACPI 2.0) */ - - struct acpi_generic_address sleep_control; /* [V4] 64-bit Sleep Control register (ACPI 5.0) */ - /* End of Version 4 FADT fields (ACPI 3.0 and ACPI 4.0) (Field was originally reserved in ACPI 3.0) */ - - struct acpi_generic_address sleep_status; /* [V5] 64-bit Sleep Status register (ACPI 5.0) */ - /* End of Version 5 FADT fields (ACPI 5.0) */ - - u64 hypervisor_id; /* [V6] Hypervisor Vendor ID (ACPI 6.0) */ - /* End of Version 6 FADT fields (ACPI 6.0) */ - + struct acpi_table_header header; /* Common ACPI table header */ + u32 facs; /* 32-bit physical address of FACS */ + u32 dsdt; /* 32-bit physical address of DSDT */ + u8 model; /* System Interrupt Model (ACPI 1.0) - not used in ACPI 2.0+ */ + u8 preferred_profile; /* Conveys preferred power management profile to OSPM. */ + u16 sci_interrupt; /* System vector of SCI interrupt */ + u32 smi_command; /* 32-bit Port address of SMI command port */ + u8 acpi_enable; /* Value to write to SMI_CMD to enable ACPI */ + u8 acpi_disable; /* Value to write to SMI_CMD to disable ACPI */ + u8 s4_bios_request; /* Value to write to SMI_CMD to enter S4BIOS state */ + u8 pstate_control; /* Processor performance state control */ + u32 pm1a_event_block; /* 32-bit port address of Power Mgt 1a Event Reg Blk */ + u32 pm1b_event_block; /* 32-bit port address of Power Mgt 1b Event Reg Blk */ + u32 pm1a_control_block; /* 32-bit port address of Power Mgt 1a Control Reg Blk */ + u32 pm1b_control_block; /* 32-bit port address of Power Mgt 1b Control Reg Blk */ + u32 pm2_control_block; /* 32-bit port address of Power Mgt 2 Control Reg Blk */ + u32 pm_timer_block; /* 32-bit port address of Power Mgt Timer Ctrl Reg Blk */ + u32 gpe0_block; /* 32-bit port address of General Purpose Event 0 Reg Blk */ + u32 gpe1_block; /* 32-bit port address of General Purpose Event 1 Reg Blk */ + u8 pm1_event_length; /* Byte Length of ports at pm1x_event_block */ + u8 pm1_control_length; /* Byte Length of ports at pm1x_control_block */ + u8 pm2_control_length; /* Byte Length of ports at pm2_control_block */ + u8 pm_timer_length; /* Byte Length of ports at pm_timer_block */ + u8 gpe0_block_length; /* Byte Length of ports at gpe0_block */ + u8 gpe1_block_length; /* Byte Length of ports at gpe1_block */ + u8 gpe1_base; /* Offset in GPE number space where GPE1 events start */ + u8 cst_control; /* Support for the _CST object and C-States change notification */ + u16 c2_latency; /* Worst case HW latency to enter/exit C2 state */ + u16 c3_latency; /* Worst case HW latency to enter/exit C3 state */ + u16 flush_size; /* Processor memory cache line width, in bytes */ + u16 flush_stride; /* Number of flush strides that need to be read */ + u8 duty_offset; /* Processor duty cycle index in processor P_CNT reg */ + u8 duty_width; /* Processor duty cycle value bit width in P_CNT register */ + u8 day_alarm; /* Index to day-of-month alarm in RTC CMOS RAM */ + u8 month_alarm; /* Index to month-of-year alarm in RTC CMOS RAM */ + u8 century; /* Index to century in RTC CMOS RAM */ + u16 boot_flags; /* IA-PC Boot Architecture Flags (see below for individual flags) */ + u8 reserved; /* Reserved, must be zero */ + u32 flags; /* Miscellaneous flag bits (see below for individual flags) */ + struct acpi_generic_address reset_register; /* 64-bit address of the Reset register */ + u8 reset_value; /* Value to write to the reset_register port to reset the system */ + u16 arm_boot_flags; /* ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */ + u8 minor_revision; /* FADT Minor Revision (ACPI 5.1) */ + u64 Xfacs; /* 64-bit physical address of FACS */ + u64 Xdsdt; /* 64-bit physical address of DSDT */ + struct acpi_generic_address xpm1a_event_block; /* 64-bit Extended Power Mgt 1a Event Reg Blk address */ + struct acpi_generic_address xpm1b_event_block; /* 64-bit Extended Power Mgt 1b Event Reg Blk address */ + struct acpi_generic_address xpm1a_control_block; /* 64-bit Extended Power Mgt 1a Control Reg Blk address */ + struct acpi_generic_address xpm1b_control_block; /* 64-bit Extended Power Mgt 1b Control Reg Blk address */ + struct acpi_generic_address xpm2_control_block; /* 64-bit Extended Power Mgt 2 Control Reg Blk address */ + struct acpi_generic_address xpm_timer_block; /* 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */ + struct acpi_generic_address xgpe0_block; /* 64-bit Extended General Purpose Event 0 Reg Blk address */ + struct acpi_generic_address xgpe1_block; /* 64-bit Extended General Purpose Event 1 Reg Blk address */ + struct acpi_generic_address sleep_control; /* 64-bit Sleep Control register (ACPI 5.0) */ + struct acpi_generic_address sleep_status; /* 64-bit Sleep Status register (ACPI 5.0) */ + u64 hypervisor_id; /* Hypervisor Vendor ID (ACPI 6.0) */ }; /* Masks for FADT IA-PC Boot Architecture Flags (boot_flags) [Vx]=Introduced in this FADT revision */ @@ -311,8 +301,8 @@ struct acpi_table_fadt { /* Masks for FADT ARM Boot Architecture Flags (arm_boot_flags) ACPI 5.1 */ -#define ACPI_FADT_PSCI_COMPLIANT (1) /* 00: [V5] PSCI 0.2+ is implemented */ -#define ACPI_FADT_PSCI_USE_HVC (1<<1) /* 01: [V5] HVC must be used instead of SMC as the PSCI conduit */ +#define ACPI_FADT_PSCI_COMPLIANT (1) /* 00: [V5+] PSCI 0.2+ is implemented */ +#define ACPI_FADT_PSCI_USE_HVC (1<<1) /* 01: [V5+] HVC must be used instead of SMC as the PSCI conduit */ /* Masks for FADT flags */ @@ -409,34 +399,20 @@ struct acpi_table_desc { * match the expected length. In other words, the length of the * FADT is the bottom line as to what the version really is. * - * NOTE: There is no officialy released V2 of the FADT. This - * version was used only for prototyping and testing during the - * 32-bit to 64-bit transition. V3 was the first official 64-bit - * version of the FADT. - * - * Update this list of defines when a new version of the FADT is - * added to the ACPI specification. Note that the FADT version is - * only incremented when new fields are appended to the existing - * version. Therefore, the FADT version is competely independent - * from the version of the ACPI specification where it is - * defined. - * - * For reference, the various FADT lengths are as follows: - * FADT V1 size: 0x074 ACPI 1.0 - * FADT V3 size: 0x0F4 ACPI 2.0 - * FADT V4 size: 0x100 ACPI 3.0 and ACPI 4.0 - * FADT V5 size: 0x10C ACPI 5.0 - * FADT V6 size: 0x114 ACPI 6.0 + * For reference, the values below are as follows: + * FADT V1 size: 0x074 + * FADT V2 size: 0x084 + * FADT V3 size: 0x0F4 + * FADT V4 size: 0x0F4 + * FADT V5 size: 0x10C + * FADT V6 size: 0x114 */ -#define ACPI_FADT_V1_SIZE (u32) (ACPI_FADT_OFFSET (flags) + 4) /* ACPI 1.0 */ -#define ACPI_FADT_V3_SIZE (u32) (ACPI_FADT_OFFSET (sleep_control)) /* ACPI 2.0 */ -#define ACPI_FADT_V4_SIZE (u32) (ACPI_FADT_OFFSET (sleep_status)) /* ACPI 3.0 and ACPI 4.0 */ -#define ACPI_FADT_V5_SIZE (u32) (ACPI_FADT_OFFSET (hypervisor_id)) /* ACPI 5.0 */ -#define ACPI_FADT_V6_SIZE (u32) (sizeof (struct acpi_table_fadt)) /* ACPI 6.0 */ - -/* Update these when new FADT versions are added */ +#define ACPI_FADT_V1_SIZE (u32) (ACPI_FADT_OFFSET (flags) + 4) +#define ACPI_FADT_V2_SIZE (u32) (ACPI_FADT_OFFSET (minor_revision) + 1) +#define ACPI_FADT_V3_SIZE (u32) (ACPI_FADT_OFFSET (sleep_control)) +#define ACPI_FADT_V5_SIZE (u32) (ACPI_FADT_OFFSET (hypervisor_id)) +#define ACPI_FADT_V6_SIZE (u32) (sizeof (struct acpi_table_fadt)) -#define ACPI_FADT_MAX_VERSION 6 #define ACPI_FADT_CONFORMANCE "ACPI 6.1 (FADT version 6)" #endif /* __ACTBL_H__ */ -- cgit v1.2.3 From e88a2766143a27bfe6704b4493b214de4094cf29 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Nov 2016 16:28:42 -0800 Subject: gro_cells: mark napi struct as not busy poll candidates Rolf Neugebauer reported very long delays at netns dismantle. Eric W. Biederman was kind enough to look at this problem and noticed synchronize_net() occurring from netif_napi_del() that was added in linux-4.5 Busy polling makes no sense for tunnels NAPI. If busy poll is used for sessions over tunnels, the poller will need to poll the physical device queue anyway. netif_tx_napi_add() could be used here, but function name is misleading, and renaming it is not stable material, so set NAPI_STATE_NO_BUSY_POLL bit directly. This will avoid inserting gro_cells napi structures in napi_hash[] and avoid the problematic synchronize_net() (per possible cpu) that Rolf reported. Fixes: 93d05d4a320c ("net: provide generic busy polling to all NAPI drivers") Signed-off-by: Eric Dumazet Reported-by: Rolf Neugebauer Reported-by: Eric W. Biederman Acked-by: Cong Wang Tested-by: Rolf Neugebauer Signed-off-by: David S. Miller --- include/net/gro_cells.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index d15214d673b2..2a1abbf8da74 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -68,6 +68,9 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); __skb_queue_head_init(&cell->napi_skbs); + + set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); + netif_napi_add(dev, &cell->napi, gro_cell_poll, 64); napi_enable(&cell->napi); } -- cgit v1.2.3 From f23cc643f9baec7f71f2b74692da3cf03abbbfda Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 14 Nov 2016 15:45:36 -0500 Subject: bpf: fix range arithmetic for bpf map access I made some invalid assumptions with BPF_AND and BPF_MOD that could result in invalid accesses to bpf map entries. Fix this up by doing a few things 1) Kill BPF_MOD support. This doesn't actually get used by the compiler in real life and just adds extra complexity. 2) Fix the logic for BPF_AND, don't allow AND of negative numbers and set the minimum value to 0 for positive AND's. 3) Don't do operations on the ranges if they are set to the limits, as they are by definition undefined, and allowing arithmetic operations on those values could make them appear valid when they really aren't. This fixes the testcase provided by Jann as well as a few other theoretical problems. Reported-by: Jann Horn Signed-off-by: Josef Bacik Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7035b997aaa5..6aaf425cebc3 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -14,7 +14,7 @@ * are obviously wrong for any sort of memory access. */ #define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024) -#define BPF_REGISTER_MIN_RANGE -(1024 * 1024 * 1024) +#define BPF_REGISTER_MIN_RANGE -1 struct bpf_reg_state { enum bpf_reg_type type; @@ -22,7 +22,8 @@ struct bpf_reg_state { * Used to determine if any memory access using this register will * result in a bad access. */ - u64 min_value, max_value; + s64 min_value; + u64 max_value; union { /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ s64 imm; -- cgit v1.2.3 From 3b7093346b326e5d3590c7d49f6aefe6fa5b2c9a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 15 Nov 2016 05:46:06 -0500 Subject: ipv4: Restore fib_trie_flush_external function and fix call ordering The patch that removed the FIB offload infrastructure was a bit too aggressive and also removed code needed to clean up us splitting the table if additional rules were added. Specifically the function fib_trie_flush_external was called at the end of a new rule being added to flush the foreign trie entries from the main trie. I updated the code so that we only call fib_trie_flush_external on the main table so that we flush the entries for local from main. This way we don't call it for every rule change which is what was happening previously. Fixes: 347e3b28c1ba2 ("switchdev: remove FIB offload infrastructure") Reported-by: Eric Dumazet Cc: Jiri Pirko Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b9314b48e39f..f390c3bb05c5 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -243,6 +243,7 @@ int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); +void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); #ifndef CONFIG_IP_MULTIPLE_TABLES -- cgit v1.2.3 From d5a4b1a540b8a9a44888b383472a80b84765aaa0 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 16 Nov 2016 17:27:34 +0800 Subject: tools/power/acpi: Remove direct kernel source include reference Avoid breaking cross-compiled ACPI tools builds by rearranging the handling of kernel header files. This patch also contains OUTPUT/srctree cleanups in order to make above fix working for various build environments. Fixes: e323c02dee59 (ACPICA: MSVC9: Fix inclusion order issue) Reported-and-tested-by: Yisheng Xie Reported-by: Andy Shevchenko Signed-off-by: Lv Zheng [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- include/acpi/platform/aclinux.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index a5d98d171866..e861a24f06f2 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -191,6 +191,9 @@ #ifndef __init #define __init #endif +#ifndef __iomem +#define __iomem +#endif /* Host-dependent types and defines for user-space ACPICA */ -