From da96786e26c3ae47316db2b92046b11268c4379c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 2 Nov 2016 12:08:25 -0700 Subject: net: tcp: check skb is non-NULL for exact match on lookups Andrey reported the following error report while running the syzkaller fuzzer: general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 648 Comm: syz-executor Not tainted 4.9.0-rc3+ #333 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff8800398c4480 task.stack: ffff88003b468000 RIP: 0010:[] [< inline >] inet_exact_dif_match include/net/tcp.h:808 RIP: 0010:[] [] __inet_lookup_listener+0xb6/0x500 net/ipv4/inet_hashtables.c:219 RSP: 0018:ffff88003b46f270 EFLAGS: 00010202 RAX: 0000000000000004 RBX: 0000000000004242 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffffc90000e3c000 RDI: 0000000000000054 RBP: ffff88003b46f2d8 R08: 0000000000004000 R09: ffffffff830910e7 R10: 0000000000000000 R11: 000000000000000a R12: ffffffff867fa0c0 R13: 0000000000004242 R14: 0000000000000003 R15: dffffc0000000000 FS: 00007fb135881700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020cc3000 CR3: 000000006d56a000 CR4: 00000000000006f0 Stack: 0000000000000000 000000000601a8c0 0000000000000000 ffffffff00004242 424200003b9083c2 ffff88003def4041 ffffffff84e7e040 0000000000000246 ffff88003a0911c0 0000000000000000 ffff88003a091298 ffff88003b9083ae Call Trace: [] tcp_v4_send_reset+0x584/0x1700 net/ipv4/tcp_ipv4.c:643 [] tcp_v4_rcv+0x198b/0x2e50 net/ipv4/tcp_ipv4.c:1718 [] ip_local_deliver_finish+0x332/0xad0 net/ipv4/ip_input.c:216 ... MD5 has a code path that calls __inet_lookup_listener with a null skb, so inet{6}_exact_dif_match needs to check skb against null before pulling the flag. Fixes: a04a480d4392 ("net: Require exact match for TCP socket lookups if dif is l3mdev") Reported-by: Andrey Konovalov Signed-off-by: David Ahern Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ca1ad9ebbc92..a0649973ee5b 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -149,7 +149,7 @@ static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb) { #if defined(CONFIG_NET_L3_MASTER_DEV) if (!net->ipv4.sysctl_tcp_l3mdev_accept && - ipv6_l3mdev_skb(IP6CB(skb)->flags)) + skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) return true; #endif return false; -- cgit v1.2.3 From 4e3264d21b90984c2165e8fe5a7b64cf25bc2c2d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 9 Nov 2016 15:36:33 -0800 Subject: bpf: Fix bpf_redirect to an ipip/ip6tnl dev If the bpf program calls bpf_redirect(dev, 0) and dev is an ipip/ip6tnl, it currently includes the mac header. e.g. If dev is ipip, the end result is IP-EthHdr-IP instead of IP-IP. The fix is to pull the mac header. At ingress, skb_postpull_rcsum() is not needed because the ethhdr should have been pulled once already and then got pushed back just before calling the bpf_prog. At egress, this patch calls skb_postpull_rcsum(). If bpf_redirect(dev, BPF_F_INGRESS) is called, it also fails now because it calls dev_forward_skb() which eventually calls eth_type_trans(skb, dev). The eth_type_trans() will set skb->type = PACKET_OTHERHOST because the mac address does not match the redirecting dev->dev_addr. The PACKET_OTHERHOST will eventually cause the ip_rcv() errors out. To fix this, ____dev_forward_skb() is added. Joint work with Daniel Borkmann. Fixes: cfc7381b3002 ("ip_tunnel: add collect_md mode to IPIP tunnel") Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels") Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 91ee3643ccc8..bf04a46f6d5b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3354,6 +3354,21 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb); +static __always_inline int ____dev_forward_skb(struct net_device *dev, + struct sk_buff *skb) +{ + if (skb_orphan_frags(skb, GFP_ATOMIC) || + unlikely(!is_skb_forwardable(dev, skb))) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + + skb_scrub_packet(skb, true); + skb->priority = 0; + return 0; +} + void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern int netdev_budget; -- cgit v1.2.3 From ea08e39230e898844d9de5b60cdbb30067cebfe7 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 11 Nov 2016 13:16:22 -0500 Subject: sunrpc: svc_age_temp_xprts_now should not call setsockopt non-tcp transports This fixes the following panic that can occur with NFSoRDMA. general protection fault: 0000 [#1] SMP Modules linked in: rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm mlx5_ib ib_core intel_powerclamp coretemp kvm_intel kvm sg ioatdma ipmi_devintf ipmi_ssif dcdbas iTCO_wdt iTCO_vendor_support pcspkr irqbypass sb_edac shpchp dca crc32_pclmul ghash_clmulni_intel edac_core lpc_ich aesni_intel lrw gf128mul glue_helper ablk_helper mei_me mei ipmi_si cryptd wmi ipmi_msghandler acpi_pad acpi_power_meter nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt ahci fb_sys_fops ttm libahci mlx5_core tg3 crct10dif_pclmul drm crct10dif_common ptp i2c_core libata crc32c_intel pps_core fjes dm_mirror dm_region_hash dm_log dm_mod CPU: 1 PID: 120 Comm: kworker/1:1 Not tainted 3.10.0-514.el7.x86_64 #1 Hardware name: Dell Inc. PowerEdge R320/0KM5PX, BIOS 2.4.2 01/29/2015 Workqueue: events check_lifetime task: ffff88031f506dd0 ti: ffff88031f584000 task.ti: ffff88031f584000 RIP: 0010:[] [] _raw_spin_lock_bh+0x17/0x50 RSP: 0018:ffff88031f587ba8 EFLAGS: 00010206 RAX: 0000000000020000 RBX: 20041fac02080072 RCX: ffff88031f587fd8 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 20041fac02080072 RBP: ffff88031f587bb0 R08: 0000000000000008 R09: ffffffff8155be77 R10: ffff880322a59b00 R11: ffffea000bf39f00 R12: 20041fac02080072 R13: 000000000000000d R14: ffff8800c4fbd800 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff880322a40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3c52d4547e CR3: 00000000019ba000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: 20041fac02080002 ffff88031f587bd0 ffffffff81557830 20041fac02080002 ffff88031f587c78 ffff88031f587c40 ffffffff8155ae08 000000010157df32 0000000800000001 ffff88031f587c20 ffffffff81096acb ffffffff81aa37d0 Call Trace: [] lock_sock_nested+0x20/0x50 [] sock_setsockopt+0x78/0x940 [] ? lock_timer_base.isra.33+0x2b/0x50 [] kernel_setsockopt+0x4d/0x50 [] svc_age_temp_xprts_now+0x174/0x1e0 [sunrpc] [] nfsd_inetaddr_event+0x9d/0xd0 [nfsd] [] notifier_call_chain+0x4c/0x70 [] __blocking_notifier_call_chain+0x4d/0x70 [] blocking_notifier_call_chain+0x16/0x20 [] __inet_del_ifa+0x168/0x2d0 [] check_lifetime+0x25f/0x270 [] process_one_work+0x17b/0x470 [] worker_thread+0x126/0x410 [] ? rescuer_thread+0x460/0x460 [] kthread+0xcf/0xe0 [] ? kthread_create_on_node+0x140/0x140 [] ret_from_fork+0x58/0x90 [] ? kthread_create_on_node+0x140/0x140 Code: ca 75 f1 5d c3 0f 1f 80 00 00 00 00 eb d9 66 0f 1f 44 00 00 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb e8 7e 04 a0 ff b8 00 00 02 00 0f c1 03 89 c2 c1 ea 10 66 39 c2 75 03 5b 5d c3 83 e2 fe 0f RIP [] _raw_spin_lock_bh+0x17/0x50 RSP Signed-off-by: Scott Mayhew Fixes: c3d4879e ("sunrpc: Add a function to close temporary transports immediately") Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index ab02a457da1f..e5d193440374 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -25,6 +25,7 @@ struct svc_xprt_ops { void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); int (*xpo_secure_port)(struct svc_rqst *); + void (*xpo_kill_temp_xprt)(struct svc_xprt *); }; struct svc_xprt_class { -- cgit v1.2.3 From f23cc643f9baec7f71f2b74692da3cf03abbbfda Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 14 Nov 2016 15:45:36 -0500 Subject: bpf: fix range arithmetic for bpf map access I made some invalid assumptions with BPF_AND and BPF_MOD that could result in invalid accesses to bpf map entries. Fix this up by doing a few things 1) Kill BPF_MOD support. This doesn't actually get used by the compiler in real life and just adds extra complexity. 2) Fix the logic for BPF_AND, don't allow AND of negative numbers and set the minimum value to 0 for positive AND's. 3) Don't do operations on the ranges if they are set to the limits, as they are by definition undefined, and allowing arithmetic operations on those values could make them appear valid when they really aren't. This fixes the testcase provided by Jann as well as a few other theoretical problems. Reported-by: Jann Horn Signed-off-by: Josef Bacik Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7035b997aaa5..6aaf425cebc3 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -14,7 +14,7 @@ * are obviously wrong for any sort of memory access. */ #define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024) -#define BPF_REGISTER_MIN_RANGE -(1024 * 1024 * 1024) +#define BPF_REGISTER_MIN_RANGE -1 struct bpf_reg_state { enum bpf_reg_type type; @@ -22,7 +22,8 @@ struct bpf_reg_state { * Used to determine if any memory access using this register will * result in a bad access. */ - u64 min_value, max_value; + s64 min_value; + u64 max_value; union { /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ s64 imm; -- cgit v1.2.3 From 5d1904204c99596b50a700f092fe49d78edba400 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Thu, 10 Nov 2016 17:16:33 +0800 Subject: mremap: fix race between mremap() and page cleanning Prior to 3.15, there was a race between zap_pte_range() and page_mkclean() where writes to a page could be lost. Dave Hansen discovered by inspection that there is a similar race between move_ptes() and page_mkclean(). We've been able to reproduce the issue by enlarging the race window with a msleep(), but have not been able to hit it without modifying the code. So, we think it's a real issue, but is difficult or impossible to hit in practice. The zap_pte_range() issue is fixed by commit 1cf35d47712d("mm: split 'tlb_flush_mmu()' into tlb flushing and memory freeing parts"). And this patch is to fix the race between page_mkclean() and mremap(). Here is one possible way to hit the race: suppose a process mmapped a file with READ | WRITE and SHARED, it has two threads and they are bound to 2 different CPUs, e.g. CPU1 and CPU2. mmap returned X, then thread 1 did a write to addr X so that CPU1 now has a writable TLB for addr X on it. Thread 2 starts mremaping from addr X to Y while thread 1 cleaned the page and then did another write to the old addr X again. The 2nd write from thread 1 could succeed but the value will get lost. thread 1 thread 2 (bound to CPU1) (bound to CPU2) 1: write 1 to addr X to get a writeable TLB on this CPU 2: mremap starts 3: move_ptes emptied PTE for addr X and setup new PTE for addr Y and then dropped PTL for X and Y 4: page laundering for N by doing fadvise FADV_DONTNEED. When done, pageframe N is deemed clean. 5: *write 2 to addr X 6: tlb flush for addr X 7: munmap (Y, pagesize) to make the page unmapped 8: fadvise with FADV_DONTNEED again to kick the page off the pagecache 9: pread the page from file to verify the value. If 1 is there, it means we have lost the written 2. *the write may or may not cause segmentation fault, it depends on if the TLB is still on the CPU. Please note that this is only one specific way of how the race could occur, it didn't mean that the race could only occur in exact the above config, e.g. more than 2 threads could be involved and fadvise() could be done in another thread, etc. For anonymous pages, they could race between mremap() and page reclaim: THP: a huge PMD is moved by mremap to a new huge PMD, then the new huge PMD gets unmapped/splitted/pagedout before the flush tlb happened for the old huge PMD in move_page_tables() and we could still write data to it. The normal anonymous page has similar situation. To fix this, check for any dirty PTE in move_ptes()/move_huge_pmd() and if any, did the flush before dropping the PTL. If we did the flush for every move_ptes()/move_huge_pmd() call then we do not need to do the flush in move_pages_tables() for the whole range. But if we didn't, we still need to do the whole range flush. Alternatively, we can track which part of the range is flushed in move_ptes()/move_huge_pmd() and which didn't to avoid flushing the whole range in move_page_tables(). But that would require multiple tlb flushes for the different sub-ranges and should be less efficient than the single whole range flush. KBuild test on my Sandybridge desktop doesn't show any noticeable change. v4.9-rc4: real 5m14.048s user 32m19.800s sys 4m50.320s With this commit: real 5m13.888s user 32m19.330s sys 4m51.200s Reported-by: Dave Hansen Signed-off-by: Aaron Lu Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 9b9f65d99873..e35e6de633b9 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -22,7 +22,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned char *vec); extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd); + pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush); extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa); -- cgit v1.2.3 From 910170442944e1f8674fd5ddbeeb8ccd1877ea98 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 12 Sep 2016 10:49:11 +0800 Subject: iommu/vt-d: Fix PASID table allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Somehow I ended up with an off-by-three error in calculating the size of the PASID and PASID State tables, which triggers allocations failures as those tables unfortunately have to be physically contiguous. In fact, even the *correct* maximum size of 8MiB is problematic and is wont to lead to allocation failures. Since I have extracted a promise that this *will* be fixed in hardware, I'm happy to limit it on the current hardware to a maximum of 0x20000 PASIDs, which gives us 1MiB tables — still not ideal, but better than before. Reported by Mika Kuoppala and also by Xunlei Pang who submitted a simpler patch to fix only the allocation (and not the free) to the "correct" limit... which was still problematic. Signed-off-by: David Woodhouse Cc: stable@vger.kernel.org --- include/linux/intel-iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 2d9b650047a5..d49e26c6cdc7 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -429,6 +429,7 @@ struct intel_iommu { struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ struct idr pasid_idr; + u32 pasid_max; #endif struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ -- cgit v1.2.3 From 8e5bfa8c1f8471aa4a2d30be631ef2b50e10abaf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 14 Nov 2016 19:46:12 +0100 Subject: sched/autogroup: Do not use autogroup->tg in zombie threads Exactly because for_each_thread() in autogroup_move_group() can't see it and update its ->sched_task_group before _put() and possibly free(). So the exiting task needs another sched_move_task() before exit_notify() and we need to re-introduce the PF_EXITING (or similar) check removed by the previous change for another reason. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: hartsjc@redhat.com Cc: vbendel@redhat.com Cc: vlovejoy@redhat.com Link: http://lkml.kernel.org/r/20161114184612.GA15968@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 348f51b0ec92..e9c009dc3a4a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2567,6 +2567,7 @@ extern void sched_autogroup_create_attach(struct task_struct *p); extern void sched_autogroup_detach(struct task_struct *p); extern void sched_autogroup_fork(struct signal_struct *sig); extern void sched_autogroup_exit(struct signal_struct *sig); +extern void sched_autogroup_exit_task(struct task_struct *p); #ifdef CONFIG_PROC_FS extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); @@ -2576,6 +2577,7 @@ static inline void sched_autogroup_create_attach(struct task_struct *p) { } static inline void sched_autogroup_detach(struct task_struct *p) { } static inline void sched_autogroup_fork(struct signal_struct *sig) { } static inline void sched_autogroup_exit(struct signal_struct *sig) { } +static inline void sched_autogroup_exit_task(struct task_struct *p) { } #endif extern int yield_to(struct task_struct *p, bool preempt); -- cgit v1.2.3 From e784930bd645e7df78c66e7872fec282b0620075 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 2 Nov 2016 16:35:51 -0600 Subject: PCI: Export pcie_find_root_port Export pcie_find_root_port() so we can use it outside of PCIe-AER error injection. Signed-off-by: Johannes Thumshirn Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e49f70dbd9b..a38772a85588 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1928,6 +1928,20 @@ static inline int pci_pcie_type(const struct pci_dev *dev) return (pcie_caps_reg(dev) & PCI_EXP_FLAGS_TYPE) >> 4; } +static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) +{ + while (1) { + if (!pci_is_pcie(dev)) + break; + if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) + return dev; + if (!dev->bus->self) + break; + dev = dev->bus->self; + } + return NULL; +} + void pci_request_acs(void); bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags); bool pci_acs_path_enabled(struct pci_dev *start, -- cgit v1.2.3 From 920c1cd36642ac21a7b2fdc47ab44b9634d570f9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 21 Nov 2016 18:28:36 -0800 Subject: netdevice.h: fix kernel-doc warning Fix kernel-doc warning in (missing ':'): ..//include/linux/netdevice.h:1904: warning: No description found for parameter 'prio_tc_map[TC_BITMASK + 1]' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bf04a46f6d5b..e16a2a980ea8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1619,7 +1619,7 @@ enum netdev_priv_flags { * @dcbnl_ops: Data Center Bridging netlink ops * @num_tc: Number of traffic classes in the net device * @tc_to_txq: XXX: need comments on this one - * @prio_tc_map XXX: need comments on this one + * @prio_tc_map: XXX: need comments on this one * * @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp * -- cgit v1.2.3 From b4353708f5a1c084fd73f1b6fd243b142157b173 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 27 Nov 2016 19:20:51 +0200 Subject: Revert "net/mlx4_en: Avoid unregister_netdev at shutdown flow" This reverts commit 9d76931180557270796f9631e2c79b9c7bb3c9fb. Using unregister_netdev at shutdown flow prevents calling the netdev's ndos or trying to access its freed resources. This fixes crashes like the following: Call Trace: [] dev_get_phys_port_id+0x1e/0x30 [] rtnl_fill_ifinfo+0x4be/0xff0 [] rtmsg_ifinfo_build_skb+0x73/0xe0 [] rtmsg_ifinfo.part.27+0x16/0x50 [] rtmsg_ifinfo+0x18/0x20 [] netdev_state_change+0x46/0x50 [] linkwatch_do_dev+0x38/0x50 [] __linkwatch_run_queue+0xf5/0x170 [] linkwatch_event+0x25/0x30 [] process_one_work+0x152/0x400 [] worker_thread+0x125/0x4b0 [] ? rescuer_thread+0x350/0x350 [] kthread+0xca/0xe0 [] ? kthread_park+0x60/0x60 [] ret_from_fork+0x25/0x30 Fixes: 9d7693118055 ("net/mlx4_en: Avoid unregister_netdev at shutdown flow") Signed-off-by: Tariq Toukan Reported-by: Sebastian Ott Reported-by: Steve Wise Cc: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 3be7abd6e722..c9f379689dd0 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -476,7 +476,6 @@ enum { enum { MLX4_INTERFACE_STATE_UP = 1 << 0, MLX4_INTERFACE_STATE_DELETION = 1 << 1, - MLX4_INTERFACE_STATE_SHUTDOWN = 1 << 2, }; #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ -- cgit v1.2.3 From 3f65047c853a2a5abcd8ac1984af3452b5df4ada Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 28 Nov 2016 19:24:55 +0100 Subject: of_mdio: add helper to deregister fixed-link PHYs Add helper to deregister fixed-link PHYs registered using of_phy_register_fixed_link(). Convert the two drivers that care to deregister their fixed-link PHYs to use the new helper, but note that most drivers currently fail to do so. Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 2ab233661ae5..a58cca8bcb29 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -29,6 +29,7 @@ struct phy_device *of_phy_attach(struct net_device *dev, extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); extern int of_mdio_parse_addr(struct device *dev, const struct device_node *np); extern int of_phy_register_fixed_link(struct device_node *np); +extern void of_phy_deregister_fixed_link(struct device_node *np); extern bool of_phy_is_fixed_link(struct device_node *np); #else /* CONFIG_OF */ @@ -83,6 +84,9 @@ static inline int of_phy_register_fixed_link(struct device_node *np) { return -ENOSYS; } +static inline void of_phy_deregister_fixed_link(struct device_node *np) +{ +} static inline bool of_phy_is_fixed_link(struct device_node *np) { return false; -- cgit v1.2.3 From 045d599a286bc01daa3510d59272440a17b23c2e Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 30 Nov 2016 15:54:13 -0800 Subject: kasan: update kasan_global for gcc 7 kasan_global struct is part of compiler/runtime ABI. gcc revision 241983 has added a new field to kasan_global struct. Update kernel definition of kasan_global struct to include the new field. Without this patch KASAN is broken with gcc 7. Link: http://lkml.kernel.org/r/1479219743-28682-1-git-send-email-dvyukov@google.com Signed-off-by: Dmitry Vyukov Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: [4.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 432f5c97e18f..928e5ca0caee 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -263,7 +263,9 @@ #endif #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP && !__CHECKER__ */ -#if GCC_VERSION >= 50000 +#if GCC_VERSION >= 70000 +#define KASAN_ABI_VERSION 5 +#elif GCC_VERSION >= 50000 #define KASAN_ABI_VERSION 4 #elif GCC_VERSION >= 40902 #define KASAN_ABI_VERSION 3 -- cgit v1.2.3 From 5cbc198ae08d84bd416b672ad8bd1222acd0855c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 30 Nov 2016 15:54:19 -0800 Subject: mm: fix false-positive WARN_ON() in truncate/invalidate for hugetlb Hugetlb pages have ->index in size of the huge pages (PMD_SIZE or PUD_SIZE), not in PAGE_SIZE as other types of pages. This means we cannot user page_to_pgoff() to check whether we've got the right page for the radix-tree index. Let's introduce page_to_index() which would return radix-tree index for given page. We will be able to get rid of this once hugetlb will be switched to multi-order entries. Fixes: fc127da085c2 ("truncate: handle file thp") Link: http://lkml.kernel.org/r/20161123093053.mjbnvn5zwxw5e6lk@black.fi.intel.com Signed-off-by: Kirill A. Shutemov Reported-by: Doug Nelson Tested-by: Doug Nelson Reviewed-by: Naoya Horiguchi Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index dd15d39e1985..7dbe9148b2f8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -374,16 +374,13 @@ static inline struct page *read_mapping_page(struct address_space *mapping, } /* - * Get the offset in PAGE_SIZE. - * (TODO: hugepage should have ->index in PAGE_SIZE) + * Get index of the page with in radix-tree + * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE) */ -static inline pgoff_t page_to_pgoff(struct page *page) +static inline pgoff_t page_to_index(struct page *page) { pgoff_t pgoff; - if (unlikely(PageHeadHuge(page))) - return page->index << compound_order(page); - if (likely(!PageTransTail(page))) return page->index; @@ -396,6 +393,18 @@ static inline pgoff_t page_to_pgoff(struct page *page) return pgoff; } +/* + * Get the offset in PAGE_SIZE. + * (TODO: hugepage should have ->index in PAGE_SIZE) + */ +static inline pgoff_t page_to_pgoff(struct page *page) +{ + if (unlikely(PageHeadHuge(page))) + return page->index << compound_order(page); + + return page_to_index(page); +} + /* * Return byte-offset into filesystem object for page. */ -- cgit v1.2.3