From 47376ceba54600cec4dd9e7c4fe8b98e4269633a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 16 Dec 2009 23:25:50 +0100 Subject: reiserfs: Fix reiserfs lock <-> inode mutex dependency inversion The reiserfs lock -> inode mutex dependency gets inverted when we relax the lock while walking to the tree. To fix this, use a specialized version of reiserfs_mutex_lock_safe that takes care of mutex subclasses. Then we can grab the inode mutex with I_MUTEX_XATTR subclass without any reiserfs lock dependency. This fixes the following report: [ INFO: possible circular locking dependency detected ] 2.6.32-06793-gf405425-dirty #2 ------------------------------------------------------- mv/18566 is trying to acquire lock: (&REISERFS_SB(s)->lock){+.+.+.}, at: [] reiserfs_write_lock+0x28= /0x40 but task is already holding lock: (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [] reiserfs_for_each_xattr+0x10c/0x380 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&sb->s_type->i_mutex_key#5/3){+.+.+.}: [] validate_chain+0xa23/0xf70 [] __lock_acquire+0x4e5/0xa70 [] lock_acquire+0x7a/0xa0 [] mutex_lock_nested+0x5f/0x2b0 [] reiserfs_for_each_xattr+0x84/0x380 [] reiserfs_delete_xattrs+0x15/0x50 [] reiserfs_delete_inode+0x8f/0x140 [] generic_delete_inode+0x9c/0x150 [] generic_drop_inode+0x3d/0x60 [] iput+0x47/0x50 [] do_unlinkat+0xdb/0x160 [] sys_unlink+0x10/0x20 [] sysenter_do_call+0x12/0x36 -> #0 (&REISERFS_SB(s)->lock){+.+.+.}: [] validate_chain+0xf68/0xf70 [] __lock_acquire+0x4e5/0xa70 [] lock_acquire+0x7a/0xa0 [] mutex_lock_nested+0x5f/0x2b0 [] reiserfs_write_lock+0x28/0x40 [] search_by_key+0x1f7b/0x21b0 [] search_by_entry_key+0x1f/0x3b0 [] reiserfs_find_entry+0x77/0x400 [] reiserfs_lookup+0x85/0x130 [] __lookup_hash+0xb4/0x110 [] lookup_one_len+0xb3/0x100 [] reiserfs_for_each_xattr+0x120/0x380 [] reiserfs_delete_xattrs+0x15/0x50 [] reiserfs_delete_inode+0x8f/0x140 [] generic_delete_inode+0x9c/0x150 [] generic_drop_inode+0x3d/0x60 [] iput+0x47/0x50 [] dentry_iput+0x6f/0xf0 [] d_kill+0x24/0x50 [] dput+0x5b/0x120 [] sys_renameat+0x1b9/0x230 [] sys_rename+0x28/0x30 [] sysenter_do_call+0x12/0x36 other info that might help us debug this: 2 locks held by mv/18566: #0: (&sb->s_type->i_mutex_key#5/1){+.+.+.}, at: [] lock_rename+0xcc/0xd0 #1: (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [] reiserfs_for_each_xattr+0x10c/0x380 stack backtrace: Pid: 18566, comm: mv Tainted: G C 2.6.32-06793-gf405425-dirty #2 Call Trace: [] ? printk+0x18/0x1e [] print_circular_bug+0xc0/0xd0 [] validate_chain+0xf68/0xf70 [] ? trace_hardirqs_off+0xb/0x10 [] __lock_acquire+0x4e5/0xa70 [] lock_acquire+0x7a/0xa0 [] ? reiserfs_write_lock+0x28/0x40 [] mutex_lock_nested+0x5f/0x2b0 [] ? reiserfs_write_lock+0x28/0x40 [] ? reiserfs_write_lock+0x28/0x40 [] ? schedule+0x27a/0x440 [] reiserfs_write_lock+0x28/0x40 [] search_by_key+0x1f7b/0x21b0 [] ? __lock_acquire+0x506/0xa70 [] ? lock_release_non_nested+0x1e7/0x340 [] ? reiserfs_write_lock+0x28/0x40 [] ? trace_hardirqs_on_caller+0x124/0x170 [] ? trace_hardirqs_on+0xb/0x10 [] ? T.316+0x15/0x1a0 [] ? sched_clock_cpu+0x9d/0x100 [] search_by_entry_key+0x1f/0x3b0 [] ? __mutex_unlock_slowpath+0x9a/0x120 [] ? trace_hardirqs_on_caller+0x124/0x170 [] reiserfs_find_entry+0x77/0x400 [] reiserfs_lookup+0x85/0x130 [] ? sched_clock_cpu+0x9d/0x100 [] __lookup_hash+0xb4/0x110 [] lookup_one_len+0xb3/0x100 [] reiserfs_for_each_xattr+0x120/0x380 [] ? delete_one_xattr+0x0/0x1c0 [] ? math_error+0x22/0x150 [] ? reiserfs_write_lock+0x28/0x40 [] reiserfs_delete_xattrs+0x15/0x50 [] ? reiserfs_write_lock+0x28/0x40 [] reiserfs_delete_inode+0x8f/0x140 [] ? generic_delete_inode+0x5f/0x150 [] ? reiserfs_delete_inode+0x0/0x140 [] generic_delete_inode+0x9c/0x150 [] generic_drop_inode+0x3d/0x60 [] iput+0x47/0x50 [] dentry_iput+0x6f/0xf0 [] d_kill+0x24/0x50 [] dput+0x5b/0x120 [] sys_renameat+0x1b9/0x230 [] ? sched_clock_cpu+0x9d/0x100 [] ? trace_hardirqs_off+0xb/0x10 [] ? cpu_clock+0x4e/0x60 [] ? do_page_fault+0x155/0x370 [] ? up_read+0x16/0x30 [] ? do_page_fault+0x155/0x370 [] sys_rename+0x28/0x30 [] sysenter_do_call+0x12/0x36 Reported-by: Alexander Beregalov Signed-off-by: Frederic Weisbecker Cc: Chris Mason Cc: Ingo Molnar Cc: Thomas Gleixner --- include/linux/reiserfs_fs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index a05b4a20768d..4351b49e2b1e 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -97,6 +97,15 @@ static inline void reiserfs_mutex_lock_safe(struct mutex *m, reiserfs_write_lock(s); } +static inline void +reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, + struct super_block *s) +{ + reiserfs_write_unlock(s); + mutex_lock_nested(m, subclass); + reiserfs_write_lock(s); +} + /* * When we schedule, we usually want to also release the write lock, * according to the previous bkl based locking scheme of reiserfs. -- cgit v1.2.3 From 2d1c861871d767153538a77c498752b36d4bb4b8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 9 Dec 2009 17:52:13 +1100 Subject: PCI/cardbus: Add a fixup hook and fix powerpc The cardbus code creates PCI devices without ever going through the necessary fixup bits and pieces that normal PCI devices go through. There's in fact a commented out call to pcibios_fixup_bus() in there, it's commented because ... it doesn't work. I could make pcibios_fixup_bus() do the right thing on powerpc easily but I felt it cleaner instead to provide a specific hook pci_fixup_cardbus for which a weak empty implementation is provided by the PCI core. This fixes cardbus on powerbooks and probably all other PowerPC platforms which was broken completely for ever on some platforms and since 2.6.31 on others such as PowerBooks when we made the DMA ops mandatory (since those are setup by the fixups). Acked-by: Dominik Brodowski Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Jesse Barnes --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index bf1e67080849..5da0690d9cee 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -566,6 +566,9 @@ void pcibios_align_resource(void *, struct resource *, resource_size_t, resource_size_t); void pcibios_update_irq(struct pci_dev *, int irq); +/* Weak but can be overriden by arch */ +void pci_fixup_cardbus(struct pci_bus *); + /* Generic PCI functions used internally */ extern struct pci_bus *pci_find_bus(int domain, int busnr); -- cgit v1.2.3 From 9a418af5df03ad133cd8c8f6742b75e542db6392 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 17 Dec 2009 13:55:48 +0100 Subject: mac80211: fix peer HT capabilities I noticed yesterday, because Jeff had noticed a speed regression, cf. bug http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2138 that the SM PS settings for peers were wrong. Instead of overwriting the SM PS settings with the local bits, we need to keep the remote bits. The bug was part of the original HT code from over two years ago, but unfortunately nobody noticed that it makes no sense -- we shouldn't be overwriting the peer's setting with our own but rather keep it intact when masking the peer capabilities with our own. While fixing that, I noticed that the masking of capabilities is completely useless for most of the bits, so also fix those other bits. Finally, I also noticed that PSMP_SUPPORT no longer exists in the final 802.11n version, so also remove that. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index d9724a28c0c2..163c840437d6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -832,7 +832,7 @@ struct ieee80211_ht_cap { #define IEEE80211_HT_CAP_DELAY_BA 0x0400 #define IEEE80211_HT_CAP_MAX_AMSDU 0x0800 #define IEEE80211_HT_CAP_DSSSCCK40 0x1000 -#define IEEE80211_HT_CAP_PSMP_SUPPORT 0x2000 +#define IEEE80211_HT_CAP_RESERVED 0x2000 #define IEEE80211_HT_CAP_40MHZ_INTOLERANT 0x4000 #define IEEE80211_HT_CAP_LSIG_TXOP_PROT 0x8000 -- cgit v1.2.3 From cc3e1bea5d87635c519da657303690f5538bb4eb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 23 Dec 2009 06:52:08 -0500 Subject: ext4, jbd2: Add barriers for file systems with exernal journals This is a bit complicated because we are trying to optimize when we send barriers to the fs data disk. We could just throw in an extra barrier to the data disk whenever we send a barrier to the journal disk, but that's not always strictly necessary. We only need to send a barrier during a commit when there are data blocks which are must be written out due to an inode written in ordered mode, or if fsync() depends on the commit to force data blocks to disk. Finally, before we drop transactions from the beginning of the journal during a checkpoint operation, we need to guarantee that any blocks that were flushed out to the data disk are firmly on the rust platter before we drop the transaction from the journal. Thanks to Oleg Drokin for pointing out this flaw in ext3/ext4. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f1011f7f3d41..638ce4554c76 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -653,6 +653,7 @@ struct transaction_s * waiting for it to finish. */ unsigned int t_synchronous_commit:1; + unsigned int t_flushed_data_blocks:1; /* * For use by the filesystem to store fs-specific data -- cgit v1.2.3 From 17bd55d037a02b04d9119511cfd1a4b985d20f63 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 23 Dec 2009 07:57:07 -0500 Subject: fs-writeback: Add helper function to start writeback if idle ext4, at least, would like to start pushing on writeback if it starts to get close to ENOSPC when reserving worst-case blocks for delalloc writes. Writing out delalloc data will convert those worst-case predictions into usually smaller actual usage, freeing up space before we hit ENOSPC based on this speculation. Thanks to Jens for the suggestion for the helper function, & the naming help. I've made the helper return status on whether writeback was started even though I don't plan to use it in the ext4 patch; it seems like it would be potentially useful to test this in some cases. Signed-off-by: Eric Sandeen Acked-by: Jan Kara --- include/linux/writeback.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index c18c008f4bbf..76e8903cd204 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -70,6 +70,7 @@ struct writeback_control { struct bdi_writeback; int inode_wait(void *); void writeback_inodes_sb(struct super_block *); +int writeback_inodes_sb_if_idle(struct super_block *); void sync_inodes_sb(struct super_block *); void writeback_inodes_wbc(struct writeback_control *wbc); long wb_do_writeback(struct bdi_writeback *wb, int force_wait); -- cgit v1.2.3 From 28f6aeea3f12d37bd258b2c0d5ba891bff4ec479 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Fri, 25 Dec 2009 17:30:22 -0800 Subject: net: restore ip source validation when using policy routing and the skb mark: there are cases where a back path validation requires us to use a different routing table for src ip validation than the one used for mapping ingress dst ip. One such a case is transparent proxying where we pretend to be the destination system and therefore the local table is used for incoming packets but possibly a main table would be used on outbound. Make the default behavior to allow the above and if users need to turn on the symmetry via sysctl src_valid_mark Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 1 + include/linux/sysctl.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 699e85c01a4d..b2304929434e 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -81,6 +81,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) #define IN_DEV_MFORWARD(in_dev) IN_DEV_ANDCONF((in_dev), MC_FORWARDING) #define IN_DEV_RPFILTER(in_dev) IN_DEV_MAXCONF((in_dev), RP_FILTER) +#define IN_DEV_SRC_VMARK(in_dev) IN_DEV_ORCONF((in_dev), SRC_VMARK) #define IN_DEV_SOURCE_ROUTE(in_dev) IN_DEV_ANDCONF((in_dev), \ ACCEPT_SOURCE_ROUTE) #define IN_DEV_ACCEPT_LOCAL(in_dev) IN_DEV_ORCONF((in_dev), ACCEPT_LOCAL) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 877ba039e6a4..bd27fbc9db62 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -482,6 +482,7 @@ enum NET_IPV4_CONF_ARP_ACCEPT=21, NET_IPV4_CONF_ARP_NOTIFY=22, NET_IPV4_CONF_ACCEPT_LOCAL=23, + NET_IPV4_CONF_SRC_VMARK=24, __NET_IPV4_CONF_MAX }; -- cgit v1.2.3 From 81744ee44ab2845c16ffd7d6f762f7b4a49a4750 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 29 Dec 2009 08:35:35 +0100 Subject: block: Fix incorrect alignment offset reporting and update documentation queue_sector_alignment_offset returned the wrong value which caused partitions to report an incorrect alignment_offset. Since offset alignment calculation is needed several places it has been split into a separate helper function. The topology stacking function has been updated accordingly. Furthermore, comments have been added to clarify how the stacking function works. Signed-off-by: Martin K. Petersen Tested-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 784a919aa0d0..59b832be3044 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1116,11 +1116,18 @@ static inline int queue_alignment_offset(struct request_queue *q) return q->limits.alignment_offset; } +static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t offset) +{ + unsigned int granularity = max(lim->physical_block_size, lim->io_min); + + offset &= granularity - 1; + return (granularity + lim->alignment_offset - offset) & (granularity - 1); +} + static inline int queue_sector_alignment_offset(struct request_queue *q, sector_t sector) { - return ((sector << 9) - q->limits.alignment_offset) - & (q->limits.io_min - 1); + return queue_limit_alignment_offset(&q->limits, sector << 9); } static inline int bdev_alignment_offset(struct block_device *bdev) -- cgit v1.2.3 From 9bd3f98821a83041e77ee25158b80b535d02d7b4 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Wed, 30 Dec 2009 08:41:07 +0100 Subject: block: blk_rq_err_sectors cleanup blk_rq_err_sectors() seems useless, get rid of it. Signed-off-by: Gui Jianfeng Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 59b832be3044..9b98173a8184 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -845,7 +845,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) * blk_rq_err_bytes() : bytes left till the next error boundary * blk_rq_sectors() : sectors left in the entire request * blk_rq_cur_sectors() : sectors left in the current segment - * blk_rq_err_sectors() : sectors left till the next error boundary */ static inline sector_t blk_rq_pos(const struct request *rq) { @@ -874,11 +873,6 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) return blk_rq_cur_bytes(rq) >> 9; } -static inline unsigned int blk_rq_err_sectors(const struct request *rq) -{ - return blk_rq_err_bytes(rq) >> 9; -} - /* * Request issue related functions. */ -- cgit v1.2.3 From e96dc9674cb597de4fee757ed005c8465072d13f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:26 +0800 Subject: tracing/syscalls: Fix typo in SYSCALL_DEFINE0 The struct syscall_metadata variable name in SYSCALL_DEFINE0 should be __syscall_meta__##sname instead of __syscall_meta_##sname to match the name that is in SYSCALL_DEFINE1/2/3/4/5/6. This error causes event_enter_##sname->data to point to the wrong location, which causes syscalls which are defined by SYSCALL_DEFINE0() not to be traced. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D2E.1010807@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 65793e90d6f6..207466a49f3d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -195,7 +195,7 @@ struct perf_event_attr; static const struct syscall_metadata __used \ __attribute__((__aligned__(4))) \ __attribute__((section("__syscalls_metadata"))) \ - __syscall_meta_##sname = { \ + __syscall_meta__##sname = { \ .name = "sys_"#sname, \ .nb_args = 0, \ .enter_event = &event_enter__##sname, \ -- cgit v1.2.3 From ed656d8deccc5669afa33387568e7ec6f14e3e94 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Sat, 26 Dec 2009 17:58:11 +0100 Subject: kfifo: Fix typo in comment It's DECLARE_KFIFO, not DECLARED_KFIFO. Signed-off-by: Rolf Eike Beer Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 3d44e9c65a8e..7c6b32a1421c 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -81,7 +81,7 @@ union { \ } /** - * INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO + * INIT_KFIFO - Initialize a kfifo declared by DECLARE_KFIFO * @name: name of the declared kfifo datatype */ #define INIT_KFIFO(name) \ -- cgit v1.2.3 From d7f0eea9e431e1b8b0742a74db1a9490730b2a25 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 30 Dec 2009 15:36:42 +0800 Subject: ACPI: introduce kernel parameter acpi_sleep=sci_force_enable Introduce kernel parameter acpi_sleep=sci_force_enable some laptop requires SCI_EN being set directly on resume, or else they hung somewhere in the resume code path. We already have a blacklist for these laptops but we still need this option, especially when debugging some suspend/resume problems, in case there are systems that need this workaround and are not yet in the blacklist. Signed-off-by: Zhang Rui Acked-by: Rafael J. Wysocki Signed-off-by: Len Brown --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ce945d4845fc..36924255c0d5 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -251,6 +251,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n, void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); void __init acpi_s4_no_nvs(void); +void __init acpi_set_sci_en_on_resume(void); #endif /* CONFIG_PM_SLEEP */ struct acpi_osc_context { -- cgit v1.2.3 From 2f5cb43406d0b29b96248f5328a14a6f6abf8ae6 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 30 Dec 2009 08:23:30 +0000 Subject: phylib: Properly reinitialize PHYs after hibernation Since hibernation assumes power loss, we should fully reinitialize PHYs (including platform fixups), as if PHYs were just attached. This patch factors phy_init_hw() out of phy_attach_direct(), then converts mdio_bus to dev_pm_ops and adds an appropriate restore() callback. Signed-off-by: Anton Vorontsov Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index b1368b8f6572..7968defd2fa7 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -447,6 +447,7 @@ struct phy_device* get_phy_device(struct mii_bus *bus, int addr); int phy_device_register(struct phy_device *phy); int phy_clear_interrupt(struct phy_device *phydev); int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); +int phy_init_hw(struct phy_device *phydev); int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, u32 flags, phy_interface_t interface); struct phy_device * phy_attach(struct net_device *dev, -- cgit v1.2.3 From 0f4bd46ec252887f44f1f065b41867cac8f70dfb Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 22 Dec 2009 03:15:43 +0000 Subject: kmsg_dump: Dump on crash_kexec as well crash_kexec gets called before kmsg_dump(KMSG_DUMP_OOPS) if panic_on_oops is set, so the kernel log buffer is not stored for this case. This patch adds a KMSG_DUMP_KEXEC dump type which gets called when crash_kexec() is invoked. To avoid getting double dumps, the old KMSG_DUMP_PANIC is moved below crash_kexec(). The mtdoops driver is modified to handle KMSG_DUMP_KEXEC in the same way as a panic. Signed-off-by: KOSAKI Motohiro Acked-by: Simon Kagstrom Signed-off-by: David Woodhouse --- include/linux/kmsg_dump.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index e32aa268efac..24b44145a886 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -17,6 +17,7 @@ enum kmsg_dump_reason { KMSG_DUMP_OOPS, KMSG_DUMP_PANIC, + KMSG_DUMP_KEXEC, }; /** -- cgit v1.2.3 From 0719d3434747889b314a1e8add776418c4148bcf Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 30 Dec 2009 00:39:22 +0100 Subject: reiserfs: Fix reiserfs lock <-> i_xattr_sem dependency inversion i_xattr_sem depends on the reiserfs lock. But after we grab i_xattr_sem, we may relax/relock the reiserfs lock while waiting on a freezed filesystem, creating a dependency inversion between the two locks. In order to avoid the i_xattr_sem -> reiserfs lock dependency, let's create a reiserfs_down_read_safe() that acts like reiserfs_mutex_lock_safe(): relax the reiserfs lock while grabbing another lock to avoid undesired dependencies induced by the heivyweight reiserfs lock. This fixes the following warning: [ 990.005931] ======================================================= [ 990.012373] [ INFO: possible circular locking dependency detected ] [ 990.013233] 2.6.33-rc1 #1 [ 990.013233] ------------------------------------------------------- [ 990.013233] dbench/1891 is trying to acquire lock: [ 990.013233] (&REISERFS_SB(s)->lock){+.+.+.}, at: [] reiserfs_write_lock+0x35/0x50 [ 990.013233] [ 990.013233] but task is already holding lock: [ 990.013233] (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [] reiserfs_xattr_set_handle+0x8a/0x470 [ 990.013233] [ 990.013233] which lock already depends on the new lock. [ 990.013233] [ 990.013233] [ 990.013233] the existing dependency chain (in reverse order) is: [ 990.013233] [ 990.013233] -> #1 (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}: [ 990.013233] [] __lock_acquire+0xf9c/0x1560 [ 990.013233] [] lock_acquire+0x8f/0xb0 [ 990.013233] [] down_write+0x44/0x80 [ 990.013233] [] reiserfs_xattr_set_handle+0x8a/0x470 [ 990.013233] [] reiserfs_xattr_set+0xb0/0x150 [ 990.013233] [] user_set+0x8a/0x90 [ 990.013233] [] reiserfs_setxattr+0xaa/0xb0 [ 990.013233] [] __vfs_setxattr_noperm+0x36/0xa0 [ 990.013233] [] vfs_setxattr+0xbc/0xc0 [ 990.013233] [] setxattr+0xc0/0x150 [ 990.013233] [] sys_fsetxattr+0x8d/0xa0 [ 990.013233] [] system_call_fastpath+0x16/0x1b [ 990.013233] [ 990.013233] -> #0 (&REISERFS_SB(s)->lock){+.+.+.}: [ 990.013233] [] __lock_acquire+0x12d0/0x1560 [ 990.013233] [] lock_acquire+0x8f/0xb0 [ 990.013233] [] __mutex_lock_common+0x47/0x3b0 [ 990.013233] [] mutex_lock_nested+0x3e/0x50 [ 990.013233] [] reiserfs_write_lock+0x35/0x50 [ 990.013233] [] reiserfs_prepare_write+0x45/0x180 [ 990.013233] [] reiserfs_xattr_set_handle+0x2a6/0x470 [ 990.013233] [] reiserfs_xattr_set+0xb0/0x150 [ 990.013233] [] user_set+0x8a/0x90 [ 990.013233] [] reiserfs_setxattr+0xaa/0xb0 [ 990.013233] [] __vfs_setxattr_noperm+0x36/0xa0 [ 990.013233] [] vfs_setxattr+0xbc/0xc0 [ 990.013233] [] setxattr+0xc0/0x150 [ 990.013233] [] sys_fsetxattr+0x8d/0xa0 [ 990.013233] [] system_call_fastpath+0x16/0x1b [ 990.013233] [ 990.013233] other info that might help us debug this: [ 990.013233] [ 990.013233] 2 locks held by dbench/1891: [ 990.013233] #0: (&sb->s_type->i_mutex_key#12){+.+.+.}, at: [] vfs_setxattr+0x78/0xc0 [ 990.013233] #1: (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [] reiserfs_xattr_set_handle+0x8a/0x470 [ 990.013233] [ 990.013233] stack backtrace: [ 990.013233] Pid: 1891, comm: dbench Not tainted 2.6.33-rc1 #1 [ 990.013233] Call Trace: [ 990.013233] [] print_circular_bug+0xe9/0xf0 [ 990.013233] [] __lock_acquire+0x12d0/0x1560 [ 990.013233] [] ? reiserfs_xattr_set_handle+0x8a/0x470 [ 990.013233] [] lock_acquire+0x8f/0xb0 [ 990.013233] [] ? reiserfs_write_lock+0x35/0x50 [ 990.013233] [] ? reiserfs_xattr_set_handle+0x8a/0x470 [ 990.013233] [] __mutex_lock_common+0x47/0x3b0 [ 990.013233] [] ? reiserfs_write_lock+0x35/0x50 [ 990.013233] [] ? reiserfs_write_lock+0x35/0x50 [ 990.013233] [] ? mark_held_locks+0x72/0xa0 [ 990.013233] [] ? __mutex_unlock_slowpath+0xbd/0x140 [ 990.013233] [] ? trace_hardirqs_on_caller+0x14d/0x1a0 [ 990.013233] [] mutex_lock_nested+0x3e/0x50 [ 990.013233] [] reiserfs_write_lock+0x35/0x50 [ 990.013233] [] reiserfs_prepare_write+0x45/0x180 [ 990.013233] [] reiserfs_xattr_set_handle+0x2a6/0x470 [ 990.013233] [] reiserfs_xattr_set+0xb0/0x150 [ 990.013233] [] ? __mutex_lock_common+0x284/0x3b0 [ 990.013233] [] user_set+0x8a/0x90 [ 990.013233] [] reiserfs_setxattr+0xaa/0xb0 [ 990.013233] [] __vfs_setxattr_noperm+0x36/0xa0 [ 990.013233] [] vfs_setxattr+0xbc/0xc0 [ 990.013233] [] setxattr+0xc0/0x150 [ 990.013233] [] ? sched_clock_cpu+0xb8/0x100 [ 990.013233] [] ? trace_hardirqs_off+0xd/0x10 [ 990.013233] [] ? cpu_clock+0x43/0x50 [ 990.013233] [] ? fget+0xb0/0x110 [ 990.013233] [] ? fget+0x0/0x110 [ 990.013233] [] ? sysret_check+0x27/0x62 [ 990.013233] [] sys_fsetxattr+0x8d/0xa0 [ 990.013233] [] system_call_fastpath+0x16/0x1b Reported-and-tested-by: Christian Kujau Signed-off-by: Frederic Weisbecker Cc: Alexander Beregalov Cc: Chris Mason Cc: Ingo Molnar --- include/linux/reiserfs_fs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 4351b49e2b1e..35d3f459b0ac 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -106,6 +106,14 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, reiserfs_write_lock(s); } +static inline void +reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s) +{ + reiserfs_write_unlock(s); + down_read(sem); + reiserfs_write_lock(s); +} + /* * When we schedule, we usually want to also release the write lock, * according to the previous bkl based locking scheme of reiserfs. -- cgit v1.2.3 From c4a62ca362258d98f42efb282cfbf9b61caffdbe Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 30 Dec 2009 03:20:19 +0100 Subject: reiserfs: Warn on lock relax if taken recursively When we relax the reiserfs lock to avoid creating unwanted dependencies against others locks while grabbing these, we want to ensure it has not been taken recursively, otherwise the lock won't be really relaxed. Only its depth will be decreased. The unwanted dependency would then actually happen. To prevent from that, add a reiserfs_lock_check_recursive() call in the places that need it. Signed-off-by: Frederic Weisbecker Cc: Alexander Beregalov Cc: Chris Mason Cc: Ingo Molnar --- include/linux/reiserfs_fs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 35d3f459b0ac..793bf8351ab8 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -62,6 +62,12 @@ void reiserfs_write_unlock(struct super_block *s); int reiserfs_write_lock_once(struct super_block *s); void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); +#ifdef CONFIG_REISERFS_CHECK +void reiserfs_lock_check_recursive(struct super_block *s); +#else +static inline void reiserfs_lock_check_recursive(struct super_block *s) { } +#endif + /* * Several mutexes depend on the write lock. * However sometimes we want to relax the write lock while we hold @@ -92,6 +98,7 @@ void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); static inline void reiserfs_mutex_lock_safe(struct mutex *m, struct super_block *s) { + reiserfs_lock_check_recursive(s); reiserfs_write_unlock(s); mutex_lock(m); reiserfs_write_lock(s); @@ -101,6 +108,7 @@ static inline void reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, struct super_block *s) { + reiserfs_lock_check_recursive(s); reiserfs_write_unlock(s); mutex_lock_nested(m, subclass); reiserfs_write_lock(s); @@ -109,6 +117,7 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, static inline void reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s) { + reiserfs_lock_check_recursive(s); reiserfs_write_unlock(s); down_read(sem); reiserfs_write_lock(s); -- cgit v1.2.3 From 96d07d211739fd2450ac54e81d00fa40fcd4b1bd Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 20 Nov 2009 14:16:33 +0100 Subject: resource: move kernel function inside __KERNEL__ It is an internal function. Move it inside __KERNEL__ ifdef, along with task_struct declaration. Then we get: --- /usr/include/linux/resource.h 2009-09-14 15:09:29.000000000 +0200 +++ usr/include/linux/resource.h 2010-01-04 11:30:54.000000000 +0100 @@ -3,8 +3,6 @@ #include -struct task_struct; - /* * Resource control/accounting header file for linux */ @@ -70,6 +68,5 @@ */ #include -int getrusage(struct task_struct *p, int who, struct rusage *ru); #endif *********** include/linux/Kbuild is untouched, since unifdef is run even on headers-y nowadays. Signed-off-by: Jiri Slaby --- include/linux/resource.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/resource.h b/include/linux/resource.h index 40fc7e626082..f1e914eefeab 100644 --- a/include/linux/resource.h +++ b/include/linux/resource.h @@ -3,8 +3,6 @@ #include -struct task_struct; - /* * Resource control/accounting header file for linux */ @@ -70,6 +68,12 @@ struct rlimit { */ #include +#ifdef __KERNEL__ + +struct task_struct; + int getrusage(struct task_struct *p, int who, struct rusage __user *ru); +#endif /* __KERNEL__ */ + #endif -- cgit v1.2.3 From 3e10e716abf3c71bdb5d86b8f507f9e72236c9cd Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 19 Nov 2009 17:16:37 +0100 Subject: resource: add helpers for fetching rlimits We want to be sure that compiler fetches the limit variable only once, so add helpers for fetching current and maximal resource limits which do that. Add them to sched.h (instead of resource.h) due to circular dependency sched.h->resource.h->task_struct Alternative would be to create a separate res_access.h or similar. Signed-off-by: Jiri Slaby Cc: James Morris Cc: Heiko Carstens Cc: Andrew Morton Cc: Ingo Molnar --- include/linux/sched.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f2f842db03ce..8d4991be9d53 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2601,6 +2601,28 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) } #endif /* CONFIG_MM_OWNER */ +static inline unsigned long task_rlimit(const struct task_struct *tsk, + unsigned int limit) +{ + return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur); +} + +static inline unsigned long task_rlimit_max(const struct task_struct *tsk, + unsigned int limit) +{ + return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max); +} + +static inline unsigned long rlimit(unsigned int limit) +{ + return task_rlimit(current, limit); +} + +static inline unsigned long rlimit_max(unsigned int limit) +{ + return task_rlimit_max(current, limit); +} + #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From 1ae861e652b5457e7fa98ccbc55abea1e207916e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 31 Dec 2009 12:15:54 +0100 Subject: PCI/PM: Use per-device D3 delays It turns out that some PCI devices require extra delays when changing power state from D3 to D0 (and the other way around). Although this is against the PCI specification, we can handle it quite easily by allowing drivers to define arbitrary D3 delays for devices known to require extra time for switching power states. Introduce additional field d3_delay in struct pci_dev and use it to store the value of the device's D0->D3 delay, in miliseconds. Make the PCI PM core code use the per-device d3_delay unless pci_pm_d3_delay is greater (in which case the latter is used). [This also allows the driver to specify d3_delay shorter than the 10 ms required by the PCI standard if the device is known to be able to handle that.] Make the sky2 driver set d3_delay to 150 for devices handled by it. Fixes http://bugzilla.kernel.org/show_bug.cgi?id=14730 which is a listed regression from 2.6.30. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 5da0690d9cee..174e5392e51e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -243,6 +243,7 @@ struct pci_dev { unsigned int d2_support:1; /* Low power state D2 is supported */ unsigned int no_d1d2:1; /* Only allow D0 and D3 */ unsigned int wakeup_prepared:1; + unsigned int d3_delay; /* D3->D0 transition time in ms */ #ifdef CONFIG_PCIEASPM struct pcie_link_state *link_state; /* ASPM link state. */ -- cgit v1.2.3 From 59b015133cd0034f5904a76969d73476380aac46 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 5 Jan 2010 17:56:02 -0800 Subject: Input: serio - fix potential deadlock when unbinding drivers sysfs_remove_group() waits for sysfs attributes to be removed, therefore we do not need to worry about driver-specific attributes being accessed after driver has been detached from the device. In fact, attempts to take serio->drv_mutex in attribute methods may lead to the following deadlock: sysfs_read_file() fill_read_buffer() sysfs_get_active_two() psmouse_attr_show_helper() serio_pin_driver() serio_disconnect_driver() mutex_lock(&serio->drv_mutex); <--------> mutex_lock(&serio_drv_mutex); psmouse_disconnect() sysfs_remove_group(... psmouse_attr_group); .... sysfs_deactivate(); wait_for_completion(); Fix this by removing calls to serio_[un]pin_driver() and functions themselves and using driver-private mutexes to serialize access to attribute's set() methods that may change device state. Signed-off-by: Eric W. Biederman Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serio.h b/include/linux/serio.h index e2f3044d4a4a..813d26c247ec 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -136,25 +136,6 @@ static inline void serio_continue_rx(struct serio *serio) spin_unlock_irq(&serio->lock); } -/* - * Use the following functions to pin serio's driver in process context - */ -static inline int serio_pin_driver(struct serio *serio) -{ - return mutex_lock_interruptible(&serio->drv_mutex); -} - -static inline void serio_pin_driver_uninterruptible(struct serio *serio) -{ - mutex_lock(&serio->drv_mutex); -} - -static inline void serio_unpin_driver(struct serio *serio) -{ - mutex_unlock(&serio->drv_mutex); -} - - #endif /* -- cgit v1.2.3 From 8558e3943df1c51c3377cb4e8a52ea484d6f357d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 6 Jan 2010 16:11:06 -0500 Subject: x86, ACPI: delete acpi_boot_table_init() return value cleanup only. setup_arch(), doesn't care care if ACPI initialization succeeded or failed, so delete acpi_boot_table_init()'s return value. Signed-off-by: Len Brown --- include/linux/acpi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 36924255c0d5..b926afe8c03e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -80,7 +80,7 @@ char * __acpi_map_table (unsigned long phys_addr, unsigned long size); void __acpi_unmap_table(char *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); -int acpi_boot_table_init (void); +void acpi_boot_table_init (void); int acpi_mps_check (void); int acpi_numa_init (void); @@ -321,9 +321,9 @@ static inline int acpi_boot_init(void) return 0; } -static inline int acpi_boot_table_init(void) +static inline void acpi_boot_table_init(void) { - return 0; + return; } static inline int acpi_mps_check(void) -- cgit v1.2.3 From cfe79c00a2f4f687eed8b7534d1d3d3d35540c29 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 6 Jan 2010 17:23:23 +0000 Subject: NOMMU: Avoiding duplicate icache flushes of shared maps When working with FDPIC, there are many shared mappings of read-only code regions between applications (the C library, applet packages like busybox, etc.), but the current do_mmap_pgoff() function will issue an icache flush whenever a VMA is added to an MM instead of only doing it when the map is initially created. The flush can instead be done when a region is first mmapped PROT_EXEC. Note that we may not rely on the first mapping of a region being executable - it's possible for it to be PROT_READ only, so we have to remember whether we've flushed the region or not, and then flush the entire region when a bit of it is made executable. However, this also affects the brk area. That will no longer be executable. We can mprotect() it to PROT_EXEC on MPU-mode kernels, but for NOMMU mode kernels, when it increases the brk allocation, making sys_brk() flush the extra from the icache should suffice. The brk area probably isn't used by NOMMU programs since the brk area can only use up the leavings from the stack allocation, where the stack allocation is larger than requested. Signed-off-by: David Howells Signed-off-by: Mike Frysinger Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 84a524afb3dc..84d020bed083 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -123,6 +123,8 @@ struct vm_region { struct file *vm_file; /* the backing file or NULL */ atomic_t vm_usage; /* region usage count */ + bool vm_icache_flushed : 1; /* true if the icache has been flushed for + * this region */ }; /* -- cgit v1.2.3 From 6144a85a0e018c19bc4b24f7eb6c1f3f7431813d Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 7 Jan 2010 11:58:36 -0600 Subject: maccess,probe_kernel: Allow arch specific override probe_kernel_(read|write) Some archs such as blackfin, would like to have an arch specific probe_kernel_read() and probe_kernel_write() implementation which can fall back to the generic implementation if no special operations are needed. CC: Thomas Gleixner CC: Ingo Molnar Signed-off-by: Jason Wessel Signed-off-by: Mike Frysinger --- include/linux/uaccess.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 6b58367d145e..d512d98dfb7d 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -94,6 +94,7 @@ static inline unsigned long __copy_from_user_nocache(void *to, * happens, handle that and return -EFAULT. */ extern long probe_kernel_read(void *dst, void *src, size_t size); +extern long __probe_kernel_read(void *dst, void *src, size_t size); /* * probe_kernel_write(): safely attempt to write to a location @@ -104,6 +105,7 @@ extern long probe_kernel_read(void *dst, void *src, size_t size); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -extern long probe_kernel_write(void *dst, void *src, size_t size); +extern long notrace probe_kernel_write(void *dst, void *src, size_t size); +extern long notrace __probe_kernel_write(void *dst, void *src, size_t size); #endif /* __LINUX_UACCESS_H__ */ -- cgit v1.2.3 From b11e1eca7ed9c0b5dab21a62c11acc711d9bdda0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 7 Jan 2010 11:58:37 -0600 Subject: kgdb: Fix kernel-doc format error in kgdb.h linux-next-20081022//include/linux/kgdb.h:308): duplicate section name 'Description' and fix typos in that file's kernel-doc comments. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Jason Wessel --- include/linux/kgdb.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 6adcc297e354..19ec41a183f5 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -29,8 +29,7 @@ struct pt_regs; * * On some architectures it is required to skip a breakpoint * exception when it occurs after a breakpoint has been removed. - * This can be implemented in the architecture specific portion of - * for kgdb. + * This can be implemented in the architecture specific portion of kgdb. */ extern int kgdb_skipexception(int exception, struct pt_regs *regs); @@ -65,7 +64,7 @@ struct uart_port; /** * kgdb_breakpoint - compiled in breakpoint * - * This will be impelmented a static inline per architecture. This + * This will be implemented as a static inline per architecture. This * function is called by the kgdb core to execute an architecture * specific trap to cause kgdb to enter the exception processing. * @@ -190,7 +189,7 @@ kgdb_arch_handle_exception(int vector, int signo, int err_code, * @flags: Current IRQ state * * On SMP systems, we need to get the attention of the other CPUs - * and get them be in a known state. This should do what is needed + * and get them into a known state. This should do what is needed * to get the other CPUs to call kgdb_wait(). Note that on some arches, * the NMI approach is not used for rounding up all the CPUs. For example, * in case of MIPS, smp_call_function() is used to roundup CPUs. In -- cgit v1.2.3 From dd3d145d49c5816b79acc6761ebbd842bc50b0ee Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 11 Jan 2010 03:21:48 -0500 Subject: block: Fix discard alignment calculation and printing Discard alignment reporting for partitions was incorrect. Update to match the algorithm used elsewhere. The alignment can be negative (misaligned). Fix format string accordingly. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9b98173a8184..a41bcc8e140f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1148,8 +1148,11 @@ static inline int queue_discard_alignment(struct request_queue *q) static inline int queue_sector_discard_alignment(struct request_queue *q, sector_t sector) { - return ((sector << 9) - q->limits.discard_alignment) - & (q->limits.discard_granularity - 1); + struct queue_limits *lim = &q->limits; + unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1); + + return (lim->discard_granularity + lim->discard_alignment - alignment) + & (lim->discard_granularity - 1); } static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) -- cgit v1.2.3 From 17be8c245054b9c7786545af3ba3ca4e54cd4ad9 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 11 Jan 2010 03:21:49 -0500 Subject: block: bdev_stack_limits wrapper DM does not want to know about partition offsets. Add a partition-aware wrapper that DM can use when stacking block devices. Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Reviewed-by: Alasdair G Kergon Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a41bcc8e140f..5c8018977efa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -938,6 +938,8 @@ extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); extern void blk_set_default_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); +extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, + sector_t offset); extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); -- cgit v1.2.3 From ce289321b7dc1eb108e3df0dec872b7429ef49f7 Mon Sep 17 00:00:00 2001 From: Kirill Afonshin Date: Fri, 8 Jan 2010 22:09:59 +0300 Subject: block: removed unused as_io_context It isn't used anymore, since AS was deleted. Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index a63235996309..78ef023227d4 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -4,32 +4,6 @@ #include #include -/* - * This is the per-process anticipatory I/O scheduler state. - */ -struct as_io_context { - spinlock_t lock; - - void (*dtor)(struct as_io_context *aic); /* destructor */ - void (*exit)(struct as_io_context *aic); /* called on task exit */ - - unsigned long state; - atomic_t nr_queued; /* queued reads & sync writes */ - atomic_t nr_dispatched; /* number of requests gone to the drivers */ - - /* IO History tracking */ - /* Thinktime */ - unsigned long last_end_request; - unsigned long ttime_total; - unsigned long ttime_samples; - unsigned long ttime_mean; - /* Layout pattern */ - unsigned int seek_samples; - sector_t last_request_pos; - u64 seek_total; - sector_t seek_mean; -}; - struct cfq_queue; struct cfq_io_context { void *key; @@ -78,7 +52,6 @@ struct io_context { unsigned long last_waited; /* Time last woken after wait for request */ int nr_batch_requests; /* Number of requests left in the batch */ - struct as_io_context *aic; struct radix_tree_root radix_root; struct hlist_head cic_list; void *ioc_data; -- cgit v1.2.3 From 7af92f8754b87bc78cbfd447d5f4096b25c46682 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 6 Jan 2010 15:45:55 -0800 Subject: genhd: overlapping variable definition This fixes the sparse warning: fs/ext4/super.c:2390:40: warning: symbol 'i' shadows an earlier one fs/ext4/super.c:2368:22: originally declared here Using 'i' in a macro is dubious practice. Signed-off-by: Stephen Hemminger Signed-off-by: Jens Axboe --- include/linux/genhd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c6c0c41af35f..9717081c75ad 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -256,9 +256,9 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, #define part_stat_read(part, field) \ ({ \ typeof((part)->dkstats->field) res = 0; \ - int i; \ - for_each_possible_cpu(i) \ - res += per_cpu_ptr((part)->dkstats, i)->field; \ + unsigned int _cpu; \ + for_each_possible_cpu(_cpu) \ + res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ res; \ }) -- cgit v1.2.3 From 4b529401c5089cf33f7165607cbc2fde43357bfb Mon Sep 17 00:00:00 2001 From: Andreas Fenkart Date: Fri, 8 Jan 2010 14:42:31 -0800 Subject: mm: make totalhigh_pages unsigned long Makes it consistent with the extern declaration, used when CONFIG_HIGHMEM is set Removes redundant casts in printout messages Signed-off-by: Andreas Fenkart Acked-by: Russell King Cc: Ralf Baechle Cc: David Howells Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Chen Liqin Cc: Lennox Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 211ff4497269..ab2cc20e21a5 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -46,7 +46,7 @@ void kmap_flush_unused(void); static inline unsigned int nr_free_highpages(void) { return 0; } -#define totalhigh_pages 0 +#define totalhigh_pages 0UL #ifndef ARCH_HAS_KMAP static inline void *kmap(struct page *page) -- cgit v1.2.3 From e992cd9b72a18122bd5c958715623057f110793f Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 8 Jan 2010 14:42:35 -0800 Subject: kmemcheck: make bitfield annotations truly no-ops when disabled It turns out that even zero-sized struct members (int foo[0];) will affect the struct layout, causing us in particular to lose 4 bytes in struct sock. This patch fixes the regression in CONFIG_KMEMCHECK=n case. Reported-by: Eric Dumazet Signed-off-by: Vegard Nossum Acked-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmemcheck.h | 110 ++++++++++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h index e880d4cf9e22..08d7dc4ddf40 100644 --- a/include/linux/kmemcheck.h +++ b/include/linux/kmemcheck.h @@ -36,6 +36,56 @@ int kmemcheck_hide_addr(unsigned long address); bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size); +/* + * Bitfield annotations + * + * How to use: If you have a struct using bitfields, for example + * + * struct a { + * int x:8, y:8; + * }; + * + * then this should be rewritten as + * + * struct a { + * kmemcheck_bitfield_begin(flags); + * int x:8, y:8; + * kmemcheck_bitfield_end(flags); + * }; + * + * Now the "flags_begin" and "flags_end" members may be used to refer to the + * beginning and end, respectively, of the bitfield (and things like + * &x.flags_begin is allowed). As soon as the struct is allocated, the bit- + * fields should be annotated: + * + * struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL); + * kmemcheck_annotate_bitfield(a, flags); + */ +#define kmemcheck_bitfield_begin(name) \ + int name##_begin[0]; + +#define kmemcheck_bitfield_end(name) \ + int name##_end[0]; + +#define kmemcheck_annotate_bitfield(ptr, name) \ + do { \ + int _n; \ + \ + if (!ptr) \ + break; \ + \ + _n = (long) &((ptr)->name##_end) \ + - (long) &((ptr)->name##_begin); \ + MAYBE_BUILD_BUG_ON(_n < 0); \ + \ + kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \ + } while (0) + +#define kmemcheck_annotate_variable(var) \ + do { \ + kmemcheck_mark_initialized(&(var), sizeof(var)); \ + } while (0) \ + #else #define kmemcheck_enabled 0 @@ -106,60 +156,16 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) return true; } -#endif /* CONFIG_KMEMCHECK */ - -/* - * Bitfield annotations - * - * How to use: If you have a struct using bitfields, for example - * - * struct a { - * int x:8, y:8; - * }; - * - * then this should be rewritten as - * - * struct a { - * kmemcheck_bitfield_begin(flags); - * int x:8, y:8; - * kmemcheck_bitfield_end(flags); - * }; - * - * Now the "flags_begin" and "flags_end" members may be used to refer to the - * beginning and end, respectively, of the bitfield (and things like - * &x.flags_begin is allowed). As soon as the struct is allocated, the bit- - * fields should be annotated: - * - * struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL); - * kmemcheck_annotate_bitfield(a, flags); - * - * Note: We provide the same definitions for both kmemcheck and non- - * kmemcheck kernels. This makes it harder to introduce accidental errors. It - * is also allowed to pass NULL pointers to kmemcheck_annotate_bitfield(). - */ -#define kmemcheck_bitfield_begin(name) \ - int name##_begin[0]; - -#define kmemcheck_bitfield_end(name) \ - int name##_end[0]; +#define kmemcheck_bitfield_begin(name) +#define kmemcheck_bitfield_end(name) +#define kmemcheck_annotate_bitfield(ptr, name) \ + do { \ + } while (0) -#define kmemcheck_annotate_bitfield(ptr, name) \ - do { \ - int _n; \ - \ - if (!ptr) \ - break; \ - \ - _n = (long) &((ptr)->name##_end) \ - - (long) &((ptr)->name##_begin); \ - MAYBE_BUILD_BUG_ON(_n < 0); \ - \ - kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \ +#define kmemcheck_annotate_variable(var) \ + do { \ } while (0) -#define kmemcheck_annotate_variable(var) \ - do { \ - kmemcheck_mark_initialized(&(var), sizeof(var)); \ - } while (0) \ +#endif /* CONFIG_KMEMCHECK */ #endif /* LINUX_KMEMCHECK_H */ -- cgit v1.2.3 From 7dd65feb6c603e13eba501c34c662259ab38e70e Mon Sep 17 00:00:00 2001 From: Albin Tonnerre Date: Fri, 8 Jan 2010 14:42:42 -0800 Subject: lib: add support for LZO-compressed kernels This patch series adds generic support for creating and extracting LZO-compressed kernel images, as well as support for using such images on the x86 and ARM architectures, and support for creating and using LZO-compressed initrd and initramfs images. Russell King said: : Testing on a Cortex A9 model: : - lzo decompressor is 65% of the time gzip takes to decompress a kernel : - lzo kernel is 9% larger than a gzip kernel : : which I'm happy to say confirms your figures when comparing the two. : : However, when comparing your new gzip code to the old gzip code: : - new is 99% of the size of the old code : - new takes 42% of the time to decompress than the old code : : What this means is that for a proper comparison, the results get even better: : - lzo is 7.5% larger than the old gzip'd kernel image : - lzo takes 28% of the time that the old gzip code took : : So the expense seems definitely worth the effort. The only reason I : can think of ever using gzip would be if you needed the additional : compression (eg, because you have limited flash to store the image.) : : I would argue that the default for ARM should therefore be LZO. This patch: The lzo compressor is worse than gzip at compression, but faster at extraction. Here are some figures for an ARM board I'm working on: Uncompressed size: 3.24Mo gzip 1.61Mo 0.72s lzo 1.75Mo 0.48s So for a compression ratio that is still relatively close to gzip, it's much faster to extract, at least in that case. This part contains: - Makefile routine to support lzo compression - Fixes to the existing lzo compressor so that it can be used in compressed kernels - wrapper around the existing lzo1x_decompress, as it only extracts one block at a time, while we need to extract a whole file here - config dialog for kernel compression [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: cleanup] Signed-off-by: Albin Tonnerre Tested-by: Wu Zhangjin Acked-by: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Tested-by: Russell King Acked-by: Russell King Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/decompress/unlzo.h | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 include/linux/decompress/unlzo.h (limited to 'include/linux') diff --git a/include/linux/decompress/unlzo.h b/include/linux/decompress/unlzo.h new file mode 100644 index 000000000000..987229752519 --- /dev/null +++ b/include/linux/decompress/unlzo.h @@ -0,0 +1,10 @@ +#ifndef DECOMPRESS_UNLZO_H +#define DECOMPRESS_UNLZO_H + +int unlzo(unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *pos, + void(*error)(char *x)); +#endif -- cgit v1.2.3 From 80884094e34456887ecdbd107d40e72c4a40f9c9 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Fri, 8 Jan 2010 14:43:08 -0800 Subject: gpio: adp5588-gpio: new driver for ADP5588 GPIO expanders Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger Cc: Jean Delvare Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2c/adp5588.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h index fc5db826b48e..02c9af374741 100644 --- a/include/linux/i2c/adp5588.h +++ b/include/linux/i2c/adp5588.h @@ -89,4 +89,16 @@ struct adp5588_kpad_platform_data { unsigned short unlock_key2; /* Unlock Key 2 */ }; +struct adp5588_gpio_platform_data { + unsigned gpio_start; /* GPIO Chip base # */ + unsigned pullup_dis_mask; /* Pull-Up Disable Mask */ + int (*setup)(struct i2c_client *client, + int gpio, unsigned ngpio, + void *context); + int (*teardown)(struct i2c_client *client, + int gpio, unsigned ngpio, + void *context); + void *context; +}; + #endif -- cgit v1.2.3 From a29815a333c6c6e677294bbe5958e771d0aad3fd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 10 Jan 2010 16:28:09 +0200 Subject: core, x86: make LIST_POISON less deadly The list macros use LIST_POISON1 and LIST_POISON2 as undereferencable pointers in order to trap erronous use of freed list_heads. Unfortunately userspace can arrange for those pointers to actually be dereferencable, potentially turning an oops to an expolit. To avoid this allow architectures