From b35250c0816c7cf7d0a8de92f5fafb6a7508a708 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 10 Jun 2020 17:36:03 +0200 Subject: writeback: Protect inode->i_io_list with inode->i_lock Currently, operations on inode->i_io_list are protected by wb->list_lock. In the following patches we'll need to maintain consistency between inode->i_state and inode->i_io_list so change the code so that inode->i_lock protects also all inode's i_io_list handling. Reviewed-by: Martijn Coenen Reviewed-by: Christoph Hellwig CC: stable@vger.kernel.org # Prerequisite for "writeback: Avoid skipping inode writeback" Signed-off-by: Jan Kara --- fs/fs-writeback.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index a605c3dddabc..ff0b18331590 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -144,6 +144,7 @@ static void inode_io_list_del_locked(struct inode *inode, struct bdi_writeback *wb) { assert_spin_locked(&wb->list_lock); + assert_spin_locked(&inode->i_lock); list_del_init(&inode->i_io_list); wb_io_lists_depopulated(wb); @@ -1122,7 +1123,9 @@ void inode_io_list_del(struct inode *inode) struct bdi_writeback *wb; wb = inode_to_wb_and_lock_list(inode); + spin_lock(&inode->i_lock); inode_io_list_del_locked(inode, wb); + spin_unlock(&inode->i_lock); spin_unlock(&wb->list_lock); } EXPORT_SYMBOL(inode_io_list_del); @@ -1172,8 +1175,10 @@ void sb_clear_inode_writeback(struct inode *inode) * the case then the inode must have been redirtied while it was being written * out and we don't reset its dirtied_when. */ -static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) +static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb) { + assert_spin_locked(&inode->i_lock); + if (!list_empty(&wb->b_dirty)) { struct inode *tail; @@ -1184,6 +1189,13 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) inode_io_list_move_locked(inode, wb, &wb->b_dirty); } +static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) +{ + spin_lock(&inode->i_lock); + redirty_tail_locked(inode, wb); + spin_unlock(&inode->i_lock); +} + /* * requeue inode for re-scanning after bdi->b_io list is exhausted. */ @@ -1394,7 +1406,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, * writeback is not making progress due to locked * buffers. Skip this inode for now. */ - redirty_tail(inode, wb); + redirty_tail_locked(inode, wb); return; } @@ -1414,7 +1426,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, * retrying writeback of the dirty page/inode * that cannot be performed immediately. */ - redirty_tail(inode, wb); + redirty_tail_locked(inode, wb); } } else if (inode->i_state & I_DIRTY) { /* @@ -1422,7 +1434,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, * such as delayed allocation during submission or metadata * updates after data IO completion. */ - redirty_tail(inode, wb); + redirty_tail_locked(inode, wb); } else if (inode->i_state & I_DIRTY_TIME) { inode->dirtied_when = jiffies; inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); @@ -1669,8 +1681,8 @@ static long writeback_sb_inodes(struct super_block *sb, */ spin_lock(&inode->i_lock); if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { + redirty_tail_locked(inode, wb); spin_unlock(&inode->i_lock); - redirty_tail(inode, wb); continue; } if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) { -- cgit v1.2.3 From 5afced3bf28100d81fb2fe7e98918632a08feaf5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 29 May 2020 15:05:22 +0200 Subject: writeback: Avoid skipping inode writeback Inode's i_io_list list head is used to attach inode to several different lists - wb->{b_dirty, b_dirty_time, b_io, b_more_io}. When flush worker prepares a list of inodes to writeback e.g. for sync(2), it moves inodes to b_io list. Thus it is critical for sync(2) data integrity guarantees that inode is not requeued to any other writeback list when inode is queued for processing by flush worker. That's the reason why writeback_single_inode() does not touch i_io_list (unless the inode is completely clean) and why __mark_inode_dirty() does not touch i_io_list if I_SYNC flag is set. However there are two flaws in the current logic: 1) When inode has only I_DIRTY_TIME set but it is already queued in b_io list due to sync(2), concurrent __mark_inode_dirty(inode, I_DIRTY_SYNC) can still move inode back to b_dirty list resulting in skipping writeback of inode time stamps during sync(2). 2) When inode is on b_dirty_time list and writeback_single_inode() races with __mark_inode_dirty() like: writeback_single_inode() __mark_inode_dirty(inode, I_DIRTY_PAGES) inode->i_state |= I_SYNC __writeback_single_inode() inode->i_state |= I_DIRTY_PAGES; if (inode->i_state & I_SYNC) bail if (!(inode->i_state & I_DIRTY_ALL)) - not true so nothing done We end up with I_DIRTY_PAGES inode on b_dirty_time list and thus standard background writeback will not writeback this inode leading to possible dirty throttling stalls etc. (thanks to Martijn Coenen for this analysis). Fix these problems by tracking whether inode is queued in b_io or b_more_io lists in a new I_SYNC_QUEUED flag. When this flag is set, we know flush worker has queued inode and we should not touch i_io_list. On the other hand we also know that once flush worker is done with the inode it will requeue the inode to appropriate dirty list. When I_SYNC_QUEUED is not set, __mark_inode_dirty() can (and must) move inode to appropriate dirty list. Reported-by: Martijn Coenen Reviewed-by: Martijn Coenen Tested-by: Martijn Coenen Reviewed-by: Christoph Hellwig Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option") CC: stable@vger.kernel.org Signed-off-by: Jan Kara --- fs/fs-writeback.c | 17 ++++++++++++----- include/linux/fs.h | 8 ++++++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ff0b18331590..f470c10641c5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -146,6 +146,7 @@ static void inode_io_list_del_locked(struct inode *inode, assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); + inode->i_state &= ~I_SYNC_QUEUED; list_del_init(&inode->i_io_list); wb_io_lists_depopulated(wb); } @@ -1187,6 +1188,7 @@ static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb) inode->dirtied_when = jiffies; } inode_io_list_move_locked(inode, wb, &wb->b_dirty); + inode->i_state &= ~I_SYNC_QUEUED; } static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) @@ -1262,8 +1264,11 @@ static int move_expired_inodes(struct list_head *delaying_queue, break; list_move(&inode->i_io_list, &tmp); moved++; + spin_lock(&inode->i_lock); if (flags & EXPIRE_DIRTY_ATIME) - set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state); + inode->i_state |= I_DIRTY_TIME_EXPIRED; + inode->i_state |= I_SYNC_QUEUED; + spin_unlock(&inode->i_lock); if (sb_is_blkdev_sb(inode->i_sb)) continue; if (sb && sb != inode->i_sb) @@ -1438,6 +1443,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, } else if (inode->i_state & I_DIRTY_TIME) { inode->dirtied_when = jiffies; inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); + inode->i_state &= ~I_SYNC_QUEUED; } else { /* The inode is clean. Remove from writeback lists. */ inode_io_list_del_locked(inode, wb); @@ -2301,11 +2307,12 @@ void __mark_inode_dirty(struct inode *inode, int flags) inode->i_state |= flags; /* - * If the inode is being synced, just update its dirty state. - * The unlocker will place the inode on the appropriate - * superblock list, based upon its state. + * If the inode is queued for writeback by flush worker, just + * update its dirty state. Once the flush worker is done with + * the inode it will place it on the appropriate superblock + * list, based upon its state. */ - if (inode->i_state & I_SYNC) + if (inode->i_state & I_SYNC_QUEUED) goto out_unlock_inode; /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 19ef6c88c152..48556efcdcf0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2157,6 +2157,10 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * * I_DONTCACHE Evict inode as soon as it is not used anymore. * + * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists. + * Used to detect that mark_inode_dirty() should not move + * inode between dirty lists. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -2174,12 +2178,12 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) -#define __I_DIRTY_TIME_EXPIRED 12 -#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) +#define I_DIRTY_TIME_EXPIRED (1 << 12) #define I_WB_SWITCH (1 << 13) #define I_OVL_INUSE (1 << 14) #define I_CREATING (1 << 15) #define I_DONTCACHE (1 << 16) +#define I_SYNC_QUEUED (1 << 17) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) -- cgit v1.2.3 From f9cae926f35e8230330f28c7b743ad088611a8de Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 29 May 2020 16:08:58 +0200 Subject: writeback: Fix sync livelock due to b_dirty_time processing When we are processing writeback for sync(2), move_expired_inodes() didn't set any inode expiry value (older_than_this). This can result in writeback never completing if there's steady stream of inodes added to b_dirty_time list as writeback rechecks dirty lists after each writeback round whether there's more work to be done. Fix the problem by using sync(2) start time is inode expiry value when processing b_dirty_time list similarly as for ordinarily dirtied inodes. This requires some refactoring of older_than_this handling which simplifies the code noticeably as a bonus. Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option") CC: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/fs-writeback.c | 44 ++++++++++++++++------------------------ include/trace/events/writeback.h | 13 ++++++------ 2 files changed, 23 insertions(+), 34 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f470c10641c5..ae17d64a3e18 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -42,7 +42,6 @@ struct wb_writeback_work { long nr_pages; struct super_block *sb; - unsigned long *older_than_this; enum writeback_sync_modes sync_mode; unsigned int tagged_writepages:1; unsigned int for_kupdate:1; @@ -1234,16 +1233,13 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) #define EXPIRE_DIRTY_ATIME 0x0001 /* - * Move expired (dirtied before work->older_than_this) dirty inodes from + * Move expired (dirtied before dirtied_before) dirty inodes from * @delaying_queue to @dispatch_queue. */ static int move_expired_inodes(struct list_head *delaying_queue, struct list_head *dispatch_queue, - int flags, - struct wb_writeback_work *work) + int flags, unsigned long dirtied_before) { - unsigned long *older_than_this = NULL; - unsigned long expire_time; LIST_HEAD(tmp); struct list_head *pos, *node; struct super_block *sb = NULL; @@ -1251,16 +1247,9 @@ static int move_expired_inodes(struct list_head *delaying_queue, int do_sb_sort = 0; int moved = 0; - if ((flags & EXPIRE_DIRTY_ATIME) == 0) - older_than_this = work->older_than_this; - else if (!work->for_sync) { - expire_time = jiffies - (dirtytime_expire_interval * HZ); - older_than_this = &expire_time; - } while (!list_empty(delaying_queue)) { inode = wb_inode(delaying_queue->prev); - if (older_than_this && - inode_dirtied_after(inode, *older_than_this)) + if (inode_dirtied_after(inode, dirtied_before)) break; list_move(&inode->i_io_list, &tmp); moved++; @@ -1306,18 +1295,22 @@ out: * | * +--> dequeue for IO */ -static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work) +static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work, + unsigned long dirtied_before) { int moved; + unsigned long time_expire_jif = dirtied_before; assert_spin_locked(&wb->list_lock); list_splice_init(&wb->b_more_io, &wb->b_io); - moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work); + moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, dirtied_before); + if (!work->for_sync) + time_expire_jif = jiffies - dirtytime_expire_interval * HZ; moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io, - EXPIRE_DIRTY_ATIME, work); + EXPIRE_DIRTY_ATIME, time_expire_jif); if (moved) wb_io_lists_populated(wb); - trace_writeback_queue_io(wb, work, moved); + trace_writeback_queue_io(wb, work, dirtied_before, moved); } static int write_inode(struct inode *inode, struct writeback_control *wbc) @@ -1829,7 +1822,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, blk_start_plug(&plug); spin_lock(&wb->list_lock); if (list_empty(&wb->b_io)) - queue_io(wb, &work); + queue_io(wb, &work, jiffies); __writeback_inodes_wb(wb, &work); spin_unlock(&wb->list_lock); blk_finish_plug(&plug); @@ -1849,7 +1842,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, * takes longer than a dirty_writeback_interval interval, then leave a * one-second gap. * - * older_than_this takes precedence over nr_to_write. So we'll only write back + * dirtied_before takes precedence over nr_to_write. So we'll only write back * all dirty pages if they are all attached to "old" mappings. */ static long wb_writeback(struct bdi_writeback *wb, @@ -1857,14 +1850,11 @@ static long wb_writeback(struct bdi_writeback *wb, { unsigned long wb_start = jiffies; long nr_pages = work->nr_pages; - unsigned long oldest_jif; + unsigned long dirtied_before = jiffies; struct inode *inode; long progress; struct blk_plug plug; - oldest_jif = jiffies; - work->older_than_this = &oldest_jif; - blk_start_plug(&plug); spin_lock(&wb->list_lock); for (;;) { @@ -1898,14 +1888,14 @@ static long wb_writeback(struct bdi_writeback *wb, * safe. */ if (work->for_kupdate) { - oldest_jif = jiffies - + dirtied_before = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); } else if (work->for_background) - oldest_jif = jiffies; + dirtied_before = jiffies; trace_writeback_start(wb, work); if (list_empty(&wb->b_io)) - queue_io(wb, work); + queue_io(wb, work, dirtied_before); if (work->sb) progress = writeback_sb_inodes(work->sb, wb, work); else diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 10f5d1fa7347..7565dcd59697 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -498,8 +498,9 @@ DEFINE_WBC_EVENT(wbc_writepage); TRACE_EVENT(writeback_queue_io, TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work, + unsigned long dirtied_before, int moved), - TP_ARGS(wb, work, moved), + TP_ARGS(wb, work, dirtied_before, moved), TP_STRUCT__entry( __array(char, name, 32) __field(unsigned long, older) @@ -509,19 +510,17 @@ TRACE_EVENT(writeback_queue_io, __field(ino_t, cgroup_ino) ), TP_fast_assign( - unsigned long *older_than_this = work->older_than_this; strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); - __entry->older = older_than_this ? *older_than_this : 0; - __entry->age = older_than_this ? - (jiffies - *older_than_this) * 1000 / HZ : -1; + __entry->older = dirtied_before; + __entry->age = (jiffies - dirtied_before) * 1000 / HZ; __entry->moved = moved; __entry->reason = work->reason; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%lu", __entry->name, - __entry->older, /* older_than_this in jiffies */ - __entry->age, /* older_than_this in relative milliseconds */ + __entry->older, /* dirtied_before in jiffies */ + __entry->age, /* dirtied_before in relative milliseconds */ __entry->moved, __print_symbolic(__entry->reason, WB_WORK_REASON), (unsigned long)__entry->cgroup_ino -- cgit v1.2.3 From 5fcd57505c002efc5823a7355e21f48dd02d5a51 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 29 May 2020 16:24:43 +0200 Subject: writeback: Drop I_DIRTY_TIME_EXPIRE The only use of I_DIRTY_TIME_EXPIRE is to detect in __writeback_single_inode() that inode got there because flush worker decided it's time to writeback the dirty inode time stamps (either because we are syncing or because of age). However we can detect this directly in __writeback_single_inode() and there's no need for the strange propagation with I_DIRTY_TIME_EXPIRE flag. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/ext4/inode.c | 2 +- fs/fs-writeback.c | 28 +++++++++++----------------- fs/xfs/libxfs/xfs_trans_inode.c | 4 ++-- include/linux/fs.h | 1 - include/trace/events/writeback.h | 1 - 5 files changed, 14 insertions(+), 22 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 40ec5c7ef0d3..4db497f02ffb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4887,7 +4887,7 @@ static void __ext4_update_other_inode_time(struct super_block *sb, (inode->i_state & I_DIRTY_TIME)) { struct ext4_inode_info *ei = EXT4_I(inode); - inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED); + inode->i_state &= ~I_DIRTY_TIME; spin_unlock(&inode->i_lock); spin_lock(&ei->i_raw_lock); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ae17d64a3e18..149227160ff0 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1238,7 +1238,7 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) */ static int move_expired_inodes(struct list_head *delaying_queue, struct list_head *dispatch_queue, - int flags, unsigned long dirtied_before) + unsigned long dirtied_before) { LIST_HEAD(tmp); struct list_head *pos, *node; @@ -1254,8 +1254,6 @@ static int move_expired_inodes(struct list_head *delaying_queue, list_move(&inode->i_io_list, &tmp); moved++; spin_lock(&inode->i_lock); - if (flags & EXPIRE_DIRTY_ATIME) - inode->i_state |= I_DIRTY_TIME_EXPIRED; inode->i_state |= I_SYNC_QUEUED; spin_unlock(&inode->i_lock); if (sb_is_blkdev_sb(inode->i_sb)) @@ -1303,11 +1301,11 @@ static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work, assert_spin_locked(&wb->list_lock); list_splice_init(&wb->b_more_io, &wb->b_io); - moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, dirtied_before); + moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before); if (!work->for_sync) time_expire_jif = jiffies - dirtytime_expire_interval * HZ; moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io, - EXPIRE_DIRTY_ATIME, time_expire_jif); + time_expire_jif); if (moved) wb_io_lists_populated(wb); trace_writeback_queue_io(wb, work, dirtied_before, moved); @@ -1483,18 +1481,14 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) spin_lock(&inode->i_lock); dirty = inode->i_state & I_DIRTY; - if (inode->i_state & I_DIRTY_TIME) { - if ((dirty & I_DIRTY_INODE) || - wbc->sync_mode == WB_SYNC_ALL || - unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) || - unlikely(time_after(jiffies, - (inode->dirtied_time_when + - dirtytime_expire_interval * HZ)))) { - dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; - trace_writeback_lazytime(inode); - } - } else - inode->i_state &= ~I_DIRTY_TIME_EXPIRED; + if ((inode->i_state & I_DIRTY_TIME) && + ((dirty & I_DIRTY_INODE) || + wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync || + time_after(jiffies, inode->dirtied_time_when + + dirtytime_expire_interval * HZ))) { + dirty |= I_DIRTY_TIME; + trace_writeback_lazytime(inode); + } inode->i_state &= ~dirty; /* diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index b5dfb6654842..1b4df6636944 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -96,9 +96,9 @@ xfs_trans_log_inode( * to log the timestamps, or will clear already cleared fields in the * worst case. */ - if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) { + if (inode->i_state & I_DIRTY_TIME) { spin_lock(&inode->i_lock); - inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED); + inode->i_state &= ~I_DIRTY_TIME; spin_unlock(&inode->i_lock); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 48556efcdcf0..45eadf5bea5d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2178,7 +2178,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) -#define I_DIRTY_TIME_EXPIRED (1 << 12) #define I_WB_SWITCH (1 << 13) #define I_OVL_INUSE (1 << 14) #define I_CREATING (1 << 15) diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 7565dcd59697..e7cbccc7c14c 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -20,7 +20,6 @@ {I_CLEAR, "I_CLEAR"}, \ {I_SYNC, "I_SYNC"}, \ {I_DIRTY_TIME, "I_DIRTY_TIME"}, \ - {I_DIRTY_TIME_EXPIRED, "I_DIRTY_TIME_EXPIRED"}, \ {I_REFERENCED, "I_REFERENCED"} \ ) -- cgit v1.2.3 From 34b09af4f54e6485e28f138ccad159611a240cc1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 5 Aug 2020 15:10:11 -0400 Subject: nfsd: fix oops on mixed NFSv4/NFSv3 client access If an NFSv2/v3 client breaks an NFSv4 client's delegation, it will hit a NULL dereference in nfsd_breaker_owns_lease(). Easily reproduceable with for example mount -overs=4.2 server:/export /mnt/ sleep 1h Fixes: 28df3d1539de50 ("nfsd: clients don't need to break their own delegations") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=208807 Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 81ed8e8bab3f..1ea9bbcc7c24 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4597,6 +4597,8 @@ static bool nfsd_breaker_owns_lease(struct file_lock *fl) if (!i_am_nfsd()) return NULL; rqst = kthread_data(current); + if (!rqst->rq_lease_breaker) + return NULL; clp = *(rqst->rq_lease_breaker); return dl->dl_stid.sc_client == clp; } -- cgit v1.2.3 From 4473171db68fe9e3de3f2bc68a00527f23852ad8 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:14 +0530 Subject: firmware: ti_sci: Drop the device id to resource type translation With ABI 3.0, sysfw deprecated special resource types used for AM65x SoC. Instead started using device id as resource type similar to the convention used in J721E SOC. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Acked-by: Nishanth Menon Link: https://lore.kernel.org/r/20200806074826.24607-2-lokeshvutla@ti.com --- drivers/firmware/ti_sci.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c index 53cee17d0115..81e4d7797ac0 100644 --- a/drivers/firmware/ti_sci.c +++ b/drivers/firmware/ti_sci.c @@ -3355,16 +3355,6 @@ static const struct ti_sci_desc ti_sci_pmmc_k2g_desc = { .rm_type_map = NULL, }; -static struct ti_sci_rm_type_map ti_sci_am654_rm_type_map[] = { - {.dev_id = 56, .type = 0x00b}, /* GIC_IRQ */ - {.dev_id = 179, .type = 0x000}, /* MAIN_NAV_UDMASS_IA0 */ - {.dev_id = 187, .type = 0x009}, /* MAIN_NAV_RA */ - {.dev_id = 188, .type = 0x006}, /* MAIN_NAV_UDMAP */ - {.dev_id = 194, .type = 0x007}, /* MCU_NAV_UDMAP */ - {.dev_id = 195, .type = 0x00a}, /* MCU_NAV_RA */ - {.dev_id = 0, .type = 0x000}, /* end of table */ -}; - /* Description for AM654 */ static const struct ti_sci_desc ti_sci_pmmc_am654_desc = { .default_host_id = 12, @@ -3373,7 +3363,7 @@ static const struct ti_sci_desc ti_sci_pmmc_am654_desc = { /* Limited by MBOX_TX_QUEUE_LEN. K2G can handle upto 128 messages! */ .max_msgs = 20, .max_msg_size = 60, - .rm_type_map = ti_sci_am654_rm_type_map, + .rm_type_map = NULL, }; static const struct of_device_id ti_sci_of_match[] = { -- cgit v1.2.3 From 9b98e02a3d369c5d0875338ea0717030471b5210 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:15 +0530 Subject: firmware: ti_sci: Drop unused structure ti_sci_rm_type_map struct ti_sci_rm_type_map is no longer used. Drop its definition and its declarations. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Acked-by: Nishanth Menon Link: https://lore.kernel.org/r/20200806074826.24607-3-lokeshvutla@ti.com --- drivers/firmware/ti_sci.c | 56 +---------------------------------------------- 1 file changed, 1 insertion(+), 55 deletions(-) diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c index 81e4d7797ac0..03bd01ba5fe7 100644 --- a/drivers/firmware/ti_sci.c +++ b/drivers/firmware/ti_sci.c @@ -64,22 +64,6 @@ struct ti_sci_xfers_info { spinlock_t xfer_lock; }; -/** - * struct ti_sci_rm_type_map - Structure representing TISCI Resource - * management representation of dev_ids. - * @dev_id: TISCI device ID - * @type: Corresponding id as identified by TISCI RM. - * - * Note: This is used only as a work around for using RM range apis - * for AM654 SoC. For future SoCs dev_id will be used as type - * for RM range APIs. In order to maintain ABI backward compatibility - * type is not being changed for AM654 SoC. - */ -struct ti_sci_rm_type_map { - u32 dev_id; - u16 type; -}; - /** * struct ti_sci_desc - Description of SoC integration * @default_host_id: Host identifier representing the compute entity @@ -87,14 +71,12 @@ struct ti_sci_rm_type_map { * @max_msgs: Maximum number of messages that can be pending * simultaneously in the system * @max_msg_size: Maximum size of data per message that can be handled. - * @rm_type_map: RM resource type mapping structure. */ struct ti_sci_desc { u8 default_host_id; int max_rx_timeout_ms; int max_msgs; int max_msg_size; - struct ti_sci_rm_type_map *rm_type_map; }; /** @@ -1710,33 +1692,6 @@ fail: return ret; } -static int ti_sci_get_resource_type(struct ti_sci_info *info, u16 dev_id, - u16 *type) -{ - struct ti_sci_rm_type_map *rm_type_map = info->desc->rm_type_map; - bool found = false; - int i; - - /* If map is not provided then assume dev_id is used as type */ - if (!rm_type_map) { - *type = dev_id; - return 0; - } - - for (i = 0; rm_type_map[i].dev_id; i++) { - if (rm_type_map[i].dev_id == dev_id) { - *type = rm_type_map[i].type; - found = true; - break; - } - } - - if (!found) - return -EINVAL; - - return 0; -} - /** * ti_sci_get_resource_range - Helper to get a range of resources assigned * to a host. Resource is uniquely identified by @@ -1760,7 +1715,6 @@ static int ti_sci_get_resource_range(const struct ti_sci_handle *handle, struct ti_sci_xfer *xfer; struct ti_sci_info *info; struct device *dev; - u16 type; int ret = 0; if (IS_ERR(handle)) @@ -1780,15 +1734,9 @@ static int ti_sci_get_resource_range(const struct ti_sci_handle *handle, return ret; } - ret = ti_sci_get_resource_type(info, dev_id, &type); - if (ret) { - dev_err(dev, "rm type lookup failed for %u\n", dev_id); - goto fail; - } - req = (struct ti_sci_msg_req_get_resource_range *)xfer->xfer_buf; req->secondary_host = s_host; - req->type = type & MSG_RM_RESOURCE_TYPE_MASK; + req->type = dev_id & MSG_RM_RESOURCE_TYPE_MASK; req->subtype = subtype & MSG_RM_RESOURCE_SUBTYPE_MASK; ret = ti_sci_do_xfer(info, xfer); @@ -3352,7 +3300,6 @@ static const struct ti_sci_desc ti_sci_pmmc_k2g_desc = { /* Limited by MBOX_TX_QUEUE_LEN. K2G can handle upto 128 messages! */ .max_msgs = 20, .max_msg_size = 64, - .rm_type_map = NULL, }; /* Description for AM654 */ @@ -3363,7 +3310,6 @@ static const struct ti_sci_desc ti_sci_pmmc_am654_desc = { /* Limited by MBOX_TX_QUEUE_LEN. K2G can handle upto 128 messages! */ .max_msgs = 20, .max_msg_size = 60, - .rm_type_map = NULL, }; static const struct of_device_id ti_sci_of_match[] = { -- cgit v1.2.3 From 53bf2b0e4e4c1ff0a957474237f9dcd20036ca54 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:16 +0530 Subject: firmware: ti_sci: Add support for getting resource with subtype With SYSFW ABI 3.0 changes, interrupts coming out of an interrupt controller is identified by a type and it is consistent across SoCs. Similarly global events for Interrupt aggregator. So add an API to get resource range using a resource type. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Acked-by: Nishanth Menon Link: https://lore.kernel.org/r/20200806074826.24607-4-lokeshvutla@ti.com --- drivers/firmware/ti_sci.c | 89 +++++++++++++++++++++++++--------- include/linux/soc/ti/ti_sci_protocol.h | 13 +++++ 2 files changed, 80 insertions(+), 22 deletions(-) diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c index 03bd01ba5fe7..722af9ee53d6 100644 --- a/drivers/firmware/ti_sci.c +++ b/drivers/firmware/ti_sci.c @@ -3208,61 +3208,50 @@ u32 ti_sci_get_num_resources(struct ti_sci_resource *res) EXPORT_SYMBOL_GPL(ti_sci_get_num_resources); /** - * devm_ti_sci_get_of_resource() - Get a TISCI resource assigned to a device + * devm_ti_sci_get_resource_sets() - Get a TISCI resources assigned to a device * @handle: TISCI handle * @dev: Device pointer to which the resource is assigned * @dev_id: TISCI device id to which the resource is assigned - * @of_prop: property name by which the resource are represented + * @sub_types: Array of sub_types assigned corresponding to device + * @sets: Number of sub_types * * Return: Pointer to ti_sci_resource if all went well else appropriate * error pointer. */ -struct ti_sci_resource * -devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, - struct device *dev, u32 dev_id, char *of_prop) +static struct ti_sci_resource * +devm_ti_sci_get_resource_sets(const struct ti_sci_handle *handle, + struct device *dev, u32 dev_id, u32 *sub_types, + u32 sets) { struct ti_sci_resource *res; bool valid_set = false; - u32 resource_subtype; int i, ret; res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL); if (!res) return ERR_PTR(-ENOMEM); - ret = of_property_count_elems_of_size(dev_of_node(dev), of_prop, - sizeof(u32)); - if (ret < 0) { - dev_err(dev, "%s resource type ids not available\n", of_prop); - return ERR_PTR(ret); - } - res->sets = ret; - + res->sets = sets; res->desc = devm_kcalloc(dev, res->sets, sizeof(*res->desc), GFP_KERNEL); if (!res->desc) return ERR_PTR(-ENOMEM); for (i = 0; i < res->sets; i++) { - ret = of_property_read_u32_index(dev_of_node(dev), of_prop, i, - &resource_subtype); - if (ret) - return ERR_PTR(-EINVAL); - ret = handle->ops.rm_core_ops.get_range(handle, dev_id, - resource_subtype, + sub_types[i], &res->desc[i].start, &res->desc[i].num); if (ret) { dev_dbg(dev, "dev = %d subtype %d not allocated for this host\n", - dev_id, resource_subtype); + dev_id, sub_types[i]); res->desc[i].start = 0; res->desc[i].num = 0; continue; } dev_dbg(dev, "dev = %d, subtype = %d, start = %d, num = %d\n", - dev_id, resource_subtype, res->desc[i].start, + dev_id, sub_types[i], res->desc[i].start, res->desc[i].num); valid_set = true; @@ -3280,6 +3269,62 @@ devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, return ERR_PTR(-EINVAL); } +/** + * devm_ti_sci_get_of_resource() - Get a TISCI resource assigned to a device + * @handle: TISCI handle + * @dev: Device pointer to which the resource is assigned + * @dev_id: TISCI device id to which the resource is assigned + * @of_prop: property name by which the resource are represented + * + * Return: Pointer to ti_sci_resource if all went well else appropriate + * error pointer. + */ +struct ti_sci_resource * +devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, + struct device *dev, u32 dev_id, char *of_prop) +{ + struct ti_sci_resource *res; + u32 *sub_types; + int sets; + + sets = of_property_count_elems_of_size(dev_of_node(dev), of_prop, + sizeof(u32)); + if (sets < 0) { + dev_err(dev, "%s resource type ids not available\n", of_prop); + return ERR_PTR(sets); + } + + sub_types = kcalloc(sets, sizeof(*sub_types), GFP_KERNEL); + if (!sub_types) + return ERR_PTR(-ENOMEM); + + of_property_read_u32_array(dev_of_node(dev), of_prop, sub_types, sets); + res = devm_ti_sci_get_resource_sets(handle, dev, dev_id, sub_types, + sets); + + kfree(sub_types); + return res; +} +EXPORT_SYMBOL_GPL(devm_ti_sci_get_of_resource); + +/** + * devm_ti_sci_get_resource() - Get a resource range assigned to the device + * @handle: TISCI handle + * @dev: Device pointer to which the resource is assigned + * @dev_id: TISCI device id to which the resource is assigned + * @suub_type: TISCI resource subytpe representing the resource. + * + * Return: Pointer to ti_sci_resource if all went well else appropriate + * error pointer. + */ +struct ti_sci_resource * +devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, + u32 dev_id, u32 sub_type) +{ + return devm_ti_sci_get_resource_sets(handle, dev, dev_id, &sub_type, 1); +} +EXPORT_SYMBOL_GPL(devm_ti_sci_get_resource); + static int tisci_reboot_handler(struct notifier_block *nb, unsigned long mode, void *cmd) { diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 49c5d29cd33c..cf27b080e148 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -220,6 +220,9 @@ struct ti_sci_rm_core_ops { u16 *range_start, u16 *range_num); }; +#define TI_SCI_RESASG_SUBTYPE_IR_OUTPUT 0 +#define TI_SCI_RESASG_SUBTYPE_IA_VINT 0xa +#define TI_SCI_RESASG_SUBTYPE_GLOBAL_EVENT_SEVT 0xd /** * struct ti_sci_rm_irq_ops: IRQ management operations * @set_irq: Set an IRQ route between the requested source @@ -556,6 +559,9 @@ u32 ti_sci_get_num_resources(struct ti_sci_resource *res); struct ti_sci_resource * devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, struct device *dev, u32 dev_id, char *of_prop); +struct ti_sci_resource * +devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, + u32 dev_id, u32 sub_type); #else /* CONFIG_TI_SCI_PROTOCOL */ @@ -609,6 +615,13 @@ devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, { return ERR_PTR(-EINVAL); } + +static inline struct ti_sci_resource * +devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, + u32 dev_id, u32 sub_type); +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_TI_SCI_PROTOCOL */ #endif /* __TISCI_PROTOCOL_H */ -- cgit v1.2.3 From 9a8e2ae71f3553f1b6cd4e3681f04e5d0f147387 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:17 +0530 Subject: dt-bindings: irqchip: ti, sci-intr: Update bindings to drop the usage of gic as parent Drop the firmware related dt-bindings and use the hardware specified interrupt numbers within Interrupt Router. This ensures interrupt router DT node need not assume any interrupt parent type. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200806074826.24607-5-lokeshvutla@ti.com --- .../bindings/interrupt-controller/ti,sci-intr.txt | 31 +++++++++++----------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt index 178fca08278f..c7046f3da201 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt @@ -44,15 +44,17 @@ Required Properties: 4: If intr supports level triggered interrupts. - interrupt-controller: Identifies the node as an interrupt controller - #interrupt-cells: Specifies the number of cells needed to encode an - interrupt source. The value should be 2. - First cell should contain the TISCI device ID of source - Second cell should contain the interrupt source offset - within the device. + interrupt source. The value should be 1. + First cell should contain interrupt router input number + as specified by hardware. - ti,sci: Phandle to TI-SCI compatible System controller node. -- ti,sci-dst-id: TISCI device ID of the destination IRQ controller. -- ti,sci-rm-range-girq: Array of TISCI subtype ids representing the host irqs - assigned to this interrupt router. Each subtype id - corresponds to a range of host irqs. +- ti,sci-dev-id: TISCI device id of interrupt controller. +- ti,interrupt-ranges: Set of triplets containing ranges that convert + the INTR output interrupt numbers to parent's + interrupt number. Each triplet has following entries: + - First entry specifies the base for intr output irq + - Second entry specifies the base for parent irqs + - Third entry specifies the limit For more details on TISCI IRQ resource management refer: https://downloads.ti.com/tisci/esd/latest/2_tisci_msgs/rm/rm_irq.html @@ -62,21 +64,20 @@ Example: The following example demonstrates both interrupt router node and the consumer node(main gpio) on the AM654 SoC: -main_intr: interrupt-controller0 { +main_gpio_intr: interrupt-controller0 { compatible = "ti,sci-intr"; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; - #interrupt-cells = <2>; + #interrupt-cells = <1>; ti,sci = <&dmsc>; - ti,sci-dst-id = <56>; - ti,sci-rm-range-girq = <0x1>; + ti,sci-dev-id = <131>; + ti,interrupt-ranges = <0 360 32>; }; main_gpio0: gpio@600000 { ... - interrupt-parent = <&main_intr>; - interrupts = <57 256>, <57 257>, <57 258>, - <57 259>, <57 260>, <57 261>; + interrupt-parent = <&main_gpio_intr>; + interrupts = <192>, <193>, <194>, <195>, <196>, <197>; ... }; -- cgit v1.2.3 From b8713af858997c3df5bc5b48e66ac1f1bfe19779 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:18 +0530 Subject: dt-bindings: irqchip: Convert ti, sci-intr bindings to yaml In order to automate the verification of DT nodes convert ti,sci-intr.txt ti,sci-intr.yaml. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200806074826.24607-6-lokeshvutla@ti.com --- .../bindings/interrupt-controller/ti,sci-intr.txt | 83 ----------------- .../bindings/interrupt-controller/ti,sci-intr.yaml | 102 +++++++++++++++++++++ MAINTAINERS | 2 +- 3 files changed, 103 insertions(+), 84 deletions(-) delete mode 100644 Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt create mode 100644 Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.yaml diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt deleted file mode 100644 index c7046f3da201..000000000000 --- a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt +++ /dev/null @@ -1,83 +0,0 @@ -Texas Instruments K3 Interrupt Router -===================================== - -The Interrupt Router (INTR) module provides a mechanism to mux M -interrupt inputs to N interrupt outputs, where all M inputs are selectable -to be driven per N output. An Interrupt Router can either handle edge triggered -or level triggered interrupts and that is fixed in hardware. - - Interrupt Router - +----------------------+ - | Inputs Outputs | - +-------+ | +------+ +-----+ | - | GPIO |----------->| | irq0 | | 0 | | Host IRQ - +-------+ | +------+ +-----+ | controller - | . . | +-------+ - +-------+ | . . |----->| IRQ | - | INTA |----------->| . . | +-------+ - +-------+ | . +-----+ | - | +------+ | N | | - | | irqM | +-----+ | - | +------+ | - | | - +----------------------+ - -There is one register per output (MUXCNTL_N) that controls the selection. -Configuration of these MUXCNTL_N registers is done by a system controller -(like the Device Memory and Security Controller on K3 AM654 SoC). System -controller will keep track of the used and unused registers within the Router. -Driver should request the system controller to get the range of GIC IRQs -assigned to the requesting hosts. It is the drivers responsibility to keep -track of Host IRQs. - -Communication between the host processor running an OS and the system -controller happens through a protocol called TI System Control Interface -(TISCI protocol). For more details refer: -Documentation/devicetree/bindings/arm/keystone/ti,sci.txt - -TISCI Interrupt Router Node: ----------------------------- -Required Properties: -- compatible: Must be "ti,sci-intr". -- ti,intr-trigger-type: Should be one of the following: - 1: If intr supports edge triggered interrupts. - 4: If intr supports level triggered interrupts. -- interrupt-controller: Identifies the node as an interrupt controller -- #interrupt-cells: Specifies the number of cells needed to encode an - interrupt source. The value should be 1. - First cell should contain interrupt router input number - as specified by hardware. -- ti,sci: Phandle to TI-SCI compatible System controller node. -- ti,sci-dev-id: TISCI device id of interrupt controller. -- ti,interrupt-ranges: Set of triplets containing ranges that convert - the INTR output interrupt numbers to parent's - interrupt number. Each triplet has following entries: - - First entry specifies the base for intr output irq - - Second entry specifies the base for parent irqs - - Third entry specifies the limit - -For more details on TISCI IRQ resource management refer: -https://downloads.ti.com/tisci/esd/latest/2_tisci_msgs/rm/rm_irq.html - -Example: --------- -The following example demonstrates both interrupt router node and the consumer -node(main gpio) on the AM654 SoC: - -main_gpio_intr: interrupt-controller0 { - compatible = "ti,sci-intr"; - ti,intr-trigger-type = <1>; - interrupt-controller; - interrupt-parent = <&gic500>; - #interrupt-cells = <1>; - ti,sci = <&dmsc>; - ti,sci-dev-id = <131>; - ti,interrupt-ranges = <0 360 32>; -}; - -main_gpio0: gpio@600000 { - ... - interrupt-parent = <&main_gpio_intr>; - interrupts = <192>, <193>, <194>, <195>, <196>, <197>; - ... -}; diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.yaml b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.yaml new file mode 100644 index 000000000000..cff6a956afb4 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.yaml @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interrupt-controller/ti,sci-intr.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Texas Instruments K3 Interrupt Router + +maintainers: + - Lokesh Vutla + +allOf: + - $ref: /schemas/arm/keystone/ti,k3-sci-common.yaml# + +description: | + The Interrupt Router (INTR) module provides a mechanism to mux M + interrupt inputs to N interrupt outputs, where all M inputs are selectable + to be driven per N output. An Interrupt Router can either handle edge + triggered or level triggered interrupts and that is fixed in hardware. + + Interrupt Router + +----------------------+ + | Inputs Outputs | + +-------+ | +------+ +-----+ | + | GPIO |----------->| | irq0 | | 0 | | Host IRQ + +-------+ | +------+ +-----+ | controller + | . . | +-------+ + +-------+ | . . |----->| IRQ | + | INTA |----------->| . . | +-------+ + +-------+ | . +-----+ | + | +------+ | N | | + | | irqM | +-----+ | + | +------+ | + | | + +----------------------+ + + There is one register per output (MUXCNTL_N) that controls the selection. + Configuration of these MUXCNTL_N registers is done by a system controller + (like the Device Memory and Security Controller on K3 AM654 SoC). System + controller will keep track of the used and unused registers within the Router. + Driver should request the system controller to get the range of GIC IRQs + assigned to the requesting hosts. It is the drivers responsibility to keep + track of Host IRQs. + + Communication between the host processor running an OS and the system + controller happens through a protocol called TI System Control Interface + (TISCI protocol). + +properties: + compatible: + const: ti,sci-intr + + ti,intr-trigger-type: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [1, 4] + description: | + Should be one of the following. + 1 = If intr supports edge triggered interrupts. + 4 = If intr supports level triggered interrupts. + + interrupt-controller: true + + '#interrupt-cells': + const: 1 + description: | + The 1st cell should contain interrupt router input hw number. + + ti,interrupt-ranges: + $ref: /schemas/types.yaml#/definitions/uint32-matrix + description: | + Interrupt ranges that converts the INTR output hw irq numbers + to parents's input interrupt numbers. + items: + items: + - description: | + "output_irq" specifies the base for intr output irq + - description: | + "parent's input irq" specifies the base for parent irq + - description: | + "limit" specifies the limit for translation + +required: + - compatible + - ti,intr-trigger-type + - interrupt-controller + - '#interrupt-cells' + - ti,sci + - ti,sci-dev-id + - ti,interrupt-ranges + +examples: + - | + main_gpio_intr: interrupt-controller0 { + compatible = "ti,sci-intr"; + ti,intr-trigger-type = <1>; + interrupt-controller; + interrupt-parent = <&gic500>; + #interrupt-cells = <1>; + ti,sci = <&dmsc>; + ti,sci-dev-id = <131>; + ti,interrupt-ranges = <0 360 32>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index deaafb617361..e08405c2c22e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17117,7 +17117,7 @@ F: Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml F: Documentation/devicetree/bindings/arm/keystone/ti,sci.txt F: Documentation/devicetree/bindings/clock/ti,sci-clk.txt F: Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt -F: Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.txt +F: Documentation/devicetree/bindings/interrupt-controller/ti,sci-intr.yaml F: Documentation/devicetree/bindings/reset/ti,sci-reset.txt F: Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt F: drivers/clk/keystone/sci-clk.c -- cgit v1.2.3 From a5b659bd4bc7518a8e45fda5256c5e5e8d3b7c49 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:19 +0530 Subject: irqchip/ti-sci-intr: Add support for INTR being a parent to INTR Driver assumes that Interrupt parent to Interrupt router is always GIC. This is not true always and an Interrupt Router can be a parent to Interrupt Router. Update the driver to detect the parent and request the parent irqs accordingly. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200806074826.24607-7-lokeshvutla@ti.com --- drivers/irqchip/irq-ti-sci-intr.c | 152 +++++++++++++++++++++++--------------- 1 file changed, 93 insertions(+), 59 deletions(-) diff --git a/drivers/irqchip/irq-ti-sci-intr.c b/drivers/irqchip/irq-ti-sci-intr.c index 5ea148faf2ab..cbc1758228d9 100644 --- a/drivers/irqchip/irq-ti-sci-intr.c +++ b/drivers/irqchip/irq-ti-sci-intr.c @@ -17,29 +17,20 @@ #include #include -#define TI_SCI_DEV_ID_MASK 0xffff -#define TI_SCI_DEV_ID_SHIFT 16 -#define TI_SCI_IRQ_ID_MASK 0xffff -#define TI_SCI_IRQ_ID_SHIFT 0 -#define HWIRQ_TO_DEVID(hwirq) (((hwirq) >> (TI_SCI_DEV_ID_SHIFT)) & \ - (TI_SCI_DEV_ID_MASK)) -#define HWIRQ_TO_IRQID(hwirq) ((hwirq) & (TI_SCI_IRQ_ID_MASK)) -#define TO_HWIRQ(dev, index) ((((dev) & TI_SCI_DEV_ID_MASK) << \ - TI_SCI_DEV_ID_SHIFT) | \ - ((index) & TI_SCI_IRQ_ID_MASK)) - /** * struct ti_sci_intr_irq_domain - Structure representing a TISCI based * Interrupt Router IRQ domain. * @sci: Pointer to TISCI handle - * @dst_irq: TISCI resource pointer representing GIC irq controller. - * @dst_id: TISCI device ID of the GIC irq controller. + * @out_irqs: TISCI resource pointer representing INTR irqs. + * @dev: Struct device pointer. + * @ti_sci_id: TI-SCI device identifier * @type: Specifies the trigger type supported by this Interrupt Router */ struct ti_sci_intr_irq_domain { const struct ti_sci_handle *sci; - struct ti_sci_resource *dst_irq; - u32 dst_id; + struct ti_sci_resource *out_irqs; + struct device *dev; + u32 ti_sci_id; u32 type; }; @@ -70,15 +61,44 @@ static int ti_sci_intr_irq_domain_translate(struct irq_domain *domain, { struct ti_sci_intr_irq_domain *intr = domain->host_data; - if (fwspec->param_count != 2) + if (fwspec->param_count != 1) return -EINVAL; - *hwirq = TO_HWIRQ(fwspec->param[0], fwspec->param[1]); + *hwirq = fwspec->param[0]; *type = intr->type; return 0; } +/** + * ti_sci_intr_xlate_irq() - Translate hwirq to parent's hwirq. + * @intr: IRQ domain corresponding to Interrupt Router + * @irq: Hardware irq corresponding to the above irq domain + * + * Return parent irq number if translation is available else -ENOENT. + */ +static int ti_sci_intr_xlate_irq(struct ti_sci_intr_irq_domain *intr, u32 irq) +{ + struct device_node *np = dev_of_node(intr->dev); + u32 base, pbase, size, len; + const __be32 *range; + + range = of_get_property(np, "ti,interrupt-ranges", &len); + if (!range) + return irq; + + for (len /= sizeof(*range); len >= 3; len -= 3) { + base = be32_to_cpu(*range++); + pbase = be32_to_cpu(*range++); + size = be32_to_cpu(*range++); + + if (base <= irq && irq < base + size) + return irq - base + pbase; + } + + return -ENOENT; +} + /** * ti_sci_intr_irq_domain_free() - Free the specified IRQs from the domain. * @domain: Domain to which the irqs belong @@ -89,66 +109,76 @@ static void ti_sci_intr_irq_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs) { struct ti_sci_intr_irq_domain *intr = domain->host_data; - struct irq_data *data, *parent_data; - u16 dev_id, irq_index; + struct irq_data *data; + int out_irq; - parent_data = irq_domain_get_irq_data(domain->parent, virq); data = irq_domain_get_irq_data(domain, virq); - irq_index = HWIRQ_TO_IRQID(data->hwirq); - dev_id = HWIRQ_TO_DEVID(data->hwirq); + out_irq = (uintptr_t)data->chip_data; - intr->sci->ops.rm_irq_ops.free_irq(intr->sci, dev_id, irq_index, - intr->dst_id, parent_data->hwirq); - ti_sci_release_resource(intr->dst_irq, parent_data->hwirq); + intr->sci->ops.rm_irq_ops.free_irq(intr->sci, + intr->ti_sci_id, data->hwirq, + intr->ti_sci_id, out_irq); + ti_sci_release_resource(intr->out_irqs, out_irq); irq_domain_free_irqs_parent(domain, virq, 1); irq_domain_reset_irq_data(data); } /** - * ti_sci_intr_alloc_gic_irq() - Allocate GIC specific IRQ + * ti_sci_intr_alloc_parent_irq() - Allocate parent IRQ * @domain: Pointer to the interrupt router IRQ domain * @virq: Corresponding Linux virtual IRQ number * @hwirq: Corresponding hwirq for the IRQ within this IRQ domain * - * Returns 0 if all went well else appropriate error pointer. + * Returns parent irq if all went well else appropriate error pointer. */ -static int ti_sci_intr_alloc_gic_irq(struct irq_domain *domain, - unsigned int virq, u32 hwirq) +static int ti_sci_intr_alloc_parent_irq(struct irq_domain *domain, + unsigned int virq, u32 hwirq) { struct ti_sci_intr_irq_domain *intr = domain->host_data; + struct device_node *parent_node; struct irq_fwspec fwspec; - u16 dev_id, irq_index; - u16 dst_irq; - int err; - - dev_id = HWIRQ_TO_DEVID(hwirq); - irq_index = HWIRQ_TO_IRQID(hwirq); + u16 out_irq, p_hwirq; + int err = 0; - dst_irq = ti_sci_get_free_resource(intr->dst_irq); - if (dst_irq == TI_SCI_RESOURCE_NULL) + out_irq = ti_sci_get_free_resource(intr->out_irqs); + if (out_irq == TI_SCI_RESOURCE_NULL) return -EINVAL; - fwspec.fwnode = domain->parent->fwnode; - fwspec.param_count = 3; - fwspec.param[0] = 0; /* SPI */ - fwspec.param[1] = dst_irq - 32; /* SPI offset */ - fwspec.param[2] = intr->type; + p_hwirq = ti_sci_intr_xlate_irq(intr, out_irq); + if (p_hwirq < 0) + goto err_irqs; + + parent_node = of_irq_find_parent(dev_of_node(intr->dev)); + fwspec.fwnode = of_node_to_fwnode(parent_node); + + if (of_device_is_compatible(parent_node, "arm,gic-v3")) { + /* Parent is GIC */ + fwspec.param_count = 3; + fwspec.param[0] = 0; /* SPI */ + fwspec.param[1] = p_hwirq - 32; /* SPI offset */ + fwspec.param[2] = intr->type; + } else { + /* Parent is Interrupt Router */ + fwspec.param_count = 1; + fwspec.param[0] = p_hwirq; + } err = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec); if (err) goto err_irqs; - err = intr->sci->ops.rm_irq_ops.set_irq(intr->sci, dev_id, irq_index, - intr->dst_id, dst_irq); + err = intr->sci->ops.rm_irq_ops.set_irq(intr->sci, + intr->ti_sci_id, hwirq, + intr->ti_sci_id, out_irq); if (err) goto err_msg; - return 0; + return p_hwirq; err_msg: irq_domain_free_irqs_parent(domain, virq, 1); err_irqs: - ti_sci_release_resource(intr->dst_irq, dst_irq); + ti_sci_release_resource(intr->out_irqs, out_irq); return err; } @@ -168,18 +198,19 @@ static int ti_sci_intr_irq_domain_alloc(struct irq_domain *domain, struct irq_fwspec *fwspec = data; unsigned long hwirq; unsigned int flags; - int err; + int err, p_hwirq; err = ti_sci_intr_irq_domain_translate(domain, fwspec, &hwirq, &flags); if (err) return err; - err = ti_sci_intr_alloc_gic_irq(domain, virq, hwirq); - if (err) - return err; + p_hwirq = ti_sci_intr_alloc_parent_irq(domain, virq, hwirq); + if (p_hwirq < 0) + return p_hwirq; irq_domain_set_hwirq_and_chip(domain, virq, hwirq, - &ti_sci_intr_irq_chip, NULL); + &ti_sci_intr_irq_chip, + (void *)(uintptr_t)p_hwirq); return 0; } @@ -214,6 +245,7 @@ static int ti_sci_intr_irq_domain_probe(struct platform_device *pdev) if (!intr) return -ENOMEM; + intr->dev = dev; ret = of_property_read_u32(dev_of_node(dev), "ti,intr-trigger-type", &intr->type); if (ret) { @@ -230,19 +262,19 @@ static int ti_sci_intr_irq_domain_probe(struct platform_device *pdev) return ret; } - ret = of_property_read_u32(dev_of_node(dev), "ti,sci-dst-id", - &intr->dst_id); + ret = of_property_read_u32(dev_of_node(dev), "ti,sci-dev-id", + &intr->ti_sci_id); if (ret) { - dev_err(dev, "missing 'ti,sci-dst-id' property\n"); + dev_err(dev, "missing 'ti,sci-dev-id' property\n"); return -EINVAL; } - intr->dst_irq = devm_ti_sci_get_of_resource(intr->sci, dev, - intr->dst_id, - "ti,sci-rm-range-girq"); - if (IS_ERR(intr->dst_irq)) { + intr->out_irqs = devm_ti_sci_get_resource(intr->sci, dev, + intr->ti_sci_id, + TI_SCI_RESASG_SUBTYPE_IR_OUTPUT); + if (IS_ERR(intr->out_irqs)) { dev_err(dev, "Destination irq resource allocation failed\n"); - return PTR_ERR(intr->dst_irq); + return PTR_ERR(intr->out_irqs); } domain = irq_domain_add_hierarchy(parent_domain, 0, 0, dev_of_node(dev), @@ -252,6 +284,8 @@ static int ti_sci_intr_irq_domain_probe(struct platform_device *pdev) return -ENOMEM; } + dev_info(dev, "Interrupt Router %d domain created\n", intr->ti_sci_id); + return 0; } -- cgit v1.2.3 From 6dde29dc31aa265a79d9e6b3571987cfa4b179cc Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:20 +0530 Subject: dt-bindings: irqchip: ti, sci-inta: Update docs to support different parent. Drop the firmware related interrupt ranges and use the hardware specified interrupt numbers within Interrupt Aggregator. This ensures interrupt aggregator DT node need not assume any interrupt parent type. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200806074826.24607-8-lokeshvutla@ti.com --- .../bindings/interrupt-controller/ti,sci-inta.txt | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt index 7841cb099e13..5fd3ee0f7167 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt @@ -43,13 +43,14 @@ TISCI Interrupt Aggregator Node: - msi-controller: Identifies the node as an MSI controller. - interrupt-parent: phandle of irq parent. - ti,sci: Phandle to TI-SCI compatible System controller node. -- ti,sci-dev-id: TISCI device ID of the Interrupt Aggregator. -- ti,sci-rm-range-vint: Array of TISCI subtype ids representing vints(inta - outputs) range within this INTA, assigned to the - requesting host context. -- ti,sci-rm-range-global-event: Array of TISCI subtype ids representing the - global events range reaching this IA and are assigned - to the requesting host context. +- ti,sci-dev-id: TISCI device id of interrupt controller. +- ti,interrupt-ranges: Set of triplets containing ranges that convert + the INTA output interrupt numbers to parent's + interrupt number. Each triplet has following entries: + - First entry specifies the base for vint + - Second entry specifies the base for parent irqs + - Third entry specifies the limit + Example: -------- @@ -61,6 +62,5 @@ main_udmass_inta: interrupt-controller@33d00000 { interrupt-parent = <&main_navss_intr>; ti,sci = <&dmsc>; ti,sci-dev-id = <179>; - ti,sci-rm-range-vint = <0x0>; - ti,sci-rm-range-global-event = <0x1>; + ti,interrupt-ranges = <0 0 256>; }; -- cgit v1.2.3 From c4dff06e79d99691f18dfc8a61a1cb17c602a025 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:21 +0530 Subject: dt-bindings: irqchip: Convert ti, sci-inta bindings to yaml In order to automate the verification of DT nodes convert ti,sci-inta.txt ti,sci-inta.yaml. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200806074826.24607-9-lokeshvutla@ti.com --- .../bindings/interrupt-controller/ti,sci-inta.txt | 66 --------------- .../bindings/interrupt-controller/ti,sci-inta.yaml | 98 ++++++++++++++++++++++ MAINTAINERS | 2 +- 3 files changed, 99 insertions(+), 67 deletions(-) delete mode 100644 Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt create mode 100644 Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt deleted file mode 100644 index 5fd3ee0f7167..000000000000 --- a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.txt +++ /dev/null @@ -1,66 +0,0 @@ -Texas Instruments K3 Interrupt Aggregator -========================================= - -The Interrupt Aggregator (INTA) provides a centralized machine -which handles the termination of system events to that they can -be coherently processed by the host(s) in the system. A maximum -of 64 events can be mapped to a single interrupt. - - - Interrupt Aggregator - +-----------------------------------------+ - | Intmap VINT | - | +--------------+ +------------+ | - m ------>| | vint | bit | | 0 |.....|63| vint0 | - . | +--------------+ +------------+ | +------+ - . | . . | | HOST | -Globalevents ------>| . . |------>| IRQ | - . | . . | | CTRL | - . | . . | +------+ - n ------>| +--------------+ +------------+ | - | | vint | bit | | 0 |.....|63| vintx | - | +--------------+ +------------+ | - | | - +-----------------------------------------+ - -Configuration of these Intmap registers that maps global events to vint is done -by a system controller (like the Device Memory and Security Controller on K3 -AM654 SoC). Driver should request the system controller to get the range -of global events and vints assigned to the requesting host. Management -of these requested resources should be handled by driver and requests -system controller to map specific global event to vint, bit pair. - -Communication between the host processor running an OS and the system -controller happens through a protocol called TI System Control Interface -(TISCI protocol). For more details refer: -Documentation/devicetree/bindings/arm/keystone/ti,sci.txt - -TISCI Interrupt Aggregator Node: -------------------------------- -- compatible: Must be "ti,sci-inta". -- reg: Should contain registers location and length. -- interrupt-controller: Identifies the node as an interrupt controller -- msi-controller: Identifies the node as an MSI controller. -- interrupt-parent: phandle of irq parent. -- ti,sci: Phandle to TI-SCI compatible System controller node. -- ti,sci-dev-id: TISCI device id of interrupt controller. -- ti,interrupt-ranges: Set of triplets containing ranges that convert - the INTA output interrupt numbers to parent's - interrupt number. Each triplet has following entries: - - First entry specifies the base for vint - - Second entry specifies the base for parent irqs - - Third entry specifies the limit - - -