summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorTim Murray <timmurray@google.com>2022-01-07 12:48:44 -0800
committerJaegeuk Kim <jaegeuk@kernel.org>2022-01-24 17:40:04 -0800
commite4544b63a7ee49e7fbebf35ece0a6acd3b9617ae (patch)
treeb55709d5753a19b8beacfcab1fe12dc8128435d7 /fs
parentdd81e1c7d5fb126e5fbc5c9e334d7b3ec29a16a0 (diff)
downloadlinux-e4544b63a7ee49e7fbebf35ece0a6acd3b9617ae.tar.gz
linux-e4544b63a7ee49e7fbebf35ece0a6acd3b9617ae.tar.bz2
linux-e4544b63a7ee49e7fbebf35ece0a6acd3b9617ae.zip
f2fs: move f2fs to use reader-unfair rwsems
f2fs rw_semaphores work better if writers can starve readers, especially for the checkpoint thread, because writers are strictly more important than reader threads. This prevents significant priority inversion between low-priority readers that blocked while trying to acquire the read lock and a second acquisition of the write lock that might be blocking high priority work. Signed-off-by: Tim Murray <timmurray@google.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/f2fs/checkpoint.c34
-rw-r--r--fs/f2fs/compress.c6
-rw-r--r--fs/f2fs/data.c50
-rw-r--r--fs/f2fs/dir.c12
-rw-r--r--fs/f2fs/f2fs.h110
-rw-r--r--fs/f2fs/file.c112
-rw-r--r--fs/f2fs/gc.c46
-rw-r--r--fs/f2fs/inline.c4
-rw-r--r--fs/f2fs/namei.c34
-rw-r--r--fs/f2fs/node.c84
-rw-r--r--fs/f2fs/recovery.c4
-rw-r--r--fs/f2fs/segment.c44
-rw-r--r--fs/f2fs/super.c56
-rw-r--r--fs/f2fs/sysfs.c4
-rw-r--r--fs/f2fs/verity.c4
-rw-r--r--fs/f2fs/xattr.c12
16 files changed, 342 insertions, 274 deletions
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 982f0170639f..deeda95688f0 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -351,13 +351,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
goto skip_write;
/* if locked failed, cp will flush dirty pages instead */
- if (!down_write_trylock(&sbi->cp_global_sem))
+ if (!f2fs_down_write_trylock(&sbi->cp_global_sem))
goto skip_write;
trace_f2fs_writepages(mapping->host, wbc, META);
diff = nr_pages_to_write(sbi, META, wbc);
written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
- up_write(&sbi->cp_global_sem);
+ f2fs_up_write(&sbi->cp_global_sem);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
return 0;
@@ -1159,7 +1159,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
if (!is_journalled_quota(sbi))
return false;
- if (!down_write_trylock(&sbi->quota_sem))
+ if (!f2fs_down_write_trylock(&sbi->quota_sem))
return true;
if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
ret = false;
@@ -1171,7 +1171,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
} else if (get_pages(sbi, F2FS_DIRTY_QDATA)) {
ret = true;
}
- up_write(&sbi->quota_sem);
+ f2fs_up_write(&sbi->quota_sem);
return ret;
}
@@ -1228,10 +1228,10 @@ retry_flush_dents:
* POR: we should ensure that there are no dirty node pages
* until finishing nat/sit flush. inode->i_blocks can be updated.
*/
- down_write(&sbi->node_change);
+ f2fs_down_write(&sbi->node_change);
if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
- up_write(&sbi->node_change);
+ f2fs_up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
err = f2fs_sync_inode_meta(sbi);
if (err)
@@ -1241,15 +1241,15 @@ retry_flush_dents:
}
retry_flush_nodes:
- down_write(&sbi->node_write);
+ f2fs_down_write(&sbi->node_write);
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
- up_write(&sbi->node_write);
+ f2fs_up_write(&sbi->node_write);
atomic_inc(&sbi->wb_sync_req[NODE]);
err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
atomic_dec(&sbi->wb_sync_req[NODE]);
if (err) {
- up_write(&sbi->node_change);
+ f2fs_up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
return err;
}
@@ -1262,13 +1262,13 @@ retry_flush_nodes:
* dirty node blocks and some checkpoint values by block allocation.
*/
__prepare_cp_block(sbi);
- up_write(&sbi->node_change);
+ f2fs_up_write(&sbi->node_change);
return err;
}
static void unblock_operations(struct f2fs_sb_info *sbi)
{
- up_write(&sbi->node_write);
+ f2fs_up_write(&sbi->node_write);
f2fs_unlock_all(sbi);
}
@@ -1612,7 +1612,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_warn(sbi, "Start checkpoint disabled!");
}
if (cpc->reason != CP_RESIZE)
- down_write(&sbi->cp_global_sem);
+ f2fs_down_write(&sbi->cp_global_sem);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
@@ -1693,7 +1693,7 @@ stop:
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
out:
if (cpc->reason != CP_RESIZE)
- up_write(&sbi->cp_global_sem);
+ f2fs_up_write(&sbi->cp_global_sem);
return err;
}
@@ -1741,9 +1741,9 @@ static int __write_checkpoint_sync(struct f2fs_sb_info *sbi)
struct cp_control cpc = { .reason = CP_SYNC, };
int err;
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
err = f2fs_write_checkpoint(sbi, &cpc);
- up_write(&sbi->gc_lock);
+ f2fs_up_write(&sbi->gc_lock);
return err;
}
@@ -1831,9 +1831,9 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi)
if (!test_opt(sbi, MERGE_CHECKPOINT) || cpc.reason != CP_SYNC) {
int ret;
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
ret = f2fs_write_checkpoint(sbi, &cpc);
- up_write(&sbi->gc_lock);
+ f2fs_up_write(&sbi->gc_lock);
return ret;
}
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index d0c3aeba5945..67bac2792e57 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1267,7 +1267,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
* checkpoint. This can only happen to quota writes which can cause
* the below discard race condition.
*/
- down_read(&sbi->node_write);
+ f2fs_down_read(&sbi->node_write);
} else if (!f2fs_trylock_op(sbi)) {
goto out_free;
}
@@ -1384,7 +1384,7 @@ unlock_continue:
f2fs_put_dnode(&dn);
if (IS_NOQUOTA(inode))
- up_read(&sbi->node_write);
+ f2fs_up_read(&sbi->node_write);
else
f2fs_unlock_op(sbi);
@@ -1410,7 +1410,7 @@ out_put_dnode:
f2fs_put_dnode(&dn);
out_unlock_op:
if (IS_NOQUOTA(inode))
- up_read(&sbi->node_write);
+ f2fs_up_read(&sbi->node_write);
else
f2fs_unlock_op(sbi);
out_free:
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8c417864c66a..0f124e8de1d4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -590,7 +590,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
enum page_type btype = PAGE_TYPE_OF_BIO(type);
struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
- down_write(&io->io_rwsem);
+ f2fs_down_write(&io->io_rwsem);
/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
@@ -601,7 +601,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
}
__submit_merged_bio(io);
- up_write(&io->io_rwsem);
+ f2fs_up_write(&io->io_rwsem);
}
static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
@@ -616,9 +616,9 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
enum page_type btype = PAGE_TYPE_OF_BIO(type);
struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
- down_read(&io->io_rwsem);
+ f2fs_down_read(&io->io_rwsem);
ret = __has_merged_page(io->bio, inode, page, ino);
- up_read(&io->io_rwsem);
+ f2fs_up_read(&io->io_rwsem);
}
if (ret)
__f2fs_submit_merged_write(sbi, type, temp);
@@ -742,9 +742,9 @@ static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
f2fs_bug_on(sbi, 1);
- down_write(&io->bio_list_lock);
+ f2fs_down_write(&io->bio_list_lock);
list_add_tail(&be->list, &io->bio_list);
- up_write(&io->bio_list_lock);
+ f2fs_up_write(&io->bio_list_lock);
}
static void del_bio_entry(struct bio_entry *be)
@@ -766,7 +766,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
struct list_head *head = &io->bio_list;
struct bio_entry *be;
- down_write(&io->bio_list_lock);
+ f2fs_down_write(&io->bio_list_lock);
list_for_each_entry(be, head, list) {
if (be->bio != *bio)
continue;
@@ -790,7 +790,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
__submit_bio(sbi, *bio, DATA);
break;
}
- up_write(&io->bio_list_lock);
+ f2fs_up_write(&io->bio_list_lock);
}
if (ret) {
@@ -816,7 +816,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
if (list_empty(head))
continue;
- down_read(&io->bio_list_lock);
+ f2fs_down_read(&io->bio_list_lock);
list_for_each_entry(be, head, list) {
if (target)
found = (target == be->bio);
@@ -826,14 +826,14 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
if (found)
break;
}
- up_read(&io->bio_list_lock);
+ f2fs_up_read(&io->bio_list_lock);
if (!found)
continue;
found = false;
- down_write(&io->bio_list_lock);
+ f2fs_down_write(&io->bio_list_lock);
list_for_each_entry(be, head, list) {
if (target)
found = (target == be->bio);
@@ -846,7 +846,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
break;
}
}
- up_write(&io->bio_list_lock);
+ f2fs_up_write(&io->bio_list_lock);
}
if (found)
@@ -906,7 +906,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
f2fs_bug_on(sbi, is_read_io(fio->op));
- down_write(&io->io_rwsem);
+ f2fs_down_write(&io->io_rwsem);
next:
if (fio->in_list) {
spin_lock(&io->io_lock);
@@ -973,7 +973,7 @@ out:
if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
!f2fs_is_checkpoint_ready(sbi))
__submit_merged_bio(io);
- up_write(&io->io_rwsem);
+ f2fs_up_write(&io->io_rwsem);
}
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
@@ -1383,9 +1383,9 @@ void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
if (lock)
- down_read(&sbi->node_change);
+ f2fs_down_read(&sbi->node_change);
else
- up_read(&sbi->node_change);
+ f2fs_up_read(&sbi->node_change);
} else {
if (lock)
f2fs_lock_op(sbi);
@@ -2749,13 +2749,13 @@ write:
* the below discard race condition.
*/
if (IS_NOQUOTA(inode))
- down_read(&sbi->node_write);
+ f2fs_down_read(&sbi->node_write);
fio.need_lock = LOCK_DONE;
err = f2fs_do_write_data_page(&fio);
if (IS_NOQUOTA(inode))
- up_read(&sbi->node_write);
+ f2fs_up_read(&sbi->node_write);
goto done;
}
@@ -3213,14 +3213,14 @@ void f2fs_write_failed(struct inode *inode, loff_t to)
/* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
if (to > i_size && !f2fs_verity_in_progress(inode)) {
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
truncate_pagecache(inode, i_size);
f2fs_truncate_blocks(inode, i_size, true);
filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -3721,13 +3721,13 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
unsigned int end_sec = secidx + blkcnt / blk_per_sec;
int ret = 0;
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
set_inode_flag(inode, FI_ALIGNED_WRITE);
for (; secidx < end_sec; secidx++) {
- down_write(&sbi->pin_sem);
+ f2fs_down_write(&sbi->pin_sem);
f2fs_lock_op(sbi);
f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
@@ -3741,7 +3741,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
page = f2fs_get_lock_data_page(inode, blkidx, true);
if (IS_ERR(page)) {
- up_write(&sbi->pin_sem);
+ f2fs_up_write(&sbi->pin_sem);
ret = PTR_ERR(page);
goto done;
}
@@ -3754,7 +3754,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
ret = filemap_fdatawrite(inode->i_mapping);
- up_write(&sbi->pin_sem);
+ f2fs_up_write(&sbi->pin_sem);
if (ret)
break;
@@ -3765,7 +3765,7 @@ done:
clear_inode_flag(inode, FI_ALIGNED_WRITE);
filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 1820e9c106f7..011df7058c42 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -766,7 +766,7 @@ add_dentry:
f2fs_wait_on_page_writeback(dentry_page, DATA, true, true);
if (inode) {
- down_write(&F2FS_I(inode)->i_sem);
+ f2fs_down_write(&F2FS_I(inode)->i_sem);
page = f2fs_init_inode_metadata(inode, dir, fname, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -793,7 +793,7 @@ add_dentry:
f2fs_update_parent_metadata(dir, inode, current_depth);
fail:
if (inode)
- up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&F2FS_I(inode)->i_sem);
f2fs_put_page(dentry_page, 1);
@@ -858,7 +858,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
struct page *page;
int err = 0;
- down_write(&F2FS_I(inode)->i_sem);
+ f2fs_down_write(&F2FS_I(inode)->i_sem);
page = f2fs_init_inode_metadata(inode, dir, NULL, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -869,7 +869,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
clear_inode_flag(inode, FI_NEW_INODE);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
fail:
- up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&F2FS_I(inode)->i_sem);
return err;
}
@@ -877,7 +877,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
- down_write(&F2FS_I(inode)->i_sem);
+ f2fs_down_write(&F2FS_I(inode)->i_sem);
if (S_ISDIR(inode->i_mode))
f2fs_i_links_write(dir, false);
@@ -888,7 +888,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode)
f2fs_i_links_write(inode, false);
f2fs_i_size_write(inode, 0);
}
- up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&F2FS_I(inode)->i_sem);
if (inode->i_nlink == 0)
f2fs_add_orphan_inode(inode);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index eb22fa91c2b2..8178a9152e49 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -123,6 +123,18 @@ typedef u32 nid_t;
#define COMPRESS_EXT_NUM 16
+/*
+ * An implementation of an rwsem that is explicitly unfair to readers. This
+ * prevents priority inversion when a low-priority reader acquires the read lock
+ * while sleeping on the write lock but the write lock is needed by
+ * higher-priority clients.
+ */
+
+struct f2fs_rwsem {
+ struct rw_semaphore internal_rwsem;
+ wait_queue_head_t read_waiters;
+};
+
struct f2fs_mount_info {
unsigned int opt;
int write_io_size_bits; /* Write IO size bits */
@@ -752,7 +764,7 @@ struct f2fs_inode_info {
/* Use below internally in f2fs*/
unsigned long flags[BITS_TO_LONGS(FI_MAX)]; /* use to pass per-file flags */
- struct rw_semaphore i_sem; /* protect fi info */
+ struct f2fs_rwsem i_sem; /* protect fi info */
atomic_t dirty_pages; /* # of dirty pages */
f2fs_hash_t chash; /* hash value of given file name */
unsigned int clevel; /* maximum level of given file name */
@@ -777,8 +789,8 @@ struct f2fs_inode_info {
struct extent_tree *extent_tree; /* cached extent_tree entry */
/* avoid racing between foreground op and gc */
- struct rw_semaphore i_gc_rwsem[2];
- struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */
+ struct f2fs_rwsem i_gc_rwsem[2];
+ struct f2fs_rwsem i_xattr_sem; /* avoid racing between reading and changing EAs */
int i_extra_isize; /* size of extra space located in i_addr */
kprojid_t i_projid; /* id for project quota */
@@ -904,7 +916,7 @@ struct f2fs_nm_info {
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
struct radix_tree_root nat_set_root;/* root of the nat set cache */
- struct rw_semaphore nat_tree_lock; /* protect nat entry tree */
+ struct f2fs_rwsem nat_tree_lock; /* protect nat entry tree */
struct list_head nat_entries; /* cached nat entry list (clean) */
spinlock_t nat_list_lock; /* protect clean nat entry list */
unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */
@@ -1017,7 +1029,7 @@ struct f2fs_sm_info {
struct dirty_seglist_info *dirty_info; /* dirty segment information */
struct curseg_info *curseg_array; /* active segment information */
- struct rw_semaphore curseg_lock; /* for preventing curseg change */
+ struct f2fs_rwsem curseg_lock; /* for preventing curseg change */
block_t seg0_blkaddr; /* block address of 0'th segment */
block_t main_blkaddr; /* start block address of main area */
@@ -1201,11 +1213,11 @@ struct f2fs_bio_info {
struct bio *bio; /* bios to merge */
sector_t last_block_in_bio; /* last block number */
struct f2fs_io_info fio; /* store buffered io info. */
- struct rw_semaphore io_rwsem; /* blocking op for bio */
+ struct f2fs_rwsem io_rwsem; /* blocking op for bio */
spinlock_t io_lock; /* serialize DATA/NODE IOs */
struct list_head io_list; /* track fios */
struct list_head bio_list; /* bio entry list head */
- struct rw_semaphore bio_list_lock; /* lock to protect bio entry list */
+ struct f2fs_rwsem bio_list_lock; /* lock to protect bio entry list */
};
#define FDEV(i) (sbi->devs[i])
@@ -1571,7 +1583,7 @@ struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
struct proc_dir_entry *s_proc; /* proc entry */
struct f2fs_super_block *raw_super; /* raw super block pointer */
- struct rw_semaphore sb_lock; /* lock for raw super block */
+ struct f2fs_rwsem sb_lock; /* lock for raw super block */
int valid_super_block; /* valid super block no */
unsigned long s_flag; /* flags for sbi */
struct mutex writepages; /* mutex for writepages() */
@@ -1591,7 +1603,7 @@ struct f2fs_sb_info {
/* for bio operations */
struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */
/* keep migration IO order for LFS mode */
- struct rw_semaphore io_order_lock;
+ struct f2fs_rwsem io_order_lock;
mempool_t *write_io_dummy; /* Dummy pages */
/* for checkpoint */
@@ -1599,10 +1611,10 @@ struct f2fs_sb_info {
int cur_cp_pack; /* remain current cp pack */
spinlock_t cp_lock; /* for flag in ckpt */
struct inode *meta_inode; /* cache meta blocks */
- struct rw_semaphore cp_global_sem; /* checkpoint procedure lock */
- struct rw_semaphore cp_rwsem; /* blocking FS operations */
- struct rw_semaphore node_write; /* locking node writes */
- struct rw_semaphore node_change; /* locking node change */
+ struct f2fs_rwsem cp_global_sem; /* checkpoint procedure lock */
+ struct f2fs_rwsem cp_rwsem; /* blocking FS operations */
+ struct f2fs_rwsem node_write; /* locking node writes */
+ struct f2fs_rwsem node_change; /* locking node change */
wait_queue_head_t cp_wait;
unsigned long last_time[MAX_TIME]; /* to store time in jiffies */
long interval_time[MAX_TIME]; /* to store thresholds */
@@ -1662,7 +1674,7 @@ struct f2fs_sb_info {
block_t unusable_block_count; /* # of blocks saved by last cp */
unsigned int nquota_files; /* # of quota sysfile */
- struct rw_semaphore quota_sem; /* blocking cp for flags */
+ struct f2fs_rwsem quota_sem; /* blocking cp for flags */
/* # of pages, see count_type */
atomic_t nr_pages[NR_COUNT_TYPE];
@@ -1678,7 +1690,7 @@ struct f2fs_sb_info {
struct f2fs_mount_info mount_opt; /* mount options */
/* for cleaning operations */
- struct rw_semaphore gc_lock; /*
+ struct f2fs_rwsem gc_lock; /*
* semaphore for GC, avoid
* race between GC and GC or CP
*/
@@ -1698,7 +1710,7 @@ struct f2fs_sb_info {
/* threshold for gc trials on pinned files */
u64 gc_pin_file_threshold;
- struct rw_semaphore pin_sem;
+ struct f2fs_rwsem pin_sem;
/* maximum # of trials to find a victim segment for SSR and GC */
unsigned int max_victim_search;
@@ -2092,9 +2104,65 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
spin_unlock_irqrestore(&sbi->cp_lock, flags);
}
+static inline void init_f2fs_rwsem(struct f2fs_rwsem *sem)
+{
+ init_rwsem(&sem->internal_rwsem);
+ init_waitqueue_head(&sem->read_waiters);
+}
+
+static inline int f2fs_rwsem_is_locked(struct f2fs_rwsem *sem)
+{
+ return rwsem_is_locked(&sem->internal_rwsem);
+}
+
+static inline int f2fs_rwsem_is_contended(struct f2fs_rwsem *sem)
+{
+ return rwsem_is_contended(&sem->internal_rwsem);
+}
+
+static inline void f2fs_down_read(struct f2fs_rwsem *sem)
+{
+ wait_event(sem->read_waiters, down_read_trylock(&sem->internal_rwsem));
+}
+
+static inline int f2fs_down_read_trylock(struct f2fs_rwsem *sem)
+{
+ return down_read_trylock(&sem->internal_rwsem);
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass)
+{
+ down_read_nested(&sem->internal_rwsem, subclass);
+}
+#else
+#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem)
+#endif
+
+static inline void f2fs_up_read(struct f2fs_rwsem *sem)
+{
+ up_read(&sem->internal_rwsem);
+}
+
+static inline void f2fs_down_write(struct f2fs_rwsem *sem)
+{
+ down_write(&sem->internal_rwsem);
+}
+
+static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem)
+{
+ return down_write_trylock(&sem->internal_rwsem);
+}
+
+static inline void f2fs_up_write(struct f2fs_rwsem *sem)
+{
+ up_write(&sem->internal_rwsem);
+ wake_up_all(&sem->read_waiters);
+}
+
static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
{
- down_read(&sbi->cp_rwsem);
+ f2fs_down_read(&sbi->cp_rwsem);
}
static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi)
@@ -2103,22 +2171,22 @@ static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi)
f2fs_show_injection_info(sbi, FAULT_LOCK_OP);
return 0;
}
- return down_read_trylock(&sbi->cp_rwsem);
+ return f2fs_down_read_trylock(&sbi->cp_rwsem);
}
static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
{
- up_read(&sbi->cp_rwsem);
+ f2fs_up_read(&sbi->cp_rwsem);
}
static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
{
- down_write(&sbi->cp_rwsem);
+ f2fs_down_write(&sbi->cp_rwsem);
}
static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
{
- up_write(&sbi->cp_rwsem);
+ f2fs_up_write(&sbi->cp_rwsem);
}
static inline int __get_cp_reason(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3c98ef6af97d..6ccdd6e347e2 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -237,13 +237,13 @@ static void try_to_fix_pino(struct inode *inode)
struct f2fs_inode_info *fi = F2FS_I(inode);
nid_t pino;
- down_write(&fi->i_sem);
+ f2fs_down_write(&fi->i_sem);
if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
get_parent_ino(inode, &pino)) {
f2fs_i_pino_write(inode, pino);
file_got_pino(inode);
}
- up_write(&fi->i_sem);
+ f2fs_up_write(&fi->i_sem);
}
static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
@@ -318,9 +318,9 @@ go_write:
* Both of fdatasync() and fsync() are able to be recovered from
* sudden-power-off.
*/
- down_read(&F2FS_I(inode)->i_sem);
+ f2fs_down_read(&F2FS_I(inode)->i_sem);
cp_reason = need_do_checkpoint(inode);
- up_read(&F2FS_I(inode)->i_sem);
+ f2fs_up_read(&F2FS_I(inode)->i_sem);
if (cp_reason) {
/* all the dirty node pages should be flushed for POR */
@@ -958,7 +958,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
return err;
}
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
truncate_setsize(inode, attr->ia_size);
@@ -970,7 +970,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
* larger than i_size.
*/
filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (err)
return err;
@@ -1112,7 +1112,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
blk_start = (loff_t)pg_start << PAGE_SHIFT;
blk_end = (loff_t)pg_end << PAGE_SHIFT;
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
truncate_pagecache_range(inode, blk_start, blk_end - 1);
@@ -1122,7 +1122,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
f2fs_unlock_op(sbi);
filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -1355,7 +1355,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi, true);
/* avoid gc operation during block exchange */
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
f2fs_lock_op(sbi);
@@ -1365,7 +1365,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
f2fs_unlock_op(sbi);
filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
@@ -1500,7 +1500,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
unsigned int end_offset;
pgoff_t end;
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(mapping);
truncate_pagecache_range(inode,
@@ -1514,7 +1514,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret) {
f2fs_unlock_op(sbi);
filemap_invalidate_unlock(mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
@@ -1526,7 +1526,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
f2fs_unlock_op(sbi);
filemap_invalidate_unlock(mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_balance_fs(sbi, dn.node_changed);
@@ -1600,7 +1600,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
/* avoid gc operation during block exchange */
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(mapping);
truncate_pagecache(inode, offset);
@@ -1618,7 +1618,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_unlock_op(sbi);
}
filemap_invalidate_unlock(mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
/* write out all moved pages, if possible */
filemap_invalidate_lock(mapping);
@@ -1674,13 +1674,13 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
next_alloc:
if (has_not_enough_free_secs(sbi, 0,
GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
if (err && err != -ENODATA && err != -EAGAIN)
goto out_err;
}
- down_write(&sbi->pin_sem);
+ f2fs_down_write(&sbi->pin_sem);
f2fs_lock_op(sbi);
f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
@@ -1690,7 +1690,7 @@ next_alloc:
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
file_dont_truncate(inode);
- up_write(&sbi->pin_sem);
+ f2fs_up_write(&sbi->pin_sem);
expanded += map.m_len;
sec_len -= map.m_len;
@@ -2020,7 +2020,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
if (ret)
goto out;
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
/*
* Should wait end_io to count F2FS_WB_CP_DATA correctly by
@@ -2031,7 +2031,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret) {
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
@@ -2044,7 +2044,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
/* add inode in inmem_list first and set atomic_file */
set_inode_flag(inode, FI_ATOMIC_FILE);
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
F2FS_I(inode)->inmem_task = current;
@@ -2351,7 +2351,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
if (err)
return err;
- down_write(&sbi->sb_lock);
+ f2fs_down_write(&sbi->sb_lock);
if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt))
goto got_it;
@@ -2370,7 +2370,7 @@ got_it:
16))
err = -EFAULT;
out_err:
- up_write(&sbi->sb_lock);
+ f2fs_up_write(&sbi->sb_lock);
mnt_drop_write_file(filp);
return err;
}
@@ -2447,12 +2447,12 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
return ret;
if (!sync) {
- if (!down_write_trylock(&sbi->gc_lock)) {
+ if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
ret = -EBUSY;
goto out;
}
} else {
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
}
ret = f2fs_gc(sbi, sync, true, false, NULL_SEGNO);
@@ -2483,12 +2483,12 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
do_more:
if (!range->sync) {
- if (!down_write_trylock(&sbi->gc_lock)) {
+ if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
ret = -EBUSY;
goto out;
}
} else {
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
}
ret = f2fs_gc(sbi, range->sync, true, false,
@@ -2820,10 +2820,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
f2fs_balance_fs(sbi, true);
- down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
if (src != dst) {
ret = -EBUSY;
- if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
+ if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
goto out_src;
}
@@ -2841,9 +2841,9 @@ static int f2fs_move_file_range(str