summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-08-16 09:59:56 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:25 -0400
commit76426098e419c1732efc3f88166f3f3592c215c9 (patch)
tree1cc431e32b7129a573116ce43307e9bc47d76b1d
parent3c7f3b7aeb73f2155aec9d00567b70ef55ede465 (diff)
downloadlinux-76426098e419c1732efc3f88166f3f3592c215c9.tar.gz
linux-76426098e419c1732efc3f88166f3f3592c215c9.tar.bz2
linux-76426098e419c1732efc3f88166f3f3592c215c9.zip
bcachefs: Reflink
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/Makefile1
-rw-r--r--fs/bcachefs/bcachefs.h4
-rw-r--r--fs/bcachefs/bcachefs_format.h26
-rw-r--r--fs/bcachefs/bkey.h2
-rw-r--r--fs/bcachefs/bkey_methods.c1
-rw-r--r--fs/bcachefs/btree_types.h9
-rw-r--r--fs/bcachefs/btree_update_leaf.c3
-rw-r--r--fs/bcachefs/buckets.c100
-rw-r--r--fs/bcachefs/extents.c50
-rw-r--r--fs/bcachefs/extents.h19
-rw-r--r--fs/bcachefs/fs-io.c218
-rw-r--r--fs/bcachefs/fs-io.h19
-rw-r--r--fs/bcachefs/fs.c42
-rw-r--r--fs/bcachefs/fs.h15
-rw-r--r--fs/bcachefs/io.c127
-rw-r--r--fs/bcachefs/io.h3
-rw-r--r--fs/bcachefs/migrate.c13
-rw-r--r--fs/bcachefs/move.c98
-rw-r--r--fs/bcachefs/move.h3
-rw-r--r--fs/bcachefs/recovery.c18
-rw-r--r--fs/bcachefs/reflink.c300
-rw-r--r--fs/bcachefs/reflink.h32
-rw-r--r--fs/bcachefs/replicas.c1
23 files changed, 945 insertions, 159 deletions
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
index c29ccdb45965..4c2608409144 100644
--- a/fs/bcachefs/Makefile
+++ b/fs/bcachefs/Makefile
@@ -44,6 +44,7 @@ bcachefs-y := \
quota.o \
rebalance.o \
recovery.o \
+ reflink.o \
replicas.o \
siphash.o \
six.o \
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 68e2d3b1a9a6..410fce3ed8d4 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -361,6 +361,7 @@ enum gc_phase {
GC_PHASE_BTREE_XATTRS,
GC_PHASE_BTREE_ALLOC,
GC_PHASE_BTREE_QUOTAS,
+ GC_PHASE_BTREE_REFLINK,
GC_PHASE_PENDING_DELETE,
GC_PHASE_ALLOC,
@@ -750,6 +751,9 @@ struct bch_fs {
struct work_struct ec_stripe_delete_work;
struct llist_head ec_stripe_delete_list;
+ /* REFLINK */
+ u64 reflink_hint;
+
/* VFS IO PATH - fs-io.c */
struct bio_set writepage_bioset;
struct bio_set dio_write_bioset;
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index b8aafd2e283a..62afea1e7ec3 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -340,7 +340,9 @@ static inline void bkey_init(struct bkey *k)
x(xattr, 11) \
x(alloc, 12) \
x(quota, 13) \
- x(stripe, 14)
+ x(stripe, 14) \
+ x(reflink_p, 15) \
+ x(reflink_v, 16)
enum bch_bkey_type {
#define x(name, nr) KEY_TYPE_##name = nr,
@@ -895,6 +897,24 @@ struct bch_stripe {
struct bch_extent_ptr ptrs[0];
} __attribute__((packed, aligned(8)));
+/* Reflink: */
+
+struct bch_reflink_p {
+ struct bch_val v;
+ __le64 idx;
+
+ __le32 reservation_generation;
+ __u8 nr_replicas;
+ __u8 pad[3];
+};
+
+struct bch_reflink_v {
+ struct bch_val v;
+ __le64 refcount;
+ union bch_extent_entry start[0];
+ __u64 _data[0];
+};
+
/* Optional/variable size superblock sections: */
struct bch_sb_field {
@@ -1297,6 +1317,7 @@ enum bch_sb_features {
BCH_FEATURE_ATOMIC_NLINK = 3, /* should have gone under compat */
BCH_FEATURE_EC = 4,
BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3 = 5,
+ BCH_FEATURE_REFLINK = 6,
BCH_FEATURE_NR,
};
@@ -1487,7 +1508,8 @@ LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
x(XATTRS, 3, "xattrs") \
x(ALLOC, 4, "alloc") \
x(QUOTAS, 5, "quotas") \
- x(EC, 6, "erasure_coding")
+ x(EC, 6, "erasure_coding") \
+ x(REFLINK, 7, "reflink")
enum btree_id {
#define x(kwd, val, name) BTREE_ID_##kwd = val,
diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
index b3a08e52e6b3..321fe6fe0b55 100644
--- a/fs/bcachefs/bkey.h
+++ b/fs/bcachefs/bkey.h
@@ -560,6 +560,8 @@ BKEY_VAL_ACCESSORS(xattr);
BKEY_VAL_ACCESSORS(alloc);
BKEY_VAL_ACCESSORS(quota);
BKEY_VAL_ACCESSORS(stripe);
+BKEY_VAL_ACCESSORS(reflink_p);
+BKEY_VAL_ACCESSORS(reflink_v);
/* byte order helpers */
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index 8af16ca994e0..6fa6ac1fadc1 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -10,6 +10,7 @@
#include "extents.h"
#include "inode.h"
#include "quota.h"
+#include "reflink.h"
#include "xattr.h"
const char * const bch2_bkey_types[] = {
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index ec14e2deecb7..621cbfa22fc9 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -464,7 +464,13 @@ static inline enum btree_node_type btree_node_type(struct btree *b)
static inline bool btree_node_type_is_extents(enum btree_node_type type)
{
- return type == BKEY_TYPE_EXTENTS;
+ switch (type) {
+ case BKEY_TYPE_EXTENTS:
+ case BKEY_TYPE_REFLINK:
+ return true;
+ default:
+ return false;
+ }
}
static inline bool btree_node_is_extents(struct btree *b)
@@ -480,6 +486,7 @@ static inline bool btree_node_type_needs_gc(enum btree_node_type type)
case BKEY_TYPE_EXTENTS:
case BKEY_TYPE_INODES:
case BKEY_TYPE_EC:
+ case BKEY_TYPE_REFLINK:
return true;
default:
return false;
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index 5f94b6e9cf28..443ffb5c709d 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -521,7 +521,8 @@ static inline bool update_triggers_transactional(struct btree_trans *trans,
{
return likely(!(trans->flags & BTREE_INSERT_MARK_INMEM)) &&
(i->iter->btree_id == BTREE_ID_EXTENTS ||
- i->iter->btree_id == BTREE_ID_INODES);
+ i->iter->btree_id == BTREE_ID_INODES ||
+ i->iter->btree_id == BTREE_ID_REFLINK);
}
static inline bool update_has_triggers(struct btree_trans *trans,
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index baf9642d21ca..3d243f2d1095 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -972,7 +972,7 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
spin_unlock(&c->ec_stripes_heap_lock);
bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
(u64) p.idx);
- return -1;
+ return -EIO;
}
BUG_ON(m->r.e.data_type != data_type);
@@ -1144,6 +1144,7 @@ int bch2_mark_key_locked(struct bch_fs *c,
fs_usage, journal_seq, flags);
break;
case KEY_TYPE_extent:
+ case KEY_TYPE_reflink_v:
ret = bch2_mark_extent(c, k, offset, sectors, BCH_DATA_USER,
fs_usage, journal_seq, flags);
break;
@@ -1304,7 +1305,8 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
xchg(&warned_disk_usage, 1))
return;
- pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors);
+ bch_err(c, "disk usage increased more than %llu sectors reserved",
+ disk_res_sectors);
trans_for_each_update_iter(trans, i) {
struct btree_iter *iter = i->iter;
@@ -1319,7 +1321,7 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
node_iter = iter->l[0].iter;
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
- KEY_TYPE_discard))) {
+ KEY_TYPE_discard))) {
struct bkey unpacked;
struct bkey_s_c k;
@@ -1471,6 +1473,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
struct bch_extent_stripe_ptr p,
s64 sectors, enum bch_data_type data_type)
{
+ struct bch_fs *c = trans->c;
struct bch_replicas_padded r;
struct btree_iter *iter;
struct bkey_i *new_k;
@@ -1487,10 +1490,10 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
return ret;
if (k.k->type != KEY_TYPE_stripe) {
- bch_err_ratelimited(trans->c,
- "pointer to nonexistent stripe %llu",
- (u64) p.idx);
- ret = -1;
+ bch2_fs_inconsistent(c,
+ "pointer to nonexistent stripe %llu",
+ (u64) p.idx);
+ ret = -EIO;
goto out;
}
@@ -1578,6 +1581,84 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
return 0;
}
+static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
+ struct bkey_s_c_reflink_p p,
+ u64 idx, unsigned sectors,
+ unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter *iter;
+ struct bkey_i *new_k;
+ struct bkey_s_c k;
+ struct bkey_i_reflink_v *r_v;
+ s64 ret;
+
+ ret = trans_get_key(trans, BTREE_ID_REFLINK,
+ POS(0, idx), &iter, &k);
+ if (ret)
+ return ret;
+
+ if (k.k->type != KEY_TYPE_reflink_v) {
+ bch2_fs_inconsistent(c,
+ "%llu:%llu len %u points to nonexistent indirect extent %llu",
+ p.k->p.inode, p.k->p.offset, p.k->size, idx);
+ ret = -EIO;
+ goto err;
+ }
+
+ if ((flags & BCH_BUCKET_MARK_OVERWRITE) &&
+ (bkey_start_offset(k.k) < idx ||
+ k.k->p.offset > idx + sectors))
+ goto out;
+
+ bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
+ BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
+
+ new_k = trans_update_key(trans, iter, k.k->u64s);
+ ret = PTR_ERR_OR_ZERO(new_k);
+ if (ret)
+ goto err;
+
+ bkey_reassemble(new_k, k);
+ r_v = bkey_i_to_reflink_v(new_k);
+
+ le64_add_cpu(&r_v->v.refcount,
+ !(flags & BCH_BUCKET_MARK_OVERWRITE) ? 1 : -1);
+
+ if (!r_v->v.refcount) {
+ r_v->k.type = KEY_TYPE_deleted;
+ set_bkey_val_u64s(&r_v->k, 0);
+ }
+out:
+ ret = k.k->p.offset - idx;
+err:
+ bch2_trans_iter_put(trans, iter);
+ return ret;
+}
+
+static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
+ struct bkey_s_c_reflink_p p, unsigned offset,
+ s64 sectors, unsigned flags)
+{
+ u64 idx = le64_to_cpu(p.v->idx) + offset;
+ s64 ret = 0;
+
+ sectors = abs(sectors);
+ BUG_ON(offset + sectors > p.k->size);
+
+ while (sectors) {
+ ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors, flags);
+ if (ret < 0)
+ break;
+
+ idx += ret;
+ sectors = max_t(s64, 0LL, sectors - ret);
+ ret = 0;
+ }
+
+ return ret;
+}
+
int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
unsigned offset, s64 sectors, unsigned flags)
{
@@ -1593,6 +1674,7 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
return bch2_trans_mark_extent(trans, k, offset, sectors,
flags, BCH_DATA_BTREE);
case KEY_TYPE_extent:
+ case KEY_TYPE_reflink_v:
return bch2_trans_mark_extent(trans, k, offset, sectors,
flags, BCH_DATA_USER);
case KEY_TYPE_inode:
@@ -1616,6 +1698,10 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
d->fs_usage.persistent_reserved[replicas - 1] += sectors;
return 0;
}
+ case KEY_TYPE_reflink_p:
+ return bch2_trans_mark_reflink_p(trans,
+ bkey_s_c_to_reflink_p(k),
+ offset, sectors, flags);
default:
return 0;
}
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 11defa3d99a5..81ec55526ce9 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -744,7 +744,8 @@ void __bch2_cut_front(struct bpos where, struct bkey_s k)
case KEY_TYPE_error:
case KEY_TYPE_cookie:
break;
- case KEY_TYPE_extent: {
+ case KEY_TYPE_extent:
+ case KEY_TYPE_reflink_v: {
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry;
bool seen_crc = false;
@@ -774,6 +775,12 @@ void __bch2_cut_front(struct bpos where, struct bkey_s k)
break;
}
+ case KEY_TYPE_reflink_p: {
+ struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k);
+
+ le64_add_cpu(&p.v->idx, sub);
+ break;
+ }
case KEY_TYPE_reservation:
break;
default:
@@ -968,6 +975,33 @@ static int __bch2_extent_atomic_end(struct btree_trans *trans,
}
break;
+ case KEY_TYPE_reflink_p: {
+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+ u64 idx = le64_to_cpu(p.v->idx);
+ unsigned sectors = end->offset - bkey_start_offset(p.k);
+ struct btree_iter *iter;
+ struct bkey_s_c r_k;
+
+ for_each_btree_key(trans, iter,
+ BTREE_ID_REFLINK, POS(0, idx + offset),
+ BTREE_ITER_SLOTS, r_k, ret) {
+ if (bkey_cmp(bkey_start_pos(r_k.k),
+ POS(0, idx + sectors)) >= 0)
+ break;
+
+ *nr_iters += 1;
+ if (*nr_iters >= max_iters) {
+ struct bpos pos = bkey_start_pos(k.k);
+ pos.offset += r_k.k->p.offset - idx;
+
+ *end = bpos_min(*end, pos);
+ break;
+ }
+ }
+
+ bch2_trans_iter_put(trans, iter);
+ break;
+ }
}
return ret;
@@ -1561,17 +1595,17 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
return false;
}
-void bch2_extent_mark_replicas_cached(struct bch_fs *c,
- struct bkey_s_extent e,
- unsigned target,
- unsigned nr_desired_replicas)
+void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k,
+ unsigned target,
+ unsigned nr_desired_replicas)
{
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry;
struct extent_ptr_decoded p;
- int extra = bch2_bkey_durability(c, e.s_c) - nr_desired_replicas;
+ int extra = bch2_bkey_durability(c, k.s_c) - nr_desired_replicas;
if (target && extra > 0)
- extent_for_each_ptr_decode(e, p, entry) {
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
int n = bch2_extent_ptr_durability(c, p);
if (n && n <= extra &&
@@ -1582,7 +1616,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
}
if (extra > 0)
- extent_for_each_ptr_decode(e, p, entry) {
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
int n = bch2_extent_ptr_durability(c, p);
if (n && n <= extra) {
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index 156d8e37045a..cef93af25858 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -306,6 +306,14 @@ static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
to_entry(&s.v->ptrs[s.v->nr_blocks]),
};
}
+ case KEY_TYPE_reflink_v: {
+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
+
+ return (struct bkey_ptrs_c) {
+ r.v->start,
+ bkey_val_end(r),
+ };
+ }
default:
return (struct bkey_ptrs_c) { NULL, NULL };
}
@@ -436,8 +444,8 @@ bch2_extent_can_insert(struct btree_trans *, struct btree_insert_entry *,
void bch2_insert_fixup_extent(struct btree_trans *,
struct btree_insert_entry *);
-void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
- unsigned, unsigned);
+void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s,
+ unsigned, unsigned);
const struct bch_extent_ptr *
bch2_extent_has_device(struct bkey_s_c_extent, unsigned);
@@ -452,17 +460,24 @@ static inline bool bkey_extent_is_data(const struct bkey *k)
switch (k->type) {
case KEY_TYPE_btree_ptr:
case KEY_TYPE_extent:
+ case KEY_TYPE_reflink_p:
+ case KEY_TYPE_reflink_v:
return true;
default:
return false;
}
}
+/*
+ * Should extent be counted under inode->i_sectors?
+ */
static inline bool bkey_extent_is_allocation(const struct bkey *k)
{
switch (k->type) {
case KEY_TYPE_extent:
case KEY_TYPE_reservation:
+ case KEY_TYPE_reflink_p:
+ case KEY_TYPE_reflink_v:
return true;
default:
return false;
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index ef94aecaa7cb..771fb111550d 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -16,6 +16,7 @@
#include "io.h"
#include "keylist.h"
#include "quota.h"
+#include "reflink.h"
#include "trace.h"
#include <linux/aio.h>
@@ -201,9 +202,9 @@ static int inode_set_size(struct bch_inode_info *inode,
return 0;
}
-static int __must_check bch2_write_inode_size(struct bch_fs *c,
- struct bch_inode_info *inode,
- loff_t new_size, unsigned fields)
+int __must_check bch2_write_inode_size(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ loff_t new_size, unsigned fields)
{
struct inode_new_size s = {
.new_size = new_size,
@@ -936,15 +937,12 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
{
struct bvec_iter iter;
struct bio_vec bv;
- unsigned nr_ptrs = bch2_bkey_nr_ptrs_allocated(k);
+ unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
+ ? 0 : bch2_bkey_nr_ptrs_allocated(k);
unsigned state = k.k->type == KEY_TYPE_reservation
? SECTOR_RESERVED
: SECTOR_ALLOCATED;
- BUG_ON(bio->bi_iter.bi_sector < bkey_start_offset(k.k));
- BUG_ON(bio_end_sector(bio) > k.k->p.offset);
-
-
bio_for_each_segment(bv, bio, iter) {
struct bch_page_state *s = bch2_page_state(bv.bv_page);
unsigned i;
@@ -959,10 +957,11 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
}
static void readpage_bio_extend(struct readpages_iter *iter,
- struct bio *bio, u64 offset,
+ struct bio *bio,
+ unsigned sectors_this_extent,
bool get_more)
{
- while (bio_end_sector(bio) < offset &&
+ while (bio_sectors(bio) < sectors_this_extent &&
bio->bi_vcnt < bio->bi_max_vecs) {
pgoff_t page_offset = bio_end_sector(bio) >> PAGE_SECTOR_SHIFT;
struct page *page = readpage_iter_next(iter);
@@ -1012,35 +1011,39 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
struct bch_fs *c = trans->c;
int flags = BCH_READ_RETRY_IF_STALE|
BCH_READ_MAY_PROMOTE;
+ int ret = 0;
rbio->c = c;
rbio->start_time = local_clock();
-
+retry:
while (1) {
BKEY_PADDED(k) tmp;
struct bkey_s_c k;
- unsigned bytes, offset_into_extent;
+ unsigned bytes, sectors, offset_into_extent;
bch2_btree_iter_set_pos(iter,
POS(inum, rbio->bio.bi_iter.bi_sector));
k = bch2_btree_iter_peek_slot(iter);
- BUG_ON(!k.k);
-
- if (IS_ERR(k.k)) {
- int ret = btree_iter_err(iter);
- BUG_ON(!ret);
- bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
- bio_endio(&rbio->bio);
- return;
- }
+ ret = bkey_err(k);
+ if (ret)
+ break;
bkey_reassemble(&tmp.k, k);
- bch2_trans_unlock(trans);
k = bkey_i_to_s_c(&tmp.k);
offset_into_extent = iter->pos.offset -
bkey_start_offset(k.k);
+ sectors = k.k->size - offset_into_extent;
+
+ ret = bch2_read_indirect_extent(trans, iter,
+ &offset_into_extent, &tmp.k);
+ if (ret)
+ break;
+
+ sectors = min(sectors, k.k->size - offset_into_extent);
+
+ bch2_trans_unlock(trans);
if (readpages_iter) {
bool want_full_extent = false;
@@ -1055,13 +1058,11 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
(p.crc.compression_type != 0));
}
- readpage_bio_extend(readpages_iter,
- &rbio->bio, k.k->p.offset,
- want_full_extent);
+ readpage_bio_extend(readpages_iter, &rbio->bio,
+ sectors, want_full_extent);
}
- bytes = min_t(unsigned, bio_sectors(&rbio->bio),
- (k.k->size - offset_into_extent)) << 9;
+ bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
swap(rbio->bio.bi_iter.bi_size, bytes);
if (rbio->bio.bi_iter.bi_size == bytes)
@@ -1078,6 +1079,12 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
swap(rbio->bio.bi_iter.bi_size, bytes);
bio_advance(&rbio->bio, bytes);
}
+
+ if (ret == -EINTR)
+ goto retry;
+
+ bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
+ bio_endio(&rbio->bio);
}
void bch2_readahead(struct readahead_control *ractl)
@@ -2256,29 +2263,25 @@ out:
/* truncate: */
-static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode,
- u64 start_offset, u64 end_offset, u64 *journal_seq)
+int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
+ struct bpos end, struct bch_inode_info *inode,
+ u64 new_i_size)
{
- struct bpos start = POS(inode->v.i_ino, start_offset);
- struct bpos end = POS(inode->v.i_ino, end_offset);
+ struct bch_fs *c = trans->c;
unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
- struct btree_trans trans;
- struct btree_iter *iter;
struct bkey_s_c k;
- int ret = 0;
-
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
-
- iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, start,
- BTREE_ITER_INTENT);
+ int ret = 0, ret2 = 0;
while ((k = bch2_btree_iter_peek(iter)).k &&
- !(ret = bkey_err(k)) &&
bkey_cmp(iter->pos, end) < 0) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0);
struct bkey_i delete;
+ ret = bkey_err(k);
+ if (ret)
+ goto btree_err;
+
bkey_init(&delete.k);
delete.k.p = iter->pos;
@@ -2286,23 +2289,51 @@ static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode,
bch2_key_resize(&delete.k, max_sectors);
bch2_cut_back(end, &delete.k);
- bch2_trans_begin_updates(&trans);
+ bch2_trans_begin_updates(trans);
- ret = bch2_extent_update(&trans, inode,
+ ret = bch2_extent_update(trans, inode,
&disk_res, NULL, iter, &delete,
- 0, true, true, NULL);
+ new_i_size, false, true, NULL);
bch2_disk_reservation_put(c, &disk_res);
-
- if (ret == -EINTR)
+btree_err:
+ if (ret == -EINTR) {
+ ret2 = ret;
ret = 0;
+ }
if (ret)
break;
+ }
- bch2_trans_cond_resched(&trans);
+ if (bkey_cmp(iter->pos, end) > 0) {
+ bch2_btree_iter_set_pos(iter, end);
+ ret = bch2_btree_iter_traverse(iter);
}
+ return ret ?: ret2;
+}
+
+static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode,
+ u64 start_offset, u64 end_offset)
+{
+ struct btree_trans trans;
+ struct btree_iter *iter;
+ int ret = 0;
+
+ bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+ POS(inode->v.i_ino, start_offset),
+ BTREE_ITER_INTENT);
+
+ ret = bch2_fpunch_at(&trans, iter,
+ POS(inode->v.i_ino, end_offset),
+ inode, 0);
+
bch2_trans_exit(&trans);
+ if (ret == -EINTR)
+ ret = 0;
+
return ret;
}
@@ -2510,7 +2541,7 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
ret = __bch2_fpunch(c, inode,
round_up(iattr->ia_size, block_bytes(c)) >> 9,
- U64_MAX, &inode->ei_journal_seq);
+ U64_MAX);
if (unlikely(ret))
goto err;
@@ -2557,8 +2588,7 @@ static long bch2_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len)
truncate_pagecache_range(&inode->v, offset, offset + len - 1);
if (discard_start < discard_end)
- ret = __bch2_fpunch(c, inode, discard_start, discard_end,
- &inode->ei_journal_seq);
+ ret = __bch2_fpunch(c, inode, discard_start, discard_end);
err:
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);
@@ -2670,7 +2700,7 @@ bkey_err:
ret = __bch2_fpunch(c, inode,
round_up(new_size, block_bytes(c)) >> 9,
- U64_MAX, &inode->ei_journal_seq);
+ U64_MAX);
if (ret)
goto err;
@@ -2853,6 +2883,94 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
return -EOPNOTSUPP;
}
+static void mark_range_unallocated(struct bch_inode_info *inode,
+ loff_t start, loff_t end)
+{
+ pgoff_t index = start >> PAGE_SHIFT;
+ pgoff_t end_index = (end - 1) >> PAGE_SHIFT;
+ struct folio_batch fbatch;
+ unsigned i, j;
+
+ folio_batch_init(&fbatch);
+
+ while (filemap_get_folios(inode->v.i_mapping,
+ &index, end_index, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i];
+ struct bch_page_state *s;
+
+ folio_lock(folio);
+ s = bch2_page_state(&folio->page);
+
+ if (s)
+ for (j = 0; j < PAGE_SECTORS; j++)
+ s->s[j].nr_replicas = 0;
+
+ folio_unlock(folio);
+ }
+ folio_batch_release(&fbatch);
+ cond_resched();
+ }
+}
+
+loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
+ struct file *file_dst, loff_t pos_dst,
+ loff_t len, unsigned remap_flags)
+{
+ struct bch_inode_info *src = file_bch_inode(file_src);
+ struct bch_inode_info *dst = file_bch_inode(file_dst);
+ struct bch_fs *c = src->v.i_sb->s_fs_info;
+ loff_t ret = 0;
+ loff_t aligned_len;
+
+ if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY))
+ return -EINVAL;
+
+ if (remap_flags & REMAP_FILE_DEDUP)
+ return -EOPNOTSUPP;
+
+ if ((pos_src & (block_bytes(c) - 1)) ||
+ (pos_dst & (block_bytes(c) - 1)))
+ return -EINVAL;
+
+ if (src == dst &&
+ abs(pos_src - pos_dst) < len)
+ return -EINVAL;
+
+ bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
+
+ inode_dio_wait(&src->v);
+ inode_dio_wait(&dst->v);
+
+ ret = generic_remap_file_range_prep(file_src, pos_src,
+ file_dst, pos_dst,
+ &len, remap_flags);
+ if (ret < 0 || len == 0)
+ goto out_unlock;
+
+ aligned_len = round_up(len, block_bytes(c));
+
+ ret = write_invalidate_inode_pages_range(dst->v.i_mapping,
+ pos_dst, pos_dst + aligned_len);
+ if (ret)
+ goto out_unlock;
+
+ mark_range_unallocated(src, pos_src, pos_src + aligned_len);
+
+ ret = bch2_remap_range(c, dst,
+ POS(dst->v.i_ino, pos_dst >> 9),
+ POS(src->v.i_ino, pos_src >> 9),
+ aligned_len >> 9,
+ pos_dst + len);
+ if (ret > 0)
+ ret = min(ret << 9, len);
+
+out_unlock:
+ bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
+
+ return ret;
+}
+
/* fseek: */
static int folio_data_offset(struct folio *folio, unsigned offset)
diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h
index e263b515e901..861ec25ab9ef 100644
--- a/fs/bcachefs/fs-io.h
+++ b/fs/bcachefs/fs-io.h
@@ -9,6 +9,22 @@
#include <linux/uio.h>
+struct quota_res;
+
+int bch2_extent_update(struct btree_trans *,
+ struct bch_inode_info *,
+ struct disk_reservation *,
+ struct quota_res *,
+ struct btree_iter *,
+ struct bkey_i *,
+ u64, bool, bool, s64 *);
+int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
+ struct bpos, struct bch_inode_info *, u64);
+
+int __must_check bch2_write_inode_size(struct bch_fs *,
+ struct bch_inode_info *,
+ loff_t, unsigned);
+
int bch2_writepage(struct page *, struct writeback_control *);
int bch2_read_folio(struct file *, struct folio *);
@@ -28,6 +44,9 @@ int bch2_fsync(struct file *, loff_t, loff_t, int);
int bch2_truncate(struct bch_inode_info *, struct iattr *);
long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t);
+loff_t bch2_remap_file_range(struct file *, loff_t, struct file *,
+ loff_t, loff_t, unsigned);
+
loff_t bch2_llseek(struct file *, loff_t, int);
vm_fault_t bch2_page_fault(struct vm_fault *);
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 54e555fb4d5d..fad019d3c3f5 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1157,6 +1157,9 @@ static int bch2_fill_extent(struct bch_fs *c,
struct extent_ptr_decoded p;
int ret;
+ if (k.k->type == KEY_TYPE_reflink_v)
+ flags |= FIEMAP_EXTENT_SHARED;
+
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
int flags2 = 0;
u64 offset = p.ptr.offset;
@@ -1200,6 +1203,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
struct btree_iter *iter;
struct bkey_s_c k;
BKEY_PADDED(k) cur, prev;
+ unsigned offset_into_extent, sectors;
bool have_extent = false;
int ret = 0;
@@ -1212,15 +1216,36 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
bch2_trans_init(&trans, c, 0, 0);
- for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
- POS(ei->v.i_ino, start >> 9), 0, k, ret) {
- if (bkey_cmp(bkey_start_pos(k.k),
- POS(ei->v.i_ino, (start + len) >> 9)) >= 0)
- break;
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+ POS(ei->v.i_ino, start >> 9),
+ BTREE_ITER_SLOTS);
+
+ while (bkey_cmp(iter->pos, POS(ei->v.i_ino, (start + len) >> 9)) < 0) {
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
bkey_reassemble(&cur.k, k);
k = bkey_i_to_s_c(&cur.k);
+ offset_into_extent = iter->pos.offset -
+ bkey