From ea0213e0c7cc1c1b52badf27bd7db4f50a67baaa Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Thu, 9 Mar 2017 09:59:57 +0100 Subject: md: superblock changes for PPL Include information about PPL location and size into mdp_superblock_1 and copy it to/from rdev. Because PPL is mutually exclusive with bitmap, put it in place of 'bitmap_offset'. Add a new flag MD_FEATURE_PPL for 'feature_map', analogically to MD_FEATURE_BITMAP_OFFSET. Add MD_HAS_PPL to mddev->flags to indicate that PPL is enabled on an array. Signed-off-by: Artur Paszkiewicz Signed-off-by: Shaohua Li --- include/uapi/linux/raid/md_p.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index 9930f3e9040f..fe2112810c43 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -242,10 +242,18 @@ struct mdp_superblock_1 { __le32 chunksize; /* in 512byte sectors */ __le32 raid_disks; - __le32 bitmap_offset; /* sectors after start of superblock that bitmap starts - * NOTE: signed, so bitmap can be before superblock - * only meaningful of feature_map[0] is set. - */ + union { + __le32 bitmap_offset; /* sectors after start of superblock that bitmap starts + * NOTE: signed, so bitmap can be before superblock + * only meaningful of feature_map[0] is set. + */ + + /* only meaningful when feature_map[MD_FEATURE_PPL] is set */ + struct { + __le16 offset; /* sectors from start of superblock that ppl starts (signed) */ + __le16 size; /* ppl size in sectors */ + } ppl; + }; /* These are only valid with feature bit '4' */ __le32 new_level; /* new level we are reshaping to */ @@ -318,6 +326,7 @@ struct mdp_superblock_1 { */ #define MD_FEATURE_CLUSTERED 256 /* clustered MD */ #define MD_FEATURE_JOURNAL 512 /* support write cache */ +#define MD_FEATURE_PPL 1024 /* support PPL */ #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ |MD_FEATURE_RECOVERY_OFFSET \ |MD_FEATURE_RESHAPE_ACTIVE \ @@ -328,6 +337,7 @@ struct mdp_superblock_1 { |MD_FEATURE_RECOVERY_BITMAP \ |MD_FEATURE_CLUSTERED \ |MD_FEATURE_JOURNAL \ + |MD_FEATURE_PPL \ ) struct r5l_payload_header { -- cgit v1.2.3 From 3418d036c81dcb604b7c7c71b209d5890a8418aa Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Thu, 9 Mar 2017 09:59:59 +0100 Subject: raid5-ppl: Partial Parity Log write logging implementation Implement the calculation of partial parity for a stripe and PPL write logging functionality. The description of PPL is added to the documentation. More details can be found in the comments in raid5-ppl.c. Attach a page for holding the partial parity data to stripe_head. Allocate it only if mddev has the MD_HAS_PPL flag set. Partial parity is the xor of not modified data chunks of a stripe and is calculated as follows: - reconstruct-write case: xor data from all not updated disks in a stripe - read-modify-write case: xor old data and parity from all updated disks in a stripe Implement it using the async_tx API and integrate into raid_run_ops(). It must be called when we still have access to old data, so do it when STRIPE_OP_BIODRAIN is set, but before ops_run_prexor5(). The result is stored into sh->ppl_page. Partial parity is not meaningful for full stripe write and is not stored in the log or used for recovery, so don't attempt to calculate it when stripe has STRIPE_FULL_WRITE. Put the PPL metadata structures to md_p.h because userspace tools (mdadm) will also need to read/write PPL. Warn about using PPL with enabled disk volatile write-back cache for now. It can be removed once disk cache flushing before writing PPL is implemented. Signed-off-by: Artur Paszkiewicz Signed-off-by: Shaohua Li --- include/uapi/linux/raid/md_p.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index fe2112810c43..d9a1ead867b9 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -398,4 +398,31 @@ struct r5l_meta_block { #define R5LOG_VERSION 0x1 #define R5LOG_MAGIC 0x6433c509 + +struct ppl_header_entry { + __le64 data_sector; /* raid sector of the new data */ + __le32 pp_size; /* length of partial parity */ + __le32 data_size; /* length of data */ + __le32 parity_disk; /* member disk containing parity */ + __le32 checksum; /* checksum of partial parity data for this + * entry (~crc32c) */ +} __attribute__ ((__packed__)); + +#define PPL_HEADER_SIZE 4096 +#define PPL_HDR_RESERVED 512 +#define PPL_HDR_ENTRY_SPACE \ + (PPL_HEADER_SIZE - PPL_HDR_RESERVED - 4 * sizeof(u32) - sizeof(u64)) +#define PPL_HDR_MAX_ENTRIES \ + (PPL_HDR_ENTRY_SPACE / sizeof(struct ppl_header_entry)) + +struct ppl_header { + __u8 reserved[PPL_HDR_RESERVED];/* reserved space, fill with 0xff */ + __le32 signature; /* signature (family number of volume) */ + __le32 padding; /* zero pad */ + __le64 generation; /* generation number of the header */ + __le32 entries_count; /* number of entries in entry array */ + __le32 checksum; /* checksum of the header (~crc32c) */ + struct ppl_header_entry entries[PPL_HDR_MAX_ENTRIES]; +} __attribute__ ((__packed__)); + #endif -- cgit v1.2.3 From 210f7cdcf088c304ee0533ffd33d6f71a8821862 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 15 Mar 2017 14:05:14 +1100 Subject: percpu-refcount: support synchronous switch to atomic mode. percpu_ref_switch_to_atomic_sync() schedules the switch to atomic mode, then waits for it to complete. Also export percpu_ref_switch_to_* so they can be used from modules. This will be used in md/raid to count the number of pending write requests to an array. We occasionally need to check if the count is zero, but most often we don't care. We always want updates to the counter to be fast, as in some cases we count every 4K page. Signed-off-by: NeilBrown Acked-by: Tejun Heo Signed-off-by: Shaohua Li --- include/linux/percpu-refcount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 3a481a49546e..c13dceb87b60 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -99,6 +99,7 @@ int __must_check percpu_ref_init(struct percpu_ref *ref, void percpu_ref_exit(struct percpu_ref *ref); void percpu_ref_switch_to_atomic(struct percpu_ref *ref, percpu_ref_func_t *confirm_switch); +void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref); void percpu_ref_switch_to_percpu(struct percpu_ref *ref); void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill); -- cgit v1.2.3 From 6f8802852f7e58a12177a86179803b9efaad98e2 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 17 Mar 2017 00:12:29 +0800 Subject: block: introduce bio_copy_data_partial Turns out we can use bio_copy_data in raid1's write behind, and we can make alloc_behind_pages() more clean/efficient, but we need to partial version of bio_copy_data(). Signed-off-by: Ming Lei Reviewed-by: Jens Axboe Signed-off-by: Shaohua Li --- include/linux/bio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 8e521194f6fc..42b62a0288b0 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -468,6 +468,8 @@ static inline void bio_flush_dcache_pages(struct bio *bi) #endif extern void bio_copy_data(struct bio *dst, struct bio *src); +extern void bio_copy_data_partial(struct bio *dst, struct bio *src, + int offset, int size); extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); extern void bio_free_pages(struct bio *bio); -- cgit v1.2.3 From f45958756fef552436e4a63029a168495920026e Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 24 Mar 2017 10:34:43 -0700 Subject: block: remove bio_clone_bioset_partial() commit c18a1e0(block: introduce bio_clone_bioset_partial()) introduced bio_clone_bioset_partial() for raid1 write behind IO. Now the write behind is rewritten by Ming. We don't need the API any more, so revert the commit. Cc: Christoph Hellwig Reviewed-by: Jens Axboe Reviewed-by: Ming Lei Signed-off-by: Shaohua Li --- include/linux/bio.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 42b62a0288b0..fafef6343d1b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -183,7 +183,7 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) -static inline unsigned __bio_segments(struct bio *bio, struct bvec_iter *bvec) +static inline unsigned bio_segments(struct bio *bio) { unsigned segs = 0; struct bio_vec bv; @@ -205,17 +205,12 @@ static inline unsigned __bio_segments(struct bio *bio, struct bvec_iter *bvec) break; } - __bio_for_each_segment(bv, bio, iter, *bvec) + bio_for_each_segment(bv, bio, iter) segs++; return segs; } -static inline unsigned bio_segments(struct bio *bio) -{ - return __bio_segments(bio, &bio->bi_iter); -} - /* * get a reference to a bio, so it won't disappear. the intended use is * something like: @@ -389,8 +384,6 @@ extern void bio_put(struct bio *); extern void __bio_clone_fast(struct bio *, struct bio *); extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); -extern struct bio *bio_clone_bioset_partial(struct bio *, gfp_t, - struct bio_set *, int, int); extern struct bio_set *fs_bio_set; -- cgit v1.2.3 From 50512625da06c41517cb596f51b923ce15f401a4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 5 Apr 2017 14:05:50 +1000 Subject: Revert "block: introduce bio_copy_data_partial" This reverts commit 6f8802852f7e58a12177a86179803b9efaad98e2. bio_copy_data_partial() is no longer needed. Signed-off-by: NeilBrown Signed-off-by: Shaohua Li --- include/linux/bio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index fafef6343d1b..7cf8a6c70a3f 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -461,8 +461,6 @@ static inline void bio_flush_dcache_pages(struct bio *bi) #endif extern void bio_copy_data(struct bio *dst, struct bio *src); -extern void bio_copy_data_partial(struct bio *dst, struct bio *src, - int offset, int size); extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); extern void bio_free_pages(struct bio *bio); -- cgit v1.2.3 From fc6d2a3ca59d5656d5b0ac3b25ecf493e4614abd Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Wed, 19 Apr 2017 10:48:06 +0200 Subject: uapi: fix linux/raid/md_p.h userspace compilation error Use __le32 and __le64 instead of u32 and u64. This fixes klibc build error: In file included from /klibc/usr/klibc/../include/sys/md.h:30:0, from /klibc/usr/kinit/do_mounts_md.c:19: /linux-next/usr/include/linux/raid/md_p.h:414:51: error: 'u32' undeclared here (not in a function) (PPL_HEADER_SIZE - PPL_HDR_RESERVED - 4 * sizeof(u32) - sizeof(u64)) Reported-by: Greg Thelen Reported-by: Nigel Croxon Tested-by: Greg Thelen Signed-off-by: Artur Paszkiewicz Signed-off-by: Shaohua Li --- include/uapi/linux/raid/md_p.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index d9a1ead867b9..d500bd224979 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -411,7 +411,7 @@ struct ppl_header_entry { #define PPL_HEADER_SIZE 4096 #define PPL_HDR_RESERVED 512 #define PPL_HDR_ENTRY_SPACE \ - (PPL_HEADER_SIZE - PPL_HDR_RESERVED - 4 * sizeof(u32) - sizeof(u64)) + (PPL_HEADER_SIZE - PPL_HDR_RESERVED - 4 * sizeof(__le32) - sizeof(__le64)) #define PPL_HDR_MAX_ENTRIES \ (PPL_HDR_ENTRY_SPACE / sizeof(struct ppl_header_entry)) -- cgit v1.2.3