diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-08 11:51:05 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-08 11:51:05 -0700 |
commit | ebb37277796269da36a8bc5d72ed1e8e1fb7d34b (patch) | |
tree | 0ded627a62a5cec70b18d12825dd858855c135d3 /drivers | |
parent | 4de13d7aa8f4d02f4dc99d4609575659f92b3c5a (diff) | |
parent | f50efd2fdbd9b35b11f5778ed85beb764184bda9 (diff) | |
download | linux-ebb37277796269da36a8bc5d72ed1e8e1fb7d34b.tar.gz linux-ebb37277796269da36a8bc5d72ed1e8e1fb7d34b.tar.bz2 linux-ebb37277796269da36a8bc5d72ed1e8e1fb7d34b.zip |
Merge branch 'for-3.10/drivers' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
"It might look big in volume, but when categorized, not a lot of
drivers are touched. The pull request contains:
- mtip32xx fixes from Micron.
- A slew of drbd updates, this time in a nicer series.
- bcache, a flash/ssd caching framework from Kent.
- Fixes for cciss"
* 'for-3.10/drivers' of git://git.kernel.dk/linux-block: (66 commits)
bcache: Use bd_link_disk_holder()
bcache: Allocator cleanup/fixes
cciss: bug fix to prevent cciss from loading in kdump crash kernel
cciss: add cciss_allow_hpsa module parameter
drivers/block/mg_disk.c: add CONFIG_PM_SLEEP to suspend/resume functions
mtip32xx: Workaround for unaligned writes
bcache: Make sure blocksize isn't smaller than device blocksize
bcache: Fix merge_bvec_fn usage for when it modifies the bvm
bcache: Correctly check against BIO_MAX_PAGES
bcache: Hack around stuff that clones up to bi_max_vecs
bcache: Set ra_pages based on backing device's ra_pages
bcache: Take data offset from the bdev superblock.
mtip32xx: mtip32xx: Disable TRIM support
mtip32xx: fix a smatch warning
bcache: Disable broken btree fuzz tester
bcache: Fix a format string overflow
bcache: Fix a minor memory leak on device teardown
bcache: Documentation updates
bcache: Use WARN_ONCE() instead of __WARN()
bcache: Add missing #include <linux/prefetch.h>
...
Diffstat (limited to 'drivers')
46 files changed, 16713 insertions, 350 deletions
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 5efed089a702..fc803ecbbce4 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -920,16 +920,14 @@ bio_pagedec(struct bio *bio) static void bufinit(struct buf *buf, struct request *rq, struct bio *bio) { - struct bio_vec *bv; - memset(buf, 0, sizeof(*buf)); buf->rq = rq; buf->bio = bio; buf->resid = bio->bi_size; buf->sector = bio->bi_sector; bio_pageinc(bio); - buf->bv = bv = bio_iovec(bio); - buf->bv_resid = bv->bv_len; + buf->bv = bio_iovec(bio); + buf->bv_resid = buf->bv->bv_len; WARN_ON(buf->bv_resid == 0); } diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 94b51c5e0678..6374dc103521 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -75,6 +75,12 @@ module_param(cciss_simple_mode, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(cciss_simple_mode, "Use 'simple mode' rather than 'performant mode'"); +static int cciss_allow_hpsa; +module_param(cciss_allow_hpsa, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(cciss_allow_hpsa, + "Prevent cciss driver from accessing hardware known to be " + " supported by the hpsa driver"); + static DEFINE_MUTEX(cciss_mutex); static struct proc_dir_entry *proc_cciss; @@ -4115,9 +4121,13 @@ static int cciss_lookup_board_id(struct pci_dev *pdev, u32 *board_id) *board_id = ((subsystem_device_id << 16) & 0xffff0000) | subsystem_vendor_id; - for (i = 0; i < ARRAY_SIZE(products); i++) + for (i = 0; i < ARRAY_SIZE(products); i++) { + /* Stand aside for hpsa driver on request */ + if (cciss_allow_hpsa) + return -ENODEV; if (*board_id == products[i].board_id) return i; + } dev_warn(&pdev->dev, "unrecognized board ID: 0x%08x, ignoring.\n", *board_id); return -ENODEV; @@ -4959,6 +4969,16 @@ static int cciss_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ctlr_info_t *h; unsigned long flags; + /* + * By default the cciss driver is used for all older HP Smart Array + * controllers. There are module paramaters that allow a user to + * override this behavior and instead use the hpsa SCSI driver. If + * this is the case cciss may be loaded first from the kdump initrd + * image and cause a kernel panic. So if reset_devices is true and + * cciss_allow_hpsa is set just bail. + */ + if ((reset_devices) && (cciss_allow_hpsa == 1)) + return -ENODEV; rc = cciss_init_reset_devices(pdev); if (rc) { if (rc != -ENOTSUPP) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 92510f8ad013..6608076dc39e 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -104,7 +104,6 @@ struct update_al_work { int err; }; -static int al_write_transaction(struct drbd_conf *mdev); void *drbd_md_get_buffer(struct drbd_conf *mdev) { @@ -168,7 +167,11 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, bio->bi_end_io = drbd_md_io_complete; bio->bi_rw = rw; - if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */ + if (!(rw & WRITE) && mdev->state.disk == D_DISKLESS && mdev->ldev == NULL) + /* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */ + ; + else if (!get_ldev_if_state(mdev, D_ATTACHING)) { + /* Corresponding put_ldev in drbd_md_io_complete() */ dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); err = -ENODEV; goto out; @@ -199,9 +202,10 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, BUG_ON(!bdev->md_bdev); - dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n", + dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n", current->comm, current->pid, __func__, - (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); + (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", + (void*)_RET_IP_ ); if (sector < drbd_md_first_sector(bdev) || sector + 7 > drbd_md_last_sector(bdev)) @@ -209,7 +213,8 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, current->comm, current->pid, __func__, (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); - err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE); + /* we do all our meta data IO in aligned 4k blocks. */ + err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, 4096); if (err) { dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n", (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err); @@ -217,44 +222,99 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, return err; } -static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) +static struct bm_extent *find_active_resync_extent(struct drbd_conf *mdev, unsigned int enr) { - struct lc_element *al_ext; struct lc_element *tmp; - int wake; - - spin_lock_irq(&mdev->al_lock); tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); if (unlikely(tmp != NULL)) { struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); - if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { - wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags); - spin_unlock_irq(&mdev->al_lock); - if (wake) - wake_up(&mdev->al_wait); - return NULL; - } + if (test_bit(BME_NO_WRITES, &bm_ext->flags)) + return bm_ext; + } + return NULL; +} + +static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr, bool nonblock) +{ + struct lc_element *al_ext; + struct bm_extent *bm_ext; + int wake; + + spin_lock_irq(&mdev->al_lock); + bm_ext = find_active_resync_extent(mdev, enr); + if (bm_ext) { + wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags); + spin_unlock_irq(&mdev->al_lock); + if (wake) + wake_up(&mdev->al_wait); + return NULL; } - al_ext = lc_get(mdev->act_log, enr); + if (nonblock) + al_ext = lc_try_get(mdev->act_log, enr); + else + al_ext = lc_get(mdev->act_log, enr); spin_unlock_irq(&mdev->al_lock); return al_ext; } -void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) +bool drbd_al_begin_io_fastpath(struct drbd_conf *mdev, struct drbd_interval *i) { /* for bios crossing activity log extent boundaries, * we may need to activate two extents in one go */ unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); - unsigned enr; - bool locked = false; + D_ASSERT((unsigned)(last - first) <= 1); + D_ASSERT(atomic_read(&mdev->local_cnt) > 0); + + /* FIXME figure out a fast path for bios crossing AL extent boundaries */ + if (first != last) + return false; + + return _al_get(mdev, first, true); +} + +bool drbd_al_begin_io_prepare(struct drbd_conf *mdev, struct drbd_interval *i) +{ + /* for bios crossing activity log extent boundaries, + * we may need to activate two extents in one go */ + unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); + unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); + unsigned enr; + bool need_transaction = false; D_ASSERT(first <= last); D_ASSERT(atomic_read(&mdev->local_cnt) > 0); - for (enr = first; enr <= last; enr++) - wait_event(mdev->al_wait, _al_get(mdev, enr) != NULL); + for (enr = first; enr <= last; enr++) { + struct lc_element *al_ext; + wait_event(mdev->al_wait, + (al_ext = _al_get(mdev, enr, false)) != NULL); + if (al_ext->lc_number != enr) + need_transaction = true; + } + return need_transaction; +} + +static int al_write_transaction(struct drbd_conf *mdev, bool delegate); + +/* When called through generic_make_request(), we must delegate + * activity log I/O to the worker thread: a further request + * submitted via generic_make_request() within the same task + * would be queued on current->bio_list, and would only start + * after this function returns (see generic_make_request()). + * + * However, if we *are* the worker, we must not delegate to ourselves. + */ + +/* + * @delegate: delegate activity log I/O to the worker thread + */ +void drbd_al_begin_io_commit(struct drbd_conf *mdev, bool delegate) +{ + bool locked = false; + + BUG_ON(delegate && current == mdev->tconn->worker.task); /* Serialize multiple transactions. * This uses test_and_set_bit, memory barrier is implicit. @@ -264,13 +324,6 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) (locked = lc_try_lock_for_transaction(mdev->act_log))); if (locked) { - /* drbd_al_write_transaction(mdev,al_ext,enr); - * recurses into generic_make_request(), which - * disallows recursion, bios being serialized on the - * current->bio_tail list now. - * we have to delegate updates to the activity log - * to the worker thread. */ - /* Double check: it may have been committed by someone else, * while we have been waiting for the lock. */ if (mdev->act_log->pending_changes) { @@ -280,11 +333,8 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) write_al_updates = rcu_dereference(mdev->ldev->disk_conf)->al_updates; rcu_read_unlock(); - if (write_al_updates) { - al_write_transaction(mdev); - mdev->al_writ_cnt++; - } - + if (write_al_updates) + al_write_transaction(mdev, delegate); spin_lock_irq(&mdev->al_lock); /* FIXME if (err) @@ -298,6 +348,66 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) } } +/* + * @delegate: delegate activity log I/O to the worker thread + */ +void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool delegate) +{ + BUG_ON(delegate && current == mdev->tconn->worker.task); + + if (drbd_al_begin_io_prepare(mdev, i)) + drbd_al_begin_io_commit(mdev, delegate); +} + +int drbd_al_begin_io_nonblock(struct drbd_conf *mdev, struct drbd_interval *i) +{ + struct lru_cache *al = mdev->act_log; + /* for bios crossing activity log extent boundaries, + * we may need to activate two extents in one go */ + unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); + unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); + unsigned nr_al_extents; + unsigned available_update_slots; + unsigned enr; + + D_ASSERT(first <= last); + + nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */ + available_update_slots = min(al->nr_elements - al->used, + al->max_pending_changes - al->pending_changes); + + /* We want all necessary updates for a given request within the same transaction + * We could first check how many updates are *actually* needed, + * and use that instead of the worst-case nr_al_extents */ + if (available_update_slots < nr_al_extents) + return -EWOULDBLOCK; + + /* Is resync active in this area? */ + for (enr = first; enr <= last; enr++) { + struct lc_element *tmp; + tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); + if (unlikely(tmp != NULL)) { + struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); + if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { + if (!test_and_set_bit(BME_PRIORITY, &bm_ext->flags)) + return -EBUSY; + return -EWOULDBLOCK; + } + } + } + + /* Checkout the refcounts. + * Given that we checked for available elements and update slots above, + * this has to be successful. */ + for (enr = first; enr <= last; enr++) { + struct lc_element *al_ext; + al_ext = lc_get_cumulative(mdev->act_log, enr); + if (!al_ext) + dev_info(DEV, "LOGIC BUG for enr=%u\n", enr); + } + return 0; +} + void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i) { /* for bios crossing activity log extent boundaries, @@ -350,6 +460,24 @@ static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) (BM_EXT_SHIFT - BM_BLOCK_SHIFT)); } +static sector_t al_tr_number_to_on_disk_sector(struct drbd_conf *mdev) +{ + const unsigned int stripes = mdev->ldev->md.al_stripes; + const unsigned int stripe_size_4kB = mdev->ldev->md.al_stripe_size_4k; + + /* transaction number, modulo on-disk ring buffer wrap around */ + unsigned int t = mdev->al_tr_number % (mdev->ldev->md.al_size_4k); + + /* ... to aligned 4k on disk block */ + t = ((t % stripes) * stripe_size_4kB) + t/stripes; + + /* ... to 512 byte sector in activity log */ + t *= 8; + + /* ... plus offset to the on disk position */ + return mdev->ldev->md.md_offset + mdev->ldev->md.al_offset + t; +} + static int _al_write_transaction(struct drbd_conf *mdev) { @@ -432,23 +560,27 @@ _al_write_transaction(struct drbd_conf *mdev) if (mdev->al_tr_cycle >= mdev->act_log->nr_elements) mdev->al_tr_cycle = 0; - sector = mdev->ldev->md.md_offset - + mdev->ldev->md.al_offset - + mdev->al_tr_pos * (MD_BLOCK_SIZE>>9); + sector = al_tr_number_to_on_disk_sector(mdev); crc = crc32c(0, buffer, 4096); buffer->crc32c = cpu_to_be32(crc); if (drbd_bm_write_hinted(mdev)) err = -EIO; - /* drbd_chk_io_error done already */ - else if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { - err = -EIO; - drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); - } else { - /* advance ringbuffer position and transaction counter */ - mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); - mdev->al_tr_number++; + else { + bool write_al_updates; + rcu_read_lock(); + write_al_updates = rcu_dereference(mdev->ldev->disk_conf)->al_updates; + rcu_read_unlock(); + if (write_al_updates) { + if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { + err = -EIO; + drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); + } else { + mdev->al_tr_number++; + mdev->al_writ_cnt++; + } + } } drbd_md_put_buffer(mdev); @@ -474,20 +606,18 @@ static int w_al_write_transaction(struct drbd_work *w, int unused) /* Calls from worker context (see w_restart_disk_io()) need to write the transaction directly. Others came through generic_make_request(), those need to delegate it to the worker. */ -static int al_write_transaction(struct drbd_conf *mdev) +static int al_write_transaction(struct drbd_conf *mdev, bool delegate) { - struct update_al_work al_work; - - if (current == mdev->tconn->worker.task) + if (delegate) { + struct update_al_work al_work; + init_completion(&al_work.event); + al_work.w.cb = w_al_write_transaction; + al_work.w.mdev = mdev; + drbd_queue_work_front(&mdev->tconn->sender_work, &al_work.w); + wait_for_completion(&al_work.event); + return al_work.err; + } else return _al_write_transaction(mdev); - - init_completion(&al_work.event); - al_work.w.cb = w_al_write_transaction; - al_work.w.mdev = mdev; - drbd_queue_work_front(&mdev->tconn->sender_work, &al_work.w); - wait_for_completion(&al_work.event); - - return al_work.err; } static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 8dc29502dc08..64fbb8385cdc 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -612,6 +612,17 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) } } +/* For the layout, see comment above drbd_md_set_sector_offsets(). */ +static u64 drbd_md_on_disk_bits(struct drbd_backing_dev *ldev) +{ + u64 bitmap_sectors; + if (ldev->md.al_offset == 8) + bitmap_sectors = ldev->md.md_size_sect - ldev->md.bm_offset; + else + bitmap_sectors = ldev->md.al_offset - ldev->md.bm_offset; + return bitmap_sectors << (9 + 3); +} + /* * make sure the bitmap has enough room for the attached storage, * if necessary, resize. @@ -668,7 +679,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) words = ALIGN(bits, 64) >> LN2_BPL; if (get_ldev(mdev)) { - u64 bits_on_disk = ((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12; + u64 bits_on_disk = drbd_md_on_disk_bits(mdev->ldev); put_ldev(mdev); if (bits > bits_on_disk) { dev_info(DEV, "bits = %lu\n", bits); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6b51afa1aae1..f943aacfdad8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -753,13 +753,16 @@ struct drbd_md { u32 flags; u32 md_size_sect; - s32 al_offset; /* signed relative sector offset to al area */ + s32 al_offset; /* signed relative sector offset to activity log */ s32 bm_offset; /* signed relative sector offset to bitmap */ - /* u32 al_nr_extents; important for restoring the AL - * is stored into ldev->dc.al_extents, which in turn - * gets applied to act_log->nr_elements - */ + /* cached value of bdev->disk_conf->meta_dev_idx (see below) */ + s32 meta_dev_idx; + + /* see al_tr_number_to_on_disk_sector() */ + u32 al_stripes; + u32 al_stripe_size_4k; + u32 al_size_4k; /* cached product of the above */ }; struct drbd_backing_dev { @@ -891,6 +894,14 @@ struct drbd_tconn { /* is a resource from the config file */ } send; }; +struct submit_worker { + struct workqueue_struct *wq; + struct work_struct worker; + + spinlock_t lock; + struct list_head writes; +}; + struct drbd_conf { struct drbd_tconn *tconn; int vnr; /* volume number within the connection */ @@ -1009,7 +1020,6 @@ struct drbd_conf { struct lru_cache *act_log; /* activity log */ unsigned int al_tr_number; int al_tr_cycle; - int al_tr_pos; /* position of the next transaction in the journal */ wait_queue_head_t seq_wait; atomic_t packet_seq; unsigned int peer_seq; @@ -1032,6 +1042,10 @@ struct drbd_conf { atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */ unsigned int peer_max_bio_size; unsigned int local_max_bio_size; + + /* any requests that would block in drbd_make_request() + * are deferred to this single-threaded work queue */ + struct submit_worker submit; }; static inline struct drbd_conf *minor_to_mdev(unsigned int minor) @@ -1148,25 +1162,44 @@ extern int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, char *why, enum bm_flag flags); extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); -extern void drbd_go_diskless(struct drbd_conf *mdev); extern void drbd_ldev_destroy(struct drbd_conf *mdev); /* Meta data layout - We reserve a 128MB Block (4k aligned) - * either at the end of the backing device - * or on a separate meta data device. */ + * + * We currently have two possible layouts. + * Offsets in (512 byte) sectors. + * external: + * |----------- md_size_sect ------------------| + * [ 4k superblock ][ activity log ][ Bitmap ] + * | al_offset == 8 | + * | bm_offset = al_offset + X | + * ==> bitmap sectors = md_size_sect - bm_offset + * + * Variants: + * old, indexed fixed size meta data: + * + * internal: + * |----------- md_size_sect ------------------| + * [data.....][ Bitmap ][ activity log ][ 4k superblock ][padding*] + * | al_offset < 0 | + * | bm_offset = al_offset - Y | + * ==> bitmap sectors = Y = al_offset - bm_offset + * + * [padding*] are zero or up to 7 unused 512 Byte sectors to the + * end of the device, so that the [4k superblock] will be 4k aligned. + * + * The activity log consists of 4k transaction blocks, + * which are written in a ring-buffer, or striped ring-buffer like fashion, + * which are writtensize used to be fixed 32kB, + * but is about to become configurable. + */ -/* The following numbers are sectors */ -/* Allows up to about 3.8TB, so if you want more, +/* Our old fixed size meta data layout + * allows up to about 3.8TB, so if you want more, * you need to use the "flexible" meta data format. */ -#define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */ -#define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */ -#define MD_AL_SECTORS 64 /* = 32 kB on disk activity log ring buffer */ -#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_SECTORS) - -/* we do all meta data IO in 4k blocks */ -#define MD_BLOCK_SHIFT 12 -#define MD_BLOCK_SIZE (1<<MD_BLOCK_SHIFT) +#define MD_128MB_SECT (128LLU << 11) /* 128 MB, unit sectors */ +#define MD_4kB_SECT 8 +#define MD_32kB_SECT 64 /* One activity log extent represents 4M of storage */ #define AL_EXTENT_SHIFT 22 @@ -1256,7 +1289,6 @@ struct bm_extent { /* in one sector of the bitmap, we have this many activity_log extents. */ #define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT)) -#define BM_WORDS_PER_AL_EXT (1 << (AL_EXTENT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) #define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT) #define BM_BLOCKS_PER_BM_EXT_MASK ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1) @@ -1276,16 +1308,18 @@ struct bm_extent { */ #define DRBD_MAX_SECTORS_32 (0xffffffffLU) -#define DRBD_MAX_SECTORS_BM \ - ((MD_RESERVED_SECT - MD_BM_OFFSET) * (1LL<<(BM_EXT_SHIFT-9))) -#if DRBD_MAX_SECTORS_BM < DRBD_MAX_SECTORS_32 -#define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_BM -#define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_BM -#elif !defined(CONFIG_LBDAF) && BITS_PER_LONG == 32 +/* we have a certain meta data variant that has a fixed on-disk size of 128 + * MiB, of which 4k are our "superblock", and 32k are the fixed size activity + * log, leaving this many sectors for the bitmap. + */ + +#define DRBD_MAX_SECTORS_FIXED_BM \ + ((MD_128MB_SECT - MD_32kB_SECT - MD_4kB_SECT) * (1LL<<(BM_EXT_SHIFT-9))) +#if !defined(CONFIG_LBDAF) && BITS_PER_LONG == 32 #define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_32 #define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_32 #else -#define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_BM +#define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_FIXED_BM /* 16 TB in units of sectors */ #if BITS_PER_LONG == 32 /* adjust by one page worth of bitmap, @@ -1418,6 +1452,7 @@ extern void conn_free_crypto(struct drbd_tconn *tconn); extern int proc_details; /* drbd_req */ +extern void do_submit(struct work_struct *ws); extern void __drbd_make_request(struct drbd_conf *, struct bio *, unsigned long); extern void drbd_make_request(struct request_queue *q, struct bio *bio); extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); @@ -1576,7 +1611,10 @@ extern const char *drbd_conn_str(enum drbd_conns s); extern const char |