/*
* Copyright (C) 2015, SUSE
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
*/
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/dlm.h>
#include <linux/sched.h>
#include <linux/raid/md_p.h>
#include "md.h"
#include "md-bitmap.h"
#include "md-cluster.h"
#define LVB_SIZE 64
#define NEW_DEV_TIMEOUT 5000
struct dlm_lock_resource {
dlm_lockspace_t *ls;
struct dlm_lksb lksb;
char *name; /* lock name. */
uint32_t flags; /* flags to pass to dlm_lock() */
wait_queue_head_t sync_locking; /* wait queue for synchronized locking */
bool sync_locking_done;
void (*bast)(void *arg, int mode); /* blocking AST function pointer*/
struct mddev *mddev; /* pointing back to mddev. */
int mode;
};
struct suspend_info {
int slot;
sector_t lo;
sector_t hi;
struct list_head list;
};
struct resync_info {
__le64 lo;
__le64 hi;
};
/* md_cluster_info flags */
#define MD_CLUSTER_WAITING_FOR_NEWDISK 1
#define MD_CLUSTER_SUSPEND_READ_BALANCING 2
#define MD_CLUSTER_BEGIN_JOIN_CLUSTER 3
/* Lock the send communication. This is done through
* bit manipulation as opposed to a mutex in order to
* accomodate lock and hold. See next comment.
*/
#define MD_CLUSTER_SEND_LOCK 4
/* If cluster operations (such as adding a disk) must lock the
* communication channel, so as to perform extra operations
* (update metadata) and no other operation is allowed on the
* MD. Token needs to be locked and held until the operation
* completes witha md_update_sb(), which would eventually release
* the lock.
*/
#define MD_CLUSTER_SEND_LOCKED_ALREADY 5
/* We should receive message after node joined cluster and
* set up all the related infos such as bitmap and personality */
#define MD_CLUSTER_ALREADY_IN_CLUSTER 6
#define MD_CLUSTER_PENDING_RECV_EVENT 7
#define MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD 8
struct md_cluster_info {
struct mddev *mddev; /* the md device which md_cluster_info belongs to */
/* dlm lock space and resources for clustered raid. */
dlm_lockspace_t *lockspace;
int slot_number;
struct completion completion;
struct mutex recv_mutex;
struct dlm_lock_resource *bitmap_lockres;
struct dlm_lock_resource **other_bitmap_lockres;
struct dlm_lock_resource *resync_lockres;
struct list_head suspend_list;
spinlock_t suspend_lock;
struct md_thread *recovery_thread;
unsigned long recovery_map;
/* communication loc resources */
struct dlm_lock_resource *ack_lockres;
struct dlm_lock_resource *message_lockres;
struct dlm_lock_resource *token_lockres;
struct dlm_lock_resource *no_new_dev_lockres;
struct md_thread *recv_thread;
struct completion newdisk_completion;
wait_queue_head_t wait;
unsigned long state;
/* record the region in RESYNCING message */
sector_t sync_low;
sector_t sync_hi;
};
enum msg_type {
METADATA_UPDATED = 0,
RESYNCING,
NEWDISK,
REMOVE,
RE_ADD,
BITMAP_NEEDS_SYNC,
CHANGE_CAPACITY,
};
struct cluster_msg {
__le32 type;
__le32 slot;
/* TODO: Unionize this for smaller footprint */
__le64 low;
__le64 high;
char uuid[16];
__le32 raid_slot;
};
static void sync_ast(void *arg)
{
struct dlm_lock_resource *res;
res = arg;
res->sync_locking_done = true;
wake_up(&res->sync_locking);
}
static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
{
int ret = 0;
ret = dlm_lock(res->ls, mode, &res->lksb,
res->flags, res->name, strlen(res->name),
0, sync_ast, res, res->bast);
if (ret)
return ret;
wait_event(res->sync_locking, res->sync_locking_done);
res->sync_locking_done = false;
if (res->lksb.sb_status == 0)
res->mode = mode;
return res->lksb.sb_status;
}
static int dlm_unlock_sync(struct dlm_lock_resource *res)
{
return dlm_lock_sync(res, DLM_LOCK_NL);
}
/*
* An variation of dlm_lock_sync, which make lock request could
* be interrupted
*/
static int dlm_lock_sync_interruptible(struct dlm_lock_resource *res, int mode,
struct mddev *mddev)
{
int ret = 0;
ret = dlm_lock(res->ls, mode, &res->lksb,
res->flags, res->name, strlen(res->name),
0, sync_ast, res, res->bast);
if (ret)
return ret;
wait_event(res->sync_locking, res->sync_locking_done
|| kthread_should_stop()
|| test_bit(MD_CLOSING, &mddev->flags));
if (!res->sync_locking_done) {
/*
* the convert queue contains the lock request when request is
* interrupted, and sync_ast could still be run, so need to
* cancel the request and reset completion
*/
ret = dlm_unlock(res->ls, res->lksb.sb_lkid, DLM_LKF_CANCEL,
&res->lksb, res);
res->sync_locking_done = false;
if (unlikely(ret != 0))
pr_info("failed to cancel previous lock request "
"%s return %d\n", res->name, ret);
return -EPERM;
} else
res->sync_locking_done = false;
if (res->lksb.sb_status == 0)
res->mode = mode;
return res->lksb.sb_status;
}
static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
char *name, void (*bastfn)(void *arg, int mode), int with_lvb)
{
struct dlm_lock_resource *res = NULL;
int ret, namelen;
struct md_cluster_info *cinfo = mddev->cluster_info;
res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL);
if (!res)
return NULL;
init_waitqueue_head(&res->sync_locking);
res->sync_locking_done = false;
res->ls = cinfo->lockspace;
res->mddev = mddev;
res->mode = DLM_LOCK_IV;
namelen = strlen(name);
res->name = kzalloc(namelen + 1, GFP_KERNEL);
if (!res->name) {
pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
goto out_err;
}
strlcpy(res->name, name, namelen + 1);
if (with_lvb) {
res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
if (!res->lksb.sb_lvbptr) {
pr_err("md-cluster: Unable to allocate LVB for resource %s\n", name);
goto out_err;
}
res->flags = DLM_LKF_VALBLK;
}
if (bastfn)
res->bast = bastfn;
res->flags |= DLM_LKF_EXPEDITE;
ret = dlm_lock_sync(res, DLM_LOCK_NL);
if (ret) {
pr_err("md-cluster: Unable to lock NL on new lock resource %s\n", name);
goto out_err;
}
res->flags &= ~DLM_LKF_EXPEDITE;
res->flags |= DLM_LKF_CONVERT;
return res;
out_err:
kfree(res->lksb.sb_lvbptr);
kfree(res->name);
kfree(res);
return NULL;
}
static void lockres_free(struct dlm_lock_resource *res)
{
int ret = 0;
if (!res)
return;
/*
* use FORCEUNLOCK flag, so we can unlock even the lock is on the
* waiting or convert queue
*/
ret = dlm_unlock(res->ls, res->lksb.sb_lkid, DLM_LKF_FORCEUNLOCK,
&res->lksb, res);
if (unlikely(ret != 0))
pr_err("failed to unlock %s return %d\n", res->name, ret);
else
wait_event(res->sync_locking, res->sync_locking_done);
kfree(res->name);
kfree(res->lksb.sb_lvbptr);
kfree(res);
}
static void add_resync_info(struct dlm_lock_resource *lockres,
sector_t lo, sector_t hi)
{
struct resync_info *ri;
ri = (struct resync_info *)lockres->lksb.sb_lvbptr;
ri->lo = cpu_to_le64(lo);
ri->hi = cpu_to_le64(hi);
}
static struct suspend_info *read_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres)
{
struct resync_info ri;
struct suspend_info *s = NULL;
sector_t hi = 0;
dlm_lock_sync(lockres, DLM_LOCK_CR);
memcpy(&ri, lockres->lksb.sb_lvbptr, sizeof(struct resync_info));
hi = le64_to_cpu(ri.hi);
if (hi > 0) {
s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
if (!s)
goto out;
s->hi = hi;
s->lo = le64_to_cpu(ri.lo);
}
dlm_unlock_sync(lockres);
out:
return s;
}
static void recover_bitmaps(struct md_thread *thread)
{
struct mddev *mddev = thread->mddev;
struct md_cluster_info *cinfo = mddev->cluster_info;
struct dlm_lock_resource *bm_lockres;
char str[64];
int slot, ret;
struct suspend_info *s, *tmp;
sector_t lo, hi;
while (
|