Commit 86a16798 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'md/4.8-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md

Pull MD fixes from Shaohua Li:
 "This includes several bug fixes:

   - Alexey Obitotskiy fixed a hang for faulty raid5 array with external
     management

   - Song Liu fixed two raid5 journal related bugs

   - Tomasz Majchrzak fixed a bad block recording issue and an
     accounting issue for raid10

   - ZhengYuan Liu fixed an accounting issue for raid5

   - I fixed a potential race condition and memory leak with DIF/DIX
     enabled

   - other trival fixes"

* tag 'md/4.8-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  raid5: avoid unnecessary bio data set
  raid5: fix memory leak of bio integrity data
  raid10: record correct address of bad block
  md-cluster: fix error return code in join()
  r5cache: set MD_JOURNAL_CLEAN correctly
  md: don't print the same repeated messages about delayed sync operation
  md: remove obsolete ret in md_start_sync
  md: do not count journal as spare in GET_ARRAY_INFO
  md: Prevent IO hold during accessing to faulty raid5 array
  MD: hold mddev lock to change bitmap location
  raid5: fix incorrectly counter of conf->empty_inactive_list_nr
  raid10: increment write counter after bio is split
parents 0cf21c66 45c91d80
......@@ -2183,19 +2183,29 @@ location_show(struct mddev *mddev, char *page)
static ssize_t
location_store(struct mddev *mddev, const char *buf, size_t len)
{
int rv;
rv = mddev_lock(mddev);
if (rv)
return rv;
if (mddev->pers) {
if (!mddev->pers->quiesce)
return -EBUSY;
if (mddev->recovery || mddev->sync_thread)
return -EBUSY;
if (!mddev->pers->quiesce) {
rv = -EBUSY;
goto out;
}
if (mddev->recovery || mddev->sync_thread) {
rv = -EBUSY;
goto out;
}
}
if (mddev->bitmap || mddev->bitmap_info.file ||
mddev->bitmap_info.offset) {
/* bitmap already configured. Only option is to clear it */
if (strncmp(buf, "none", 4) != 0)
return -EBUSY;
if (strncmp(buf, "none", 4) != 0) {
rv = -EBUSY;
goto out;
}
if (mddev->pers) {
mddev->pers->quiesce(mddev, 1);
bitmap_destroy(mddev);
......@@ -2214,21 +2224,25 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
/* nothing to be done */;
else if (strncmp(buf, "file:", 5) == 0) {
/* Not supported yet */
return -EINVAL;
rv = -EINVAL;
goto out;
} else {
int rv;
if (buf[0] == '+')
rv = kstrtoll(buf+1, 10, &offset);
else
rv = kstrtoll(buf, 10, &offset);
if (rv)
return rv;
if (offset == 0)
return -EINVAL;
goto out;
if (offset == 0) {
rv = -EINVAL;
goto out;
}
if (mddev->bitmap_info.external == 0 &&
mddev->major_version == 0 &&
offset != mddev->bitmap_info.default_offset)
return -EINVAL;
offset != mddev->bitmap_info.default_offset) {
rv = -EINVAL;
goto out;
}
mddev->bitmap_info.offset = offset;
if (mddev->pers) {
struct bitmap *bitmap;
......@@ -2245,7 +2259,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
mddev->pers->quiesce(mddev, 0);
if (rv) {
bitmap_destroy(mddev);
return rv;
goto out;
}
}
}
......@@ -2257,6 +2271,11 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread);
}
rv = 0;
out:
mddev_unlock(mddev);
if (rv)
return rv;
return len;
}
......
......@@ -834,8 +834,10 @@ static int join(struct mddev *mddev, int nodes)
goto err;
}
cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
if (!cinfo->ack_lockres)
if (!cinfo->ack_lockres) {
ret = -ENOMEM;
goto err;
}
/* get sync CR lock on ACK. */
if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
......@@ -849,8 +851,10 @@ static int join(struct mddev *mddev, int nodes)
pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number);
snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1);
cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1);
if (!cinfo->bitmap_lockres)
if (!cinfo->bitmap_lockres) {
ret = -ENOMEM;
goto err;
}
if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) {
pr_err("Failed to get bitmap lock\n");
ret = -EINVAL;
......@@ -858,8 +862,10 @@ static int join(struct mddev *mddev, int nodes)
}
cinfo->resync_lockres = lockres_init(mddev, "resync", NULL, 0);
if (!cinfo->resync_lockres)
if (!cinfo->resync_lockres) {
ret = -ENOMEM;
goto err;
}
return 0;
err:
......
......@@ -1604,11 +1604,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
mddev->new_chunk_sectors = mddev->chunk_sectors;
}
if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) {
if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
set_bit(MD_HAS_JOURNAL, &mddev->flags);
if (mddev->recovery_cp == MaxSector)
set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
}
} else if (mddev->pers == NULL) {
/* Insist of good event counter while assembling, except for
* spares (which don't need an event count) */
......@@ -5851,6 +5848,9 @@ static int get_array_info(struct mddev *mddev, void __user *arg)
working++;
if (test_bit(In_sync, &rdev->flags))
insync++;
else if (test_bit(Journal, &rdev->flags))
/* TODO: add journal count to md_u.h */
;
else
spare++;
}
......@@ -7862,6 +7862,7 @@ void md_do_sync(struct md_thread *thread)
*/
do {
int mddev2_minor = -1;
mddev->curr_resync = 2;
try_again:
......@@ -7891,10 +7892,14 @@ void md_do_sync(struct md_thread *thread)
prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
mddev2->curr_resync >= mddev->curr_resync) {
printk(KERN_INFO "md: delaying %s of %s"
" until %s has finished (they"
" share one or more physical units)\n",
desc, mdname(mddev), mdname(mddev2));
if (mddev2_minor != mddev2->md_minor) {
mddev2_minor = mddev2->md_minor;
printk(KERN_INFO "md: delaying %s of %s"
" until %s has finished (they"
" share one or more physical units)\n",
desc, mdname(mddev),
mdname(mddev2));
}
mddev_put(mddev2);
if (signal_pending(current))
flush_signals(current);
......@@ -8275,16 +8280,13 @@ no_add:
static void md_start_sync(struct work_struct *ws)
{
struct mddev *mddev = container_of(ws, struct mddev, del_work);
int ret = 0;
mddev->sync_thread = md_register_thread(md_do_sync,
mddev,
"resync");
if (!mddev->sync_thread) {
if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
printk(KERN_ERR "%s: could not start resync"
" thread...\n",
mdname(mddev));
printk(KERN_ERR "%s: could not start resync thread...\n",
mdname(mddev));
/* leave the spares where they are, it shouldn't hurt */
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
......
......@@ -1064,6 +1064,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
int max_sectors;
int sectors;
md_write_start(mddev, bio);
/*
* Register the new request and wait if the reconstruction
* thread has put up a bar for new requests.
......@@ -1445,8 +1447,6 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
return;
}
md_write_start(mddev, bio);
do {
/*
......@@ -2465,20 +2465,21 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
while (sect_to_write) {
struct bio *wbio;
sector_t wsector;
if (sectors > sect_to_write)
sectors = sect_to_write;
/* Write at 'sector' for 'sectors' */
wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
choose_data_offset(r10_bio, rdev) +
(sector - r10_bio->sector));
wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
wbio->bi_iter.bi_sector = wsector +
choose_data_offset(r10_bio, rdev);
wbio->bi_bdev = rdev->bdev;
bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
if (submit_bio_wait(wbio) < 0)
/* Failure! */
ok = rdev_set_badblocks(rdev, sector,
ok = rdev_set_badblocks(rdev, wsector,
sectors, 0)
&& ok;
......
......@@ -659,6 +659,7 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
{
struct stripe_head *sh;
int hash = stripe_hash_locks_hash(sector);
int inc_empty_inactive_list_flag;
pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
......@@ -703,7 +704,12 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
atomic_inc(&conf->active_stripes);
BUG_ON(list_empty(&sh->lru) &&
!test_bit(STRIPE_EXPANDING, &sh->state));
inc_empty_inactive_list_flag = 0;
if (!list_empty(conf->inactive_list + hash))
inc_empty_inactive_list_flag = 1;
list_del_init(&sh->lru);
if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
atomic_inc(&conf->empty_inactive_list_nr);
if (sh->group) {
sh->group->stripes_cnt--;
sh->group = NULL;
......@@ -762,6 +768,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
sector_t head_sector, tmp_sec;
int hash;
int dd_idx;
int inc_empty_inactive_list_flag;
/* Don't cross chunks, so stripe pd_idx/qd_idx is the same */
tmp_sec = sh->sector;
......@@ -779,7 +786,12 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
atomic_inc(&conf->active_stripes);
BUG_ON(list_empty(&head->lru) &&
!test_bit(STRIPE_EXPANDING, &head->state));
inc_empty_inactive_list_flag = 0;
if (!list_empty(conf->inactive_list + hash))
inc_empty_inactive_list_flag = 1;
list_del_init(&head->lru);
if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
atomic_inc(&conf->empty_inactive_list_nr);
if (head->group) {
head->group->stripes_cnt--;
head->group = NULL;
......@@ -993,7 +1005,6 @@ again:
set_bit(STRIPE_IO_STARTED, &sh->state);
bio_reset(bi);
bi->bi_bdev = rdev->bdev;
bio_set_op_attrs(bi, op, op_flags);
bi->bi_end_io = op_is_write(op)
......@@ -1045,7 +1056,6 @@ again:
set_bit(STRIPE_IO_STARTED, &sh->state);
bio_reset(rbi);
rbi->bi_bdev = rrdev->bdev;
bio_set_op_attrs(rbi, op, op_flags);
BUG_ON(!op_is_write(op));
......@@ -1978,9 +1988,11 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
put_cpu();
}
static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
int disks)
{
struct stripe_head *sh;
int i;
sh = kmem_cache_zalloc(sc, gfp);
if (sh) {
......@@ -1989,6 +2001,17 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
INIT_LIST_HEAD(&sh->batch_list);
INIT_LIST_HEAD(&sh->lru);
atomic_set(&sh->count, 1);
for (i = 0; i < disks; i++) {
struct r5dev *dev = &sh->dev[i];
bio_init(&dev->req);
dev->req.bi_io_vec = &dev->vec;
dev->req.bi_max_vecs = 1;
bio_init(&dev->rreq);
dev->rreq.bi_io_vec = &dev->rvec;
dev->rreq.bi_max_vecs = 1;
}
}
return sh;
}
......@@ -1996,7 +2019,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
{
struct stripe_head *sh;
sh = alloc_stripe(conf->slab_cache, gfp);
sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size);
if (!sh)
return 0;
......@@ -2167,7 +2190,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
mutex_lock(&conf->cache_size_mutex);
for (i = conf->max_nr_stripes; i; i--) {
nsh = alloc_stripe(sc, GFP_KERNEL);
nsh = alloc_stripe(sc, GFP_KERNEL, newsize);
if (!nsh)
break;
......@@ -2299,6 +2322,7 @@ static void raid5_end_read_request(struct bio * bi)
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
bi->bi_error);
if (i == disks) {
bio_reset(bi);
BUG();
return;
}
......@@ -2402,6 +2426,7 @@ static void raid5_end_read_request(struct bio * bi)
clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state);
raid5_release_stripe(sh);
bio_reset(bi);
}
static void raid5_end_write_request(struct bio *bi)
......@@ -2436,6 +2461,7 @@ static void raid5_end_write_request(struct bio *bi)
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
bi->bi_error);
if (i == disks) {
bio_reset(bi);
BUG();
return;
}
......@@ -2479,22 +2505,13 @@ static void raid5_end_write_request(struct bio *bi)
if (sh->batch_head && sh != sh->batch_head)
raid5_release_stripe(sh->batch_head);
bio_reset(bi);
}
static void raid5_build_block(struct stripe_head *sh, int i, int previous)
{
struct r5dev *dev = &sh->dev[i];
bio_init(&dev->req);
dev->req.bi_io_vec = &dev->vec;
dev->req.bi_max_vecs = 1;
dev->req.bi_private = sh;
bio_init(&dev->rreq);
dev->rreq.bi_io_vec = &dev->rvec;
dev->rreq.bi_max_vecs = 1;
dev->rreq.bi_private = sh;
dev->flags = 0;
dev->sector = raid5_compute_blocknr(sh, i, previous);
}
......@@ -4628,7 +4645,9 @@ finish:
}
if (!bio_list_empty(&s.return_bi)) {
if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) {
if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags) &&
(s.failed <= conf->max_degraded ||
conf->mddev->external == 0)) {
spin_lock_irq(&conf->device_lock);
bio_list_merge(&conf->return_bi, &s.return_bi);
spin_unlock_irq(&conf->device_lock);
......@@ -6826,11 +6845,14 @@ static int raid5_run(struct mddev *mddev)
if (IS_ERR(conf))
return PTR_ERR(conf);
if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !journal_dev) {
printk(KERN_ERR "md/raid:%s: journal disk is missing, force array readonly\n",
mdname(mddev));
mddev->ro = 1;
set_disk_ro(mddev->gendisk, 1);
if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
if (!journal_dev) {
pr_err("md/raid:%s: journal disk is missing, force array readonly\n",
mdname(mddev));
mddev->ro = 1;
set_disk_ro(mddev->gendisk, 1);
} else if (mddev->recovery_cp == MaxSector)
set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
}
conf->min_offset_diff = min_offset_diff;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment