Commit b0af205a authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm

* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm:
  dm: detect lost queue
  dm: publish dm_vcalloc
  dm: publish dm_table_unplug_all
  dm: publish dm_get_mapinfo
  dm: export struct dm_dev
  dm crypt: avoid unnecessary wait when splitting bio
  dm crypt: tidy ctx pending
  dm crypt: fix async inc_pending
  dm crypt: move dec_pending on error into write_io_submit
  dm crypt: remove inc_pending from write_io_submit
  dm crypt: tidy write loop pending
  dm crypt: tidy crypt alloc
  dm crypt: tidy inc pending
  dm exception store: use chunk_t for_areas
  dm exception store: introduce area_location function
  dm raid1: kcopyd should stop on error if errors handled
  dm mpath: remove is_active from struct dm_path
  dm mpath: use more error codes

Fixed up trivial conflict in drivers/md/dm-mpath.c manually.
parents 73f6aa4d 0c2322e4
......@@ -333,7 +333,6 @@ static void crypt_convert_init(struct crypt_config *cc,
ctx->idx_out = bio_out ? bio_out->bi_idx : 0;
ctx->sector = sector + cc->iv_offset;
init_completion(&ctx->restart);
atomic_set(&ctx->pending, 1);
}
static int crypt_convert_block(struct crypt_config *cc,
......@@ -408,6 +407,8 @@ static int crypt_convert(struct crypt_config *cc,
{
int r;
atomic_set(&ctx->pending, 1);
while(ctx->idx_in < ctx->bio_in->bi_vcnt &&
ctx->idx_out < ctx->bio_out->bi_vcnt) {
......@@ -456,9 +457,11 @@ static void dm_crypt_bio_destructor(struct bio *bio)
/*
* Generate a new unfragmented bio with the given size
* This should never violate the device limitations
* May return a smaller bio when running out of pages
* May return a smaller bio when running out of pages, indicated by
* *out_of_pages set to 1.
*/
static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size,
unsigned *out_of_pages)
{
struct crypt_config *cc = io->target->private;
struct bio *clone;
......@@ -472,11 +475,14 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
return NULL;
clone_init(io, clone);
*out_of_pages = 0;
for (i = 0; i < nr_iovecs; i++) {
page = mempool_alloc(cc->page_pool, gfp_mask);
if (!page)
if (!page) {
*out_of_pages = 1;
break;
}
/*
* if additional pages cannot be allocated without waiting,
......@@ -517,6 +523,27 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
}
}
static struct dm_crypt_io *crypt_io_alloc(struct dm_target *ti,
struct bio *bio, sector_t sector)
{
struct crypt_config *cc = ti->private;
struct dm_crypt_io *io;
io = mempool_alloc(cc->io_pool, GFP_NOIO);
io->target = ti;
io->base_bio = bio;
io->sector = sector;
io->error = 0;
atomic_set(&io->pending, 0);
return io;
}
static void crypt_inc_pending(struct dm_crypt_io *io)
{
atomic_inc(&io->pending);
}
/*
* One of the bios was finished. Check for completion of
* the whole request and correctly clean up the buffer.
......@@ -591,7 +618,7 @@ static void kcryptd_io_read(struct dm_crypt_io *io)
struct bio *base_bio = io->base_bio;
struct bio *clone;
atomic_inc(&io->pending);
crypt_inc_pending(io);
/*
* The block layer might modify the bvec array, so always
......@@ -653,6 +680,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io,
crypt_free_buffer_pages(cc, clone);
bio_put(clone);
io->error = -EIO;
crypt_dec_pending(io);
return;
}
......@@ -664,28 +692,34 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io,
if (async)
kcryptd_queue_io(io);
else {
atomic_inc(&io->pending);
else
generic_make_request(clone);
}
}
static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io)
static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->target->private;
struct bio *clone;
int crypt_finished;
unsigned out_of_pages = 0;
unsigned remaining = io->base_bio->bi_size;
int r;
/*
* Prevent io from disappearing until this function completes.
*/
crypt_inc_pending(io);
crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, io->sector);
/*
* The allocated buffers can be smaller than the whole bio,
* so repeat the whole process until all the data can be handled.
*/
while (remaining) {
clone = crypt_alloc_buffer(io, remaining);
clone = crypt_alloc_buffer(io, remaining, &out_of_pages);
if (unlikely(!clone)) {
io->error = -ENOMEM;
return;
break;
}
io->ctx.bio_out = clone;
......@@ -693,37 +727,32 @@ static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io)
remaining -= clone->bi_size;
crypt_inc_pending(io);
r = crypt_convert(cc, &io->ctx);
crypt_finished = atomic_dec_and_test(&io->ctx.pending);
if (atomic_dec_and_test(&io->ctx.pending)) {
/* processed, no running async crypto */
/* Encryption was already finished, submit io now */
if (crypt_finished) {
kcryptd_crypt_write_io_submit(io, r, 0);
if (unlikely(r < 0))
return;
} else
atomic_inc(&io->pending);
/* out of memory -> run queues */
if (unlikely(remaining)) {
/* wait for async crypto then reinitialize pending */
wait_event(cc->writeq, !atomic_read(&io->ctx.pending));
atomic_set(&io->ctx.pending, 1);
congestion_wait(WRITE, HZ/100);
/*
* If there was an error, do not try next fragments.
* For async, error is processed in async handler.
*/
if (unlikely(r < 0))
break;
}
}
}
static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->target->private;
/*
* Prevent io from disappearing until this function completes.
*/
atomic_inc(&io->pending);
/*
* Out of memory -> run queues
* But don't wait if split was due to the io size restriction
*/
if (unlikely(out_of_pages))
congestion_wait(WRITE, HZ/100);
crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, io->sector);
kcryptd_crypt_write_convert_loop(io);
if (unlikely(remaining))
wait_event(cc->writeq, !atomic_read(&io->ctx.pending));
}
crypt_dec_pending(io);
}
......@@ -741,7 +770,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
struct crypt_config *cc = io->target->private;
int r = 0;
atomic_inc(&io->pending);
crypt_inc_pending(io);
crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio,
io->sector);
......@@ -1108,15 +1137,9 @@ static void crypt_dtr(struct dm_target *ti)
static int crypt_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
{
struct crypt_config *cc = ti->private;
struct dm_crypt_io *io;
io = mempool_alloc(cc->io_pool, GFP_NOIO);
io->target = ti;
io->base_bio = bio;
io->sector = bio->bi_sector - ti->begin;
io->error = 0;
atomic_set(&io->pending, 0);
io = crypt_io_alloc(ti, bio, bio->bi_sector - ti->begin);
if (bio_data_dir(io->base_bio) == READ)
kcryptd_queue_io(io);
......
......@@ -108,12 +108,12 @@ struct pstore {
* Used to keep track of which metadata area the data in
* 'chunk' refers to.
*/
uint32_t current_area;
chunk_t current_area;
/*
* The next free chunk for an exception.
*/
uint32_t next_free;
chunk_t next_free;
/*
* The index of next free exception in the current
......@@ -175,7 +175,7 @@ static void do_metadata(struct work_struct *work)
/*
* Read or write a chunk aligned and sized block of data from a device.
*/
static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata)
static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
{
struct dm_io_region where = {
.bdev = ps->snap->cow->bdev,
......@@ -208,17 +208,24 @@ static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata)
return req.result;
}
/*
* Convert a metadata area index to a chunk index.
*/
static chunk_t area_location(struct pstore *ps, chunk_t area)
{
return 1 + ((ps->exceptions_per_area + 1) * area);
}
/*
* Read or write a metadata area. Remembering to skip the first
* chunk which holds the header.
*/
static int area_io(struct pstore *ps, uint32_t area, int rw)
static int area_io(struct pstore *ps, chunk_t area, int rw)
{
int r;
uint32_t chunk;
chunk_t chunk;
/* convert a metadata area index to a chunk index */
chunk = 1 + ((ps->exceptions_per_area + 1) * area);
chunk = area_location(ps, area);
r = chunk_io(ps, chunk, rw, 0);
if (r)
......@@ -228,7 +235,7 @@ static int area_io(struct pstore *ps, uint32_t area, int rw)
return 0;
}
static int zero_area(struct pstore *ps, uint32_t area)
static int zero_area(struct pstore *ps, chunk_t area)
{
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
return area_io(ps, area, WRITE);
......@@ -404,7 +411,7 @@ static int insert_exceptions(struct pstore *ps, int *full)
static int read_exceptions(struct pstore *ps)
{
uint32_t area;
chunk_t area;
int r, full = 1;
/*
......@@ -517,6 +524,7 @@ static int persistent_prepare(struct exception_store *store,
{
struct pstore *ps = get_info(store);
uint32_t stride;
chunk_t next_free;
sector_t size = get_dev_size(store->snap->cow->bdev);
/* Is there enough room ? */
......@@ -530,7 +538,8 @@ static int persistent_prepare(struct exception_store *store,
* into account the location of the metadata chunks.
*/
stride = (ps->exceptions_per_area + 1);
if ((++ps->next_free % stride) == 1)
next_free = ++ps->next_free;
if (sector_div(next_free, stride) == 1)
ps->next_free++;
atomic_inc(&ps->pending_count);
......
......@@ -1131,7 +1131,7 @@ static void retrieve_deps(struct dm_table *table,
unsigned int count = 0;
struct list_head *tmp;
size_t len, needed;
struct dm_dev *dd;
struct dm_dev_internal *dd;
struct dm_target_deps *deps;
deps = get_result_buffer(param, param_size, &len);
......@@ -1157,7 +1157,7 @@ static void retrieve_deps(struct dm_table *table,
deps->count = count;
count = 0;
list_for_each_entry (dd, dm_table_get_devices(table), list)
deps->dev[count++] = huge_encode_dev(dd->bdev->bd_dev);
deps->dev[count++] = huge_encode_dev(dd->dm_dev.bdev->bd_dev);
param->data_size = param->data_start + needed;
}
......
......@@ -30,6 +30,7 @@ struct pgpath {
struct list_head list;
struct priority_group *pg; /* Owning PG */
unsigned is_active; /* Path status */
unsigned fail_count; /* Cumulative failure count */
struct dm_path path;
......@@ -125,7 +126,7 @@ static struct pgpath *alloc_pgpath(void)
struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
if (pgpath) {
pgpath->path.is_active = 1;
pgpath->is_active = 1;
INIT_WORK(&pgpath->deactivate_path, deactivate_path);
}
......@@ -575,12 +576,12 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
/* we need at least a path arg */
if (as->argc < 1) {
ti->error = "no device given";
return NULL;
return ERR_PTR(-EINVAL);
}
p = alloc_pgpath();
if (!p)
return NULL;
return ERR_PTR(-ENOMEM);
r = dm_get_device(ti, shift(as), ti->begin, ti->len,
dm_table_get_mode(ti->table), &p->path.dev);
......@@ -608,7 +609,7 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
bad:
free_pgpath(p);
return NULL;
return ERR_PTR(r);
}
static struct priority_group *parse_priority_group(struct arg_set *as,
......@@ -626,14 +627,14 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
if (as->argc < 2) {
as->argc = 0;
ti->error = "not enough priority group aruments";
return NULL;
ti->error = "not enough priority group arguments";
return ERR_PTR(-EINVAL);
}
pg = alloc_priority_group();
if (!pg) {
ti->error = "couldn't allocate priority group";
return NULL;
return ERR_PTR(-ENOMEM);
}
pg->m = m;
......@@ -666,8 +667,10 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
path_args.argv = as->argv;
pgpath = parse_path(&path_args, &pg->ps, ti);
if (!pgpath)
if (IS_ERR(pgpath)) {
r = PTR_ERR(pgpath);
goto bad;
}
pgpath->pg = pg;
list_add_tail(&pgpath->list, &pg->pgpaths);
......@@ -678,7 +681,7 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
bad:
free_priority_group(pg, ti);
return NULL;
return ERR_PTR(r);
}
static int parse_hw_handler(struct arg_set *as, struct multipath *m)
......@@ -797,8 +800,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
struct priority_group *pg;
pg = parse_priority_group(&as, m);
if (!pg) {
r = -EINVAL;
if (IS_ERR(pg)) {
r = PTR_ERR(pg);
goto bad;
}
......@@ -864,13 +867,13 @@ static int fail_path(struct pgpath *pgpath)
spin_lock_irqsave(&m->lock, flags);
if (!pgpath->path.is_active)
if (!pgpath->is_active)
goto out;
DMWARN("Failing path %s.", pgpath->path.dev->name);
pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
pgpath->path.is_active = 0;
pgpath->is_active = 0;
pgpath->fail_count++;
m->nr_valid_paths--;
......@@ -901,7 +904,7 @@ static int reinstate_path(struct pgpath *pgpath)
spin_lock_irqsave(&m->lock, flags);
if (pgpath->path.is_active)
if (pgpath->is_active)
goto out;
if (!pgpath->pg->ps.type->reinstate_path) {
......@@ -915,7 +918,7 @@ static int reinstate_path(struct pgpath *pgpath)
if (r)
goto out;
pgpath->path.is_active = 1;
pgpath->is_active = 1;
m->current_pgpath = NULL;
if (!m->nr_valid_paths++ && m->queue_size)
......@@ -1303,7 +1306,7 @@ static int multipath_status(struct dm_target *ti, status_type_t type,
list_for_each_entry(p, &pg->pgpaths, list) {
DMEMIT("%s %s %u ", p->path.dev->name,
p->path.is_active ? "A" : "F",
p->is_active ? "A" : "F",
p->fail_count);
if (pg->ps.type->status)
sz += pg->ps.type->status(&pg->ps,
......
......@@ -13,8 +13,6 @@ struct dm_dev;
struct dm_path {
struct dm_dev *dev; /* Read-only */
unsigned is_active; /* Read-only */
void *pscontext; /* For path-selector use */
};
......
......@@ -842,7 +842,9 @@ static int recover(struct mirror_set *ms, struct region *reg)
}
/* hand to kcopyd */
set_bit(DM_KCOPYD_IGNORE_ERROR, &flags);
if (!errors_handled(ms))
set_bit(DM_KCOPYD_IGNORE_ERROR, &flags);
r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to,
flags, recovery_complete, reg);
......
......@@ -250,7 +250,8 @@ static void free_devices(struct list_head *devices)
struct list_head *tmp, *next;
list_for_each_safe(tmp, next, devices) {
struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
struct dm_dev_internal *dd =
list_entry(tmp, struct dm_dev_internal, list);
kfree(dd);
}
}
......@@ -327,12 +328,12 @@ static int lookup_device(const char *path, dev_t *dev)
/*
* See if we've already got a device in the list.
*/
static struct dm_dev *find_device(struct list_head *l, dev_t dev)
static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev)
{
struct dm_dev *dd;
struct dm_dev_internal *dd;
list_for_each_entry (dd, l, list)
if (dd->bdev->bd_dev == dev)
if (dd->dm_dev.bdev->bd_dev == dev)
return dd;
return NULL;
......@@ -341,45 +342,47 @@ static struct dm_dev *find_device(struct list_head *l, dev_t dev)
/*
* Open a device so we can use it as a map destination.
*/
static int open_dev(struct dm_dev *d, dev_t dev, struct mapped_device *md)
static int open_dev(struct dm_dev_internal *d, dev_t dev,
struct mapped_device *md)
{
static char *_claim_ptr = "I belong to device-mapper";
struct block_device *bdev;
int r;
BUG_ON(d->bdev);
BUG_ON(d->dm_dev.bdev);
bdev = open_by_devnum(dev, d->mode);
bdev = open_by_devnum(dev, d->dm_dev.mode);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md));
if (r)
blkdev_put(bdev);
else
d->bdev = bdev;
d->dm_dev.bdev = bdev;
return r;
}
/*
* Close a device that we've been using.
*/
static void close_dev(struct dm_dev *d, struct mapped_device *md)
static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
{
if (!d->bdev)
if (!d->dm_dev.bdev)
return;
bd_release_from_disk(d->bdev, dm_disk(md));
blkdev_put(d->bdev);
d->bdev = NULL;
bd_release_from_disk(d->dm_dev.bdev, dm_disk(md));
blkdev_put(d->dm_dev.bdev);
d->dm_dev.bdev = NULL;
}
/*
* If possible, this checks an area of a destination device is valid.
*/
static int check_device_area(struct dm_dev *dd, sector_t start, sector_t len)
static int check_device_area(struct dm_dev_internal *dd, sector_t start,
sector_t len)
{
sector_t dev_size = dd->bdev->bd_inode->i_size >> SECTOR_SHIFT;
sector_t dev_size = dd->dm_dev.bdev->bd_inode->i_size >> SECTOR_SHIFT;
if (!dev_size)
return 1;
......@@ -392,16 +395,17 @@ static int check_device_area(struct dm_dev *dd, sector_t start, sector_t len)
* careful to leave things as they were if we fail to reopen the
* device.
*/
static int upgrade_mode(struct dm_dev *dd, int new_mode, struct mapped_device *md)
static int upgrade_mode(struct dm_dev_internal *dd, int new_mode,
struct mapped_device *md)
{
int r;
struct dm_dev dd_copy;
dev_t dev = dd->bdev->bd_dev;
struct dm_dev_internal dd_copy;
dev_t dev = dd->dm_dev.bdev->bd_dev;
dd_copy = *dd;
dd->mode |= new_mode;
dd->bdev = NULL;
dd->dm_dev.mode |= new_mode;
dd->dm_dev.bdev = NULL;
r = open_dev(dd, dev, md);
if (!r)
close_dev(&dd_copy, md);
......@@ -421,7 +425,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
{
int r;
dev_t uninitialized_var(dev);
struct dm_dev *dd;
struct dm_dev_internal *dd;
unsigned int major, minor;
BUG_ON(!t);
......@@ -443,20 +447,20 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
if (!dd)
return -ENOMEM;
dd->mode = mode;
dd->bdev = NULL;
dd->dm_dev.mode = mode;
dd->dm_dev.bdev = NULL;
if ((r = open_dev(dd, dev, t->md))) {
kfree(dd);
return r;
}
format_dev_t(dd->name, dev);
format_dev_t(dd->dm_dev.name, dev);
atomic_set(&dd->count, 0);
list_add(&dd->list, &t->devices);
} else if (dd->mode != (mode | dd->mode)) {
} else if (dd->dm_dev.mode != (mode | dd->dm_dev.mode)) {
r = upgrade_mode(dd, mode, t->md);
if (r)
return r;
......@@ -465,11 +469,11 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
if (!check_device_area(dd, start, len)) {
DMWARN("device %s too small for target", path);
dm_put_device(ti, dd);
dm_put_device(ti, &dd->dm_dev);
return -EINVAL;
}
*result = dd;
*result = &dd->dm_dev;
return 0;