Commit b49249d1 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'dm-3.8-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm

Pull dm update from Alasdair G Kergon:
 "Miscellaneous device-mapper fixes, cleanups and performance
  improvements.

  Of particular note:
   - Disable broken WRITE SAME support in all targets except linear and
     striped.  Use it when kcopyd is zeroing blocks.
   - Remove several mempools from targets by moving the data into the
     bio's new front_pad area(which dm calls 'per_bio_data').
   - Fix a race in thin provisioning if discards are misused.
   - Prevent userspace from interfering with the ioctl parameters and
     use kmalloc for the data buffer if it's small instead of vmalloc.
   - Throttle some annoying error messages when I/O fails."

* tag 'dm-3.8-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm: (36 commits)
  dm stripe: add WRITE SAME support
  dm: remove map_info
  dm snapshot: do not use map_context
  dm thin: dont use map_context
  dm raid1: dont use map_context
  dm flakey: dont use map_context
  dm raid1: rename read_record to bio_record
  dm: move target request nr to dm_target_io
  dm snapshot: use per_bio_data
  dm verity: use per_bio_data
  dm raid1: use per_bio_data
  dm: introduce per_bio_data
  dm kcopyd: add WRITE SAME support to dm_kcopyd_zero
  dm linear: add WRITE SAME support
  dm: add WRITE SAME support
  dm: prepare to support WRITE SAME
  dm ioctl: use kmalloc if possible
  dm ioctl: remove PF_MEMALLOC
  dm persistent data: improve improve space map block alloc failure message
  dm thin: use DMERR_LIMIT for errors
  ...
parents 10532b56 45e621d4
......@@ -207,31 +207,6 @@ void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios)
}
EXPORT_SYMBOL_GPL(dm_cell_release);
/*
* There are a couple of places where we put a bio into a cell briefly
* before taking it out again. In these situations we know that no other
* bio may be in the cell. This function releases the cell, and also does
* a sanity check.
*/
static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio)
{
BUG_ON(cell->holder != bio);
BUG_ON(!bio_list_empty(&cell->bios));
__cell_release(cell, NULL);
}
void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio)
{
unsigned long flags;
struct dm_bio_prison *prison = cell->prison;
spin_lock_irqsave(&prison->lock, flags);
__cell_release_singleton(cell, bio);
spin_unlock_irqrestore(&prison->lock, flags);
}
EXPORT_SYMBOL_GPL(dm_cell_release_singleton);
/*
* Sometimes we don't want the holder, just the additional bios.
*/
......
......@@ -44,7 +44,6 @@ int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key,
struct bio *inmate, struct dm_bio_prison_cell **ref);
void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios);
void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio); // FIXME: bio arg not needed
void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates);
void dm_cell_error(struct dm_bio_prison_cell *cell);
......
......@@ -1689,8 +1689,7 @@ bad:
return ret;
}
static int crypt_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int crypt_map(struct dm_target *ti, struct bio *bio)
{
struct dm_crypt_io *io;
struct crypt_config *cc = ti->private;
......@@ -1846,7 +1845,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
static struct target_type crypt_target = {
.name = "crypt",
.version = {1, 11, 0},
.version = {1, 12, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
......
......@@ -274,8 +274,7 @@ static void delay_resume(struct dm_target *ti)
atomic_set(&dc->may_delay, 1);
}
static int delay_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int delay_map(struct dm_target *ti, struct bio *bio)
{
struct delay_c *dc = ti->private;
......@@ -338,7 +337,7 @@ out:
static struct target_type delay_target = {
.name = "delay",
.version = {1, 1, 0},
.version = {1, 2, 0},
.module = THIS_MODULE,
.ctr = delay_ctr,
.dtr = delay_dtr,
......
......@@ -39,6 +39,10 @@ enum feature_flag_bits {
DROP_WRITES
};
struct per_bio_data {
bool bio_submitted;
};
static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
struct dm_target *ti)
{
......@@ -214,6 +218,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_flush_requests = 1;
ti->num_discard_requests = 1;
ti->per_bio_data_size = sizeof(struct per_bio_data);
ti->private = fc;
return 0;
......@@ -265,11 +270,12 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
}
}
static int flakey_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int flakey_map(struct dm_target *ti, struct bio *bio)
{
struct flakey_c *fc = ti->private;
unsigned elapsed;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
pb->bio_submitted = false;
/* Are we alive ? */
elapsed = (jiffies - fc->start_time) / HZ;
......@@ -277,7 +283,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio,
/*
* Flag this bio as submitted while down.
*/
map_context->ll = 1;
pb->bio_submitted = true;
/*
* Map reads as normal.
......@@ -314,17 +320,16 @@ map_bio:
return DM_MAPIO_REMAPPED;
}
static int flakey_end_io(struct dm_target *ti, struct bio *bio,
int error, union map_info *map_context)
static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
{
struct flakey_c *fc = ti->private;
unsigned bio_submitted_while_down = map_context->ll;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
/*
* Corrupt successful READs while in down state.
* If flags were specified, only corrupt those that match.
*/
if (fc->corrupt_bio_byte && !error && bio_submitted_while_down &&
if (fc->corrupt_bio_byte && !error && pb->bio_submitted &&
(bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) &&
all_corrupt_bio_flags_match(bio, fc))
corrupt_bio_data(bio, fc);
......@@ -406,7 +411,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_
static struct target_type flakey_target = {
.name = "flakey",
.version = {1, 2, 0},
.version = {1, 3, 0},
.module = THIS_MODULE,
.ctr = flakey_ctr,
.dtr = flakey_dtr,
......
......@@ -287,7 +287,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
unsigned num_bvecs;
sector_t remaining = where->count;
struct request_queue *q = bdev_get_queue(where->bdev);
sector_t discard_sectors;
unsigned short logical_block_size = queue_logical_block_size(q);
sector_t num_sectors;
/*
* where->count may be zero if rw holds a flush and we need to
......@@ -297,7 +298,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
/*
* Allocate a suitably sized-bio.
*/
if (rw & REQ_DISCARD)
if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME))
num_bvecs = 1;
else
num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev),
......@@ -310,9 +311,21 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
store_io_and_region_in_bio(bio, io, region);
if (rw & REQ_DISCARD) {
discard_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining);
bio->bi_size = discard_sectors << SECTOR_SHIFT;
remaining -= discard_sectors;
num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining);
bio->bi_size = num_sectors << SECTOR_SHIFT;
remaining -= num_sectors;
} else if (rw & REQ_WRITE_SAME) {
/*
* WRITE SAME only uses a single page.
*/
dp->get_page(dp, &page, &len, &offset);
bio_add_page(bio, page, logical_block_size, offset);
num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining);
bio->bi_size = num_sectors << SECTOR_SHIFT;
offset = 0;
remaining -= num_sectors;
dp->next_page(dp);
} else while (remaining) {
/*
* Try and add as many pages as possible.
......
......@@ -1543,7 +1543,21 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user)
return r;
}
static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
#define DM_PARAMS_VMALLOC 0x0001 /* Params alloced with vmalloc not kmalloc */
#define DM_WIPE_BUFFER 0x0010 /* Wipe input buffer before returning from ioctl */
static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags)
{
if (param_flags & DM_WIPE_BUFFER)
memset(param, 0, param_size);
if (param_flags & DM_PARAMS_VMALLOC)
vfree(param);
else
kfree(param);
}
static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param, int *param_flags)
{
struct dm_ioctl tmp, *dmi;
int secure_data;
......@@ -1556,7 +1570,21 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
secure_data = tmp.flags & DM_SECURE_DATA_FLAG;
dmi = vmalloc(tmp.data_size);
*param_flags = secure_data ? DM_WIPE_BUFFER : 0;
/*
* Try to avoid low memory issues when a device is suspended.
* Use kmalloc() rather than vmalloc() when we can.
*/
dmi = NULL;
if (tmp.data_size <= KMALLOC_MAX_SIZE)
dmi = kmalloc(tmp.data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
if (!dmi) {
dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL);
*param_flags |= DM_PARAMS_VMALLOC;
}
if (!dmi) {
if (secure_data && clear_user(user, tmp.data_size))
return -EFAULT;
......@@ -1566,6 +1594,14 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
if (copy_from_user(dmi, user, tmp.data_size))
goto bad;
/*
* Abort if something changed the ioctl data while it was being copied.
*/
if (dmi->data_size != tmp.data_size) {
DMERR("rejecting ioctl: data size modified while processing parameters");
goto bad;
}
/* Wipe the user buffer so we do not return it to userspace */
if (secure_data && clear_user(user, tmp.data_size))
goto bad;
......@@ -1574,9 +1610,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
return 0;
bad:
if (secure_data)
memset(dmi, 0, tmp.data_size);
vfree(dmi);
free_params(dmi, tmp.data_size, *param_flags);
return -EFAULT;
}
......@@ -1613,7 +1648,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param)
static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
{
int r = 0;
int wipe_buffer;
int param_flags;
unsigned int cmd;
struct dm_ioctl *uninitialized_var(param);
ioctl_fn fn = NULL;
......@@ -1648,25 +1683,15 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
return -ENOTTY;
}
/*
* Trying to avoid low memory issues when a device is
* suspended.
*/
current->flags |= PF_MEMALLOC;
/*
* Copy the parameters into kernel space.
*/
r = copy_params(user, &param);
current->flags &= ~PF_MEMALLOC;
r = copy_params(user, &param, &param_flags);
if (r)
return r;
input_param_size = param->data_size;
wipe_buffer = param->flags & DM_SECURE_DATA_FLAG;
r = validate_params(cmd, param);
if (r)
goto out;
......@@ -1681,10 +1706,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
r = -EFAULT;
out:
if (wipe_buffer)
memset(param, 0, input_param_size);
vfree(param);
free_params(param, input_param_size, param_flags);
return r;
}
......
......@@ -349,7 +349,7 @@ static void complete_io(unsigned long error, void *context)
struct dm_kcopyd_client *kc = job->kc;
if (error) {
if (job->rw == WRITE)
if (job->rw & WRITE)
job->write_err |= error;
else
job->read_err = 1;
......@@ -361,7 +361,7 @@ static void complete_io(unsigned long error, void *context)
}
}
if (job->rw == WRITE)
if (job->rw & WRITE)
push(&kc->complete_jobs, job);
else {
......@@ -432,7 +432,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
if (r < 0) {
/* error this rogue job */
if (job->rw == WRITE)
if (job->rw & WRITE)
job->write_err = (unsigned long) -1L;
else
job->read_err = 1;
......@@ -585,6 +585,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
{
struct kcopyd_job *job;
int i;
/*
* Allocate an array of jobs consisting of one master job
......@@ -611,7 +612,16 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
memset(&job->source, 0, sizeof job->source);
job->source.count = job->dests[0].count;
job->pages = &zero_page_list;
job->rw = WRITE;
/*
* Use WRITE SAME to optimize zeroing if all dests support it.
*/
job->rw = WRITE | REQ_WRITE_SAME;
for (i = 0; i < job->num_dests; i++)
if (!bdev_write_same(job->dests[i].bdev)) {
job->rw = WRITE;
break;
}
}
job->fn = fn;
......
......@@ -55,6 +55,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_flush_requests = 1;
ti->num_discard_requests = 1;
ti->num_write_same_requests = 1;
ti->private = lc;
return 0;
......@@ -87,8 +88,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
bio->bi_sector = linear_map_sector(ti, bio->bi_sector);
}
static int linear_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int linear_map(struct dm_target *ti, struct bio *bio)
{
linear_map_bio(ti, bio);
......@@ -155,7 +155,7 @@ static int linear_iterate_devices(struct dm_target *ti,
static struct target_type linear_target = {
.name = "linear",
.version = {1, 1, 0},
.version = {1, 2, 0},
.module = THIS_MODULE,
.ctr = linear_ctr,
.dtr = linear_dtr,
......
......@@ -295,9 +295,11 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
* Choose a reasonable default. All figures in sectors.
*/
if (min_region_size > (1 << 13)) {
/* If not a power of 2, make it the next power of 2 */
if (min_region_size & (min_region_size - 1))
region_size = 1 << fls(region_size);
DMINFO("Choosing default region size of %lu sectors",
region_size);
region_size = min_region_size;
} else {
DMINFO("Choosing default region size of 4MiB");
region_size = 1 << 13; /* sectors */
......@@ -1216,7 +1218,7 @@ static void raid_dtr(struct dm_target *ti)
context_free(rs);
}
static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_context)
static int raid_map(struct dm_target *ti, struct bio *bio)
{
struct raid_set *rs = ti->private;
struct mddev *mddev = &rs->md;
......@@ -1430,7 +1432,7 @@ static void raid_resume(struct dm_target *ti)
static struct target_type raid_target = {
.name = "raid",
.version = {1, 3, 1},
.version = {1, 4, 0},
.module = THIS_MODULE,
.ctr = raid_ctr,
.dtr = raid_dtr,
......
......@@ -61,7 +61,6 @@ struct mirror_set {
struct dm_region_hash *rh;
struct dm_kcopyd_client *kcopyd_client;
struct dm_io_client *io_client;
mempool_t *read_record_pool;
/* recovery */
region_t nr_regions;
......@@ -139,14 +138,13 @@ static void dispatch_bios(void *context, struct bio_list *bio_list)
queue_bio(ms, bio, WRITE);
}
#define MIN_READ_RECORDS 20
struct dm_raid1_read_record {
struct dm_raid1_bio_record {
struct mirror *m;
/* if details->bi_bdev == NULL, details were not saved */
struct dm_bio_details details;
region_t write_region;
};
static struct kmem_cache *_dm_raid1_read_record_cache;
/*
* Every mirror should look like this one.
*/
......@@ -876,19 +874,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
atomic_set(&ms->suspend, 0);
atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS,
_dm_raid1_read_record_cache);
if (!ms->read_record_pool) {
ti->error = "Error creating mirror read_record_pool";
kfree(ms);
return NULL;
}
ms->io_client = dm_io_client_create();
if (IS_ERR(ms->io_client)) {
ti->error = "Error creating dm_io client";
mempool_destroy(ms->read_record_pool);
kfree(ms);
return NULL;
}
......@@ -900,7 +888,6 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
if (IS_ERR(ms->rh)) {
ti->error = "Error creating dirty region hash";
dm_io_client_destroy(ms->io_client);
mempool_destroy(ms->read_record_pool);
kfree(ms);
return NULL;
}
......@@ -916,7 +903,6 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti,
dm_io_client_destroy(ms->io_client);
dm_region_hash_destroy(ms->rh);
mempool_destroy(ms->read_record_pool);
kfree(ms);
}
......@@ -1088,6 +1074,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_flush_requests = 1;
ti->num_discard_requests = 1;
ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record);
ti->discard_zeroes_data_unsupported = true;
ms->kmirrord_wq = alloc_workqueue("kmirrord",
......@@ -1155,18 +1142,20 @@ static void mirror_dtr(struct dm_target *ti)
/*
* Mirror mapping function
*/
static int mirror_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
static int mirror_map(struct dm_target *ti, struct bio *bio)
{
int r, rw = bio_rw(bio);
struct mirror *m;
struct mirror_set *ms = ti->private;
struct dm_raid1_read_record *read_record = NULL;
struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
struct dm_raid1_bio_record *bio_record =
dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
bio_record->details.bi_bdev = NULL;
if (rw == WRITE) {
/* Save region for mirror_end_io() handler */
map_context->ll = dm_rh_bio_to_region(ms->rh, bio);
bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio);
queue_bio(ms, bio, rw);
return DM_MAPIO_SUBMITTED;
}
......@@ -1194,33 +1183,29 @@ static int mirror_map(struct dm_target *ti, struct bio *bio,
if (unlikely(!m))
return -EIO;
read_record = mempool_alloc(ms->read_record_pool, GFP_NOIO);
if (likely(read_record)) {
dm_bio_record(&read_record->details, bio);
map_context->ptr = read_record;
read_record->m = m;
}
dm_bio_record(&bio_record->details, bio);
bio_record->m = m;
map_bio(m, bio);
return DM_MAPIO_REMAPPED;
}
static int mirror_end_io(struct dm_target *ti, struct bio *bio,
int error, union map_info *map_context)
static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
{
int rw = bio_rw(bio);
struct mirror_set *ms = (struct mirror_set *) ti->private;
struct mirror *m = NULL;
struct dm_bio_details *bd = NULL;
struct dm_raid1_read_record *read_record = map_context->ptr;
struct dm_raid1_bio_record *bio_record =
dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
/*
* We need to dec pending if this was a write.
*/
if (rw == WRITE) {
if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)))
dm_rh_dec(ms->rh, map_context->ll);
dm_rh_dec(ms->rh, bio_record->write_region);
return error;
}
......@@ -1231,7 +1216,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
goto out;
if (unlikely(error)) {
if (!read_record) {
if (!bio_record->details.bi_bdev) {
/*
* There wasn't enough memory to record necessary
* information for a retry or there was no other
......@@ -1241,7 +1226,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
return -EIO;
}
m = read_record->m;
m = bio_record->m;
DMERR("Mirror read failed from %s. Trying alternative device.",
m->dev->name);
......@@ -1253,22 +1238,18 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
* mirror.
*/
if (default_ok(m) || mirror_available(ms, bio)) {
bd = &read_record->details;
bd = &bio_record->details;
dm_bio_restore(bd, bio);
mempool_free(read_record, ms->read_record_pool);
map_context->ptr = NULL;
bio_record->details.bi_bdev = NULL;
queue_bio(ms, bio, rw);
return 1;
return DM_ENDIO_INCOMPLETE;
}
DMERR("All replicated volumes dead, failing I/O");
}
out:
if (read_record) {
mempool_free(read_record, ms->read_record_pool);
map_context->ptr = NULL;
}
bio_record->details.bi_bdev = NULL;
return error;
}
......@@ -1422,7 +1403,7 @@ static int mirror_iterate_devices(struct dm_target *ti,
static struct target_type mirror_target = {
.name = "mirror",
.version = {1, 12, 1},
.version = {1, 13, 1},
.module = THIS_MODULE,
.ctr = mirror_ctr,
.dtr = mirror_dtr,
......@@ -1439,13 +1420,6 @@ static int __init dm_mirror_init(void)
{
int r;
_dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0);
if (!_dm_raid1_read_record_cache) {
DMERR("Can't allocate dm_raid1_read_record cache");
r = -ENOMEM;
goto bad_cache;
}
r = dm_register_target(&mirror_target);
if (r < 0) {
DMERR("Failed to register mirror target");
......@@ -1455,15 +1429,12 @@ static int __init dm_mirror_init(void)
return 0;