Commit 355bbd8c authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-2.6.32' of git://git.kernel.dk/linux-2.6-block

* 'for-2.6.32' of git://git.kernel.dk/linux-2.6-block: (29 commits)
  block: use blkdev_issue_discard in blk_ioctl_discard
  Make DISCARD_BARRIER and DISCARD_NOBARRIER writes instead of reads
  block: don't assume device has a request list backing in nr_requests store
  block: Optimal I/O limit wrapper
  cfq: choose a new next_req when a request is dispatched
  Seperate read and write statistics of in_flight requests
  aoe: end barrier bios with EOPNOTSUPP
  block: trace bio queueing trial only when it occurs
  block: enable rq CPU completion affinity by default
  cfq: fix the log message after dispatched a request
  block: use printk_once
  cciss: memory leak in cciss_init_one()
  splice: update mtime and atime on files
  block: make blk_iopoll_prep_sched() follow normal 0/1 return convention
  cfq-iosched: get rid of must_alloc flag
  block: use interrupts disabled version of raise_softirq_irqoff()
  block: fix comment in blk-iopoll.c
  block: adjust default budget for blk-iopoll
  block: fix long lines in block/blk-iopoll.c
  block: add blk-iopoll, a NAPI like approach for block devices
  ...
parents 39695224 746cd1e7
......@@ -5,7 +5,7 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
ioctl.o genhd.o scsi_ioctl.o
blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
......
......@@ -348,6 +348,9 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
clear_bit(BIO_UPTODATE, &bio->bi_flags);
}
if (bio->bi_private)
complete(bio->bi_private);
bio_put(bio);
}
......@@ -357,21 +360,20 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
* @sector: start sector
* @nr_sects: number of sectors to discard
* @gfp_mask: memory allocation flags (for bio_alloc)
* @flags: DISCARD_FL_* flags to control behaviour
*
* Description:
* Issue a discard request for the sectors in question. Does not wait.
* Issue a discard request for the sectors in question.
*/
int blkdev_issue_discard(struct block_device *bdev,
sector_t sector, sector_t nr_sects, gfp_t gfp_mask)
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, int flags)
{
struct request_queue *q;
struct bio *bio;
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q = bdev_get_queue(bdev);
int type = flags & DISCARD_FL_BARRIER ?
DISCARD_BARRIER : DISCARD_NOBARRIER;
int ret = 0;
if (bdev->bd_disk == NULL)
return -ENXIO;
q = bdev_get_queue(bdev);
if (!q)
return -ENXIO;
......@@ -379,12 +381,14 @@ int blkdev_issue_discard(struct block_device *bdev,
return -EOPNOTSUPP;
while (nr_sects && !ret) {
bio = bio_alloc(gfp_mask, 0);
struct bio *bio = bio_alloc(gfp_mask, 0);
if (!bio)
return -ENOMEM;
bio->bi_end_io = blkdev_discard_end_io;
bio->bi_bdev = bdev;
if (flags & DISCARD_FL_WAIT)
bio->bi_private = &wait;
bio->bi_sector = sector;
......@@ -396,10 +400,13 @@ int blkdev_issue_discard(struct block_device *bdev,
bio->bi_size = nr_sects << 9;
nr_sects = 0;
}
bio_get(bio);
submit_bio(DISCARD_BARRIER, bio);
submit_bio(type, bio);
if (flags & DISCARD_FL_WAIT)
wait_for_completion(&wait);
/* Check if it failed immediately */
if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
else if (!bio_flagged(bio, BIO_UPTODATE))
......
......@@ -69,7 +69,7 @@ static void drive_stat_acct(struct request *rq, int new_io)
part_stat_inc(cpu, part, merges[rw]);
else {
part_round_stats(cpu, part);
part_inc_in_flight(part);
part_inc_in_flight(part, rw);
}
part_stat_unlock();
......@@ -1031,7 +1031,7 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
if (part->in_flight) {
__part_stat_add(cpu, part, time_in_queue,
part->in_flight * (now - part->stamp));
part_in_flight(part) * (now - part->stamp));
__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
}
part->stamp = now;
......@@ -1112,31 +1112,27 @@ void init_request_from_bio(struct request *req, struct bio *bio)
req->cmd_type = REQ_TYPE_FS;
/*
* inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
* Inherit FAILFAST from bio (for read-ahead, and explicit
* FAILFAST). FAILFAST flags are identical for req and bio.
*/
if (bio_rw_ahead(bio))
req->cmd_flags |= (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
REQ_FAILFAST_DRIVER);
if (bio_failfast_dev(bio))
req->cmd_flags |= REQ_FAILFAST_DEV;
if (bio_failfast_transport(bio))
req->cmd_flags |= REQ_FAILFAST_TRANSPORT;
if (bio_failfast_driver(bio))
req->cmd_flags |= REQ_FAILFAST_DRIVER;
if (unlikely(bio_discard(bio))) {
if (bio_rw_flagged(bio, BIO_RW_AHEAD))
req->cmd_flags |= REQ_FAILFAST_MASK;
else
req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK;
if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) {
req->cmd_flags |= REQ_DISCARD;
if (bio_barrier(bio))
if (bio_rw_flagged(bio, BIO_RW_BARRIER))
req->cmd_flags |= REQ_SOFTBARRIER;
req->q->prepare_discard_fn(req->q, req);
} else if (unlikely(bio_barrier(bio)))
} else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
req->cmd_flags |= REQ_HARDBARRIER;
if (bio_sync(bio))
if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
req->cmd_flags |= REQ_RW_SYNC;
if (bio_rw_meta(bio))
if (bio_rw_flagged(bio, BIO_RW_META))
req->cmd_flags |= REQ_RW_META;
if (bio_noidle(bio))
if (bio_rw_flagged(bio, BIO_RW_NOIDLE))
req->cmd_flags |= REQ_NOIDLE;
req->errors = 0;
......@@ -1151,7 +1147,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
*/
static inline bool queue_should_plug(struct request_queue *q)
{
return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
return !(blk_queue_nonrot(q) && blk_queue_queuing(q));
}
static int __make_request(struct request_queue *q, struct bio *bio)
......@@ -1160,11 +1156,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
int el_ret;
unsigned int bytes = bio->bi_size;
const unsigned short prio = bio_prio(bio);
const int sync = bio_sync(bio);
const int unplug = bio_unplug(bio);
const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
int rw_flags;
if (bio_barrier(bio) && bio_has_data(bio) &&
if (bio_rw_flagged(bio, BIO_RW_BARRIER) && bio_has_data(bio) &&
(q->next_ordered == QUEUE_ORDERED_NONE)) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
......@@ -1178,7 +1175,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
spin_lock_irq(q->queue_lock);
if (unlikely(bio_barrier(bio)) || elv_queue_empty(q))
if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q))
goto get_rq;
el_ret = elv_merge(q, &req, bio);
......@@ -1191,6 +1188,9 @@ static int __make_request(struct request_queue *q, struct bio *bio)
trace_block_bio_backmerge(q, bio);
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
blk_rq_set_mixed_merge(req);
req->biotail->bi_next = bio;
req->biotail = bio;
req->__data_len += bytes;
......@@ -1210,6 +1210,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
trace_block_bio_frontmerge(q, bio);
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
blk_rq_set_mixed_merge(req);
req->cmd_flags &= ~REQ_FAILFAST_MASK;
req->cmd_flags |= ff;
}
bio->bi_next = req->bio;
req->bio = bio;
......@@ -1457,19 +1463,20 @@ static inline void __generic_make_request(struct bio *bio)
if (old_sector != -1)
trace_block_remap(q, bio, old_dev, old_sector);
trace_block_bio_queue(q, bio);
old_sector = bio->bi_sector;
old_dev = bio->bi_bdev->bd_dev;
if (bio_check_eod(bio, nr_sectors))
goto end_io;
if (bio_discard(bio) && !q->prepare_discard_fn) {
if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
!q->prepare_discard_fn) {
err = -EOPNOTSUPP;
goto end_io;
}
trace_block_bio_queue(q, bio);
ret = q->make_request_fn(q, bio);
} while (ret);
......@@ -1654,6 +1661,50 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
}
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
/**
* blk_rq_err_bytes - determine number of bytes till the next failure boundary
* @rq: request to examine
*
* Description:
* A request could be merge of IOs which require different failure
* handling. This function determines the number of bytes which
* can be failed from the beginning of the request without
* crossing into area which need to be retried further.
*
* Return:
* The number of bytes to fail.
*
* Context:
* queue_lock must be held.
*/
unsigned int blk_rq_err_bytes(const struct request *rq)
{
unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
unsigned int bytes = 0;
struct bio *bio;
if (!(rq->cmd_flags & REQ_MIXED_MERGE))
return blk_rq_bytes(rq);
/*
* Currently the only 'mixing' which can happen is between
* different fastfail types. We can safely fail portions
* which have all the failfast bits that the first one has -
* the ones which are at least as eager to fail as the first
* one.
*/
for (bio = rq->bio; bio; bio = bio->bi_next) {
if ((bio->bi_rw & ff) != ff)
break;
bytes += bio->bi_size;
}
/* this could lead to infinite loop */
BUG_ON(blk_rq_bytes(rq) && !bytes);
return bytes;
}
EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
static void blk_account_io_completion(struct request *req, unsigned int bytes)
{
if (blk_do_io_stat(req)) {
......@@ -1687,7 +1738,7 @@ static void blk_account_io_done(struct request *req)
part_stat_inc(cpu, part, ios[rw]);
part_stat_add(cpu, part, ticks[rw], duration);
part_round_stats(cpu, part);
part_dec_in_flight(part);
part_dec_in_flight(part, rw);
part_stat_unlock();
}
......@@ -1807,8 +1858,15 @@ void blk_dequeue_request(struct request *rq)
* and to it is freed is accounted as io that is in progress at
* the driver side.
*/
if (blk_account_rq(rq))
if (blk_account_rq(rq)) {
q->in_flight[rq_is_sync(rq)]++;
/*
* Mark this device as supporting hardware queuing, if
* we have more IOs in flight than 4.
*/
if (!blk_queue_queuing(q) && queue_in_flight(q) > 4)
set_bit(QUEUE_FLAG_CQ, &q->queue_flags);
}
}
/**
......@@ -2000,6 +2058,12 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
if (blk_fs_request(req) || blk_discard_rq(req))
req->__sector += total_bytes >> 9;
/* mixed attributes always follow the first bio */
if (req->cmd_flags & REQ_MIXED_MERGE) {
req->cmd_flags &= ~REQ_FAILFAST_MASK;
req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
}
/*
* If total number of sectors is less than the first segment
* size, something has gone terribly wrong.
......@@ -2178,6 +2242,25 @@ bool blk_end_request_cur(struct request *rq, int error)
}
EXPORT_SYMBOL(blk_end_request_cur);
/**
* blk_end_request_err - Finish a request till the next failure boundary.
* @rq: the request to finish till the next failure boundary for
* @error: must be negative errno
*
* Description:
* Complete @rq till the next failure boundary.
*
* Return:
* %false - we are done with this request
* %true - still buffers pending for this request
*/
bool blk_end_request_err(struct request *rq, int error)
{
WARN_ON(error >= 0);
return blk_end_request(rq, error, blk_rq_err_bytes(rq));
}
EXPORT_SYMBOL_GPL(blk_end_request_err);
/**
* __blk_end_request - Helper function for drivers to complete the request.
* @rq: the request being processed
......@@ -2237,12 +2320,31 @@ bool __blk_end_request_cur(struct request *rq, int error)
}
EXPORT_SYMBOL(__blk_end_request_cur);
/**
* __blk_end_request_err - Finish a request till the next failure boundary.
* @rq: the request to finish till the next failure boundary for
* @error: must be negative errno
*
* Description:
* Complete @rq till the next failure boundary. Must be called
* with queue lock held.
*
* Return:
* %false - we are done with this request
* %true - still buffers pending for this request
*/
bool __blk_end_request_err(struct request *rq, int error)
{
WARN_ON(error >= 0);
return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
}
EXPORT_SYMBOL_GPL(__blk_end_request_err);
void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
struct bio *bio)
{
/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and
we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */
rq->cmd_flags |= (bio->bi_rw & 3);
/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
rq->cmd_flags |= bio->bi_rw & REQ_RW;
if (bio_has_data(bio)) {
rq->nr_phys_segments = bio_phys_segments(q, bio);
......
/*
* Functions related to interrupt-poll handling in the block layer. This
* is similar to NAPI for network devices.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>
#include <linux/blk-iopoll.h>
#include <linux/delay.h>
#include "blk.h"
int blk_iopoll_enabled = 1;
EXPORT_SYMBOL(blk_iopoll_enabled);
static unsigned int blk_iopoll_budget __read_mostly = 256;
static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
/**
* blk_iopoll_sched - Schedule a run of the iopoll handler
* @iop: The parent iopoll structure
*
* Description:
* Add this blk_iopoll structure to the pending poll list and trigger the
* raise of the blk iopoll softirq. The driver must already have gotten a
* succesful return from blk_iopoll_sched_prep() before calling this.
**/
void blk_iopoll_sched(struct blk_iopoll *iop)
{
unsigned long flags;
local_irq_save(flags);
list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll));
__raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
local_irq_restore(flags);
}
EXPORT_SYMBOL(blk_iopoll_sched);
/**
* __blk_iopoll_complete - Mark this @iop as un-polled again
* @iop: The parent iopoll structure
*
* Description:
* See blk_iopoll_complete(). This function must be called with interrupts
* disabled.
**/
void __blk_iopoll_complete(struct blk_iopoll *iop)
{
list_del(&iop->list);
smp_mb__before_clear_bit();
clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
}
EXPORT_SYMBOL(__blk_iopoll_complete);
/**
* blk_iopoll_complete - Mark this @iop as un-polled again
* @iop: The parent iopoll structure
*
* Description:
* If a driver consumes less than the assigned budget in its run of the
* iopoll handler, it'll end the polled mode by calling this function. The
* iopoll handler will not be invoked again before blk_iopoll_sched_prep()
* is called.
**/
void blk_iopoll_complete(struct blk_iopoll *iopoll)
{
unsigned long flags;
local_irq_save(flags);
__blk_iopoll_complete(iopoll);
local_irq_restore(flags);
}
EXPORT_SYMBOL(blk_iopoll_complete);
static void blk_iopoll_softirq(struct softirq_action *h)
{
struct list_head *list = &__get_cpu_var(blk_cpu_iopoll);
int rearm = 0, budget = blk_iopoll_budget;
unsigned long start_time = jiffies;
local_irq_disable();
while (!list_empty(list)) {
struct blk_iopoll *iop;
int work, weight;
/*
* If softirq window is exhausted then punt.
*/
if (budget <= 0 || time_after(jiffies, start_time)) {
rearm = 1;
break;
}
local_irq_enable();
/* Even though interrupts have been re-enabled, this
* access is safe because interrupts can only add new
* entries to the tail of this list, and only ->poll()
* calls can remove this head entry from the list.
*/
iop = list_entry(list->next, struct blk_iopoll, list);
weight = iop->weight;
work = 0;
if (test_bit(IOPOLL_F_SCHED, &iop->state))
work = iop->poll(iop, weight);
budget -= work;
local_irq_disable();
/*
* Drivers must not modify the iopoll state, if they
* consume their assigned weight (or more, some drivers can't
* easily just stop processing, they have to complete an
* entire mask of commands).In such cases this code
* still "owns" the iopoll instance and therefore can
* move the instance around on the list at-will.
*/
if (work >= weight) {
if (blk_iopoll_disable_pending(iop))
__blk_iopoll_complete(iop);
else
list_move_tail(&iop->list, list);
}
}
if (rearm)
__raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
local_irq_enable();
}
/**
* blk_iopoll_disable - Disable iopoll on this @iop
* @iop: The parent iopoll structure
*
* Description:
* Disable io polling and wait for any pending callbacks to have completed.
**/
void blk_iopoll_disable(struct blk_iopoll *iop)
{
set_bit(IOPOLL_F_DISABLE, &iop->state);
while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state))
msleep(1);
clear_bit(IOPOLL_F_DISABLE, &iop->state);
}
EXPORT_SYMBOL(blk_iopoll_disable);
/**
* blk_iopoll_enable - Enable iopoll on this @iop
* @iop: The parent iopoll structure
*
* Description:
* Enable iopoll on this @iop. Note that the handler run will not be
* scheduled, it will only mark it as active.
**/
void blk_iopoll_enable(struct blk_iopoll *iop)
{
BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state));
smp_mb__before_clear_bit();
clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
}
EXPORT_SYMBOL(blk_iopoll_enable);
/**
* blk_iopoll_init - Initialize this @iop
* @iop: The parent iopoll structure
* @weight: The default weight (or command completion budget)
* @poll_fn: The handler to invoke
*
* Description:
* Initialize this blk_iopoll structure. Before being actively used, the
* driver must call blk_iopoll_enable().
**/
void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn)
{
memset(iop, 0, sizeof(*iop));
INIT_LIST_HEAD(&iop->list);
iop->weight = weight;
iop->poll = poll_fn;
set_bit(IOPOLL_F_SCHED, &iop->state);
}
EXPORT_SYMBOL(blk_iopoll_init);
static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
/*
* If a CPU goes away, splice its entries to the current CPU
* and trigger a run of the softirq
*/
if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
int cpu = (unsigned long) hcpu;
local_irq_disable();
list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
&__get_cpu_var(blk_cpu_iopoll));
__raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
local_irq_enable();
}
return NOTIFY_OK;
}
static struct notifier_block __cpuinitdata blk_iopoll_cpu_notifier = {
.notifier_call = blk_iopoll_cpu_notify,
};
static __init int blk_iopoll_setup(void)
{
int i;
for_each_possible_cpu(i)
INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq);
register_hotcpu_notifier(&blk_iopoll_cpu_notifier);
return 0;
}
subsys_initcall(blk_iopoll_setup);
......@@ -311,6 +311,36 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
return 1;
}
/**
* blk_rq_set_mixed_merge - mark a request as mixed merge
* @rq: request to mark as mixed merge
*
* Description:
* @rq is about to be mixed merged. Make sure the attributes
* which can be mixed are set in each bio and mark @rq as mixed
* merged.
*/
void blk_rq_set_mixed_merge(struct request *rq)
{
unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
struct bio *bio;
if (rq->cmd_flags & REQ_MIXED_MERGE)
return;
/*
* @rq will no longer represent mixable attributes for all the
* contained bios. It will just track those of the first one.
* Distributes the attributs to each bio.
*/
for (bio = rq->bio; bio; bio = bio->bi_next) {