...
 
Commits (66)
......@@ -33,12 +33,58 @@
#include <linux/ratelimit.h>
#include <linux/pm_runtime.h>
#include <linux/blk-cgroup.h>
#include <linux/blk-bench.h>
#include <linux/bdump.h>
#define CREATE_TRACE_POINTS
#include <trace/events/block.h>
#include "blk.h"
#include "blk-mq.h"
#include <linux/blk-lcd.h>
//static bool __always_inline check_fio(void) {
// if(strcmp(current->comm, "fio") == 0) {
// return true;
// }
// return false;
//}
//INIT_BENCHMARK_DATA(plug);
//INIT_BENCHMARK_DATA(plug_ot);
//INIT_BENCHMARK_DATA(plug_ots);
//static void __always_inline bench_start_plos(void) {
// BENCH_BEGIN(plug_ots);
//}
//static void __always_inline bench_end_plos(void) {
// BENCH_END(plug_ots);
//}
//static void __always_inline bench_start_plo(void) {
// BENCH_BEGIN(plug_ot);
//}
//static void __always_inline bench_end_plo(void) {
// BENCH_END(plug_ot);
//}
//static void __always_inline bench_start_pl(void) {
// BENCH_BEGIN(plug);
//}
//static void __always_inline bench_end_pl(void) {
// BENCH_END(plug);
//}
//void bdump_data(void) {
// BENCH_COMPUTE_STAT(plug);
/// BENCH_COMPUTE_STAT(plug_ot);
// BENCH_COMPUTE_STAT(plug_ots);
//}
struct request_queue *queue_nullb = NULL;
EXPORT_SYMBOL(queue_nullb);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
......@@ -148,11 +194,17 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
if (unlikely(rq->cmd_flags & REQ_QUIET))
bio_set_flag(bio, BIO_QUIET);
//queue_nullb ? printk("bio_advance: %d \n",nbytes) : queue_nullb = NULL;
bio_advance(bio, nbytes);
/* don't actually finish bio if it's part of flush sequence */
if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) {
//queue_nullb ? printk("calling bio_endio \n") : queue_nullb = NULL;
//BENCH_BEGIN(io_complete);
bio_endio(bio);
//BENCH_END(io_complete);
}
}
void blk_dump_rq_flags(struct request *rq, char *msg)
......@@ -545,6 +597,12 @@ void blk_cleanup_queue(struct request_queue *q)
{
spinlock_t *lock = q->queue_lock;
//if(queue_nullb) {
// BENCH_COMPUTE_STAT(io_complete);
// bdump_data();
//}
/* mark @q DYING, no new request or merges will be allowed afterwards */
mutex_lock(&q->sysfs_lock);
blk_set_queue_dying(q);
......@@ -691,14 +749,18 @@ static void blk_rq_timed_out_timer(unsigned long data)
struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
{
struct request_queue_container *rq_cnt;
struct request_queue *q;
int err;
q = kmem_cache_alloc_node(blk_requestq_cachep,
rq_cnt = kmem_cache_alloc_node(blk_requestq_cachep,
gfp_mask | __GFP_ZERO, node_id);
if (!q)
if (!rq_cnt)
return NULL;
q = &rq_cnt->rq;
q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
if (q->id < 0)
goto fail_q;
......@@ -773,7 +835,8 @@ fail_split:
fail_id:
ida_simple_remove(&blk_queue_ida, q->id);
fail_q:
kmem_cache_free(blk_requestq_cachep, q);
//kmem_cache_free(blk_requestq_cachep, q);
kmem_cache_free(blk_requestq_cachep, rq_cnt);
return NULL;
}
EXPORT_SYMBOL(blk_alloc_queue_node);
......@@ -1997,6 +2060,8 @@ blk_qc_t generic_make_request(struct bio *bio)
struct bio_list bio_list_on_stack;
blk_qc_t ret = BLK_QC_T_NONE;
//(check_fio() == true) ? printk("[%s:%s] --> in \n",__FILE__,__func__) : -1;
if (!generic_make_request_checks(bio))
goto out;
......@@ -2011,6 +2076,7 @@ blk_qc_t generic_make_request(struct bio *bio)
* should be added at the tail
*/
if (current->bio_list) {
//(check_fio() == true) ? printk("adding bio to the current->bio_list \n") : -1;
bio_list_add(current->bio_list, bio);
goto out;
}
......@@ -2036,20 +2102,25 @@ blk_qc_t generic_make_request(struct bio *bio)
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
if (likely(blk_queue_enter(q, false) == 0)) {
//(check_fio() == true) ? printk("calling make_request_fn \n") : -1;
//(check_fio() == true) ? bench_start_pl() : -1;
ret = q->make_request_fn(q, bio);
//(check_fio() == true) ? bench_end_pl() : -1;
blk_queue_exit(q);
bio = bio_list_pop(current->bio_list);
} else {
struct bio *bio_next = bio_list_pop(current->bio_list);
//(check_fio() == true) ? printk("pop bio list \n") : -1;
bio_io_error(bio);
bio = bio_next;
}
} while (bio);
current->bio_list = NULL; /* deactivate */
//(check_fio() == true) ? printk("[%s:%s] <-- out \n",__FILE__,__func__) : -1;
out:
return ret;
}
......@@ -2066,13 +2137,16 @@ EXPORT_SYMBOL(generic_make_request);
*/
blk_qc_t submit_bio(struct bio *bio)
{
//blk_qc_t ret;
/*
* If it's a regular read/write or a barrier with data attached,
* go through the normal accounting stuff before submission.
*/
//(check_fio() == true) ? printk("[%s:%s] --> in \n",__FILE__,__func__) : -1;
if (bio_has_data(bio)) {
unsigned int count;
//(check_fio() == true) ? printk("bio has data \n") : -1;
if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
count = bdev_logical_block_size(bio->bi_bdev) >> 9;
else
......@@ -2081,6 +2155,7 @@ blk_qc_t submit_bio(struct bio *bio)
if (op_is_write(bio_op(bio))) {
count_vm_events(PGPGOUT, count);
} else {
//(check_fio() == true) ? printk("task_io_account \n") : -1;
task_io_account_read(bio->bi_iter.bi_size);
count_vm_events(PGPGIN, count);
}
......@@ -2096,7 +2171,10 @@ blk_qc_t submit_bio(struct bio *bio)
}
}
//(check_fio() == true) ? bench_start_pl() : -1;
return generic_make_request(bio);
//(check_fio() == true) ? bench_end_pl() : -1;
//return ret;
}
EXPORT_SYMBOL(submit_bio);
......@@ -2593,22 +2671,34 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
blk_account_io_completion(req, nr_bytes);
//if(queue_nullb) {
//BENCH_BEGIN(io_complete);
//printk("----> into while \n");
//}
total_bytes = 0;
while (req->bio) {
struct bio *bio = req->bio;
unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
if (bio_bytes == bio->bi_iter.bi_size)
if (bio_bytes == bio->bi_iter.bi_size) {
// queue_nullb ? printk("hitting here \n") : queue_nullb = NULL;
req->bio = bio->bi_next;
}
//queue_nullb ? printk("calling req_bio_endio: %d \n", bio_bytes) : queue_nullb= NULL;
req_bio_endio(req, bio, bio_bytes, error);
total_bytes += bio_bytes;
nr_bytes -= bio_bytes;
//queue_nullb ? printk("nr_bytes: %d, total_bytes: %d \n", nr_bytes, total_bytes) : queue_nullb = NULL;
if (!nr_bytes)
break;
}
//if(queue_nullb) {
//printk("<--- out of while \n");
//BENCH_END(io_complete);
//}
/*
* completely done
......@@ -3224,17 +3314,24 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
LIST_HEAD(list);
unsigned int depth;
//(check_fio() == true) ? printk("[%s:%s] --> in \n",__FILE__,__func__) : -1;
flush_plug_callbacks(plug, from_schedule);
if (!list_empty(&plug->mq_list))
if (!list_empty(&plug->mq_list)) {
//(check_fio() == true) ? printk("calling flush plug list \n") : -1;
//(check_fio() == true) ? bench_start_pl() : -1;
blk_mq_flush_plug_list(plug, from_schedule);
//(check_fio() == true) ? bench_end_pl() : -1;
}
if (list_empty(&plug->list))
return;
list_splice_init(&plug->list, &list);
//(check_fio() == true) ? bench_start_plos() : -1;
list_sort(NULL, &list, plug_rq_cmp);
//(check_fio() == true) ? bench_end_plos() : -1;
q = NULL;
depth = 0;
......@@ -3243,6 +3340,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
* Save and disable interrupts here, to avoid doing it for every
* queue lock we have to take.
*/
//(check_fio() == true) ? bench_start_plo() : -1;
local_irq_save(flags);
while (!list_empty(&list)) {
rq = list_entry_rq(list.next);
......@@ -3285,6 +3383,8 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
queue_unplugged(q, depth, from_schedule);
local_irq_restore(flags);
//(check_fio() == true) ? bench_end_plo() : -1;
//(check_fio() == true) ? printk("[%s:%s] <-- in \n",__FILE__,__func__) : -1;
}
void blk_finish_plug(struct blk_plug *plug)
......@@ -3532,8 +3632,12 @@ int __init blk_dev_init(void)
request_cachep = kmem_cache_create("blkdev_requests",
sizeof(struct request), 0, SLAB_PANIC, NULL);
//blk_requestq_cachep = kmem_cache_create("request_queue",
// sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
/* AB - This is for LCD */
blk_requestq_cachep = kmem_cache_create("request_queue",
sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
sizeof(struct request_queue_container), 0, SLAB_PANIC, NULL);
return 0;
}
......@@ -11,6 +11,13 @@
#include "blk.h"
static bool __always_inline check_fio(void) {
if(strcmp(current->comm, "fio") == 0) {
return true;
}
return false;
}
static struct bio *blk_bio_discard_split(struct request_queue *q,
struct bio *bio,
struct bio_set *bs,
......@@ -194,15 +201,19 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
struct bio *split, *res;
unsigned nsegs;
//(check_fio() == true) ? printk("[%s:%s] in --> \n",__FILE__,__func__) : -1;
switch (bio_op(*bio)) {
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
//(check_fio() == true) ? printk("REQ_OP_SECURE_ERASE: \n") : -1;
split = blk_bio_discard_split(q, *bio, bs, &nsegs);
break;
case REQ_OP_WRITE_SAME:
//(check_fio() == true) ? printk("REQ_OP_WRITE_SAME: \n") : -1;
split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
break;
default:
//(check_fio() == true) ? printk("default: \n") : -1;
split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
break;
}
......@@ -218,9 +229,11 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
bio_chain(split, *bio);
trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
//(check_fio() == true) ? printk("calling make req again \n") : -1;
generic_make_request(*bio);
*bio = split;
}
//(check_fio() == true) ? printk("[%s:%s] <-- out \n",__FILE__,__func__) : -1;
}
EXPORT_SYMBOL(blk_queue_split);
......
......@@ -19,6 +19,29 @@
#include "blk-mq.h"
#include "blk-mq-tag.h"
#include <linux/blk-bench.h>
#include <linux/bdump.h>
//INIT_BENCHMARK_DATA(gettag);
//static bool __always_inline check_fio(void) {
// if(strcmp(current->comm, "fio") == 0) {
// return true;
// }
// return false;
//}
//static void __always_inline bench_start(void) {
// BENCH_BEGIN(gettag);
//}
//static void __always_inline bench_end(void) {
// BENCH_END(gettag);
//}
//void bdump_data(void) {
// BENCH_COMPUTE_STAT(gettag);
//}
static bool bt_has_free_tags(struct blk_mq_bitmap_tags *bt)
{
int i;
......@@ -264,7 +287,9 @@ static int bt_get(struct blk_mq_alloc_data *data,
DEFINE_WAIT(wait);
int tag;
//(check_fio() == true) ? bench_start() : -1;
tag = __bt_get(hctx, bt, last_tag, tags);
//(check_fio() == true) ? bench_end() : -1;
if (tag != -1)
return tag;
......@@ -278,7 +303,6 @@ static int bt_get(struct blk_mq_alloc_data *data,
tag = __bt_get(hctx, bt, last_tag, tags);
if (tag != -1)
break;
/*
* We're out of tags on this hardware queue, kick any
* pending IO submits before going to sleep waiting for
......@@ -310,6 +334,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
hctx = data->hctx;
bt = &hctx->tags->bitmap_tags;
}
finish_wait(&bs->wait, &wait);
bs = bt_wait_ptr(bt, hctx);
} while (1);
......@@ -322,8 +347,10 @@ static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
{
int tag;
//(check_fio() == true) ? bench_start() : -1;
tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
&data->ctx->last_tag, data->hctx->tags);
//(check_fio() == true) ? bench_end() : -1;
if (tag >= 0)
return tag + data->hctx->tags->nr_reserved_tags;
......@@ -349,8 +376,9 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
{
if (data->flags & BLK_MQ_REQ_RESERVED)
if (data->flags & BLK_MQ_REQ_RESERVED) {
return __blk_mq_get_reserved_tag(data);
}
return __blk_mq_get_tag(data);
}
......
This diff is collapsed.
......@@ -13,6 +13,7 @@
#include "blk.h"
#include "blk-mq.h"
#include <linux/blk-lcd.h>
struct queue_sysfs_entry {
struct attribute attr;
......@@ -604,7 +605,11 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
{
struct request_queue *q = container_of(rcu_head, struct request_queue,
rcu_head);
kmem_cache_free(blk_requestq_cachep, q);
struct request_queue_container *rq_cnt = container_of(q,
struct request_queue_container,
rq);
kmem_cache_free(blk_requestq_cachep, rq_cnt);
}
/**
......
......@@ -183,6 +183,14 @@ unsigned long blk_rq_timeout(unsigned long timeout)
return timeout;
}
static bool __always_inline check_fio_klcd(void) {
if(strcmp(current->comm, "fio") == 0 || strcmp(current->comm, "klcd") == 0) {
return true;
}
return false;
}
/**
* blk_add_timer - Start timeout timer for a single request
* @req: request that is about to start running.
......@@ -207,8 +215,10 @@ void blk_add_timer(struct request *req)
* Some LLDs, like scsi, peek at the timeout to prevent a
* command from being retried forever.
*/
if (!req->timeout)
if (!req->timeout) {
//check_fio_klcd() ? printk("[timeout_null] name: %s req: %p, timeout: %d,\n",current->comm, req, req->timeout) : -1;
req->timeout = q->rq_timeout;
}
req->deadline = jiffies + req->timeout;
......@@ -225,6 +235,7 @@ void blk_add_timer(struct request *req)
* second.
*/
expiry = blk_rq_timeout(round_jiffies_up(req->deadline));
//check_fio_klcd() ? printk("[expiry_check] name: %s req: %p, timeout: %d, expiry: %d\n",current->comm, req, req->timeout, expiry) : -1;
if (!timer_pending(&q->timeout) ||
time_before(expiry, q->timeout.expires)) {
......@@ -238,6 +249,7 @@ void blk_add_timer(struct request *req)
* will be X + something.
*/
if (!timer_pending(&q->timeout) || (diff >= HZ / 2))
//check_fio_klcd() ? printk("[calling mod-timer] name: %s req: %p, timeout: %d, expiry: %d\n",current->comm, req, q->timeout, expiry) : -1;
mod_timer(&q->timeout, expiry);
}
......
......@@ -23,6 +23,7 @@
#include <linux/badblocks.h>
#include "blk.h"
#include <linux/blk-lcd.h>
static DEFINE_MUTEX(block_class_lock);
struct kobject *block_depr;
......@@ -1144,7 +1145,10 @@ static void disk_release(struct device *dev)
hd_free_part(&disk->part0);
if (disk->queue)
blk_put_queue(disk->queue);
kfree(disk);
//kfree(disk);
kfree((container_of(disk,
struct gendisk_container,
gendisk)));
}
struct class block_class = {
.name = "block",
......@@ -1289,18 +1293,25 @@ EXPORT_SYMBOL(alloc_disk);
struct gendisk *alloc_disk_node(int minors, int node_id)
{
struct gendisk_container *disk_container;
struct gendisk *disk;
disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
if (disk) {
//disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
disk_container = kzalloc_node(sizeof(struct gendisk_container), GFP_KERNEL, node_id);
if (disk_container) {
disk = &disk_container->gendisk;
if (!init_part_stats(&disk->part0)) {
kfree(disk);
//kfree(disk);
kfree(disk_container);
return NULL;
}
disk->node_id = node_id;
if (disk_expand_part_tbl(disk, 0)) {
free_part_stats(&disk->part0);
kfree(disk);
//kfree(disk);
kfree(disk_container);
return NULL;
}
disk->part_tbl->part[0] = &disk->part0;
......@@ -1317,7 +1328,8 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
seqcount_init(&disk->part0.nr_sects_seq);
if (hd_ref_init(&disk->part0)) {
hd_free_part(&disk->part0);
kfree(disk);
//kfree(disk);
kfree(disk_container);
return NULL;
}
......
This diff is collapsed.
......@@ -46,9 +46,62 @@
#include "internal.h"
#include <linux/blk-bench.h>
#include <linux/bdump.h>
#define AIO_RING_MAGIC 0xa10a10a1
#define AIO_RING_COMPAT_FEATURES 1
#define AIO_RING_INCOMPAT_FEATURES 0
extern struct request_queue *queue_nullb;
//INIT_BENCHMARK_DATA(io_sub);
//INIT_BENCHMARK_DATA(io_gu);
//INIT_BENCHMARK_DATA(io_cu);
//static void __always_inline bench_start(void) {
// BENCH_BEGIN(io_sub);
//}
//static void __always_inline bench_end(void) {
// BENCH_END(io_sub);
//}
//static void __always_inline bench_start_gu(void) {
// BENCH_BEGIN(io_gu);
//}
//static void __always_inline bench_end_gu(void) {
// BENCH_END(io_gu);
//}
//static void __always_inline bench_start_cu(void) {
// BENCH_BEGIN(io_cu);
//}
//static void __always_inline bench_end_cu(void) {
// BENCH_END(io_cu);
//}
//void bdump_data(void) {
// BENCH_COMPUTE_STAT(io_sub);
//BENCH_COMPUTE_STAT(io_gu);
//BENCH_COMPUTE_STAT(io_cu);
//}
static bool __always_inline check_fio(void) {
if(strcmp(current->comm, "fio") == 0) {
return true;
}
return false;
}
//static inline void bbegin(void) {
// BENCH_BEGIN(aio_complete);
// return;
//}
//static inline void bend(void) {
// BENCH_END(aio_complete);
// return;
//}
struct aio_ring {
unsigned id; /* kernel internal index number */
unsigned nr; /* number of io_events */
......@@ -1074,7 +1127,8 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2)
struct io_event *ev_page, *event;
unsigned tail, pos, head;
unsigned long flags;
//(queue_nullb != NULL) ? bbegin() : -1;
/*
* Special case handling for sync iocbs:
* - events go directly into the iocb for fast handling
......@@ -1145,8 +1199,11 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2)
* eventfd. The eventfd_signal() function is safe to be called
* from IRQ context.
*/
if (iocb->ki_eventfd != NULL)
if (iocb->ki_eventfd != NULL) {
//(queue_nullb != NULL) ? printk("eventfd?\n") : -1;
eventfd_signal(iocb->ki_eventfd, 1);
}
/* everything turned out well, dispose of the aiocb. */
kiocb_free(iocb);
......@@ -1159,12 +1216,19 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2)
*/
smp_mb();
if (waitqueue_active(&ctx->wait))
if (waitqueue_active(&ctx->wait)) {
//(queue_nullb != NULL) ? printk("wakeup wait?\n") : -1;
wake_up(&ctx->wait);
}
percpu_ref_put(&ctx->reqs);
//(queue_nullb != NULL) ? bend() : -1;
}
//void bdump_data(void) {
// BENCH_COMPUTE_STAT(aio_complete);
//}
/* aio_read_events_ring
* Pull an event off of the ioctx's event ring. Returns the number of
* events fetched
......@@ -1280,7 +1344,9 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
if (unlikely(copy_from_user(&ts, timeout, sizeof(ts))))
return -EFAULT;
//(queue_nullb != NULL) ? printk("us ts.sec: %lld \n", ts.tv_sec) : -1;
until = timespec_to_ktime(ts);
//(queue_nullb != NULL) ? printk("reset timer: until.tv64: %lld \n", until.tv64) : -1;
}
/*
......@@ -1297,13 +1363,18 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
* the ringbuffer empty. So in practice we should be ok, but it's
* something to be aware of when touching this code.
*/
if (until.tv64 == 0)
if (until.tv64 == 0) {
//(queue_nullb != NULL) ? printk("regular read_ev \n") : -1;
//(check_fio() == true) ? printk("regular read min: %d nr: %d \n",min_nr, nr) : -1;
aio_read_events(ctx, min_nr, nr, event, &ret);
else
}
else {
//(queue_nullb != NULL) ? printk("waited read_ev \n") : -1;
//(check_fio() == true) ? printk("waited read min: %d nr: %d \n",min_nr, nr) : -1;
wait_event_interruptible_hrtimeout(ctx->wait,
aio_read_events(ctx, min_nr, nr, event, &ret),
until);
}
if (!ret && signal_pending(current))
ret = -EINTR;
......@@ -1446,6 +1517,7 @@ rw_common:
ret = aio_setup_vectored_rw(rw, buf, len,
&iovec, compat, &iter);
else {
//(check_fio() == true) ? printk("calling ISR -> len:%d \n", len) : -1;
ret = import_single_range(rw, buf, len, iovec, &iter);
iovec = NULL;
}
......@@ -1460,7 +1532,10 @@ rw_common:
if (rw == WRITE)
file_start_write(file);
//(check_fio() == true) ? printk("calling read/write op \n") : -1;
//(check_fio() == true) ? bench_start() : -1;
ret = iter_op(req, &iter);
//(check_fio() == true) ? bench_end() : -1;
if (rw == WRITE)
file_end_write(file);
......@@ -1495,6 +1570,7 @@ rw_common:
ret == -ERESTARTNOHAND ||
ret == -ERESTART_RESTARTBLOCK))
ret = -EINTR;
//(check_fio() == true) ? printk("***** Failing IO with EINTR \n") : -1;
aio_complete(req, ret, 0);
}
......@@ -1562,10 +1638,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->ki_user_iocb = user_iocb;
req->ki_user_data = iocb->aio_data;
//(check_fio() == true) ? printk("-> aio_run_iocb \n") : -1;
//(check_fio() == true) ? bench_start() : -1;
ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
(char __user *)(unsigned long)iocb->aio_buf,
iocb->aio_nbytes,
compat);
//(check_fio() == true) ? bench_end() : -1;
if (ret)
goto out_put_req;
......@@ -1594,12 +1674,13 @@ long do_io_submit(aio_context_t ctx_id, long nr,
if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
return -EFAULT;
//(check_fio() == true) ? bench_start() : -1;
ctx = lookup_ioctx(ctx_id);
if (unlikely(!ctx)) {
pr_debug("EINVAL: invalid context id\n");
return -EINVAL;
}
//(check_fio() == true) ? bench_end() : -1;
blk_start_plug(&plug);
/*
......@@ -1610,23 +1691,36 @@ long do_io_submit(aio_context_t ctx_id, long nr,
struct iocb __user *user_iocb;
struct iocb tmp;
//(check_fio() == true) ? bench_start_gu() : -1;
if (unlikely(__get_user(user_iocb, iocbpp + i))) {
ret = -EFAULT;
break;
}
//(check_fio() == true) ? bench_end_gu() : -1;
//(check_fio() == true) ? bench_start_cu() : -1;
if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
ret = -EFAULT;
break;
}
//(check_fio() == true) ? bench_end_cu() : -1;
//(check_fio() == true) ? printk("io_submit_one: --> %d \n", i) : -1;
//(check_fio() == true) ? bench_start_gu() : -1;
ret = io_submit_one(ctx, user_iocb, &tmp, compat);
//(check_fio() == true) ? bench_end_gu() : -1;
if (ret)
break;
}
//(check_fio() == true) ? printk(" -------> finish plug %d \n", i) : -1;
//(check_fio() == true) ? bench_start() : -1;
blk_finish_plug(&plug);
//(check_fio() == true) ? bench_end() : -1;
//(check_fio() == true) ? bench_start_gu() : -1;
percpu_ref_put(&ctx->users);
//(check_fio() == true) ? bench_end_gu() : -1;
return i ? i : ret;
}
......@@ -1645,7 +1739,13 @@ long do_io_submit(aio_context_t ctx_id, long nr,
SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
struct iocb __user * __user *, iocbpp)
{
return do_io_submit(ctx_id, nr, iocbpp, 0);
long ret = 0;
//(check_fio() == true) ? printk("--------------> syscall enter \n") : -1;
//(check_fio() == true) ? bench_start() : -1;
ret = do_io_submit(ctx_id, nr, iocbpp, 0);
//(check_fio() == true) ? printk("<-------------- syscall enter \n") : -1;
//(check_fio() == true) ? bench_end() : -1;
return ret;
}
/* lookup_kiocb
......
This diff is collapsed.
#ifndef _BDUMP_H_
#define _BDUMP_H_
void bdump_data(void);
#endif
This diff is collapsed.
#ifndef __BLK_LCD_H_
#define __BLK_LCD_H_
typedef struct cptr {
unsigned long cptr;
} cptr_t;
struct request_queue_container {
struct request_queue rq;
cptr_t other_ref;
cptr_t my_ref;
};
struct blk_mq_hw_ctx_container {
struct blk_mq_hw_ctx blk_mq_hw_ctx;
cptr_t other_ref;
cptr_t my_ref;
};
struct gendisk_container {
struct gendisk gendisk;
cptr_t other_ref;
cptr_t my_ref;
};
#endif
......@@ -87,7 +87,15 @@ struct blk_mq_queue_data {
bool last;
};
struct blk_mq_queue_data_async {
struct list_head *rq_list;
struct list_head *list;
struct list_head *drv_list;
int *queued;
};
typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
typedef void (queue_rq_fn_async)(struct blk_mq_hw_ctx *, struct blk_mq_queue_data_async *);
typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
......@@ -105,6 +113,10 @@ typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int);
struct blk_mq_ops {
/*
* Queue request asynchronously
*/
queue_rq_fn_async *queue_rq_async;
/*
* Queue request
*/
......@@ -229,6 +241,7 @@ void blk_mq_end_request(struct request *rq, int error);
void __blk_mq_end_request(struct request *rq, int error);
void blk_mq_requeue_request(struct request *rq);
void __blk_mq_requeue_request(struct request *rq);
void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
void blk_mq_cancel_requeue_work(struct request_queue *q);
void blk_mq_kick_requeue_list(struct request_queue *q);
......
......@@ -2,26 +2,11 @@
#define PRIV_MEMPOOL_H
#define MTU 1470
#define HEADERS 42
#define HEADERS 42
#define SKB_LCD_MEMBERS_SZ 48
#define SKB_SHARED_INFO (sizeof(struct skb_shared_info))
#define DATA_ALIGNED_SZ (SKB_DATA_ALIGN(MTU + HEADERS + SKB_LCD_MEMBERS_SZ))
#define SKB_DATA_SIZE (DATA_ALIGNED_SZ + SKB_DATA_ALIGN(SKB_SHARED_INFO))
#define SKB_CONTAINER_SIZE 128
typedef enum {
/* for skb->data */
SKB_DATA_POOL = 0,
/* for skb->page_frag */
SKB_FRAG_POOL,
/* for skb_container */
SKB_CONTAINER_POOL,
POOL_MAX,
} pool_type_t;
#define DATA_ALIGNED_SZ (SKB_DATA_ALIGN(MTU + HEADERS + SKB_LCD_MEMBERS_SZ))
#define SKB_DATA_SIZE (DATA_ALIGNED_SZ + SKB_DATA_ALIGN(SKB_SHARED_INFO))
struct object {
struct object *next;
......@@ -38,34 +23,28 @@ struct atom {
} __attribute__((aligned(16)));
typedef struct {
struct object __percpu **head;
struct object __percpu **marker;
#ifdef PBUF
char __percpu **buf;
char __percpu **bufend;
#endif
int __percpu *cached;
unsigned int obj_size;
unsigned int total_pages;
unsigned int num_objs_percpu;
unsigned int obj_size;
unsigned int total_objs;
unsigned int num_cpus;
void *pool;
void *gpool;
unsigned int num_objs_percpu;
void *base;
struct atom stack;
unsigned int pool_order;
spinlock_t pool_spin_lock;
bool dump_once;
struct dentry *pstats;
struct object __percpu **head;
struct object __percpu **marker;
int __percpu *cached;
} priv_pool_t;
void *priv_alloc(pool_type_t type);
void priv_free(void *p, pool_type_t type);
//priv_pool_t *priv_pool_init(pool_type_t type, unsigned int num_objs, unsigned int obj_size);
void *priv_alloc(priv_pool_t *pool);
void priv_free(priv_pool_t *pool, void *obj);
priv_pool_t *priv_pool_init(void *pool_base, size_t pool_size,
unsigned int obj_size, const char* name);
priv_pool_t *priv_pool_init(pool_type_t type, void *pool_base,
size_t pool_size,
unsigned int obj_size);
void priv_pool_destroy(priv_pool_t *p);
#endif /* PRIV_MEMPOOL_H */
......@@ -16,14 +16,14 @@
/**
* Creates the arch-dependent part of an LCD (e.g., the ept).
*/
int lcd_arch_create(struct lcd_arch **out);
int lcd_arch_create(struct lcd_arch **out, bool is_child);
/**
* Tears down arch-dep part of LCD.
*
* IMPORTANT: When the ept is torn down, any host memory that is still mapped
* will *not* be freed. Beware.
*/
void lcd_arch_destroy(struct lcd_arch *lcd_arch);
void lcd_arch_destroy(struct lcd_arch *lcd_arch, bool is_child);
/**
* Set the lcd's program counter to the guest virtual address
* a.
......@@ -46,6 +46,9 @@ int lcd_arch_set_sp(struct lcd_arch *lcd_arch, gva_t a);
* guest physical address a.
*/
int lcd_arch_set_gva_root(struct lcd_arch *lcd_arch, gpa_t a);
int lcd_arch_set_gs_base(struct lcd_arch *lcd_arch, gva_t a);
/**
* Allocate memory for the VMCS for an LCD.
*/
......
......@@ -110,6 +110,9 @@ void lcd_arch_ept_invept(u64 eptp);
* VMCS pointer, and the spinlock.
*/
int lcd_arch_ept_init(struct lcd_arch *lcd_arch);
#ifdef CONFIG_LCD_SINGLE_EPT
int lcd_arch_ept_child_init(struct lcd_arch *lcd_arch);
#endif
/**
* Free an LCD's EPT tables.
*
......
......@@ -124,6 +124,32 @@ static inline int lcd_syscall_five_args(int id,
return (int)ret;
}
static inline int lcd_syscall_six_args(int id,
unsigned long arg0,
unsigned long arg1,
unsigned long arg2,
unsigned long arg3,
unsigned long arg4,
unsigned long arg5)
{
long ret;
asm volatile(
"movq %7, %%r13 \n\t"
"movq %6, %%r12 \n\t"
"movq %5, %%r11 \n\t"
"movq %4, %%r10 \n\t"
"movq %3, %%r9 \n\t"
"movq %2, %%r8 \n\t"
"movq %1, %%rax \n\t"
"vmcall \n\t"
"movq %%rax, %0 \n\t"
: "=g" (ret)
: "g" (id), "g" (arg0), "g" (arg1), "g" (arg2), "g" (arg3),
"g" (arg4), "g" (arg5)
: "rax", "r8", "r9", "r10", "r11", "r12", "r13");
return (int)ret;
}
static inline void lcd_syscall_cap_delete(cptr_t cptr)
{
lcd_syscall_one_arg(LCD_SYSCALL_CAP_DELETE, cptr_val(cptr));
......@@ -152,11 +178,11 @@ static inline int lcd_syscall_create(cptr_t lcd_slot)
static inline int lcd_syscall_config_registers(cptr_t lcd, gva_t pc,
gva_t sp, gpa_t gva_root,
gpa_t utcb_page)
gpa_t utcb_page, gva_t gs_base)
{
return lcd_syscall_five_args(LCD_SYSCALL_CONFIG_REGISTERS,
return lcd_syscall_six_args(LCD_SYSCALL_CONFIG_REGISTERS,
cptr_val(lcd), gva_val(pc), gva_val(sp),
gpa_val(gva_root), gpa_val(utcb_page));
gpa_val(gva_root), gpa_val(utcb_page), gva_val(gs_base));
}
static inline int lcd_syscall_memory_grant_and_map(cptr_t lcd,
......
......@@ -252,7 +252,7 @@ int lcd_arch_create(struct lcd_arch **out);
* IMPORTANT: When the ept is torn down, any host memory that is still mapped
* will be freed. This is for convenience. But beware.
*/
void lcd_arch_destroy(struct lcd_arch *lcd_arch);
void lcd_arch_destroy(struct lcd_arch *lcd_arch, bool is_child);
/**
* Does logical consistency checks (e.g., runs through checks
* listed in Intel SDM V3 26.1, 26.2, and 26.3).
......@@ -415,6 +415,10 @@ static inline u64 lcd_arch_get_syscall_arg4(struct lcd_arch *lcd)
{
return lcd->regs.r12;
}
static inline u64 lcd_arch_get_syscall_arg5(struct lcd_arch *lcd)
{
return lcd->regs.r13;
}
static inline void lcd_arch_set_syscall_ret(struct lcd_arch *lcd, u64 val)
{
lcd->regs.rax = val;
......
......@@ -65,6 +65,10 @@ static inline u64 lcd_arch_get_syscall_arg4(struct lcd_arch *lcd)
{
return lcd->regs.r12;
}
static inline u64 lcd_arch_get_syscall_arg5(struct lcd_arch *lcd)
{
return lcd->regs.r13;
}
static inline void lcd_arch_set_syscall_ret(struct lcd_arch *lcd, u64 val)
{
lcd->regs.rax = val;
......
......@@ -165,6 +165,7 @@
#define LCD_BOOTSTRAP_PAGES_SIZE (1 * PAGE_SIZE) /* .......... 4 KBs */
#define LCD_BOOTSTRAP_PAGE_TABLES_SIZE (16 * PAGE_SIZE) /* ... 64 KBs */
#define LCD_STACK_SIZE (2 * PAGE_SIZE) /* .................... 8 KBs */
#define LCD_GLOBAL_SEGMENT_SIZE PAGE_SIZE /* ..................... 4 KBs */
/* Orders (for convenience) */
......@@ -174,22 +175,133 @@
(ilog2(LCD_BOOTSTRAP_PAGE_TABLES_SIZE >> PAGE_SHIFT))
#define LCD_STACK_ORDER \
(ilog2(LCD_STACK_SIZE >> PAGE_SHIFT))
#define LCD_GLOBAL_SEGMENT_ORDER \
(ilog2(LCD_GLOBAL_SEGMENT_SIZE >> PAGE_SHIFT))
/* Offsets. */
#define LCD_MISC_REGION_OFFSET (1UL << 30)
#define LCD_UTCB_OFFSET LCD_MISC_REGION_OFFSET
#define LCD_BOOTSTRAP_PAGES_OFFSET (LCD_UTCB_OFFSET + LCD_UTCB_SIZE)
#ifndef CONFIG_LCD_SINGLE_EPT
#define LCD_BOOTSTRAP_PAGE_TABLES_OFFSET \
(LCD_BOOTSTRAP_PAGES_OFFSET + LCD_BOOTSTRAP_PAGES_SIZE)
#define LCD_STACK_PROT_PAGE_OFFSET \
(LCD_BOOTSTRAP_PAGE_TABLES_OFFSET+ LCD_BOOTSTRAP_PAGE_TABLES_SIZE)
#endif /* CONFIG_LCD_SINGLE_EPT */
/* HOLE */
#define LCD_STACK_REGION_OFFSET \
(LCD_MISC_REGION_OFFSET + LCD_MISC_REGION_SIZE + (1UL << 30))
#define LCD_STACK_OFFSET \
(LCD_STACK_REGION_OFFSET + LCD_STACK_REGION_SIZE - LCD_STACK_SIZE)
#ifdef CONFIG_LCD_SINGLE_EPT
/*
* When we have multiple LCDs and decide to have a single EPT for all of them,
* we need to have private UTCB, bootstrap pages and stack pages. For
* allocating those pages, we need the ID of the LCD to move offsets
* accordingly. So, we partition the UTCB, BOOTSTRAP_PAGES and STACK regions to
* support multiple LCDs.
*/
/*
* +---------------------------+ 0x0000 0001 0000 0000 (4 GB)
* | Stack for LCD 0 |
* +---------------------------+ (4 GB - STACK_SZ (8 KB) - (0 * STACK_SZ)
* | Stack for LCD 1 |
* +---------------------------+ (4 GB - STACK_SZ (8 KB) - (1 * STACK_SZ)
* | Stack for LCD 2 |
* +---------------------------+ (4 GB - STACK_SZ (8 KB) - (2 * STACK_SZ)
* | . |
* | . |
* +---------------------------+ (4 GB - STACK_SZ (8 KB) - ((N-2) * STACK_SZ)
* | Stack for LCD N |
* +---------------------------+ (4 GB - STACK_SZ (8 KB) - ((N-1) * STACK_SZ)
* | Stack Region |
* +---------------------------+ 0x0000 0000 c000 0000 (3 GB)
* | HOLE / Unmapped |
* | (1 GB) |
* +---------------------------+ 0x0000 0000 8000 0000 (2 GB)
* | Bootstrap Pagetable |
* | Pages (256 MB) |
* +---------------------------+ (1 GB + 768 MB)
* | Global segment |
* | Pages (256MB) |
* +---------------------------+ (1 GB + 512 MB)
* | Bootstrap Pages |
* | (256MB) |
* +---------------------------+ (1 GB + 256 MB)
* | UTCB Pages |
* | (256MB) |
* +---------------------------+ 0x0000 0000 4000 0000 (1 GB)
*/
/* Region Sizes */
#define LCD_UTCB_REGION_SIZE (1UL << 28) /* .................. 256 MBs */
#define LCD_BOOTSTRAP_PAGES_REGION_SIZE (1UL << 28) /* ........... 256 MBs */
#define LCD_GLOBAL_SEGMENT_PAGE_REGION_SIZE (1UL << 28) /* ........... 256 MBs */
/* Region offsets */
#define LCD_BOOTSTRAP_PAGES_REGION_OFFSET \
(LCD_UTCB_OFFSET + LCD_UTCB_REGION_SIZE)
#define LCD_GLOBAL_SEGMENT_PAGE_REGION_OFFSET \
(LCD_BOOTSTRAP_PAGES_REGION_OFFSET + LCD_BOOTSTRAP_PAGES_REGION_SIZE)
#define LCD_BOOTSTRAP_PAGE_TABLES_REGION_OFFSET \
(LCD_GLOBAL_SEGMENT_PAGE_REGION_OFFSET+ LCD_GLOBAL_SEGMENT_PAGE_REGION_SIZE)
#define LCD_BOOTSTRAP_PAGE_TABLES_OFFSET LCD_BOOTSTRAP_PAGE_TABLES_REGION_OFFSET
#define LCD_GLOBAL_SEGMENT_PAGE_OFFSET LCD_GLOBAL_SEGMENT_PAGE_REGION_OFFSET
/* Offsets for LCDs within a region */
#define LCD_UTCB_OFFSET_CHILD(id) \
(LCD_MISC_REGION_OFFSET + ((id) * LCD_UTCB_SIZE))
#define LCD_BOOTSTRAP_PAGES_OFFSET_CHILD(id) (LCD_BOOTSTRAP_PAGES_REGION_OFFSET \
+ ((id) * LCD_BOOTSTRAP_PAGES_SIZE))
#define LCD_GLOBAL_SEGMENT_PAGE_OFFSET_CHILD(id) (LCD_GLOBAL_SEGMENT_PAGE_REGION_OFFSET \
+ ((id) * LCD_GLOBAL_SEGMENT_SIZE))
#define LCD_STACK_OFFSET_CHILD(id) \
(LCD_STACK_REGION_OFFSET + LCD_STACK_REGION_SIZE \
- LCD_STACK_SIZE - ((id) * LCD_STACK_SIZE))
/* GVA/GPA addresses */
/* UTCB pages */
#define LCD_UTCB_GP_ADDR_CHILD(id) (__gpa(LCD_PHYS_BASE \
+ LCD_UTCB_OFFSET + (id * LCD_UTCB_SIZE)))
#define LCD_UTCB_GV_ADDR_CHILD(id) (__gva(LCD_VIRT_BASE \
+ LCD_UTCB_OFFSET + (id * LCD_UTCB_SIZE)))
/* Bootstrap pages */
#define LCD_BOOTSTRAP_PAGES_GP_ADDR_CHILD(id) \
(__gpa(LCD_PHYS_BASE + LCD_BOOTSTRAP_PAGES_OFFSET_CHILD(id)))
#define LCD_BOOTSTRAP_PAGES_GV_ADDR_CHILD(id) \
(__gva(LCD_VIRT_BASE + LCD_BOOTSTRAP_PAGES_OFFSET_CHILD(id)))
/* global segment pages */
#define LCD_GLOBAL_SEGMENT_PAGE_GP_ADDR_CHILD(id) \
(__gpa(LCD_PHYS_BASE + LCD_GLOBAL_SEGMENT_PAGE_OFFSET_CHILD(id)))
#define LCD_GLOBAL_SEGMENT_PAGE_GV_ADDR_CHILD(id) \
(__gva(LCD_VIRT_BASE + LCD_GLOBAL_SEGMENT_PAGE_OFFSET_CHILD(id)))
/* Stack pages */
#define LCD_STACK_GP_ADDR_CHILD(id) (__gpa(LCD_PHYS_BASE \
+ LCD_STACK_OFFSET_CHILD(id)))
#define LCD_STACK_GV_ADDR_CHILD(id) (__gva(LCD_VIRT_BASE \
+ LCD_STACK_OFFSET_CHILD(id)))
#define LCD_ARCH_GS_BASE_CHILD(id) __gpa(LCD_PHYS_BASE \
+ LCD_GLOBAL_SEGMENT_PAGE_OFFSET_CHILD(id))
#define LCD_ARCH_GS_GV_BASE_CHILD(id) __gva(LCD_VIRT_BASE \
+ LCD_GLOBAL_SEGMENT_PAGE_OFFSET_CHILD(id))
#endif /* CONFIG_LCD_SINGLE_EPT */
/* HOLE */
#define LCD_HEAP_REGION_OFFSET \
......@@ -253,6 +365,7 @@
#define LCD_ARCH_FS_BASE __gpa(0UL)
#define LCD_ARCH_FS_LIMIT 0xFFFFFFFF
#define LCD_ARCH_GS_BASE __gpa(0UL)
#define LCD_ARCH_GS_GV_BASE __gva(0UL)
#define LCD_ARCH_GS_LIMIT 0xFFFFFFFF
#define LCD_ARCH_GDTR_BASE __gpa(0UL)
#define LCD_ARCH_GDTR_LIMIT 0x0 /* no gdt */
......
......@@ -866,7 +866,7 @@ static void vmx_free_vpid(struct lcd_arch *lcd_arch)
spin_unlock(&lcd_vpids.lock);
}
int lcd_arch_create(struct lcd_arch **out)
int lcd_arch_create(struct lcd_arch **out, bool is_child)
{
struct lcd_arch *lcd_arch;
int ret;
......@@ -879,13 +879,19 @@ int lcd_arch_create(struct lcd_arch **out)
ret = -ENOMEM;
goto fail_alloc;
}
/*
* Set up ept
*/
ret = lcd_arch_ept_init(lcd_arch);
if (ret) {
LCD_ERR("setting up etp");
goto fail_ept;
if (is_child) {
printk("%s, initializing EPT for child \n", __func__);
ret = lcd_arch_ept_child_init(lcd_arch);
} else {
ret = lcd_arch_ept_init(lcd_arch);
if (ret) {
LCD_ERR("setting up etp");
goto fail_ept;
}
}
/*
* Alloc vmcs
......@@ -930,7 +936,7 @@ fail_alloc:
return ret;
}
void lcd_arch_destroy(struct lcd_arch *lcd_arch)
void lcd_arch_destroy(struct lcd_arch *lcd_arch, bool is_child)
{
/*
* Premption Disabled
......@@ -964,7 +970,8 @@ void lcd_arch_destroy(struct lcd_arch *lcd_arch)
*/
vmx_free_vpid(lcd_arch);
lcd_arch_free_vmcs(lcd_arch->vmcs);
lcd_arch_ept_free(lcd_arch);
if (!is_child)
lcd_arch_ept_free(lcd_arch);
kmem_cache_free(lcd_arch_cache, lcd_arch);
}
......@@ -1061,3 +1068,12 @@ void lcd_arch_irq_enable(struct lcd_arch *lcd_arch)
PIN_BASED_EXT_INTR_MASK);
vmx_put_cpu(lcd_arch);
}
int lcd_arch_set_gs_base(struct lcd_arch *lcd_arch, gva_t a)
{
lcd_arch->regs.gs = gva_val(a);
vmx_get_cpu(lcd_arch);
vmcs_writel(GUEST_GS_BASE, gva_val(a));
vmx_put_cpu(lcd_arch);
return 0;
}