Commit ed1a5fbd authored by Vikram Narayanan's avatar Vikram Narayanan

test_mods/nullb: Support hctx > 1

With a single hctx,  multi-threaded fio bottlenecks on accessing an
atomic variable. To avoid accessing a single global variable, add
support for running null_blk with as many hctx as the number of CPUs.
parent 82fa2133
......@@ -20,12 +20,20 @@ int null_major;
struct gendisk *disk_g;
struct request_queue *rq_g;
struct blk_mq_tag_set *set_g;
unsigned int submit_queues;
extern struct request_queue *queue_nullb;
/* hack for init_request */
int hw_depth;
struct request **rq_map;
struct request_map {
int queue_num;
struct request **rq_map;
bool init_done;
};
struct request_map **maps;
struct async_item {
int pid;
......@@ -228,12 +236,14 @@ int blk_mq_end_request_callee(struct fipc_message *request, struct thc_channel *
//unsigned int request_cookie;
int ret = 0;
int error = 0;
int tag = fipc_get_reg0(request);
int queue_num = fipc_get_reg1(request);
//request_cookie = thc_get_request_cookie(request);
rq = rq_map[fipc_get_reg0(request)];
rq = maps[queue_num]->rq_map[tag];
error = fipc_get_reg1(request);
error = fipc_get_reg2(request);
//printk("[KLCD] END_REQ for rq_map_tag %ld rq->tag %d req: %p\n", fipc_get_reg0(request), rq->tag, rq);
fipc_recv_msg_end(thc_channel_to_fipc(channel), request);
......@@ -319,13 +329,14 @@ int blk_mq_start_request_callee(struct fipc_message *request, struct thc_channel
//unsigned int request_cookie;
int ret = 0;
int tag = fipc_get_reg0(request);
int queue_num = fipc_get_reg1(request);
//MARKER_BEGIN(blk_mq_end_req);
//request_cookie = thc_get_request_cookie(request);
//BENCH_BEGIN(async_reply);
//WARN_ON(tag >= hw_depth);
rq = rq_map[tag];
rq = maps[queue_num]->rq_map[tag];
//printk("[KLCD] START_REQ for rq_map_tag %ld rq->tag %d rq: %p \n", fipc_get_reg0(request), rq->tag, rq);
fipc_recv_msg_end(thc_channel_to_fipc(channel), request);
......@@ -1395,8 +1406,12 @@ fail_ipc:
int _queue_rq_fn(struct blk_mq_hw_ctx *ctx, const struct blk_mq_queue_data *bd, struct trampoline_hidden_args *hidden_args)
{
int ret;
static int once = 0;
struct fipc_message *request;
struct fipc_message *response;
struct thc_channel *chnl;
struct blk_mq_ops_container *ops_container;
struct blk_mq_hw_ctx_container *ctx_container;
......@@ -1407,17 +1422,27 @@ int _queue_rq_fn(struct blk_mq_hw_ctx *ctx, const struct blk_mq_queue_data *bd,
return ret;
}
printk("^^^^^^^^^^ [Klcd-queue-rq] enter ^^^^^^^^^^^ \n");
if (thc_pts_get_state() == true) {
chnl = thc_pts_get_chnl();
} else {
chnl = hidden_args->async_chnl;
}
//printk("^^^^^^^^^^ [Klcd-queue-rq] enter ^^^^^^^^^^^ \n");
/*XXX Beware!! hwctx can be unique per hw context of the driver, if multiple
* exists, then we need one cspace insert function per hwctx. Should be handled
* in the init_hctx routine */
if (once && !strncmp(current->comm, "fio", strlen("fio"))) {
dump_stack();
once = 0;
}
ctx_container = container_of(ctx, struct blk_mq_hw_ctx_container, blk_mq_hw_ctx);
ops_container = (struct blk_mq_ops_container *)hidden_args->struct_container;
printk("hctx %p | cptr %lu\n",ctx, ctx_container->other_ref.cptr);
//printk("hctx %p | cptr %lu\n",ctx, ctx_container->other_ref.cptr);
ret = async_msg_blocking_send_start(hidden_args->async_chnl, &request);
ret = async_msg_blocking_send_start(chnl, &request);
if (ret) {
LIBLCD_ERR("failed to get a send slot");
goto fail_async;
......@@ -1428,26 +1453,70 @@ int _queue_rq_fn(struct blk_mq_hw_ctx *ctx, const struct blk_mq_queue_data *bd,
fipc_set_reg0(request, ctx->queue_num);
fipc_set_reg1(request, ctx_container->other_ref.cptr);
fipc_set_reg2(request, ops_container->other_ref.cptr);
fipc_set_reg3(request, bd->rq->tag);
/* send message */
thc_set_msg_type(request, msg_type_request);
fipc_send_msg_end(thc_channel_to_fipc(chnl), request);
#ifdef SENDER_DISPATCH_LOOP
/* for blk_mq_start_request */
ret = thc_ipc_recv_resp_noyield(chnl, &response);
if (ret) {
printk(KERN_ERR "error receiving response\n");
goto fail_async;
}
// printk("%s:%d, got msg type %d\n",
// __func__, __LINE__,
// async_msg_get_fn_type(response));
dispatch_async_loop(chnl, response, c_cspace,
hidden_args->sync_ep);
/* for blk_mq_end_request */
ret = thc_ipc_recv_resp_noyield(chnl, &response);
if (ret) {
printk(KERN_ERR "error receiving response\n");
goto fail_async;
}
// printk("%s:%d, got msg type %d\n",
// __func__, __LINE__,
// async_msg_get_fn_type(response));
dispatch_async_loop(chnl, response, c_cspace,
hidden_args->sync_ep);
ret = fipc_msg_blocking_recv_start(chnl, &response);
if (ret) {
printk(KERN_ERR "thc_ipc_call: error receiving response\n");
goto fail_async;
}
#else
ret = thc_ipc_call(chnl, request, &response);
ret = thc_ipc_call(hidden_args->async_chnl, request, &response);
if (ret) {
LIBLCD_ERR("thc_ipc_call");
goto fail_ipc;
}
blk_mq_start_request(bd->rq);
blk_mq_end_request(bd->rq, 0);
#endif
//blk_mq_start_request(bd->rq);
//blk_mq_end_request(bd->rq, 0);
/* function ret - makes no sense now but keeping it this way! */
ret = fipc_get_reg0(response);
fipc_recv_msg_end(thc_channel_to_fipc(hidden_args->async_chnl), response);
printk("^^^^^ [Klcd-queue-rq] done ^^^^^^ \n");
return BLK_MQ_RQ_QUEUE_OK;
fipc_recv_msg_end(thc_channel_to_fipc(chnl), response);
//printk("^^^^^ [Klcd-queue-rq] done ^^^^^^ \n");
return ret;
//return ret;
fail_async:
#ifndef SENDER_DISPATCH_LOOP
fail_ipc:
#endif
printk("[Klcd-queue-rq] done with err \n");
return ret;
}
......@@ -1943,20 +2012,46 @@ void queue_rq_async(struct blk_mq_hw_ctx *ctx, struct blk_mq_queue_data_async *b
int init_request(void *data, struct request *req, unsigned int hctx_idx, unsigned int rq_idx,
unsigned int numa_node)
{
static int init_done = 0;
//int init_done = 0;
if (!maps) {
maps = kzalloc(sizeof(struct request_map*) * submit_queues,
GFP_KERNEL);
if (!maps) {
LIBLCD_ERR("cannot allocate mem for maps");
return -1;
}
printk("%s, initializing maps: %p!\n", __func__, maps);
}
if (!maps[hctx_idx]) {
maps[hctx_idx] = kzalloc(sizeof(struct request_map),
GFP_KERNEL);
if (!maps[hctx_idx]) {
LIBLCD_ERR("cannot allocate mem for maps[hctx_idx]");
return -1;
}
printk("%s, initializing maps: %p!\n", __func__, maps[hctx_idx]);
}
if(!init_done) {
rq_map = kzalloc((sizeof(struct request *) * (hw_depth + 1)),
GFP_KERNEL);
if(!rq_map) {
LIBLCD_ERR("cannot allocate mem for rq_map");
if (!maps[hctx_idx]->init_done) {
maps[hctx_idx]->rq_map = kzalloc((sizeof(struct request *) *
(hw_depth + submit_queues)),
GFP_KERNEL);
if (!maps[hctx_idx]->rq_map) {
LIBLCD_ERR("cannot allocate mem for maps.rq_map");
return -1;
}
init_done = 1;
printk("%s, initializing maps[hctx_idx:%d]->rq_map:%p\n",
__func__, hctx_idx, maps[hctx_idx]->rq_map);
maps[hctx_idx]->init_done = true;
}
rq_map[rq_idx] = req;
printk("init_request: req %p req->tag %d rq_idx %d \n",req, req->tag, rq_idx);
printk("%s, maps[hctx_idx:%d]->rq_map[rq_idx:%d] = %p, req->tag: %d\n", __func__, hctx_idx, rq_idx, req, req->tag);
maps[hctx_idx]->rq_map[rq_idx] = req;
return 0;
}
......@@ -2836,6 +2931,7 @@ int blk_mq_alloc_tag_set_callee(struct fipc_message *request, struct thc_channel
/* Get the rest of the members from LCD */
set_container->blk_mq_tag_set.nr_hw_queues = fipc_get_reg1(request);
submit_queues = fipc_get_reg1(request);
set_container->blk_mq_tag_set.queue_depth = fipc_get_reg2(request);
hw_depth = set_container->blk_mq_tag_set.queue_depth;
set_container->blk_mq_tag_set.reserved_tags = fipc_get_reg3(request);
......
......@@ -363,13 +363,14 @@ static void handle_loop(long id)
ret = async_loop(&drv, &curr_item, &msg);
if (!ret) {
#if 0
if (async_msg_get_fn_type(msg) == BLK_MQ_END_REQUEST) {
printk("KLCD: end_request seen \n");
}
if (async_msg_get_fn_type(msg) == BLK_MQ_START_REQUEST) {
printk("KLCD: start_request seen \n");
}
#endif
//(async_msg_get_fn_type(msg) == BLK_MQ_END_REQUEST) ? count ++ : -1;
//(async_msg_get_fn_type(msg) == BLK_MQ_START_REQUEST) ? bench_start() : -1;
//((id == 0) && (async_msg_get_fn_type(msg) == BLK_MQ_END_REQUEST)) ? marker_begin() : -1;
......@@ -533,13 +534,15 @@ static void loop(cptr_t register_chnl)
ret = async_loop(&drv, &curr_item, &msg);
if (!ret) {
#if 0
if (async_msg_get_fn_type(msg) == BLK_MQ_END_REQUEST) {
printk("KLCD: end_request seen \n");
}
if (async_msg_get_fn_type(msg) == BLK_MQ_START_REQUEST) {
printk("KLCD: start_request seen \n");
}
#endif
//(async_msg_get_fn_type(msg) == BLK_MQ_END_REQUEST) ? count ++ : -1;
//(async_msg_get_fn_type(msg) == BLK_MQ_START_REQUEST) ? bench_start() : -1;
//((id == 0) && (async_msg_get_fn_type(msg) == BLK_MQ_END_REQUEST)) ? marker_begin() : -1;
......
......@@ -108,7 +108,7 @@ struct request_queue_container {
cptr_t other_ref;
cptr_t my_ref;
};
#define NUM_CPUS 32
#if NUM_LCDS == 1
#define NUM_LCD_CPUS (NUM_LCDS + 1)
#define MAX_CHANNELS_PER_LCD (NUM_CPUS - NUM_LCD_CPUS)
......@@ -117,6 +117,7 @@ struct request_queue_container {
#define MAX_CHANNELS_PER_LCD 15
#define NUM_THREADS_ON_NODE0 5
#elif NUM_LCDS == 4
#define NUM_LCD_CPUS (NUM_LCDS + 1)
/* total LCD cores = 5 (lcds=4,klcd=1), free cores = 15 */
#define MAX_CHANNELS_PER_LCD 7
#define NUM_THREADS_ON_NODE0 6
......
......@@ -23,12 +23,14 @@ extern struct thc_channel_group ch_grp[NUM_LCDS];
struct thc_channel *disp_chnl[2];
struct blk_mq_hw_ctx_container *ctx_container_g;
struct blk_mq_hw_ctx_container *ctx_containers[NUM_CPUS - NUM_LCD_CPUS];
struct blk_mq_ops_container *ops_container_g;
struct lcd_request_container {
struct request rq;
void *channel;
unsigned int cookie;
unsigned int queue_num;
};
#ifdef CONFIG_PREALLOC_CHANNELS
......@@ -641,7 +643,8 @@ void blk_mq_end_request(struct request *rq, int error)
async_msg_set_fn_type(request, BLK_MQ_END_REQUEST);
fipc_set_reg0(request, rq->tag);
fipc_set_reg1(request, error);
fipc_set_reg1(request, rq_c->queue_num);
fipc_set_reg2(request, error);
thc_set_msg_type(request, msg_type_request);
#ifdef SENDER_DISPATCH_LOOP
......@@ -752,6 +755,7 @@ void blk_mq_start_request(struct request *rq)
//printk("[LCD_GLUE] START_REQ slot obtained for tag %d \n",rq->tag);
async_msg_set_fn_type(request, BLK_MQ_START_REQUEST);
fipc_set_reg0(request, rq->tag);
fipc_set_reg1(request, rq_c->queue_num);
//printk("[LCD_GLUE] START_REQ ipc_call-> rq->tag: %d \n",rq->tag);
//ret = thc_ipc_call(blk_async_chnls[CURRENT_LCD_ID], request, &response);
......@@ -1417,13 +1421,14 @@ int queue_rq_fn_callee(struct fipc_message *request, struct thc_channel *channel
{
struct fipc_message *response;
unsigned int request_cookie;
struct blk_mq_hw_ctx_container *ctx_container = ctx_container_g;
struct blk_mq_hw_ctx_container *ctx_container;
struct blk_mq_ops_container *ops_container = ops_container_g;
struct blk_mq_queue_data bd;
struct lcd_request_container rq_c;
struct request *rq = &rq_c.rq;
int ret;
int func_ret = 0;
int queue_num;
request_cookie = thc_get_request_cookie(request);
//printk("[LCD] queue_rq glue called \n");
......@@ -1433,8 +1438,9 @@ int queue_rq_fn_callee(struct fipc_message *request, struct thc_channel *channel
// LIBLCD_ERR("lookup");
// goto fail_lookup;
// }
ctx_container->blk_mq_hw_ctx.queue_num = fipc_get_reg0(request);
queue_num = fipc_get_reg0(request);
ctx_container = ctx_containers[queue_num];
ctx_container->blk_mq_hw_ctx.queue_num = queue_num;
// ret = glue_cap_lookup_blk_mq_ops_type(c_cspace, __cptr(fipc_get_reg2(request)),
// &ops_container);
......@@ -1450,6 +1456,7 @@ int queue_rq_fn_callee(struct fipc_message *request, struct thc_channel *channel
#ifdef SENDER_DISPATCH_LOOP
rq_c.channel = channel;
rq_c.cookie = request_cookie;
rq_c.queue_num = queue_num;
#endif
#if 1
func_ret = ops_container->blk_mq_ops.queue_rq(&ctx_container->blk_mq_hw_ctx,
......@@ -1539,7 +1546,11 @@ int init_hctx_fn_callee(struct fipc_message *request, struct thc_channel *channe
goto fail_alloc;
}
ctx_container_g = ctx_container;
//ctx_container_g = ctx_container;
index = fipc_get_reg1(request);
ctx_containers[index] = ctx_container;
ret = glue_cap_insert_blk_mq_hw_ctx_type(c_cspace, ctx_container, &ctx_container->my_ref);
if (ret) {
LIBLCD_ERR("lcd insert");
......@@ -1547,7 +1558,6 @@ int init_hctx_fn_callee(struct fipc_message *request, struct thc_channel *channe
}
ctx_container->other_ref.cptr = fipc_get_reg0(request);
index = fipc_get_reg1(request);
ret = glue_cap_lookup_blk_mq_ops_type(c_cspace,
__cptr(fipc_get_reg2(request)), &ops_container);
......
......@@ -15,6 +15,8 @@
#include <linux/lightnvm.h>
#include <liblcd/spinlock.h>
#include "../glue_helper.h"
#ifdef LCD_ISOLATE
#include <lcd_config/post_hook.h>
#endif
......@@ -45,12 +47,6 @@ struct nullb_queue {
struct nullb_cmd *cmds;
};
struct blk_mq_tag_set_container {
struct blk_mq_tag_set set;
u64 ref1;
u64 ref2;
};
struct nullb {
struct list_head list;
unsigned int index;
......@@ -95,7 +91,7 @@ enum {
#ifdef LCD_ISOLATE
/*TODO have to hardcode a value that nr_online_cpus return
* I doubt that nr_online_cpus will be accessible from here */
static int submit_queues;
static int submit_queues = NUM_CPUS - NUM_LCD_CPUS;
#else
static int submit_queues;
module_param(submit_queues, int, S_IRUGO);
......@@ -514,14 +510,8 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
}
#ifdef LCD_ISOLATE
struct blk_mq_ops_container {
struct blk_mq_ops mq_ops;
u64 ref1;
u64 ref2;
};
static struct blk_mq_ops_container null_mq_ops_container = {
.mq_ops = {
.blk_mq_ops = {
.queue_rq = null_queue_rq,
.map_queue = blk_mq_map_queue,
.init_hctx = null_init_hctx,
......@@ -567,7 +557,7 @@ static void null_del_dev(struct nullb *nullb)
printk("calling blk_cleanup \n");
blk_cleanup_queue(nullb->q);
if (queue_mode == NULL_Q_MQ)
blk_mq_free_tag_set(&nullb->tag_set_container->set);
blk_mq_free_tag_set(&nullb->tag_set_container->blk_mq_tag_set);
if (!use_lightnvm)
put_disk(nullb->disk);
cleanup_queues(nullb);
......@@ -723,21 +713,11 @@ static void null_release(struct gendisk *disk, fmode_t mode)
}
#ifdef LCD_ISOLATE
struct block_device_operations_container {
struct block_device_operations null_fops;
u64 ref1;
u64 ref2;
};
struct module_container {
struct module module;
u64 ref1;
u64 ref2;
};
static struct module_container module_container;
static const struct block_device_operations_container null_ops_container = {
.null_fops = {
.block_device_operations = {
.owner = &module_container.module,
.open = null_open,
.release = null_release,
......@@ -842,8 +822,10 @@ static int null_add_dev(void)
#endif
//spin_lock_init(&nullb->lock);
if (queue_mode == NULL_Q_MQ && use_per_node_hctx)
if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
submit_queues = nr_online_nodes;
printk("submit_queues %d \n",submit_queues);
}
rv = setup_queues(nullb);
if (rv)
......@@ -851,32 +833,32 @@ static int null_add_dev(void)
if (queue_mode == NULL_Q_MQ) {
#ifdef LCD_ISOLATE
nullb->tag_set_container->set.ops = &null_mq_ops_container.mq_ops;
nullb->tag_set_container->set.nr_hw_queues = submit_queues;
nullb->tag_set_container->blk_mq_tag_set.ops = &null_mq_ops_container.blk_mq_ops;
nullb->tag_set_container->blk_mq_tag_set.nr_hw_queues = submit_queues;
printk("submit_queues %d \n",submit_queues);
nullb->tag_set_container->set.queue_depth = hw_queue_depth;
nullb->tag_set_container->set.numa_node = home_node;
nullb->tag_set_container->set.cmd_size = sizeof(struct nullb_cmd);
nullb->tag_set_container->set.flags = BLK_MQ_F_SHOULD_MERGE;
nullb->tag_set_container->blk_mq_tag_set.queue_depth = hw_queue_depth;
nullb->tag_set_container->blk_mq_tag_set.numa_node = home_node;
nullb->tag_set_container->blk_mq_tag_set.cmd_size = sizeof(struct nullb_cmd);
nullb->tag_set_container->blk_mq_tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
/* TODO allocate memory for nullb in the klcd glue, exchange my_ref
and other_ref */
nullb->tag_set_container->set.driver_data = nullb;
nullb->tag_set_container->blk_mq_tag_set.driver_data = nullb;
rv = blk_mq_alloc_tag_set(&nullb->tag_set_container->set);
rv = blk_mq_alloc_tag_set(&nullb->tag_set_container->blk_mq_tag_set);
printk("blk_mq alloc tag set retruns %d \n",rv);
if (rv)
goto out_cleanup_queues;
/*TODO AB - allocate request_queue container in the glue. IDL doesn't
create container for ptr returned! */
nullb->q = blk_mq_init_queue(&nullb->tag_set_container->set);
nullb->q = blk_mq_init_queue(&nullb->tag_set_container->blk_mq_tag_set);
if (IS_ERR(nullb->q)) {
rv = -ENOMEM;
printk("blk_mq_init_queue returns NULL! \n");
goto out_cleanup_tags;
}
#else
nullb->tag_set.ops = &null_mq_ops_container.mq_ops;
nullb->tag_set.ops = &null_mq_ops_container.blk_mq_ops;
nullb->tag_set.nr_hw_queues = submit_queues;
nullb->tag_set.queue_depth = hw_queue_depth;
nullb->tag_set.numa_node = home_node;
......@@ -955,7 +937,7 @@ static int null_add_dev(void)
disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
disk->major = null_major;
disk->first_minor = nullb->index;
disk->fops = &null_ops_container.null_fops;
disk->fops = &null_ops_container.block_device_operations;
/* TODO Nullb's memory will be allocated in the klcd glue, so my_ref for nullb
should be marshalled here */
disk->private_data = nullb;
......@@ -989,7 +971,7 @@ out_cleanup_blk_queue:
out_cleanup_tags:
if (queue_mode == NULL_Q_MQ)
blk_mq_free_tag_set(&nullb->tag_set_container->set);
blk_mq_free_tag_set(&nullb->tag_set_container->blk_mq_tag_set);
out_cleanup_queues:
cleanup_queues(nullb);
......@@ -1038,11 +1020,13 @@ int null_init(void)
nr_online_nodes);
submit_queues = nr_online_nodes;
}
} else if (submit_queues > nr_cpu_ids)
}
#if 0
else if (submit_queues > nr_cpu_ids)
submit_queues = nr_cpu_ids;
else if (!submit_queues)
submit_queues = 1;
#endif
mutex_init(&lock);
null_major = register_blkdev(0, "nullb");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment