Commit a5b4860b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
  IB/ehca: Support small QP queues
  IB/ehca: Make internal_create/destroy_qp() static
  IB/ehca: Move ehca2ib_return_code() out of line
  IB/ehca: Generate async event when SRQ limit reached
  IB/ehca: Support large page MRs
  IB/mlx4: Fix error path in create_qp_common()
  mlx4_core: Change command token on timeout
  IB/mthca: Change command token on timeout
  IB/ipath: Remove ipath_layer dead code
  IB/mlx4: Fix leaks in __mlx4_ib_modify_qp
parents e9ed7e72 e2f81daf
......@@ -43,7 +43,6 @@
#ifndef __EHCA_CLASSES_H__
#define __EHCA_CLASSES_H__
struct ehca_module;
struct ehca_qp;
struct ehca_cq;
......@@ -100,6 +99,11 @@ struct ehca_sport {
struct ehca_sma_attr saved_attr;
};
#define HCA_CAP_MR_PGSIZE_4K 1
#define HCA_CAP_MR_PGSIZE_64K 2
#define HCA_CAP_MR_PGSIZE_1M 4
#define HCA_CAP_MR_PGSIZE_16M 8
struct ehca_shca {
struct ib_device ib_device;
struct ibmebus_dev *ibmebus_dev;
......@@ -115,6 +119,8 @@ struct ehca_shca {
struct h_galpas galpas;
struct mutex modify_mutex;
u64 hca_cap;
/* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
u32 hca_cap_mr_pgsize;
int max_mtu;
};
......@@ -122,6 +128,10 @@ struct ehca_pd {
struct ib_pd ib_pd;
struct ipz_pd fw_pd;
u32 ownpid;
/* small queue mgmt */
struct mutex lock;
struct list_head free[2];
struct list_head full[2];
};
enum ehca_ext_qp_type {
......@@ -206,6 +216,7 @@ struct ehca_mr {
enum ehca_mr_flag flags;
u32 num_kpages; /* number of kernel pages */
u32 num_hwpages; /* number of hw pages to form MR */
u64 hwpage_size; /* hw page size used for this MR */
int acl; /* ACL (stored here for usage in reregister) */
u64 *start; /* virtual start address (stored here for */
/* usage in reregister) */
......@@ -240,6 +251,7 @@ struct ehca_mr_pginfo {
enum ehca_mr_pgi_type type;
u64 num_kpages;
u64 kpage_cnt;
u64 hwpage_size; /* hw page size used for this MR */
u64 num_hwpages; /* number of hw pages */
u64 hwpage_cnt; /* counter for hw pages */
u64 next_hwpage; /* next hw page in buffer/chunk/listelem */
......@@ -298,6 +310,8 @@ int ehca_init_av_cache(void);
void ehca_cleanup_av_cache(void);
int ehca_init_mrmw_cache(void);
void ehca_cleanup_mrmw_cache(void);
int ehca_init_small_qp_cache(void);
void ehca_cleanup_small_qp_cache(void);
extern rwlock_t ehca_qp_idr_lock;
extern rwlock_t ehca_cq_idr_lock;
......@@ -315,7 +329,7 @@ struct ipzu_queue_resp {
u32 queue_length; /* queue length allocated in bytes */
u32 pagesize;
u32 toggle_state;
u32 dummy; /* padding for 8 byte alignment */
u32 offset; /* save offset within a page for small_qp */
};
struct ehca_create_cq_resp {
......@@ -357,15 +371,29 @@ enum ehca_ll_comp_flags {
LLQP_COMP_MASK = 0x60,
};
struct ehca_alloc_queue_parms {
/* input parameters */
int max_wr;
int max_sge;
int page_size;
int is_small;
/* output parameters */
u16 act_nr_wqes;
u8 act_nr_sges;
u32 queue_size; /* bytes for small queues, pages otherwise */
};
struct ehca_alloc_qp_parms {
/* input parameters */
struct ehca_alloc_queue_parms squeue;
struct ehca_alloc_queue_parms rqueue;
/* input parameters */
enum ehca_service_type servicetype;
int qp_storage;
int sigtype;
enum ehca_ext_qp_type ext_type;
enum ehca_ll_comp_flags ll_comp_flags;
int max_send_wr, max_recv_wr;
int max_send_sge, max_recv_sge;
int ud_av_l_key_ctl;
u32 token;
......@@ -375,18 +403,10 @@ struct ehca_alloc_qp_parms {
u32 srq_qpn, srq_token, srq_limit;
/* output parameters */
/* output parameters */
u32 real_qp_num;
struct ipz_qp_handle qp_handle;
struct h_galpas galpas;
u16 act_nr_send_wqes;
u16 act_nr_recv_wqes;
u8 act_nr_recv_sges;
u8 act_nr_send_sges;
u32 nr_rq_pages;
u32 nr_sq_pages;
};
int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
......
......@@ -190,8 +190,8 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
goto create_cq_exit2;
}
ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages,
EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0);
ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages,
EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0);
if (!ipz_rc) {
ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p",
ipz_rc, device);
......@@ -285,7 +285,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
return cq;
create_cq_exit4:
ipz_queue_dtor(&my_cq->ipz_queue);
ipz_queue_dtor(NULL, &my_cq->ipz_queue);
create_cq_exit3:
h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
......@@ -359,7 +359,7 @@ int ehca_destroy_cq(struct ib_cq *cq)
"ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
return ehca2ib_return_code(h_ret);
}
ipz_queue_dtor(&my_cq->ipz_queue);
ipz_queue_dtor(NULL, &my_cq->ipz_queue);
kmem_cache_free(cq_cache, my_cq);
return 0;
......
......@@ -86,8 +86,8 @@ int ehca_create_eq(struct ehca_shca *shca,
return -EINVAL;
}
ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages,
EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0);
ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages,
EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0);
if (!ret) {
ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
goto create_eq_exit1;
......@@ -145,7 +145,7 @@ int ehca_create_eq(struct ehca_shca *shca,
return 0;
create_eq_exit2:
ipz_queue_dtor(&eq->ipz_queue);
ipz_queue_dtor(NULL, &eq->ipz_queue);
create_eq_exit1:
hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
......@@ -181,7 +181,7 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
ehca_err(&shca->ib_device, "Can't free EQ resources.");
return -EINVAL;
}
ipz_queue_dtor(&eq->ipz_queue);
ipz_queue_dtor(NULL, &eq->ipz_queue);
return 0;
}
......@@ -175,9 +175,8 @@ error_data1:
}
static void qp_event_callback(struct ehca_shca *shca,
u64 eqe,
enum ib_event_type event_type)
static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
enum ib_event_type event_type, int fatal)
{
struct ib_event event;
struct ehca_qp *qp;
......@@ -191,16 +190,26 @@ static void qp_event_callback(struct ehca_shca *shca,
if (!qp)
return;
ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
if (fatal)
ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
if (!qp->ib_qp.event_handler)
return;
event.device = &shca->ib_device;
event.device = &shca->ib_device;
event.event = event_type;
event.element.qp = &qp->ib_qp;
if (qp->ext_type == EQPT_SRQ) {
if (!qp->ib_srq.event_handler)
return;
qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
event.event = fatal ? IB_EVENT_SRQ_ERR : event_type;
event.element.srq = &qp->ib_srq;
qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context);
} else {
if (!qp->ib_qp.event_handler)
return;
event.event = event_type;
event.element.qp = &qp->ib_qp;
qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
}
return;
}
......@@ -234,17 +243,17 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe)
switch (identifier) {
case 0x02: /* path migrated */
qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG);
qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0);
break;
case 0x03: /* communication established */
qp_event_callback(shca, eqe, IB_EVENT_COMM_EST);
qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0);
break;
case 0x04: /* send queue drained */
qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED);
qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0);
break;
case 0x05: /* QP error */
case 0x06: /* QP error */
qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL);
qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1);
break;
case 0x07: /* CQ error */
case 0x08: /* CQ error */
......@@ -278,6 +287,11 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe)
ehca_err(&shca->ib_device, "Interface trace stopped.");
break;
case 0x14: /* first error capture info available */
ehca_info(&shca->ib_device, "First error capture available");
break;
case 0x15: /* SRQ limit reached */
qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0);
break;
default:
ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
identifier, shca->ib_device.name);
......
......@@ -63,6 +63,7 @@ int ehca_port_act_time = 30;
int ehca_poll_all_eqs = 1;
int ehca_static_rate = -1;
int ehca_scaling_code = 0;
int ehca_mr_largepage = 0;
module_param_named(open_aqp1, ehca_open_aqp1, int, 0);
module_param_named(debug_level, ehca_debug_level, int, 0);
......@@ -72,7 +73,8 @@ module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0);
module_param_named(port_act_time, ehca_port_act_time, int, 0);
module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0);
module_param_named(static_rate, ehca_static_rate, int, 0);
module_param_named(scaling_code, ehca_scaling_code, int, 0);
module_param_named(scaling_code, ehca_scaling_code, int, 0);
module_param_named(mr_largepage, ehca_mr_largepage, int, 0);
MODULE_PARM_DESC(open_aqp1,
"AQP1 on startup (0: no (default), 1: yes)");
......@@ -95,6 +97,9 @@ MODULE_PARM_DESC(static_rate,
"set permanent static rate (default: disabled)");
MODULE_PARM_DESC(scaling_code,
"set scaling code (0: disabled/default, 1: enabled)");
MODULE_PARM_DESC(mr_largepage,
"use large page for MR (0: use PAGE_SIZE (default), "
"1: use large page depending on MR size");
DEFINE_RWLOCK(ehca_qp_idr_lock);
DEFINE_RWLOCK(ehca_cq_idr_lock);
......@@ -125,6 +130,23 @@ void ehca_free_fw_ctrlblock(void *ptr)
}
#endif
int ehca2ib_return_code(u64 ehca_rc)
{
switch (ehca_rc) {
case H_SUCCESS:
return 0;
case H_RESOURCE: /* Resource in use */
case H_BUSY:
return -EBUSY;
case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
case H_CONSTRAINED: /* resource constraint */
case H_NO_MEM:
return -ENOMEM;
default:
return -EINVAL;
}
}
static int ehca_create_slab_caches(void)
{
int ret;
......@@ -159,6 +181,12 @@ static int ehca_create_slab_caches(void)
goto create_slab_caches5;
}
ret = ehca_init_small_qp_cache();
if (ret) {
ehca_gen_err("Cannot create small queue SLAB cache.");
goto create_slab_caches6;
}
#ifdef CONFIG_PPC_64K_PAGES
ctblk_cache = kmem_cache_create("ehca_cache_ctblk",
EHCA_PAGESIZE, H_CB_ALIGNMENT,
......@@ -166,12 +194,15 @@ static int ehca_create_slab_caches(void)
NULL);
if (!ctblk_cache) {
ehca_gen_err("Cannot create ctblk SLAB cache.");
ehca_cleanup_mrmw_cache();
goto create_slab_caches5;
ehca_cleanup_small_qp_cache();
goto create_slab_caches6;
}
#endif
return 0;
create_slab_caches6:
ehca_cleanup_mrmw_cache();
create_slab_caches5:
ehca_cleanup_av_cache();
......@@ -189,6 +220,7 @@ create_slab_caches2:
static void ehca_destroy_slab_caches(void)
{
ehca_cleanup_small_qp_cache();
ehca_cleanup_mrmw_cache();
ehca_cleanup_av_cache();
ehca_cleanup_qp_cache();
......@@ -295,6 +327,8 @@ int ehca_sense_attributes(struct ehca_shca *shca)
if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
ehca_gen_dbg(" %s", hca_cap_descr[i].descr);
shca->hca_cap_mr_pgsize = rblock->memory_page_size_supported;
port = (struct hipz_query_port *)rblock;
h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
if (h_ret != H_SUCCESS) {
......@@ -590,6 +624,14 @@ static ssize_t ehca_show_adapter_handle(struct device *dev,
}
static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
static ssize_t ehca_show_mr_largepage(struct device *dev,
struct device_attribute *attr,
char *buf)
{
return sprintf(buf, "%d\n", ehca_mr_largepage);
}
static DEVICE_ATTR(mr_largepage, S_IRUGO, ehca_show_mr_largepage, NULL);
static struct attribute *ehca_dev_attrs[] = {
&dev_attr_adapter_handle.attr,
&dev_attr_num_ports.attr,
......@@ -606,6 +648,7 @@ static struct attribute *ehca_dev_attrs[] = {
&dev_attr_cur_mw.attr,
&dev_attr_max_pd.attr,
&dev_attr_max_ah.attr,
&dev_attr_mr_largepage.attr,
NULL
};
......
This diff is collapsed.
......@@ -111,7 +111,7 @@ int ehca_mr_is_maxmr(u64 size,
void ehca_mrmw_map_acl(int ib_acl,
u32 *hipz_acl);
void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl);
void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl);
void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
int *ib_acl);
......
......@@ -49,6 +49,7 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device,
struct ib_ucontext *context, struct ib_udata *udata)
{
struct ehca_pd *pd;
int i;
pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL);
if (!pd) {
......@@ -58,6 +59,11 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device,
}
pd->ownpid = current->tgid;
for (i = 0; i < 2; i++) {
INIT_LIST_HEAD(&pd->free[i]);
INIT_LIST_HEAD(&pd->full[i]);
}
mutex_init(&pd->lock);
/*
* Kernel PD: when device = -1, 0
......@@ -81,6 +87,9 @@ int ehca_dealloc_pd(struct ib_pd *pd)
{
u32 cur_pid = current->tgid;
struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
int i, leftovers = 0;
extern struct kmem_cache *small_qp_cache;
struct ipz_small_queue_page *page, *tmp;
if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
my_pd->ownpid != cur_pid) {
......@@ -89,8 +98,20 @@ int ehca_dealloc_pd(struct ib_pd *pd)
return -EINVAL;
}
kmem_cache_free(pd_cache,
container_of(pd, struct ehca_pd, ib_pd));
for (i = 0; i < 2; i++) {
list_splice(&my_pd->full[i], &my_pd->free[i]);
list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) {
leftovers = 1;
free_page(page->page);
kmem_cache_free(small_qp_cache, page);
}
}
if (leftovers)
ehca_warn(pd->device,
"Some small queue pages were not freed");
kmem_cache_free(pd_cache, my_pd);
return 0;
}
......
......@@ -275,34 +275,39 @@ static inline void queue2resp(struct ipzu_queue_resp *resp,
resp->toggle_state = queue->toggle_state;
}
static inline int ll_qp_msg_size(int nr_sge)
{
return 128 << nr_sge;
}
/*
* init_qp_queue initializes/constructs r/squeue and registers queue pages.
*/
static inline int init_qp_queue(struct ehca_shca *shca,
struct ehca_pd *pd,
struct ehca_qp *my_qp,
struct ipz_queue *queue,
int q_type,
u64 expected_hret,
int nr_q_pages,
int wqe_size,
int nr_sges)
struct ehca_alloc_queue_parms *parms,
int wqe_size)
{
int ret, cnt, ipz_rc;
int ret, cnt, ipz_rc, nr_q_pages;
void *vpage;
u64 rpage, h_ret;
struct ib_device *ib_dev = &shca->ib_device;
struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
if (!nr_q_pages)
if (!parms->queue_size)
return 0;
ipz_rc = ipz_queue_ctor(queue, nr_q_pages, EHCA_PAGESIZE,
wqe_size, nr_sges);
if (parms->is_small) {
nr_q_pages = 1;
ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
128 << parms->page_size,
wqe_size, parms->act_nr_sges, 1);
} else {
nr_q_pages = parms->queue_size;
ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
EHCA_PAGESIZE, wqe_size,
parms->act_nr_sges, 0);
}
if (!ipz_rc) {
ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x",
ipz_rc);
......@@ -323,7 +328,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
my_qp->ipz_qp_handle,
NULL, 0, q_type,
rpage, 1,
rpage, parms->is_small ? 0 : 1,
my_qp->galpas.kernel);
if (cnt == (nr_q_pages - 1)) { /* last page! */
if (h_ret != expected_hret) {
......@@ -354,19 +359,55 @@ static inline int init_qp_queue(struct ehca_shca *shca,
return 0;
init_qp_queue1:
ipz_queue_dtor(queue);
ipz_queue_dtor(pd, queue);
return ret;
}
static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp)
{
if (is_llqp)
return 128 << act_nr_sge;
else
return offsetof(struct ehca_wqe,
u.nud.sg_list[act_nr_sge]);
}
static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue,
int req_nr_sge, int is_llqp)
{
u32 wqe_size, q_size;
int act_nr_sge = req_nr_sge;
if (!is_llqp)
/* round up #SGEs so WQE size is a power of 2 */
for (act_nr_sge = 4; act_nr_sge <= 252;
act_nr_sge = 4 + 2 * act_nr_sge)
if (act_nr_sge >= req_nr_sge)
break;
wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp);
q_size = wqe_size * (queue->max_wr + 1);
if (q_size <= 512)
queue->page_size = 2;
else if (q_size <= 1024)
queue->page_size = 3;
else
queue->page_size = 0;
queue->is_small = (queue->page_size != 0);
}
/*
* Create an ib_qp struct that is either a QP or an SRQ, depending on
* the value of the is_srq parameter. If init_attr and srq_init_attr share
* fields, the field out of init_attr is used.
*/
struct ehca_qp *internal_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata, int is_srq)
static struct ehca_qp *internal_create_qp(
struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata, int is_srq)
{
struct ehca_qp *my_qp;
struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
......@@ -552,10 +593,20 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd,
if (my_qp->recv_cq)
parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle;
parms.max_send_wr = init_attr->cap.max_send_wr;
parms.max_recv_wr = init_attr->cap.max_recv_wr;
parms.max_send_sge = max_send_sge;
parms.max_recv_sge = max_recv_sge;
parms.squeue.max_wr = init_attr->cap.max_send_wr;
parms.rqueue.max_wr = init_attr->cap.max_recv_wr;
parms.squeue.max_sge = max_send_sge;
parms.rqueue.max_sge = max_recv_sge;
if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)
&& !(context && udata)) { /* no small QP support in userspace ATM */
ehca_determine_small_queue(
&parms.squeue, max_send_sge, is_llqp);
ehca_determine_small_queue(
&parms.rqueue, max_recv_sge, is_llqp);
parms.qp_storage =
(parms.squeue.is_small || parms.rqueue.is_small);
}
h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
if (h_ret != H_SUCCESS) {
......@@ -569,50 +620,33 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd,
my_qp->ipz_qp_handle = parms.qp_handle;
my_qp->galpas = parms.galpas;
swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp);
rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp);
switch (qp_type) {
case IB_QPT_RC:
if (!is_llqp) {
swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
(parms.act_nr_send_sges)]);
rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
(parms.act_nr_recv_sges)]);
} else { /* for LLQP we need to use msg size, not wqe size */
swqe_size = ll_qp_msg_size(max_send_sge);
rwqe_size = ll_qp_msg_size(max_recv_sge);
parms.act_nr_send_sges = 1;
parms.act_nr_recv_sges = 1;
if (is_llqp) {
parms.squeue.act_nr_sges = 1;