Commit 3af73d39 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (29 commits)
  RDMA/nes: Fix slab corruption
  IB/mlx4: Set RLKEY bit for kernel QPs
  RDMA/nes: Correct error_module bit mask
  RDMA/nes: Fix routed RDMA connections
  RDMA/nes: Enhanced PFT management scheme
  RDMA/nes: Handle AE bounds violation
  RDMA/nes: Limit critical error interrupts
  RDMA/nes: Stop spurious MAC interrupts
  RDMA/nes: Correct tso_wqe_length
  RDMA/nes: Fill in firmware version for ethtool
  RDMA/nes: Use ethtool timer value
  RDMA/nes: Correct MAX TSO frags value
  RDMA/nes: Enable MC/UC after changing MTU
  RDMA/nes: Free NIC TX buffers when destroying NIC QP
  RDMA/nes: Fix MDC setting
  RDMA/nes: Add wqm_quanta module option
  RDMA/nes: Module parameter permissions
  RDMA/cxgb3: Set active_mtu in ib_port_attr
  RDMA/nes: Add support for 4-port 1G HP blade card
  RDMA/nes: Make mini_cm_connect() static
  ...
parents 13dd7f87 eedd5d0a
......@@ -3748,6 +3748,7 @@ error1:
cm_remove_port_fs(port);
}
device_unregister(cm_dev->device);
kfree(cm_dev);
}
static void cm_remove_one(struct ib_device *ib_device)
......@@ -3776,6 +3777,7 @@ static void cm_remove_one(struct ib_device *ib_device)
cm_remove_port_fs(port);
}
device_unregister(cm_dev->device);
kfree(cm_dev);
}
static int __init ib_cm_init(void)
......
......@@ -1697,9 +1697,8 @@ static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv,
u8 port_num = mad_agent_priv->agent.port_num;
u8 lmc;
send_resp = ((struct ib_mad *)(wr->send_buf.mad))->
mad_hdr.method & IB_MGMT_METHOD_RESP;
rcv_resp = rwc->recv_buf.mad->mad_hdr.method & IB_MGMT_METHOD_RESP;
send_resp = ib_response_mad((struct ib_mad *)wr->send_buf.mad);
rcv_resp = ib_response_mad(rwc->recv_buf.mad);
if (send_resp == rcv_resp)
/* both requests, or both responses. GIDs different */
......
......@@ -272,7 +272,6 @@ static struct ib_qp *c2_create_qp(struct ib_pd *pd,
pr_debug("%s: Invalid QP type: %d\n", __func__,
init_attr->qp_type);
return ERR_PTR(-EINVAL);
break;
}
if (err) {
......
......@@ -1155,13 +1155,11 @@ static int iwch_query_port(struct ib_device *ibdev,
u8 port, struct ib_port_attr *props)
{
PDBG("%s ibdev %p\n", __func__, ibdev);
memset(props, 0, sizeof(struct ib_port_attr));
props->max_mtu = IB_MTU_4096;
props->lid = 0;
props->lmc = 0;
props->sm_lid = 0;
props->sm_sl = 0;
props->active_mtu = IB_MTU_2048;
props->state = IB_PORT_ACTIVE;
props->phys_state = 0;
props->port_cap_flags =
IB_PORT_CM_SUP |
IB_PORT_SNMP_TUNNEL_SUP |
......@@ -1170,7 +1168,6 @@ static int iwch_query_port(struct ib_device *ibdev,
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
props->gid_tbl_len = 1;
props->pkey_tbl_len = 1;
props->qkey_viol_cntr = 0;
props->active_width = 2;
props->active_speed = 2;
props->max_msg_sz = -1;
......
......@@ -164,6 +164,13 @@ struct ehca_qmap_entry {
u16 reported;
};
struct ehca_queue_map {
struct ehca_qmap_entry *map;
unsigned int entries;
unsigned int tail;
unsigned int left_to_poll;
};
struct ehca_qp {
union {
struct ib_qp ib_qp;
......@@ -173,8 +180,9 @@ struct ehca_qp {
enum ehca_ext_qp_type ext_type;
enum ib_qp_state state;
struct ipz_queue ipz_squeue;
struct ehca_qmap_entry *sq_map;
struct ehca_queue_map sq_map;
struct ipz_queue ipz_rqueue;
struct ehca_queue_map rq_map;
struct h_galpas galpas;
u32 qkey;
u32 real_qp_num;
......@@ -204,6 +212,8 @@ struct ehca_qp {
atomic_t nr_events; /* events seen */
wait_queue_head_t wait_completion;
int mig_armed;
struct list_head sq_err_node;
struct list_head rq_err_node;
};
#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
......@@ -233,6 +243,8 @@ struct ehca_cq {
/* mmap counter for resources mapped into user space */
u32 mm_count_queue;
u32 mm_count_galpa;
struct list_head sqp_err_list;
struct list_head rqp_err_list;
};
enum ehca_mr_flag {
......
......@@ -276,6 +276,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
for (i = 0; i < QP_HASHTAB_LEN; i++)
INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
INIT_LIST_HEAD(&my_cq->sqp_err_list);
INIT_LIST_HEAD(&my_cq->rqp_err_list);
if (context) {
struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
struct ehca_create_cq_resp resp;
......
......@@ -197,6 +197,8 @@ void ehca_poll_eqs(unsigned long data);
int ehca_calc_ipd(struct ehca_shca *shca, int port,
enum ib_rate path_rate, u32 *ipd);
void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq);
#ifdef CONFIG_PPC_64K_PAGES
void *ehca_alloc_fw_ctrlblock(gfp_t flags);
void ehca_free_fw_ctrlblock(void *ptr);
......
......@@ -396,6 +396,50 @@ static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue,
queue->is_small = (queue->page_size != 0);
}
/* needs to be called with cq->spinlock held */
void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq)
{
struct list_head *list, *node;
/* TODO: support low latency QPs */
if (qp->ext_type == EQPT_LLQP)
return;
if (on_sq) {
list = &qp->send_cq->sqp_err_list;
node = &qp->sq_err_node;
} else {
list = &qp->recv_cq->rqp_err_list;
node = &qp->rq_err_node;
}
if (list_empty(node))
list_add_tail(node, list);
return;
}
static void del_from_err_list(struct ehca_cq *cq, struct list_head *node)
{
unsigned long flags;
spin_lock_irqsave(&cq->spinlock, flags);
if (!list_empty(node))
list_del_init(node);
spin_unlock_irqrestore(&cq->spinlock, flags);
}
static void reset_queue_map(struct ehca_queue_map *qmap)
{
int i;
qmap->tail = 0;
for (i = 0; i < qmap->entries; i++)
qmap->map[i].reported = 1;
}
/*
* Create an ib_qp struct that is either a QP or an SRQ, depending on
* the value of the is_srq parameter. If init_attr and srq_init_attr share
......@@ -407,12 +451,11 @@ static struct ehca_qp *internal_create_qp(
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata, int is_srq)
{
struct ehca_qp *my_qp;
struct ehca_qp *my_qp, *my_srq = NULL;
struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
ib_device);
struct ib_ucontext *context = NULL;
u32 nr_qes;
u64 h_ret;
int is_llqp = 0, has_srq = 0;
int qp_type, max_send_sge, max_recv_sge, ret;
......@@ -457,8 +500,7 @@ static struct ehca_qp *internal_create_qp(
/* handle SRQ base QPs */
if (init_attr->srq) {
struct ehca_qp *my_srq =
container_of(init_attr->srq, struct ehca_qp, ib_srq);
my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq);
has_srq = 1;
parms.ext_type = EQPT_SRQBASE;
......@@ -716,15 +758,19 @@ static struct ehca_qp *internal_create_qp(
"and pages ret=%i", ret);
goto create_qp_exit2;
}
nr_qes = my_qp->ipz_squeue.queue_length /
my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
my_qp->ipz_squeue.qe_size;
my_qp->sq_map = vmalloc(nr_qes *
my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
sizeof(struct ehca_qmap_entry));
if (!my_qp->sq_map) {
if (!my_qp->sq_map.map) {
ehca_err(pd->device, "Couldn't allocate squeue "
"map ret=%i", ret);
goto create_qp_exit3;
}
INIT_LIST_HEAD(&my_qp->sq_err_node);
/* to avoid the generation of bogus flush CQEs */
reset_queue_map(&my_qp->sq_map);
}
if (HAS_RQ(my_qp)) {
......@@ -736,6 +782,25 @@ static struct ehca_qp *internal_create_qp(
"and pages ret=%i", ret);
goto create_qp_exit4;
}
my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
my_qp->ipz_rqueue.qe_size;
my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
sizeof(struct ehca_qmap_entry));
if (!my_qp->rq_map.map) {
ehca_err(pd->device, "Couldn't allocate squeue "
"map ret=%i", ret);
goto create_qp_exit5;
}
INIT_LIST_HEAD(&my_qp->rq_err_node);
/* to avoid the generation of bogus flush CQEs */
reset_queue_map(&my_qp->rq_map);
} else if (init_attr->srq) {
/* this is a base QP, use the queue map of the SRQ */
my_qp->rq_map = my_srq->rq_map;
INIT_LIST_HEAD(&my_qp->rq_err_node);
my_qp->ipz_rqueue = my_srq->ipz_rqueue;
}
if (is_srq) {
......@@ -799,7 +864,7 @@ static struct ehca_qp *internal_create_qp(
if (ret) {
ehca_err(pd->device,
"Couldn't assign qp to send_cq ret=%i", ret);
goto create_qp_exit6;
goto create_qp_exit7;
}
}
......@@ -825,25 +890,29 @@ static struct ehca_qp *internal_create_qp(
if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
ehca_err(pd->device, "Copy to udata failed");
ret = -EINVAL;
goto create_qp_exit7;
goto create_qp_exit8;
}
}
return my_qp;
create_qp_exit7:
create_qp_exit8:
ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
create_qp_exit6:
create_qp_exit7:
kfree(my_qp->mod_qp_parm);
create_qp_exit6:
if (HAS_RQ(my_qp))
vfree(my_qp->rq_map.map);
create_qp_exit5:
if (HAS_RQ(my_qp))
ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
create_qp_exit4:
if (HAS_SQ(my_qp))
vfree(my_qp->sq_map);
vfree(my_qp->sq_map.map);
create_qp_exit3:
if (HAS_SQ(my_qp))
......@@ -1035,6 +1104,101 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
return 0;
}
static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
struct ehca_queue_map *qmap)
{
void *wqe_v;
u64 q_ofs;
u32 wqe_idx;
/* convert real to abs address */
wqe_p = wqe_p & (~(1UL << 63));
wqe_v = abs_to_virt(wqe_p);
if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
ehca_gen_err("Invalid offset for calculating left cqes "
"wqe_p=%#lx wqe_v=%p\n", wqe_p, wqe_v);
return -EFAULT;
}
wqe_idx = q_ofs / ipz_queue->qe_size;
if (wqe_idx < qmap->tail)
qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx;
else
qmap->left_to_poll = wqe_idx - qmap->tail;
return 0;
}
static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
{
u64 h_ret;
void *send_wqe_p, *recv_wqe_p;
int ret;
unsigned long flags;
int qp_num = my_qp->ib_qp.qp_num;
/* this hcall is not supported on base QPs */
if (my_qp->ext_type != EQPT_SRQBASE) {
/* get send and receive wqe pointer */
h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
my_qp->ipz_qp_handle, &my_qp->pf,
&send_wqe_p, &recv_wqe_p, 4);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "disable_and_get_wqe() "
"failed ehca_qp=%p qp_num=%x h_ret=%li",
my_qp, qp_num, h_ret);
return ehca2ib_return_code(h_ret);
}
/*
* acquire lock to ensure that nobody is polling the cq which
* could mean that the qmap->tail pointer is in an
* inconsistent state.
*/
spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue,
&my_qp->sq_map);
spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
if (ret)
return ret;
spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue,
&my_qp->rq_map);
spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
if (ret)
return ret;
} else {
spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
my_qp->sq_map.left_to_poll = 0;
spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
my_qp->rq_map.left_to_poll = 0;
spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
}
/* this assures flush cqes being generated only for pending wqes */
if ((my_qp->sq_map.left_to_poll == 0) &&
(my_qp->rq_map.left_to_poll == 0)) {
spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
ehca_add_to_err_list(my_qp, 1);
spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
if (HAS_RQ(my_qp)) {
spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
ehca_add_to_err_list(my_qp, 0);
spin_unlock_irqrestore(&my_qp->recv_cq->spinlock,
flags);
}
}
return 0;
}
/*
* internal_modify_qp with circumvention to handle aqp0 properly
* smi_reset2init indicates if this is an internal reset-to-init-call for
......@@ -1539,10 +1703,27 @@ static int internal_modify_qp(struct ib_qp *ibqp,
goto modify_qp_exit2;
}
}
if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) {
ret = check_for_left_cqes(my_qp, shca);
if (ret)
goto modify_qp_exit2;
}
if (statetrans == IB_QPST_ANY2RESET) {
ipz_qeit_reset(&my_qp->ipz_rqueue);
ipz_qeit_reset(&my_qp->ipz_squeue);
if (qp_cur_state == IB_QPS_ERR) {
del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
if (HAS_RQ(my_qp))
del_from_err_list(my_qp->recv_cq,
&my_qp->rq_err_node);
}
reset_queue_map(&my_qp->sq_map);
if (HAS_RQ(my_qp))
reset_queue_map(&my_qp->rq_map);
}
if (attr_mask & IB_QP_QKEY)
......@@ -1958,6 +2139,16 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
idr_remove(&ehca_qp_idr, my_qp->token);
write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
/*
* SRQs will never get into an error list and do not have a recv_cq,
* so we need to skip them here.
*/
if (HAS_RQ(my_qp) && !IS_SRQ(my_qp))
del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
if (HAS_SQ(my_qp))
del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
/* now wait until all pending events have completed */
wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
......@@ -1983,7 +2174,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
if (qp_type == IB_QPT_GSI) {
struct ib_event event;
ehca_info(dev, "device %s: port %x is inactive.",
shca->ib_device.name, port_num);
shca->ib_device.name, port_num);
event.device = &shca->ib_device;
event.event = IB_EVENT_PORT_ERR;
event.element.port_num = port_num;
......@@ -1991,11 +2182,15 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
ib_dispatch_event(&event);
}
if (HAS_RQ(my_qp))
if (HAS_RQ(my_qp)) {
ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
vfree(my_qp->rq_map.map);
}
if (HAS_SQ(my_qp)) {
ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
vfree(my_qp->sq_map);
vfree(my_qp->sq_map.map);
}
kmem_cache_free(qp_cache, my_qp);
atomic_dec(&shca->num_qps);
......
......@@ -53,9 +53,25 @@
/* in RC traffic, insert an empty RDMA READ every this many packets */
#define ACK_CIRC_THRESHOLD 2000000
static u64 replace_wr_id(u64 wr_id, u16 idx)
{
u64 ret;
ret = wr_id & ~QMAP_IDX_MASK;
ret |= idx & QMAP_IDX_MASK;
return ret;
}
static u16 get_app_wr_id(u64 wr_id)
{
return wr_id & QMAP_IDX_MASK;
}
static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
struct ehca_wqe *wqe_p,
struct ib_recv_wr *recv_wr)
struct ib_recv_wr *recv_wr,
u32 rq_map_idx)
{
u8 cnt_ds;
if (unlikely((recv_wr->num_sge < 0) ||
......@@ -69,7 +85,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
/* clear wqe header until sglist */
memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
wqe_p->work_request_id = recv_wr->wr_id;
wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
wqe_p->nr_of_data_seg = recv_wr->num_sge;
for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
......@@ -146,6 +162,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
u64 dma_length;
struct ehca_av *my_av;
u32 remote_qkey = send_wr->wr.ud.remote_qkey;
struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
if (unlikely((send_wr->num_sge < 0) ||
(send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
......@@ -158,11 +175,10 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
/* clear wqe header until sglist */
memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
wqe_p->work_request_id = send_wr->wr_id & ~QMAP_IDX_MASK;
wqe_p->work_request_id |= sq_map_idx & QMAP_IDX_MASK;
wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
qp->sq_map[sq_map_idx].app_wr_id = send_wr->wr_id & QMAP_IDX_MASK;
qp->sq_map[sq_map_idx].reported = 0;
qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
qmap_entry->reported = 0;
switch (send_wr->opcode) {
case IB_WR_SEND:
......@@ -496,7 +512,9 @@ static int internal_post_recv(struct ehca_qp *my_qp,
struct ehca_wqe *wqe_p;
int wqe_cnt = 0;
int ret = 0;
u32 rq_map_idx;
unsigned long flags;
struct ehca_qmap_entry *qmap_entry;
if (unlikely(!HAS_RQ(my_qp))) {
ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
......@@ -524,8 +542,15 @@ static int internal_post_recv(struct ehca_qp *my_qp,
}
goto post_recv_exit0;
}
/*
* Get the index of the WQE in the recv queue. The same index
* is used for writing into the rq_map.
*/
rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
/* write a RECV WQE into the QUEUE */
ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr);
ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr,
rq_map_idx);
/*
* if something failed,
* reset the free entry pointer to the start value
......@@ -540,6 +565,11 @@ static int internal_post_recv(struct ehca_qp *my_qp,
}
goto post_recv_exit0;
}
qmap_entry = &my_qp->rq_map.map[rq_map_idx];
qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
qmap_entry->reported = 0;
wqe_cnt++;
} /* eof for cur_recv_wr */
......@@ -596,10 +626,12 @@ static const u8 ib_wc_opcode[255] = {
/* internal function to poll one entry of cq */
static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
{
int ret = 0;
int ret = 0, qmap_tail_idx;
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
struct ehca_cqe *cqe;
struct ehca_qp *my_qp;
struct ehca_qmap_entry *qmap_entry;
struct ehca_queue_map *qmap;
int cqe_count = 0, is_error;
repoll:
......@@ -674,27 +706,52 @@ repoll:
goto repoll;
wc->qp = &my_qp->ib_qp;
if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) {
struct ehca_qmap_entry *qmap_entry;
if (is_error) {
/*
* We got a send completion and need to restore the original
* wr_id.
* set left_to_poll to 0 because in error state, we will not
* get any additional CQEs
*/
qmap_entry = &my_qp->sq_map[cqe->work_request_id &
QMAP_IDX_MASK];
ehca_add_to_err_list(my_qp, 1);
my_qp->sq_map.left_to_poll = 0;
if (qmap_entry->reported) {
ehca_warn(cq->device, "Double cqe on qp_num=%#x",
my_qp->real_qp_num);
/* found a double cqe, discard it and read next one */
goto repoll;
}
wc->wr_id = cqe->work_request_id & ~QMAP_IDX_MASK;
wc->wr_id |= qmap_entry->app_wr_id;
qmap_entry->reported = 1;
} else