Commit 12eef41f authored by Bryan O'Sullivan's avatar Bryan O'Sullivan Committed by Linus Torvalds
Browse files

[PATCH] IB/ipath: rC receive interrupt performance changes



This patch separates QP state used for sending and receiving RC packets so the
processing in the receive interrupt handler can be done mostly without locks
being held.  ACK packets are now sent without requiring synchronization with
the send tasklet.
Signed-off-by: default avatarRalph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: default avatarBryan O'Sullivan <bryan.osullivan@qlogic.com>
Cc: "Michael S. Tsirkin" <mst@mellanox.co.il>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent fba75200
......@@ -121,6 +121,7 @@ int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
struct ib_sge *sge, int acc)
{
struct ipath_mregion *mr;
unsigned n, m;
size_t off;
int ret;
......@@ -152,20 +153,22 @@ int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
}
off += mr->offset;
isge->mr = mr;
isge->m = 0;
isge->n = 0;
while (off >= mr->map[isge->m]->segs[isge->n].length) {
off -= mr->map[isge->m]->segs[isge->n].length;
isge->n++;
if (isge->n >= IPATH_SEGSZ) {
isge->m++;
isge->n = 0;
m = 0;
n = 0;
while (off >= mr->map[m]->segs[n].length) {
off -= mr->map[m]->segs[n].length;
n++;
if (n >= IPATH_SEGSZ) {
m++;
n = 0;
}
}
isge->vaddr = mr->map[isge->m]->segs[isge->n].vaddr + off;
isge->length = mr->map[isge->m]->segs[isge->n].length - off;
isge->mr = mr;
isge->vaddr = mr->map[m]->segs[n].vaddr + off;
isge->length = mr->map[m]->segs[n].length - off;
isge->sge_length = sge->length;
isge->m = m;
isge->n = n;
ret = 1;
......@@ -190,6 +193,7 @@ int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
struct ipath_lkey_table *rkt = &dev->lk_table;
struct ipath_sge *sge = &ss->sge;
struct ipath_mregion *mr;
unsigned n, m;
size_t off;
int ret;
......@@ -207,20 +211,22 @@ int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
}
off += mr->offset;
sge->mr = mr;
sge->m = 0;
sge->n = 0;
while (off >= mr->map[sge->m]->segs[sge->n].length) {
off -= mr->map[sge->m]->segs[sge->n].length;
sge->n++;
if (sge->n >= IPATH_SEGSZ) {
sge->m++;
sge->n = 0;
m = 0;
n = 0;
while (off >= mr->map[m]->segs[n].length) {
off -= mr->map[m]->segs[n].length;
n++;
if (n >= IPATH_SEGSZ) {
m++;
n = 0;
}
}
sge->vaddr = mr->map[sge->m]->segs[sge->n].vaddr + off;
sge->length = mr->map[sge->m]->segs[sge->n].length - off;
sge->mr = mr;
sge->vaddr = mr->map[m]->segs[n].vaddr + off;
sge->length = mr->map[m]->segs[n].length - off;
sge->sge_length = len;
sge->m = m;
sge->n = n;
ss->sg_list = NULL;
ss->num_sge = 1;
......
......@@ -333,10 +333,11 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->remote_qpn = 0;
qp->qkey = 0;
qp->qp_access_flags = 0;
clear_bit(IPATH_S_BUSY, &qp->s_flags);
qp->s_hdrwords = 0;
qp->s_psn = 0;
qp->r_psn = 0;
atomic_set(&qp->msn, 0);
qp->r_msn = 0;
if (qp->ibqp.qp_type == IB_QPT_RC) {
qp->s_state = IB_OPCODE_RC_SEND_LAST;
qp->r_state = IB_OPCODE_RC_SEND_LAST;
......@@ -345,7 +346,8 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->r_state = IB_OPCODE_UC_SEND_LAST;
}
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->s_nak_state = 0;
qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_nak_state = 0;
qp->s_rnr_timeout = 0;
qp->s_head = 0;
qp->s_tail = 0;
......@@ -363,10 +365,10 @@ static void ipath_reset_qp(struct ipath_qp *qp)
* @qp: the QP to put into an error state
*
* Flushes both send and receive work queues.
* QP r_rq.lock and s_lock should be held.
* QP s_lock should be held and interrupts disabled.
*/
static void ipath_error_qp(struct ipath_qp *qp)
void ipath_error_qp(struct ipath_qp *qp)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_wc wc;
......@@ -409,12 +411,14 @@ static void ipath_error_qp(struct ipath_qp *qp)
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
wc.opcode = IB_WC_RECV;
spin_lock(&qp->r_rq.lock);
while (qp->r_rq.tail != qp->r_rq.head) {
wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id;
if (++qp->r_rq.tail >= qp->r_rq.size)
qp->r_rq.tail = 0;
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
}
spin_unlock(&qp->r_rq.lock);
}
/**
......@@ -434,8 +438,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
unsigned long flags;
int ret;
spin_lock_irqsave(&qp->r_rq.lock, flags);
spin_lock(&qp->s_lock);
spin_lock_irqsave(&qp->s_lock, flags);
cur_state = attr_mask & IB_QP_CUR_STATE ?
attr->cur_qp_state : qp->state;
......@@ -506,31 +509,19 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
if (attr_mask & IB_QP_MIN_RNR_TIMER)
qp->s_min_rnr_timer = attr->min_rnr_timer;
qp->r_min_rnr_timer = attr->min_rnr_timer;
if (attr_mask & IB_QP_QKEY)
qp->qkey = attr->qkey;
qp->state = new_state;
spin_unlock(&qp->s_lock);
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
/*
* If QP1 changed to the RTS state, try to move to the link to INIT
* even if it was ACTIVE so the SM will reinitialize the SMA's
* state.
*/
if (qp->ibqp.qp_num == 1 && new_state == IB_QPS_RTS) {
struct ipath_ibdev *dev = to_idev(ibqp->device);
spin_unlock_irqrestore(&qp->s_lock, flags);
ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
}
ret = 0;
goto bail;
inval:
spin_unlock(&qp->s_lock);
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EINVAL;
bail:
......@@ -564,7 +555,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->sq_draining = 0;
attr->max_rd_atomic = 1;
attr->max_dest_rd_atomic = 1;
attr->min_rnr_timer = qp->s_min_rnr_timer;
attr->min_rnr_timer = qp->r_min_rnr_timer;
attr->port_num = 1;
attr->timeout = 0;
attr->retry_cnt = qp->s_retry_cnt;
......@@ -591,16 +582,12 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
* @qp: the queue pair to compute the AETH for
*
* Returns the AETH.
*
* The QP s_lock should be held.
*/
__be32 ipath_compute_aeth(struct ipath_qp *qp)
{
u32 aeth = atomic_read(&qp->msn) & IPS_MSN_MASK;
u32 aeth = qp->r_msn & IPS_MSN_MASK;
if (qp->s_nak_state) {
aeth |= qp->s_nak_state << IPS_AETH_CREDIT_SHIFT;
} else if (qp->ibqp.srq) {
if (qp->ibqp.srq) {
/*
* Shared receive queues don't generate credits.
* Set the credit field to the invalid value.
......
This diff is collapsed.
......@@ -113,20 +113,23 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
*
* Return 0 if no RWQE is available, otherwise return 1.
*
* Called at interrupt level with the QP r_rq.lock held.
* Can be called from interrupt level.
*/
int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
{
unsigned long flags;
struct ipath_rq *rq;
struct ipath_srq *srq;
struct ipath_rwqe *wqe;
int ret;
int ret = 1;
if (!qp->ibqp.srq) {
rq = &qp->r_rq;
spin_lock_irqsave(&rq->lock, flags);
if (unlikely(rq->tail == rq->head)) {
ret = 0;
goto bail;
goto done;
}
wqe = get_rwqe_ptr(rq, rq->tail);
qp->r_wr_id = wqe->wr_id;
......@@ -138,17 +141,16 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
}
if (++rq->tail >= rq->size)
rq->tail = 0;
ret = 1;
goto bail;
goto done;
}
srq = to_isrq(qp->ibqp.srq);
rq = &srq->rq;
spin_lock(&rq->lock);
spin_lock_irqsave(&rq->lock, flags);
if (unlikely(rq->tail == rq->head)) {
spin_unlock(&rq->lock);
ret = 0;
goto bail;
goto done;
}
wqe = get_rwqe_ptr(rq, rq->tail);
qp->r_wr_id = wqe->wr_id;
......@@ -170,18 +172,18 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
n = rq->head - rq->tail;
if (n < srq->limit) {
srq->limit = 0;
spin_unlock(&rq->lock);
spin_unlock_irqrestore(&rq->lock, flags);
ev.device = qp->ibqp.device;
ev.element.srq = qp->ibqp.srq;
ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
srq->ibsrq.event_handler(&ev,
srq->ibsrq.srq_context);
} else
spin_unlock(&rq->lock);
} else
spin_unlock(&rq->lock);
ret = 1;
goto bail;
}
}
done:
spin_unlock_irqrestore(&rq->lock, flags);
bail:
return ret;
}
......@@ -248,10 +250,8 @@ again:
wc.imm_data = wqe->wr.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
spin_lock_irqsave(&qp->r_rq.lock, flags);
if (!ipath_get_rwqe(qp, 0)) {
rnr_nak:
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
/* Handle RNR NAK */
if (qp->ibqp.qp_type == IB_QPT_UC)
goto send_comp;
......@@ -263,20 +263,17 @@ again:
sqp->s_rnr_retry--;
dev->n_rnr_naks++;
sqp->s_rnr_timeout =
ib_ipath_rnr_table[sqp->s_min_rnr_timer];
ib_ipath_rnr_table[sqp->r_min_rnr_timer];
ipath_insert_rnr_queue(sqp);
goto done;
}
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
wc.wc_flags = IB_WC_WITH_IMM;
wc.imm_data = wqe->wr.imm_data;
spin_lock_irqsave(&qp->r_rq.lock, flags);
if (!ipath_get_rwqe(qp, 1))
goto rnr_nak;
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
/* FALLTHROUGH */
case IB_WR_RDMA_WRITE:
if (wqe->length == 0)
......
......@@ -241,7 +241,6 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
u32 hdrsize;
u32 psn;
u32 pad;
unsigned long flags;
struct ib_wc wc;
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
struct ib_reth *reth;
......@@ -279,8 +278,6 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
wc.imm_data = 0;
wc.wc_flags = 0;
spin_lock_irqsave(&qp->r_rq.lock, flags);
/* Compare the PSN verses the expected PSN. */
if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
/*
......@@ -537,15 +534,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
default:
/* Drop packet for unknown opcodes. */
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
dev->n_pkt_drops++;
goto bail;
goto done;
}
qp->r_psn++;
qp->r_state = opcode;
done:
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
bail:
return;
}
......@@ -307,32 +307,34 @@ struct ipath_qp {
u32 s_next_psn; /* PSN for next request */
u32 s_last_psn; /* last response PSN processed */
u32 s_psn; /* current packet sequence number */
u32 s_ack_psn; /* PSN for RDMA_READ */
u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
u32 s_ack_psn; /* PSN for next ACK or RDMA_READ */
u64 s_ack_atomic; /* data for atomic ACK */
u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
u64 r_wr_id; /* ID for current receive WQE */
u64 r_atomic_data; /* data for last atomic op */
u32 r_atomic_psn; /* PSN of last atomic op */
u32 r_len; /* total length of r_sge */
u32 r_rcv_len; /* receive data len processed */
u32 r_psn; /* expected rcv packet sequence number */
u32 r_msn; /* message sequence number */
u8 state; /* QP state */
u8 s_state; /* opcode of last packet sent */
u8 s_ack_state; /* opcode of packet to ACK */
u8 s_nak_state; /* non-zero if NAK is pending */
u8 r_state; /* opcode of last packet received */
u8 r_ack_state; /* opcode of packet to ACK */
u8 r_nak_state; /* non-zero if NAK is pending */
u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
u8 r_reuse_sge; /* for UC receive errors */
u8 r_sge_inx; /* current index into sg_list */
u8 s_max_sge; /* size of s_wq->sg_list */
u8 qp_access_flags;
u8 s_max_sge; /* size of s_wq->sg_list */
u8 s_retry_cnt; /* number of times to retry */
u8 s_rnr_retry_cnt;
u8 s_min_rnr_timer;
u8 s_retry; /* requester retry counter */
u8 s_rnr_retry; /* requester RNR retry counter */
u8 s_pkey_index; /* PKEY index to use */
enum ib_mtu path_mtu;
atomic_t msn; /* message sequence number */
u32 remote_qpn;
u32 qkey; /* QKEY for this QP (for UD or RD) */
u32 s_size; /* send work queue size */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment