Commit bea3348e authored by Stephen Hemminger's avatar Stephen Hemminger Committed by David S. Miller

[NET]: Make NAPI polling independent of struct net_device objects.

Several devices have multiple independant RX queues per net
device, and some have a single interrupt doorbell for several
queues.

In either case, it's easier to support layouts like that if the
structure representing the poll is independant from the net
device itself.

The signature of the ->poll() call back goes from:

	int foo_poll(struct net_device *dev, int *budget)

to

	int foo_poll(struct napi_struct *napi, int budget)

The caller is returned the number of RX packets processed (or
the number of "NAPI credits" consumed if you want to get
abstract).  The callee no longer messes around bumping
dev->quota, *budget, etc. because that is all handled in the
caller upon return.

The napi_struct is to be embedded in the device driver private data
structures.

Furthermore, it is the driver's responsibility to disable all NAPI
instances in it's ->stop() device close handler.  Since the
napi_struct is privatized into the driver's private data structures,
only the driver knows how to get at all of the napi_struct instances
it may have per-device.

With lots of help and suggestions from Rusty Russell, Roland Dreier,
Michael Chan, Jeff Garzik, and Jamal Hadi Salim.

Bug fixes from Thomas Graf, Roland Dreier, Peter Zijlstra,
Joseph Fannin, Scott Wood, Hans J. Koch, and Michael Chan.

[ Ported to current tree and all drivers converted.  Integrated
  Stephen's follow-on kerneldoc additions, and restored poll_list
  handling to the old style to fix mutual exclusion issues.  -DaveM ]
Signed-off-by: default avatarStephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent dde4e47e
......@@ -240,17 +240,23 @@ X!Ilib/string.c
<sect1><title>Driver Support</title>
!Enet/core/dev.c
!Enet/ethernet/eth.c
!Enet/sched/sch_generic.c
!Iinclude/linux/etherdevice.h
!Iinclude/linux/netdevice.h
</sect1>
<sect1><title>PHY Support</title>
!Edrivers/net/phy/phy.c
!Idrivers/net/phy/phy.c
!Edrivers/net/phy/phy_device.c
!Idrivers/net/phy/phy_device.c
!Edrivers/net/phy/mdio_bus.c
!Idrivers/net/phy/mdio_bus.c
</sect1>
<!-- FIXME: Removed for now since no structured comments in source
<sect1><title>Wireless</title>
X!Enet/core/wireless.c
-->
</sect1>
-->
<sect1><title>Synchronous PPP</title>
!Edrivers/net/wan/syncppp.c
</sect1>
......
This diff is collapsed.
......@@ -95,9 +95,13 @@ dev->set_multicast_list:
Synchronization: netif_tx_lock spinlock.
Context: BHs disabled
dev->poll:
Synchronization: __LINK_STATE_RX_SCHED bit in dev->state. See
dev_close code and comments in net/core/dev.c for more info.
struct napi_struct synchronization rules
========================================
napi->poll:
Synchronization: NAPI_STATE_SCHED bit in napi->state. Device
driver's dev->close method will invoke napi_disable() on
all NAPI instances which will do a sleeping poll on the
NAPI_STATE_SCHED napi->state bit, waiting for all pending
NAPI activity to cease.
Context: softirq
will be called with interrupts disabled by netconsole.
......@@ -228,6 +228,8 @@ struct ipoib_dev_priv {
struct net_device *dev;
struct napi_struct napi;
unsigned long flags;
struct mutex mcast_mutex;
......@@ -351,7 +353,7 @@ extern struct workqueue_struct *ipoib_workqueue;
/* functions */
int ipoib_poll(struct net_device *dev, int *budget);
int ipoib_poll(struct napi_struct *napi, int budget);
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
......
......@@ -281,63 +281,58 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
wc->status, wr_id, wc->vendor_err);
}
int ipoib_poll(struct net_device *dev, int *budget)
int ipoib_poll(struct napi_struct *napi, int budget)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int max = min(*budget, dev->quota);
struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi);
struct net_device *dev = priv->dev;
int done;
int t;
int empty;
int n, i;
done = 0;
empty = 0;
while (max) {
poll_more:
while (done < budget) {
int max = (budget - done);
t = min(IPOIB_NUM_WC, max);
n = ib_poll_cq(priv->cq, t, priv->ibwc);
for (i = 0; i < n; ++i) {
for (i = 0; i < n; i++) {
struct ib_wc *wc = priv->ibwc + i;
if (wc->wr_id & IPOIB_CM_OP_SRQ) {
++done;
--max;
ipoib_cm_handle_rx_wc(dev, wc);
} else if (wc->wr_id & IPOIB_OP_RECV) {
++done;
--max;
ipoib_ib_handle_rx_wc(dev, wc);
} else
ipoib_ib_handle_tx_wc(dev, wc);
}
if (n != t) {
empty = 1;
if (n != t)
break;
}
}
dev->quota -= done;
*budget -= done;
if (empty) {
netif_rx_complete(dev);
if (done < budget) {
netif_rx_complete(dev, napi);
if (unlikely(ib_req_notify_cq(priv->cq,
IB_CQ_NEXT_COMP |
IB_CQ_REPORT_MISSED_EVENTS)) &&
netif_rx_reschedule(dev, 0))
return 1;
return 0;
netif_rx_reschedule(dev, napi))
goto poll_more;
}
return 1;
return done;
}
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
{
netif_rx_schedule(dev_ptr);
struct net_device *dev = dev_ptr;
struct ipoib_dev_priv *priv = netdev_priv(dev);
netif_rx_schedule(dev, &priv->napi);
}
static inline int post_send(struct ipoib_dev_priv *priv,
......@@ -577,7 +572,6 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
int i;
clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
netif_poll_disable(dev);
ipoib_cm_dev_stop(dev);
......@@ -660,7 +654,6 @@ timeout:
msleep(1);
}
netif_poll_enable(dev);
ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP);
return 0;
......
......@@ -98,16 +98,20 @@ int ipoib_open(struct net_device *dev)
ipoib_dbg(priv, "bringing up interface\n");
napi_enable(&priv->napi);
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
if (ipoib_pkey_dev_delay_open(dev))
return 0;
if (ipoib_ib_dev_open(dev))
if (ipoib_ib_dev_open(dev)) {
napi_disable(&priv->napi);
return -EINVAL;
}
if (ipoib_ib_dev_up(dev)) {
ipoib_ib_dev_stop(dev, 1);
napi_disable(&priv->napi);
return -EINVAL;
}
......@@ -140,6 +144,7 @@ static int ipoib_stop(struct net_device *dev)
ipoib_dbg(priv, "stopping interface\n");
clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
napi_disable(&priv->napi);
netif_stop_queue(dev);
......@@ -948,8 +953,8 @@ static void ipoib_setup(struct net_device *dev)
dev->hard_header = ipoib_hard_header;
dev->set_multicast_list = ipoib_set_mcast_list;
dev->neigh_setup = ipoib_neigh_setup_dev;
dev->poll = ipoib_poll;
dev->weight = 100;
netif_napi_add(dev, &priv->napi, ipoib_poll, 100);
dev->watchdog_timeo = HZ;
......
......@@ -334,6 +334,8 @@ struct cp_private {
spinlock_t lock;
u32 msg_enable;
struct napi_struct napi;
struct pci_dev *pdev;
u32 rx_config;
u16 cpcmd;
......@@ -501,12 +503,12 @@ static inline unsigned int cp_rx_csum_ok (u32 status)
return 0;
}
static int cp_rx_poll (struct net_device *dev, int *budget)
static int cp_rx_poll(struct napi_struct *napi, int budget)
{
struct cp_private *cp = netdev_priv(dev);
unsigned rx_tail = cp->rx_tail;
unsigned rx_work = dev->quota;
unsigned rx;
struct cp_private *cp = container_of(napi, struct cp_private, napi);
struct net_device *dev = cp->dev;
unsigned int rx_tail = cp->rx_tail;
int rx;
rx_status_loop:
rx = 0;
......@@ -588,33 +590,28 @@ rx_next:
desc->opts1 = cpu_to_le32(DescOwn | cp->rx_buf_sz);
rx_tail = NEXT_RX(rx_tail);
if (!rx_work--)
if (rx >= budget)
break;
}
cp->rx_tail = rx_tail;
dev->quota -= rx;
*budget -= rx;
/* if we did not reach work limit, then we're done with
* this round of polling
*/
if (rx_work) {
if (rx < budget) {
unsigned long flags;
if (cpr16(IntrStatus) & cp_rx_intr_mask)
goto rx_status_loop;
local_irq_save(flags);
spin_lock_irqsave(&cp->lock, flags);
cpw16_f(IntrMask, cp_intr_mask);
__netif_rx_complete(dev);
local_irq_restore(flags);
return 0; /* done */
__netif_rx_complete(dev, napi);
spin_unlock_irqrestore(&cp->lock, flags);
}
return 1; /* not done */
return rx;
}
static irqreturn_t cp_interrupt (int irq, void *dev_instance)
......@@ -647,9 +644,9 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance)
}
if (status & (RxOK | RxErr | RxEmpty | RxFIFOOvr))
if (netif_rx_schedule_prep(dev)) {
if (netif_rx_schedule_prep(dev, &cp->napi)) {
cpw16_f(IntrMask, cp_norx_intr_mask);
__netif_rx_schedule(dev);
__netif_rx_schedule(dev, &cp->napi);
}
if (status & (TxOK | TxErr | TxEmpty | SWInt))
......@@ -1175,6 +1172,8 @@ static int cp_open (struct net_device *dev)
if (rc)
return rc;
napi_enable(&cp->napi);
cp_init_hw(cp);
rc = request_irq(dev->irq, cp_interrupt, IRQF_SHARED, dev->name, dev);
......@@ -1188,6 +1187,7 @@ static int cp_open (struct net_device *dev)
return 0;
err_out_hw:
napi_disable(&cp->napi);
cp_stop_hw(cp);
cp_free_rings(cp);
return rc;
......@@ -1198,6 +1198,8 @@ static int cp_close (struct net_device *dev)
struct cp_private *cp = netdev_priv(dev);
unsigned long flags;
napi_disable(&cp->napi);
if (netif_msg_ifdown(cp))
printk(KERN_DEBUG "%s: disabling interface\n", dev->name);
......@@ -1933,11 +1935,10 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
dev->hard_start_xmit = cp_start_xmit;
dev->get_stats = cp_get_stats;
dev->do_ioctl = cp_ioctl;
dev->poll = cp_rx_poll;
#ifdef CONFIG_NET_POLL_CONTROLLER
dev->poll_controller = cp_poll_controller;
#endif
dev->weight = 16; /* arbitrary? from NAPI_HOWTO.txt. */
netif_napi_add(dev, &cp->napi, cp_rx_poll, 16);
#ifdef BROKEN
dev->change_mtu = cp_change_mtu;
#endif
......
......@@ -573,6 +573,8 @@ struct rtl8139_private {
int drv_flags;
struct pci_dev *pci_dev;
u32 msg_enable;
struct napi_struct napi;
struct net_device *dev;
struct net_device_stats stats;
unsigned char *rx_ring;
unsigned int cur_rx; /* Index into the Rx buffer of next Rx pkt. */
......@@ -625,10 +627,10 @@ static void rtl8139_tx_timeout (struct net_device *dev);
static void rtl8139_init_ring (struct net_device *dev);
static int rtl8139_start_xmit (struct sk_buff *skb,
struct net_device *dev);
static int rtl8139_poll(struct net_device *dev, int *budget);
#ifdef CONFIG_NET_POLL_CONTROLLER
static void rtl8139_poll_controller(struct net_device *dev);
#endif
static int rtl8139_poll(struct napi_struct *napi, int budget);
static irqreturn_t rtl8139_interrupt (int irq, void *dev_instance);
static int rtl8139_close (struct net_device *dev);
static int netdev_ioctl (struct net_device *dev, struct ifreq *rq, int cmd);
......@@ -963,6 +965,7 @@ static int __devinit rtl8139_init_one (struct pci_dev *pdev,
assert (dev != NULL);
tp = netdev_priv(dev);
tp->dev = dev;
ioaddr = tp->mmio_addr;
assert (ioaddr != NULL);
......@@ -976,8 +979,7 @@ static int __devinit rtl8139_init_one (struct pci_dev *pdev,
/* The Rtl8139-specific entries in the device structure. */
dev->open = rtl8139_open;
dev->hard_start_xmit = rtl8139_start_xmit;
dev->poll = rtl8139_poll;
dev->weight = 64;
netif_napi_add(dev, &tp->napi, rtl8139_poll, 64);
dev->stop = rtl8139_close;
dev->get_stats = rtl8139_get_stats;
dev->set_multicast_list = rtl8139_set_rx_mode;
......@@ -1332,6 +1334,8 @@ static int rtl8139_open (struct net_device *dev)
}
napi_enable(&tp->napi);
tp->mii.full_duplex = tp->mii.force_media;
tp->tx_flag = (TX_FIFO_THRESH << 11) & 0x003f0000;
......@@ -2103,39 +2107,32 @@ static void rtl8139_weird_interrupt (struct net_device *dev,
}
}
static int rtl8139_poll(struct net_device *dev, int *budget)
static int rtl8139_poll(struct napi_struct *napi, int budget)
{
struct rtl8139_private *tp = netdev_priv(dev);
struct rtl8139_private *tp = container_of(napi, struct rtl8139_private, napi);
struct net_device *dev = tp->dev;
void __iomem *ioaddr = tp->mmio_addr;
int orig_budget = min(*budget, dev->quota);
int done = 1;
int work_done;
spin_lock(&tp->rx_lock);
if (likely(RTL_R16(IntrStatus) & RxAckBits)) {
int work_done;
work_done = rtl8139_rx(dev, tp, orig_budget);
if (likely(work_done > 0)) {
*budget -= work_done;
dev->quota -= work_done;
done = (work_done < orig_budget);
}
}
work_done = 0;
if (likely(RTL_R16(IntrStatus) & RxAckBits))
work_done += rtl8139_rx(dev, tp, budget);
if (done) {
if (work_done < budget) {
unsigned long flags;
/*
* Order is important since data can get interrupted
* again when we think we are done.
*/
local_irq_save(flags);
spin_lock_irqsave(&tp->lock, flags);
RTL_W16_F(IntrMask, rtl8139_intr_mask);
__netif_rx_complete(dev);
local_irq_restore(flags);
__netif_rx_complete(dev, napi);
spin_unlock_irqrestore(&tp->lock, flags);
}
spin_unlock(&tp->rx_lock);
return !done;
return work_done;
}
/* The interrupt handler does all of the Rx thread work and cleans up
......@@ -2180,9 +2177,9 @@ static irqreturn_t rtl8139_interrupt (int irq, void *dev_instance)
/* Receive packets are processed by poll routine.
If not running start it now. */
if (status & RxAckBits){
if (netif_rx_schedule_prep(dev)) {
if (netif_rx_schedule_prep(dev, &tp->napi)) {
RTL_W16_F (IntrMask, rtl8139_norx_intr_mask);
__netif_rx_schedule (dev);
__netif_rx_schedule(dev, &tp->napi);
}
}
......@@ -2223,7 +2220,8 @@ static int rtl8139_close (struct net_device *dev)
void __iomem *ioaddr = tp->mmio_addr;
unsigned long flags;
netif_stop_queue (dev);
netif_stop_queue(dev);
napi_disable(&tp->napi);
if (netif_msg_ifdown(tp))
printk(KERN_DEBUG "%s: Shutting down ethercard, status was 0x%4.4x.\n",
......
......@@ -723,9 +723,10 @@ static int amd8111e_tx(struct net_device *dev)
#ifdef CONFIG_AMD8111E_NAPI
/* This function handles the driver receive operation in polling mode */
static int amd8111e_rx_poll(struct net_device *dev, int * budget)
static int amd8111e_rx_poll(struct napi_struct *napi, int budget)
{
struct amd8111e_priv *lp = netdev_priv(dev);
struct amd8111e_priv *lp = container_of(napi, struct amd8111e_priv, napi);
struct net_device *dev = lp->amd8111e_net_dev;
int rx_index = lp->rx_idx & RX_RING_DR_MOD_MASK;
void __iomem *mmio = lp->mmio;
struct sk_buff *skb,*new_skb;
......@@ -737,7 +738,7 @@ static int amd8111e_rx_poll(struct net_device *dev, int * budget)
#if AMD8111E_VLAN_TAG_USED
short vtag;
#endif
int rx_pkt_limit = dev->quota;
int rx_pkt_limit = budget;
unsigned long flags;
do{
......@@ -838,21 +839,14 @@ static int amd8111e_rx_poll(struct net_device *dev, int * budget)
} while(intr0 & RINT0);
/* Receive descriptor is empty now */
dev->quota -= num_rx_pkt;
*budget -= num_rx_pkt;
spin_lock_irqsave(&lp->lock, flags);
netif_rx_complete(dev);
__netif_rx_complete(dev, napi);
writel(VAL0|RINTEN0, mmio + INTEN0);
writel(VAL2 | RDMD0, mmio + CMD0);
spin_unlock_irqrestore(&lp->lock, flags);
return 0;
rx_not_empty:
/* Do not call a netif_rx_complete */
dev->quota -= num_rx_pkt;
*budget -= num_rx_pkt;
return 1;
return num_rx_pkt;
}
#else
......@@ -1287,11 +1281,11 @@ static irqreturn_t amd8111e_interrupt(int irq, void *dev_id)
/* Check if Receive Interrupt has occurred. */
#ifdef CONFIG_AMD8111E_NAPI
if(intr0 & RINT0){
if(netif_rx_schedule_prep(dev)){
if(netif_rx_schedule_prep(dev, &lp->napi)){
/* Disable receive interupts */
writel(RINTEN0, mmio + INTEN0);
/* Schedule a polling routine */
__netif_rx_schedule(dev);
__netif_rx_schedule(dev, &lp->napi);
}
else if (intren0 & RINTEN0) {
printk("************Driver bug! \
......@@ -1345,6 +1339,8 @@ static int amd8111e_close(struct net_device * dev)
struct amd8111e_priv *lp = netdev_priv(dev);
netif_stop_queue(dev);
napi_disable(&lp->napi);
spin_lock_irq(&lp->lock);
amd8111e_disable_interrupt(lp);
......@@ -1375,12 +1371,15 @@ static int amd8111e_open(struct net_device * dev )
dev->name, dev))
return -EAGAIN;
napi_enable(&lp->napi);
spin_lock_irq(&lp->lock);
amd8111e_init_hw_default(lp);
if(amd8111e_restart(dev)){
spin_unlock_irq(&lp->lock);
napi_disable(&lp->napi);
if (dev->irq)
free_irq(dev->irq, dev);
return -ENOMEM;
......@@ -2031,8 +2030,7 @@ static int __devinit amd8111e_probe_one(struct pci_dev *pdev,
dev->tx_timeout = amd8111e_tx_timeout;
dev->watchdog_timeo = AMD8111E_TX_TIMEOUT;
#ifdef CONFIG_AMD8111E_NAPI
dev->poll = amd8111e_rx_poll;
dev->weight = 32;
netif_napi_add(dev, &lp->napi, amd8111e_rx_poll, 32);
#endif
#ifdef CONFIG_NET_POLL_CONTROLLER
dev->poll_controller = amd8111e_poll;
......
......@@ -763,6 +763,8 @@ struct amd8111e_priv{
/* Reg memory mapped address */
void __iomem *mmio;
struct napi_struct napi;
spinlock_t lock; /* Guard lock */
unsigned long rx_idx, tx_idx; /* The next free ring entry */
unsigned long tx_complete_idx;
......
......@@ -169,6 +169,9 @@ struct ep93xx_priv
spinlock_t tx_pending_lock;
unsigned int tx_pending;
struct net_device *dev;
struct napi_struct napi;
struct net_device_stats stats;
struct mii_if_info mii;
......@@ -190,15 +193,11 @@ static struct net_device_stats *ep93xx_get_stats(struct net_device *dev)
return &(ep->stats);
}
static int ep93xx_rx(struct net_device *dev, int *budget)
static int ep93xx_rx(struct net_device *dev, int processed, int budget)
{
struct ep93xx_priv *ep = netdev_priv(dev);
int rx_done;
int processed;
rx_done = 0;
processed = 0;
while (*budget > 0) {
while (processed < budget) {
int entry;
struct ep93xx_rstat *rstat;
u32 rstat0;
......@@ -211,10 +210,8 @@ static int ep93xx_rx(struct net_device *dev, int *budget)
rstat0 = rstat->rstat0;
rstat1 = rstat->rstat1;
if (!(rstat0 & RSTAT0_RFP) || !(rstat1 & RSTAT1_RFP)) {
rx_done = 1;
if (!(rstat0 & RSTAT0_RFP) || !(rstat1 & RSTAT1_RFP))
break;
}
rstat->rstat0 = 0;
rstat->rstat1 = 0;
......@@ -275,8 +272,6 @@ static int ep93xx_rx(struct net_device *dev, int *budget)
err:
ep->rx_pointer = (entry + 1) & (RX_QUEUE_ENTRIES - 1);
processed++;
dev->quota--;
(*budget)--;