Commit 53fda7f7 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'xmit_list'



David Miller says:

====================
net: Make dev_hard_start_xmit() work fundamentally on lists

After this patch set, dev_hard_start_xmit() will work fundemantally on
any and all SKB lists.

This opens the path for a clean implementation of pulling multiple
packets out during qdisc_restart(), and then passing that blob in one
shot to dev_hard_start_xmit().

There were two main architectural blockers to this:

1) The GSO handling, we kept the original GSO head SKB around simply
   because dev_hard_start_xmit() had no way to communicate to the
   caller how far into the segmented list it was able to go.  Now it
   can, so the head GSO can be liberated immediately.

   All of the special GSO head SKB destructor et al. handling goes
   away too.

2) Validate of VLAN, CSUM, and segmentation characteristics was being
   performed inside of dev_hard_start_xmit().  If want to truly batch,
   we have to let the higher levels to this.  In particular, this is
   now dequeue_skb()'s job.

And with those two issues out of the way, it should now be trivial to
build experiments on top of this patch set, all of the framework
should be there now.  You could do something as simple as:

	skb = q->dequeue(q);
	if (skb)
		skb = validate_xmit_skb(skb, qdisc_dev(q));
	if (skb) {
		struct sk_buff *new, *head = skb;
		int limit = 5;

		do {
			new = q->dequeue(q);
			if (new)
				new = validate_xmit_skb(new, qdisc_dev(q));
			if (new) {
				skb->next = new;
				skb = new;
			}
		} while (new && --limit);
		skb = head;
	}

inside of the else branch of dequeue_skb().
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents dace1b54 8dcda22a
......@@ -192,8 +192,10 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev)
{
struct dlci_local *dlp = netdev_priv(dev);
if (skb)
netdev_start_xmit(skb, dlp->slave);
if (skb) {
struct netdev_queue *txq = skb_get_tx_queue(dev, skb);
netdev_start_xmit(skb, dlp->slave, txq, false);
}
return NETDEV_TX_OK;
}
......
......@@ -2827,8 +2827,9 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *);
int dev_change_carrier(struct net_device *, bool new_carrier);
int dev_get_phys_port_id(struct net_device *dev,
struct netdev_phys_port_id *ppid);
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq);
struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev);
struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, int *ret);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb);
......@@ -3431,17 +3432,24 @@ int __init dev_proc_init(void);
#endif
static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
struct sk_buff *skb, struct net_device *dev)
struct sk_buff *skb, struct net_device *dev,
bool more)
{
skb->xmit_more = 0;
skb->xmit_more = more ? 1 : 0;
return ops->ndo_start_xmit(skb, dev);
}
static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, bool more)
{
const struct net_device_ops *ops = dev->netdev_ops;
int rc;
return __netdev_start_xmit(ops, skb, dev);
rc = __netdev_start_xmit(ops, skb, dev, more);
if (rc == NETDEV_TX_OK)
txq_trans_update(txq);
return rc;
}
int netdev_class_create_file_ns(struct class_attribute *class_attr,
......
......@@ -599,7 +599,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
}
non_ip:
return __netdev_start_xmit(mpc->old_ops, skb, dev);
return __netdev_start_xmit(mpc->old_ops, skb, dev, false);
}
static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
......
......@@ -2485,52 +2485,6 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
return 0;
}
struct dev_gso_cb {
void (*destructor)(struct sk_buff *skb);
};
#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
static void dev_gso_skb_destructor(struct sk_buff *skb)
{
struct dev_gso_cb *cb;
kfree_skb_list(skb->next);
skb->next = NULL;
cb = DEV_GSO_CB(skb);
if (cb->destructor)
cb->destructor(skb);
}
/**
* dev_gso_segment - Perform emulated hardware segmentation on skb.
* @skb: buffer to segment
* @features: device features as applicable to this skb
*
* This function segments the given skb and stores the list of segments
* in skb->next.
*/
static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
{
struct sk_buff *segs;
segs = skb_gso_segment(skb, features);
/* Verifying header integrity only. */
if (!segs)
return 0;
if (IS_ERR(segs))
return PTR_ERR(segs);
skb->next = segs;
DEV_GSO_CB(skb)->destructor = skb->destructor;
skb->destructor = dev_gso_skb_destructor;
return 0;
}
/* If MPLS offload request, verify we are testing hardware MPLS features
* instead of standard features for the netdev.
*/
......@@ -2599,118 +2553,125 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
}
EXPORT_SYMBOL(netif_skb_features);
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
static int xmit_one(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, bool more)
{
unsigned int len;
int rc;
if (!list_empty(&ptype_all))
dev_queue_xmit_nit(skb, dev);
len = skb->len;
trace_net_dev_start_xmit(skb, dev);
rc = netdev_start_xmit(skb, dev, txq, more);
trace_net_dev_xmit(skb, rc, dev, len);
return rc;
}
struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev,
struct netdev_queue *txq, int *ret)
{
struct sk_buff *skb = first;
int rc = NETDEV_TX_OK;
unsigned int skb_len;
if (likely(!skb->next)) {
netdev_features_t features;
while (skb) {
struct sk_buff *next = skb->next;
/*
* If device doesn't need skb->dst, release it right now while
* its hot in this cpu cache
*/
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(skb);
skb->next = NULL;
rc = xmit_one(skb, dev, txq, next != NULL);
if (unlikely(!dev_xmit_complete(rc))) {
skb->next = next;
goto out;
}
features = netif_skb_features(skb);
skb = next;
if (netif_xmit_stopped(txq) && skb) {
rc = NETDEV_TX_BUSY;
break;
}
}
if (vlan_tx_tag_present(skb) &&
!vlan_hw_offload_capable(features, skb->vlan_proto)) {
skb = __vlan_put_tag(skb, skb->vlan_proto,
vlan_tx_tag_get(skb));
if (unlikely(!skb))
goto out;
out:
*ret = rc;
return skb;
}
struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t features)
{
if (vlan_tx_tag_present(skb) &&
!vlan_hw_offload_capable(features, skb->vlan_proto)) {
skb = __vlan_put_tag(skb, skb->vlan_proto,
vlan_tx_tag_get(skb));
if (skb)
skb->vlan_tci = 0;
}
}
return skb;
}
/* If encapsulation offload request, verify we are testing
* hardware encapsulation features instead of standard
* features for the netdev
*/
if (skb->encapsulation)
features &= dev->hw_enc_features;
struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
{
netdev_features_t features;
if (netif_needs_gso(skb, features)) {
if (unlikely(dev_gso_segment(skb, features)))
goto out_kfree_skb;
if (skb->next)
goto gso;
} else {
if (skb_needs_linearize(skb, features) &&
__skb_linearize(skb))
goto out_kfree_skb;
if (skb->next)
return skb;
/* If packet is not checksummed and device does not
* support checksumming for this protocol, complete
* checksumming here.
*/
if (skb->ip_summed == CHECKSUM_PARTIAL) {
if (skb->encapsulation)
skb_set_inner_transport_header(skb,
skb_checksum_start_offset(skb));
else
skb_set_transport_header(skb,
skb_checksum_start_offset(skb));
if (!(features & NETIF_F_ALL_CSUM) &&
skb_checksum_help(skb))
goto out_kfree_skb;
}
}
/* If device doesn't need skb->dst, release it right now while
* its hot in this cpu cache
*/
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(skb);
if (!list_empty(&ptype_all))
dev_queue_xmit_nit(skb, dev);
features = netif_skb_features(skb);
skb = validate_xmit_vlan(skb, features);
if (unlikely(!skb))
goto out_null;
skb_len = skb->len;
trace_net_dev_start_xmit(skb, dev);
rc = netdev_start_xmit(skb, dev);
trace_net_dev_xmit(skb, rc, dev, skb_len);
if (rc == NETDEV_TX_OK)
txq_trans_update(txq);
return rc;
}
/* If encapsulation offload request, verify we are testing
* hardware encapsulation features instead of standard
* features for the netdev
*/
if (skb->encapsulation)
features &= dev->hw_enc_features;
gso:
do {
struct sk_buff *nskb = skb->next;
if (netif_needs_gso(skb, features)) {
struct sk_buff *segs;
skb->next = nskb->next;
nskb->next = NULL;
segs = skb_gso_segment(skb, features);
kfree_skb(skb);
if (IS_ERR(segs))
segs = NULL;
skb = segs;
} else {
if (skb_needs_linearize(skb, features) &&
__skb_linearize(skb))
goto out_kfree_skb;
if (!list_empty(&ptype_all))
dev_queue_xmit_nit(nskb, dev);
skb_len = nskb->len;
trace_net_dev_start_xmit(nskb, dev);
rc = netdev_start_xmit(nskb, dev);
trace_net_dev_xmit(nskb, rc, dev, skb_len);
if (unlikely(rc != NETDEV_TX_OK)) {
if (rc & ~NETDEV_TX_MASK)
goto out_kfree_gso_skb;
nskb->next = skb->next;
skb->next = nskb;
return rc;
/* If packet is not checksummed and device does not
* support checksumming for this protocol, complete
* checksumming here.
*/
if (skb->ip_summed == CHECKSUM_PARTIAL) {
if (skb->encapsulation)
skb_set_inner_transport_header(skb,
skb_checksum_start_offset(skb));
else
skb_set_transport_header(skb,
skb_checksum_start_offset(skb));
if (!(features & NETIF_F_ALL_CSUM) &&
skb_checksum_help(skb))
goto out_kfree_skb;
}
txq_trans_update(txq);
if (unlikely(netif_xmit_stopped(txq) && skb->next))
return NETDEV_TX_BUSY;
} while (skb->next);
out_kfree_gso_skb:
if (likely(skb->next == NULL)) {
skb->destructor = DEV_GSO_CB(skb)->destructor;
consume_skb(skb);
return rc;
}
return skb;
out_kfree_skb:
kfree_skb(skb);
out:
return rc;
out_null:
return NULL;
}
EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
static void qdisc_pkt_len_init(struct sk_buff *skb)
{
......@@ -2922,7 +2883,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
if (!netif_xmit_stopped(txq)) {
__this_cpu_inc(xmit_recursion);
rc = dev_hard_start_xmit(skb, dev, txq);
skb = dev_hard_start_xmit(skb, dev, txq, &rc);
__this_cpu_dec(xmit_recursion);
if (dev_xmit_complete(rc)) {
HARD_TX_UNLOCK(dev, txq);
......
......@@ -91,9 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
skb->vlan_tci = 0;
}
status = netdev_start_xmit(skb, dev);
if (status == NETDEV_TX_OK)
txq_trans_update(txq);
status = netdev_start_xmit(skb, dev, txq, false);
out:
return status;
......
......@@ -3335,11 +3335,10 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
goto unlock;
}
atomic_inc(&(pkt_dev->skb->users));
ret = netdev_start_xmit(pkt_dev->skb, odev);
ret = netdev_start_xmit(pkt_dev->skb, odev, txq, false);
switch (ret) {
case NETDEV_TX_OK:
txq_trans_update(txq);
pkt_dev->last_ok = 1;
pkt_dev->sofar++;
pkt_dev->seq_num++;
......
......@@ -258,11 +258,8 @@ static int packet_direct_xmit(struct sk_buff *skb)
local_bh_disable();
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_drv_stopped(txq)) {
ret = netdev_start_xmit(skb, dev);
if (ret == NETDEV_TX_OK)
txq_trans_update(txq);
}
if (!netif_xmit_frozen_or_drv_stopped(txq))
ret = netdev_start_xmit(skb, dev, txq, false);
HARD_TX_UNLOCK(dev, txq);
local_bh_enable();
......
......@@ -70,8 +70,11 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
} else
skb = NULL;
} else {
if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq))
if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) {
skb = q->dequeue(q);
if (skb)
skb = validate_xmit_skb(skb, qdisc_dev(q));
}
}
return skb;
......@@ -126,7 +129,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_stopped(txq))
ret = dev_hard_start_xmit(skb, dev, txq);
skb = dev_hard_start_xmit(skb, dev, txq, &ret);
HARD_TX_UNLOCK(dev, txq);
......
......@@ -316,8 +316,8 @@ restart:
unsigned int length = qdisc_pkt_len(skb);
if (!netif_xmit_frozen_or_stopped(slave_txq) &&
netdev_start_xmit(skb, slave) == NETDEV_TX_OK) {
txq_trans_update(slave_txq);
netdev_start_xmit(skb, slave, slave_txq, false) ==
NETDEV_TX_OK) {
__netif_tx_unlock(slave_txq);
master->slaves = NEXT_SLAVE(q);
netif_wake_queue(dev);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment