From 44713b67db10c774f14280c129b0d5fd13c70cf2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 4 Mar 2011 21:24:47 -0800
Subject: [PATCH] ipv4: Optimize flow initialization in output route lookup.

We burn a lot of useless cycles, cpu store buffer traffic, and
memory operations memset()'ing the on-stack flow used to perform
output route lookups in __ip_route_output_key().

Only the first half of the flow object members even matter for
output route lookups in this context, specifically:

FIB rules matching cares about:

	dst, src, tos, iif, oif, mark

FIB trie lookup cares about:

	dst

FIB semantic match cares about:

	tos, scope, oif

Therefore only initialize these specific members and elide the
memset entirely.

On Niagara2 this kills about ~300 cycles from the output route
lookup path.

Likely, we can take things further, since all callers of output
route lookups essentially throw away the on-stack flow they use.
So they don't care if we use it as a scratch-pad to compute the
final flow key.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/ipv4/route.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 432eee645648..6c8740362ef9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2431,14 +2431,7 @@ static struct rtable *ip_route_output_slow(struct net *net,
 					   const struct flowi *oldflp)
 {
 	u32 tos	= RT_FL_TOS(oldflp);
-	struct flowi fl = { .fl4_dst = oldflp->fl4_dst,
-			    .fl4_src = oldflp->fl4_src,
-			    .fl4_tos = tos & IPTOS_RT_MASK,
-			    .fl4_scope = ((tos & RTO_ONLINK) ?
-					  RT_SCOPE_LINK : RT_SCOPE_UNIVERSE),
-			    .mark = oldflp->mark,
-			    .iif = net->loopback_dev->ifindex,
-			    .oif = oldflp->oif };
+	struct flowi fl;
 	struct fib_result res;
 	unsigned int flags = 0;
 	struct net_device *dev_out = NULL;
@@ -2449,6 +2442,15 @@ static struct rtable *ip_route_output_slow(struct net *net,
 	res.r		= NULL;
 #endif
 
+	fl.oif = oldflp->oif;
+	fl.iif = net->loopback_dev->ifindex;
+	fl.mark = oldflp->mark;
+	fl.fl4_dst = oldflp->fl4_dst;
+	fl.fl4_src = oldflp->fl4_src;
+	fl.fl4_tos = tos & IPTOS_RT_MASK;
+	fl.fl4_scope = ((tos & RTO_ONLINK) ?
+			RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
+
 	rcu_read_lock();
 	if (oldflp->fl4_src) {
 		rth = ERR_PTR(-EINVAL);
-- 
GitLab