From c5f5877c043ca471c3a607fa2c864848b19bc49a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Sun, 25 Mar 2007 20:21:15 -0700
Subject: [PATCH] [TCP] tcp_cubic: faster cube root

The Newton-Raphson method is quadratically convergent so
only a small fixed number of steps are necessary.
Therefore it is faster to unroll the loop. Since div64_64 is no longer
inline it won't cause code explosion.

Also fixes a bug that can occur if x^2 was bigger than 32 bits.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 6f08adbda54e..0e6cdfeb207a 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -96,23 +96,17 @@ static void bictcp_init(struct sock *sk)
  */
 static u32 cubic_root(u64 a)
 {
-	u32 x, x1;
+	u32 x;
 
 	/* Initial estimate is based on:
 	 * cbrt(x) = exp(log(x) / 3)
 	 */
 	x = 1u << (fls64(a)/3);
 
-	/*
-	 * Iteration based on:
-	 *                         2
-	 * x    = ( 2 * x  +  a / x  ) / 3
-	 *  k+1          k         k
-	 */
-	do {
-		x1 = x;
-		x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
-	} while (abs(x1 - x) > 1);
+	/* converges to 32 bits in 3 iterations */
+	x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
+	x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
+	x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
 
 	return x;
 }
-- 
GitLab