diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 6df0732401353f56524d1ee1d60ef2a9358bae8d..318b811006236fe08b7508e3a16f6ccaa326a28d 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1,4 +1,3 @@
-
 /*
  * kvm_ia64.c: Basic KVM suppport On Itanium series processors
  *
@@ -431,7 +430,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 	if (itc_diff < 0)
 		itc_diff = -itc_diff;
 
-	expires = div64_64(itc_diff, cyc_per_usec);
+	expires = div64_u64(itc_diff, cyc_per_usec);
 	kt = ktime_set(0, 1000 * expires);
 	vcpu->arch.ht_active = 1;
 	hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 361e31611276d03018f4b16d275a04b5e478f2cf..4c943eabacc39ecf39d9e4a21ecae64ea3bc8d6a 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -35,7 +35,7 @@
 #include "i8254.h"
 
 #ifndef CONFIG_X86_64
-#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
+#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
 #else
 #define mod_64(x, y) ((x) % (y))
 #endif
@@ -60,8 +60,8 @@ static u64 muldiv64(u64 a, u32 b, u32 c)
 	rl = (u64)u.l.low * (u64)b;
 	rh = (u64)u.l.high * (u64)b;
 	rh += (rl >> 32);
-	res.l.high = div64_64(rh, c);
-	res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
+	res.l.high = div64_u64(rh, c);
+	res.l.low = div64_u64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
 	return res.ll;
 }
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 57ac4e4c556a5a3ca5bc79a8acbd1e35f3f33d9d..36809d79788bedffcd8332609c7313175aa09e3a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -25,13 +25,13 @@
 #include <linux/hrtimer.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/math64.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
 #include <asm/page.h>
 #include <asm/current.h>
 #include <asm/apicdef.h>
 #include <asm/atomic.h>
-#include <asm/div64.h>
 #include "irq.h"
 
 #define PRId64 "d"
@@ -526,8 +526,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
 	} else
 		passed = ktime_sub(now, apic->timer.last_update);
 
-	counter_passed = div64_64(ktime_to_ns(passed),
-				  (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
+	counter_passed = div64_u64(ktime_to_ns(passed),
+				   (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
 
 	if (counter_passed > tmcct) {
 		if (unlikely(!apic_lvtt_period(apic))) {
diff --git a/include/asm-arm/div64.h b/include/asm-arm/div64.h
index 0b5f881c3d85a252481f30dc713b60a91f5d7c9f..5001390be9582fc1c937aa562671c23cd403f957 100644
--- a/include/asm-arm/div64.h
+++ b/include/asm-arm/div64.h
@@ -224,6 +224,4 @@
 
 #endif
 
-extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
-
 #endif
diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h
index a4a49370793c49ac48a52d52b20691863d4c6483..8f4e3193342e589b8c619e8c2673c3d5544e49a9 100644
--- a/include/asm-generic/div64.h
+++ b/include/asm-generic/div64.h
@@ -30,11 +30,6 @@
 	__rem;							\
  })
 
-static inline uint64_t div64_64(uint64_t dividend, uint64_t divisor)
-{
-	return dividend / divisor;
-}
-
 #elif BITS_PER_LONG == 32
 
 extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
@@ -54,8 +49,6 @@ extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
 	__rem;						\
  })
 
-extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
-
 #else /* BITS_PER_LONG == ?? */
 
 # error do_div() does not yet support the C64
diff --git a/include/asm-m68k/div64.h b/include/asm-m68k/div64.h
index 33caad1628d4276619daa5b442319a386f4d17a3..8243c931b5c065f4578989361be6f9613b49eb8e 100644
--- a/include/asm-m68k/div64.h
+++ b/include/asm-m68k/div64.h
@@ -25,5 +25,4 @@
 	__rem;							\
 })
 
-extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
 #endif /* _M68K_DIV64_H */
diff --git a/include/asm-mips/div64.h b/include/asm-mips/div64.h
index 716371bd098076c7297e5ef90a2400d42387c620..d1d699105c1106b724340104786f95ce8df39188 100644
--- a/include/asm-mips/div64.h
+++ b/include/asm-mips/div64.h
@@ -82,7 +82,6 @@
 	(n) = __quot; \
 	__mod; })
 
-extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
 #endif /* (_MIPS_SZLONG == 32) */
 
 #if (_MIPS_SZLONG == 64)
@@ -106,11 +105,6 @@ extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
 	(n) = __quot; \
 	__mod; })
 
-static inline uint64_t div64_64(uint64_t dividend, uint64_t divisor)
-{
-	return dividend / divisor;
-}
-
 #endif /* (_MIPS_SZLONG == 64) */
 
 #endif /* _ASM_DIV64_H */
diff --git a/include/asm-mn10300/div64.h b/include/asm-mn10300/div64.h
index bf9c515a998c8e0bc613952229f41ebe967fd4c4..3a8329b3e8694493c6b018eea7820873d04110f8 100644
--- a/include/asm-mn10300/div64.h
+++ b/include/asm-mn10300/div64.h
@@ -97,7 +97,4 @@ signed __muldiv64s(signed val, signed mult, signed div)
 	return result;
 }
 
-extern __attribute__((const))
-uint64_t div64_64(uint64_t dividend, uint64_t divisor);
-
 #endif /* _ASM_DIV64 */
diff --git a/include/asm-um/div64.h b/include/asm-um/div64.h
index 7b73b2cd5b340be8212d45657eb44ad9cda71425..1e17f7409cabc072df18f3f516b0df0bc2a58506 100644
--- a/include/asm-um/div64.h
+++ b/include/asm-um/div64.h
@@ -3,5 +3,4 @@
 
 #include "asm/arch/div64.h"
 
-extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
 #endif
diff --git a/include/asm-x86/div64.h b/include/asm-x86/div64.h
index c7892cfe9ce65d5e92e09bdd011791c04e24274b..32fdbddaae555947d13641e25c27bfc6b987941d 100644
--- a/include/asm-x86/div64.h
+++ b/include/asm-x86/div64.h
@@ -71,8 +71,6 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
 }
 #define div_u64_rem	div_u64_rem
 
-extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
-
 #else
 # include <asm-generic/div64.h>
 #endif /* CONFIG_X86_32 */
diff --git a/include/linux/math64.h b/include/linux/math64.h
index 6d171664100856e9689787d56cd42371646c0b02..c1a5f81501ff9f3f16edbd9c8e10aec48408aeee 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -27,6 +27,14 @@ static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
 	return dividend / divisor;
 }
 
+/**
+ * div64_u64 - unsigned 64bit divide with 64bit divisor
+ */
+static inline u64 div64_u64(u64 dividend, u64 divisor)
+{
+	return dividend / divisor;
+}
+
 #elif BITS_PER_LONG == 32
 
 #ifndef div_u64_rem
@@ -41,6 +49,10 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
 extern s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder);
 #endif
 
+#ifndef div64_u64
+extern u64 div64_u64(u64 dividend, u64 divisor);
+#endif
+
 #endif /* BITS_PER_LONG */
 
 /**
diff --git a/kernel/sched.c b/kernel/sched.c
index e2f7f5acc80778328a892961d8e26faa7c4c8841..34bcc5bc120e5f2da970e180dfaf65d73b80dc7d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8025,7 +8025,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
 
 	se->my_q = cfs_rq;
 	se->load.weight = tg->shares;
-	se->load.inv_weight = div64_64(1ULL<<32, se->load.weight);
+	se->load.inv_weight = div64_u64(1ULL<<32, se->load.weight);
 	se->parent = parent;
 }
 #endif
@@ -8692,7 +8692,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares)
 		dequeue_entity(cfs_rq, se, 0);
 
 	se->load.weight = shares;
-	se->load.inv_weight = div64_64((1ULL<<32), shares);
+	se->load.inv_weight = div64_u64((1ULL<<32), shares);
 
 	if (on_rq)
 		enqueue_entity(cfs_rq, se, 0);
@@ -8787,7 +8787,7 @@ static unsigned long to_ratio(u64 period, u64 runtime)
 	if (runtime == RUNTIME_INF)
 		return 1ULL << 16;
 
-	return div64_64(runtime << 16, period);
+	return div64_u64(runtime << 16, period);
 }
 
 #ifdef CONFIG_CGROUP_SCHED
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 8a9498e7c8311e582f35709c7ffc3019fda2f601..6b4a12558e88b94893afeb326c30a21da6269fbf 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -357,8 +357,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 
 		avg_per_cpu = p->se.sum_exec_runtime;
 		if (p->se.nr_migrations) {
-			avg_per_cpu = div64_64(avg_per_cpu,
-					       p->se.nr_migrations);
+			avg_per_cpu = div64_u64(avg_per_cpu,
+						p->se.nr_migrations);
 		} else {
 			avg_per_cpu = -1LL;
 		}
diff --git a/lib/div64.c b/lib/div64.c
index 689bd76833faf4ae7eed39efc50bc52d766f5135..bb5bd0c0f030a9cf44c57ef07b50fac3e7170ce8 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -78,9 +78,10 @@ EXPORT_SYMBOL(div_s64_rem);
 #endif
 
 /* 64bit divisor, dividend and result. dynamic precision */
-uint64_t div64_64(uint64_t dividend, uint64_t divisor)
+#ifndef div64_u64
+u64 div64_u64(u64 dividend, u64 divisor)
 {
-	uint32_t high, d;
+	u32 high, d;
 
 	high = divisor >> 32;
 	if (high) {
@@ -91,10 +92,9 @@ uint64_t div64_64(uint64_t dividend, uint64_t divisor)
 	} else
 		d = divisor;
 
-	do_div(dividend, d);
-
-	return dividend;
+	return div_u64(dividend, d);
 }
-EXPORT_SYMBOL(div64_64);
+EXPORT_SYMBOL(div64_u64);
+#endif
 
 #endif /* BITS_PER_LONG == 32 */
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index eb5b9854c8c7330791ada69b8c9e8695f7a73f3d..4a1221e5e8ee2ec8b7d1a5157aa063f6a3f8011e 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -15,8 +15,8 @@
 
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/math64.h>
 #include <net/tcp.h>
-#include <asm/div64.h>
 
 #define BICTCP_BETA_SCALE    1024	/* Scale factor beta calculation
 					 * max_cwnd = snd_cwnd * beta
@@ -128,7 +128,7 @@ static u32 cubic_root(u64 a)
 	 * x    = ( 2 * x  +  a / x  ) / 3
 	 *  k+1          k         k
 	 */
-	x = (2 * x + (u32)div64_64(a, (u64)x * (u64)(x - 1)));
+	x = (2 * x + (u32)div64_u64(a, (u64)x * (u64)(x - 1)));
 	x = ((x * 341) >> 10);
 	return x;
 }
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index b15e7e2fa14328612cdf4de25150cc62604e8a58..d7e8983cd37f6303921edf25bc201eca7e99c4a2 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -4,12 +4,11 @@
 #include <linux/module.h>
 #include <linux/bitops.h>
 #include <linux/skbuff.h>
+#include <linux/math64.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_connbytes.h>
 #include <net/netfilter/nf_conntrack.h>
 
-#include <asm/div64.h>
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: Number of packets/bytes per connection matching");
@@ -82,7 +81,7 @@ connbytes_mt(const struct sk_buff *skb, const struct net_device *in,
 			break;
 		}
 		if (pkts != 0)
-			what = div64_64(bytes, pkts);
+			what = div64_u64(bytes, pkts);
 		break;
 	}