diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f6f2c505fa7efd9d5aa34d249f2061515dd30415..a367ed5bb3fecdc0f1853c77f40f07c3f8c9c0ec 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -652,7 +652,7 @@ typedef struct pglist_data {
 
 void get_zone_counts(unsigned long *active, unsigned long *inactive,
 			unsigned long *free);
-void build_all_zonelists(void);
+void build_all_zonelists(void *data);
 void wakeup_kswapd(struct zone *zone, int order);
 int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 		int classzone_idx, int alloc_flags);
diff --git a/init/main.c b/init/main.c
index 22881b5e95e3b00e010b407310539fbd4e1a8a57..3bdb152f412f32b8edeeb1036a24f934988b5be2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -567,7 +567,7 @@ asmlinkage void __init start_kernel(void)
 	setup_per_cpu_areas();
 	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */
 
-	build_all_zonelists();
+	build_all_zonelists(NULL);
 	page_alloc_init();
 
 	printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a3fbcc0a0abc15976dc78423a985a6c522180c18..3e8b3ba27175f4c13ed217aeaf3771a127ad6553 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -358,7 +358,7 @@ int __cpuinit cpu_up(unsigned int cpu)
 	}
 
 	if (pgdat->node_zonelists->_zonerefs->zone == NULL)
-		build_all_zonelists();
+		build_all_zonelists(NULL);
 #endif
 
 	cpu_maps_update_begin();
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 85eb4d342ac5010ccc0997be9a251d20cb82bb50..089cc97aed3c5506e22487e158106502c6dde666 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -389,6 +389,11 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
 	int nid;
 	int ret;
 	struct memory_notify arg;
+	/*
+	 * mutex to protect zone->pageset when it's still shared
+	 * in onlined_pages()
+	 */
+	static DEFINE_MUTEX(zone_pageset_mutex);
 
 	arg.start_pfn = pfn;
 	arg.nr_pages = nr_pages;
@@ -415,12 +420,14 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
 	 * This means the page allocator ignores this zone.
 	 * So, zonelist must be updated after online.
 	 */
+	mutex_lock(&zone_pageset_mutex);
 	if (!populated_zone(zone))
 		need_zonelists_rebuild = 1;
 
 	ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
 		online_pages_range);
 	if (ret) {
+		mutex_unlock(&zone_pageset_mutex);
 		printk(KERN_DEBUG "online_pages %lx at %lx failed\n",
 			nr_pages, pfn);
 		memory_notify(MEM_CANCEL_ONLINE, &arg);
@@ -429,8 +436,12 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
 
 	zone->present_pages += onlined_pages;
 	zone->zone_pgdat->node_present_pages += onlined_pages;
+	if (need_zonelists_rebuild)
+		build_all_zonelists(zone);
+	else
+		zone_pcp_update(zone);
 
-	zone_pcp_update(zone);
+	mutex_unlock(&zone_pageset_mutex);
 	setup_per_zone_wmarks();
 	calculate_zone_inactive_ratio(zone);
 	if (onlined_pages) {
@@ -438,10 +449,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
 		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
 	}
 
-	if (need_zonelists_rebuild)
-		build_all_zonelists();
-	else
-		vm_total_pages = nr_free_pagecache_pages();
+	vm_total_pages = nr_free_pagecache_pages();
 
 	writeback_set_ratelimit();
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 595d0ac211e25d33b019968455bc030c8b56c9dc..21c52d2d8624a368c388e15e6df338441f6ba5ca 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2572,7 +2572,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
 				NUMA_ZONELIST_ORDER_LEN);
 			user_zonelist_order = oldval;
 		} else if (oldval != user_zonelist_order)
-			build_all_zonelists();
+			build_all_zonelists(NULL);
 	}
 out:
 	mutex_unlock(&zl_order_mutex);
@@ -2922,9 +2922,10 @@ static void build_zonelist_cache(pg_data_t *pgdat)
  */
 static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
 static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
+static void setup_zone_pageset(struct zone *zone);
 
 /* return values int ....just for stop_machine() */
-static int __build_all_zonelists(void *dummy)
+static __init_refok int __build_all_zonelists(void *data)
 {
 	int nid;
 	int cpu;
@@ -2939,6 +2940,14 @@ static int __build_all_zonelists(void *dummy)
 		build_zonelist_cache(pgdat);
 	}
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+	/* Setup real pagesets for the new zone */
+	if (data) {
+		struct zone *zone = data;
+		setup_zone_pageset(zone);
+	}
+#endif
+
 	/*
 	 * Initialize the boot_pagesets that are going to be used
 	 * for bootstrapping processors. The real pagesets for
@@ -2958,7 +2967,7 @@ static int __build_all_zonelists(void *dummy)
 	return 0;
 }
 
-void build_all_zonelists(void)
+void build_all_zonelists(void *data)
 {
 	set_zonelist_order();
 
@@ -2969,7 +2978,7 @@ void build_all_zonelists(void)
 	} else {
 		/* we have to stop all cpus to guarantee there is no user
 		   of zonelist */
-		stop_machine(__build_all_zonelists, NULL, NULL);
+		stop_machine(__build_all_zonelists, data, NULL);
 		/* cpuset refresh routine should be here */
 	}
 	vm_total_pages = nr_free_pagecache_pages();