Commit e6145236 authored by Matthew Wilcox's avatar Matthew Wilcox Committed by Linus Torvalds

radix_tree: add support for multi-order entries

With huge pages, it is convenient to have the radix tree be able to
return an entry that covers multiple indices.  Previous attempts to deal
with the problem have involved inserting N duplicate entries, which is a
waste of memory and leads to problems trying to handle aliased tags, or
probing the tree multiple times to find alternative entries which might
cover the requested index.

This approach inserts one canonical entry into the tree for a given
range of indices, and may also insert other entries in order to ensure
that lookups find the canonical entry.

This solution only tolerates inserting powers of two that are greater
than the fanout of the tree.  If we wish to expand the radix tree's
abilities to support large-ish pages that is less than the fanout at the
penultimate level of the tree, then we would need to add one more step
in lookup to ensure that any sibling nodes in the final level of the
tree are dereferenced and we return the canonical entry that they
reference.
Signed-off-by: default avatarMatthew Wilcox <willy@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 0070e28d
......@@ -271,8 +271,15 @@ static inline void radix_tree_replace_slot(void **pslot, void *item)
}
int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
struct radix_tree_node **nodep, void ***slotp);
int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
unsigned order, struct radix_tree_node **nodep,
void ***slotp);
int __radix_tree_insert(struct radix_tree_root *, unsigned long index,
unsigned order, void *);
static inline int radix_tree_insert(struct radix_tree_root *root,
unsigned long index, void *entry)
{
return __radix_tree_insert(root, index, 0, entry);
}
void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
struct radix_tree_node **nodep, void ***slotp);
void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
......
......@@ -333,7 +333,8 @@ static inline unsigned long radix_tree_maxindex(unsigned int height)
/*
* Extend a radix tree so it can store key @index.
*/
static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
static int radix_tree_extend(struct radix_tree_root *root,
unsigned long index, unsigned order)
{
struct radix_tree_node *node;
struct radix_tree_node *slot;
......@@ -345,7 +346,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
while (index > radix_tree_maxindex(height))
height++;
if (root->rnode == NULL) {
if ((root->rnode == NULL) && (order == 0)) {
root->height = height;
goto out;
}
......@@ -386,6 +387,7 @@ out:
* __radix_tree_create - create a slot in a radix tree
* @root: radix tree root
* @index: index key
* @order: index occupies 2^order aligned slots
* @nodep: returns node
* @slotp: returns slot
*
......@@ -399,26 +401,29 @@ out:
* Returns -ENOMEM, or 0 for success.
*/
int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
struct radix_tree_node **nodep, void ***slotp)
unsigned order, struct radix_tree_node **nodep,
void ***slotp)
{
struct radix_tree_node *node = NULL, *slot;
unsigned int height, shift, offset;
int error;
BUG_ON((0 < order) && (order < RADIX_TREE_MAP_SHIFT));
/* Make sure the tree is high enough. */
if (index > radix_tree_maxindex(root->height)) {
error = radix_tree_extend(root, index);
error = radix_tree_extend(root, index, order);
if (error)
return error;
}
slot = indirect_to_ptr(root->rnode);
slot = root->rnode;
height = root->height;
shift = height * RADIX_TREE_MAP_SHIFT;
offset = 0; /* uninitialised var warning */
while (shift > 0) {
while (shift > order) {
if (slot == NULL) {
/* Have to add a child node. */
if (!(slot = radix_tree_node_alloc(root)))
......@@ -433,15 +438,31 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
} else
rcu_assign_pointer(root->rnode,
ptr_to_indirect(slot));
}
} else if (!radix_tree_is_indirect_ptr(slot))
break;
/* Go a level down */
height--;
shift -= RADIX_TREE_MAP_SHIFT;
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
node = slot;
node = indirect_to_ptr(slot);
slot = node->slots[offset];
slot = indirect_to_ptr(slot);
height--;
}
/* Insert pointers to the canonical entry */
if ((shift - order) > 0) {
int i, n = 1 << (shift - order);
offset = offset & ~(n - 1);
slot = ptr_to_indirect(&node->slots[offset]);
for (i = 0; i < n; i++) {
if (node->slots[offset + i])
return -EEXIST;
}
for (i = 1; i < n; i++) {
rcu_assign_pointer(node->slots[offset + i], slot);
node->count++;
}
}
if (nodep)
......@@ -452,15 +473,16 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
}
/**
* radix_tree_insert - insert into a radix tree
* __radix_tree_insert - insert into a radix tree
* @root: radix tree root
* @index: index key
* @order: key covers the 2^order indices around index
* @item: item to insert
*
* Insert an item into the radix tree at position @index.
*/
int radix_tree_insert(struct radix_tree_root *root,
unsigned long index, void *item)
int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
unsigned order, void *item)
{
struct radix_tree_node *node;
void **slot;
......@@ -468,7 +490,7 @@ int radix_tree_insert(struct radix_tree_root *root,
BUG_ON(radix_tree_is_indirect_ptr(item));
error = __radix_tree_create(root, index, &node, &slot);
error = __radix_tree_create(root, index, order, &node, &slot);
if (error)
return error;
if (*slot != NULL)
......@@ -486,7 +508,7 @@ int radix_tree_insert(struct radix_tree_root *root,
return 0;
}
EXPORT_SYMBOL(radix_tree_insert);
EXPORT_SYMBOL(__radix_tree_insert);
/**
* __radix_tree_lookup - lookup an item in a radix tree
......@@ -537,6 +559,8 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
node = rcu_dereference_raw(*slot);
if (node == NULL)
return NULL;
if (!radix_tree_is_indirect_ptr(node))
break;
node = indirect_to_ptr(node);
shift -= RADIX_TREE_MAP_SHIFT;
......@@ -624,6 +648,8 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
tag_set(slot, tag, offset);
slot = slot->slots[offset];
BUG_ON(slot == NULL);
if (!radix_tree_is_indirect_ptr(slot))
break;
slot = indirect_to_ptr(slot);
shift -= RADIX_TREE_MAP_SHIFT;
height--;
......@@ -669,6 +695,8 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
while (shift) {
if (slot == NULL)
goto out;
if (!radix_tree_is_indirect_ptr(slot))
break;
slot = indirect_to_ptr(slot);
shift -= RADIX_TREE_MAP_SHIFT;
......@@ -753,6 +781,8 @@ int radix_tree_tag_get(struct radix_tree_root *root,
if (height == 1)
return 1;
node = rcu_dereference_raw(node->slots[offset]);
if (!radix_tree_is_indirect_ptr(node))
return 1;
shift -= RADIX_TREE_MAP_SHIFT;
height--;
}
......@@ -813,6 +843,7 @@ restart:
node = rnode;
while (1) {
struct radix_tree_node *slot;
if ((flags & RADIX_TREE_ITER_TAGGED) ?
!test_bit(offset, node->tags[tag]) :
!node->slots[offset]) {
......@@ -843,10 +874,12 @@ restart:
if (!shift)
break;
node = rcu_dereference_raw(node->slots[offset]);
if (node == NULL)
slot = rcu_dereference_raw(node->slots[offset]);
if (slot == NULL)
goto restart;
node = indirect_to_ptr(node);
if (!radix_tree_is_indirect_ptr(slot))
break;
node = indirect_to_ptr(slot);
shift -= RADIX_TREE_MAP_SHIFT;
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
}
......@@ -944,16 +977,20 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
if (!tag_get(slot, iftag, offset))
goto next;
if (shift) {
/* Go down one level */
shift -= RADIX_TREE_MAP_SHIFT;
node = slot;
slot = slot->slots[offset];
slot = indirect_to_ptr(slot);
continue;
if (radix_tree_is_indirect_ptr(slot)) {
slot = indirect_to_ptr(slot);
shift -= RADIX_TREE_MAP_SHIFT;
continue;
} else {
slot = node;
node = node->parent;
}
}
/* tag the leaf */
tagged++;
tagged += 1 << shift;
tag_set(slot, settag, offset);
/* walk back up the path tagging interior nodes */
......@@ -1201,11 +1238,20 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item,
goto out;
}
shift -= RADIX_TREE_MAP_SHIFT;
slot = rcu_dereference_raw(slot->slots[i]);
if (slot == NULL)
goto out;
if (!radix_tree_is_indirect_ptr(slot)) {
if (slot == item) {
*found_index = index + i;
index = 0;
} else {
index += shift;
}
goto out;
}
slot = indirect_to_ptr(slot);
shift -= RADIX_TREE_MAP_SHIFT;
}
/* Bottom level: check items */
......@@ -1285,7 +1331,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
/*
* The candidate node has more than one child, or its child
* is not at the leftmost slot, we cannot shrink.
* is not at the leftmost slot, or it is a multiorder entry,
* we cannot shrink.
*/
if (to_free->count != 1)
break;
......@@ -1301,6 +1348,9 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
* one (root->rnode) as far as dependent read barriers go.
*/
if (root->height > 1) {
if (!radix_tree_is_indirect_ptr(slot))
break;
slot = indirect_to_ptr(slot);
slot->parent = NULL;
slot = ptr_to_indirect(slot);
......@@ -1399,7 +1449,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
unsigned long index, void *item)
{
struct radix_tree_node *node;
unsigned int offset;
unsigned int offset, i;
void **slot;
void *entry;
int tag;
......@@ -1428,6 +1478,13 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
radix_tree_tag_clear(root, index, tag);
}
/* Delete any sibling slots pointing to this slot */
for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
if (node->slots[offset + i] != ptr_to_indirect(slot))
break;
node->slots[offset + i] = NULL;
node->count--;
}
node->slots[offset] = NULL;
node->count--;
......
......@@ -586,7 +586,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
void **slot;
int error;
error = __radix_tree_create(&mapping->page_tree, page->index,
error = __radix_tree_create(&mapping->page_tree, page->index, 0,
&node, &slot);
if (error)
return error;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment