Rev 1121 | Rev 1136 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1101 | jermar | 1 | /* |
2 | * Copyright (C) 2006 Jakub Jermar |
||
3 | * All rights reserved. |
||
4 | * |
||
5 | * Redistribution and use in source and binary forms, with or without |
||
6 | * modification, are permitted provided that the following conditions |
||
7 | * are met: |
||
8 | * |
||
9 | * - Redistributions of source code must retain the above copyright |
||
10 | * notice, this list of conditions and the following disclaimer. |
||
11 | * - Redistributions in binary form must reproduce the above copyright |
||
12 | * notice, this list of conditions and the following disclaimer in the |
||
13 | * documentation and/or other materials provided with the distribution. |
||
14 | * - The name of the author may not be used to endorse or promote products |
||
15 | * derived from this software without specific prior written permission. |
||
16 | * |
||
17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
||
18 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||
19 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
||
20 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
||
21 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||
22 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
||
23 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
||
24 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||
25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
||
26 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||
27 | */ |
||
28 | |||
29 | /* |
||
30 | * This B-tree has the following properties: |
||
1121 | jermar | 31 | * - it is a ballanced 2-3-4 tree (i.e. BTREE_M = 4) |
1101 | jermar | 32 | * - values (i.e. pointers to values) are stored only in leaves |
33 | * - leaves are linked in a list |
||
34 | * - technically, it is a B+-tree (because of the previous properties) |
||
35 | * |
||
36 | * Some of the functions below take pointer to the right-hand |
||
37 | * side subtree pointer as parameter. Note that this is sufficient |
||
38 | * because: |
||
39 | * - New root node is passed the left-hand side subtree pointer |
||
40 | * directly. |
||
41 | * - node_split() always creates the right sibling and preserves |
||
42 | * the original node (which becomes the left sibling). |
||
43 | * There is always pointer to the left-hand side subtree |
||
44 | * (i.e. left sibling) in the parent node. |
||
1134 | jermar | 45 | * |
46 | * Be carefull when using these trees. They need to allocate |
||
47 | * and deallocate memory for their index nodes and as such |
||
48 | * can sleep. |
||
1101 | jermar | 49 | */ |
50 | |||
51 | #include <adt/btree.h> |
||
52 | #include <adt/list.h> |
||
53 | #include <mm/slab.h> |
||
54 | #include <debug.h> |
||
55 | #include <panic.h> |
||
56 | #include <typedefs.h> |
||
57 | #include <print.h> |
||
58 | |||
59 | static void _btree_insert(btree_t *t, __native key, void *value, btree_node_t *rsubtree, btree_node_t *node); |
||
60 | static void node_initialize(btree_node_t *node); |
||
61 | static void node_insert_key(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree); |
||
1134 | jermar | 62 | void node_remove_key(btree_node_t *node, __native key); |
1101 | jermar | 63 | static btree_node_t *node_split(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree, __native *median); |
64 | |||
65 | #define ROOT_NODE(n) (!(n)->parent) |
||
66 | #define INDEX_NODE(n) ((n)->subtree[0] != NULL) |
||
67 | #define LEAF_NODE(n) ((n)->subtree[0] == NULL) |
||
68 | |||
69 | #define MEDIAN_LOW_INDEX(n) (((n)->keys-1)/2) |
||
70 | #define MEDIAN_HIGH_INDEX(n) ((n)->keys/2) |
||
71 | #define MEDIAN_LOW(n) ((n)->key[MEDIAN_LOW_INDEX((n))]); |
||
72 | #define MEDIAN_HIGH(n) ((n)->key[MEDIAN_HIGH_INDEX((n))]); |
||
73 | |||
74 | /** Create empty B-tree. |
||
75 | * |
||
76 | * @param t B-tree. |
||
77 | */ |
||
78 | void btree_create(btree_t *t) |
||
79 | { |
||
80 | list_initialize(&t->leaf_head); |
||
81 | t->root = (btree_node_t *) malloc(sizeof(btree_node_t), 0); |
||
82 | node_initialize(t->root); |
||
83 | list_append(&t->root->leaf_link, &t->leaf_head); |
||
84 | } |
||
85 | |||
86 | /** Destroy empty B-tree. */ |
||
87 | void btree_destroy(btree_t *t) |
||
88 | { |
||
89 | ASSERT(!t->root->keys); |
||
90 | free(t->root); |
||
91 | } |
||
92 | |||
93 | /** Insert key-value pair into B-tree. |
||
94 | * |
||
95 | * @param t B-tree. |
||
96 | * @param key Key to be inserted. |
||
97 | * @param value Value to be inserted. |
||
98 | * @param leaf_node Leaf node where the insertion should begin. |
||
99 | */ |
||
100 | void btree_insert(btree_t *t, __native key, void *value, btree_node_t *leaf_node) |
||
101 | { |
||
102 | btree_node_t *lnode; |
||
103 | |||
104 | ASSERT(value); |
||
105 | |||
106 | lnode = leaf_node; |
||
107 | if (!lnode) { |
||
108 | if (btree_search(t, key, &lnode)) { |
||
109 | panic("B-tree %P already contains key %d\n", t, key); |
||
110 | } |
||
111 | } |
||
112 | |||
113 | _btree_insert(t, key, value, NULL, lnode); |
||
114 | } |
||
115 | |||
116 | /** Recursively insert into B-tree. |
||
117 | * |
||
118 | * @param t B-tree. |
||
119 | * @param key Key to be inserted. |
||
120 | * @param value Value to be inserted. |
||
121 | * @param rsubtree Right subtree of the inserted key. |
||
122 | * @param node Start inserting into this node. |
||
123 | */ |
||
124 | void _btree_insert(btree_t *t, __native key, void *value, btree_node_t *rsubtree, btree_node_t *node) |
||
125 | { |
||
126 | if (node->keys < BTREE_MAX_KEYS) { |
||
127 | /* |
||
128 | * Node conatins enough space, the key can be stored immediately. |
||
129 | */ |
||
130 | node_insert_key(node, key, value, rsubtree); |
||
131 | } else { |
||
132 | btree_node_t *rnode; |
||
133 | __native median; |
||
134 | |||
135 | /* |
||
136 | * Node is full. |
||
137 | * Split it and insert the smallest key from the node containing |
||
138 | * bigger keys (i.e. the original node) into its parent. |
||
139 | */ |
||
140 | |||
141 | rnode = node_split(node, key, value, rsubtree, &median); |
||
142 | |||
143 | if (LEAF_NODE(node)) { |
||
144 | list_append(&rnode->leaf_link, &node->leaf_link); |
||
145 | } |
||
146 | |||
147 | if (ROOT_NODE(node)) { |
||
148 | /* |
||
149 | * We split the root node. Create new root. |
||
150 | */ |
||
151 | |||
152 | t->root = (btree_node_t *) malloc(sizeof(btree_node_t), 0); |
||
153 | node->parent = t->root; |
||
154 | rnode->parent = t->root; |
||
155 | node_initialize(t->root); |
||
156 | |||
157 | /* |
||
158 | * Left-hand side subtree will be the old root (i.e. node). |
||
159 | * Right-hand side subtree will be rnode. |
||
160 | */ |
||
161 | t->root->subtree[0] = node; |
||
162 | |||
163 | t->root->depth = node->depth + 1; |
||
164 | } |
||
165 | _btree_insert(t, median, NULL, rnode, node->parent); |
||
166 | } |
||
167 | |||
168 | } |
||
169 | |||
170 | /* TODO */ |
||
171 | void btree_remove(btree_t *t, __native key) |
||
172 | { |
||
173 | } |
||
174 | |||
175 | /** Search key in a B-tree. |
||
176 | * |
||
177 | * @param t B-tree. |
||
178 | * @param key Key to be searched. |
||
179 | * @param leaf_node Address where to put pointer to visited leaf node. |
||
180 | * |
||
181 | * @return Pointer to value or NULL if there is no such key. |
||
182 | */ |
||
183 | void *btree_search(btree_t *t, __native key, btree_node_t **leaf_node) |
||
184 | { |
||
185 | btree_node_t *cur, *next; |
||
186 | |||
187 | /* |
||
1134 | jermar | 188 | * Iteratively descend to the leaf that can contain the searched key. |
1101 | jermar | 189 | */ |
190 | for (cur = t->root; cur; cur = next) { |
||
1134 | jermar | 191 | |
1101 | jermar | 192 | /* Last iteration will set this with proper leaf node address. */ |
193 | *leaf_node = cur; |
||
1134 | jermar | 194 | |
195 | /* |
||
196 | * The key can be in the leftmost subtree. |
||
197 | * Test it separately. |
||
198 | */ |
||
199 | if (key < cur->key[0]) { |
||
200 | next = cur->subtree[0]; |
||
201 | continue; |
||
202 | } else { |
||
203 | void *val; |
||
204 | int i; |
||
205 | |||
206 | /* |
||
207 | * Now if the key is smaller than cur->key[i] |
||
208 | * it can only mean that the value is in cur->subtree[i] |
||
209 | * or it is not in the tree at all. |
||
210 | */ |
||
211 | for (i = 1; i < cur->keys; i++) { |
||
212 | if (key < cur->key[i]) { |
||
213 | next = cur->subtree[i]; |
||
214 | val = cur->value[i - 1]; |
||
215 | |||
216 | if (LEAF_NODE(cur)) |
||
217 | return key == cur->key[i - 1] ? val : NULL; |
||
218 | |||
219 | goto descend; |
||
220 | } |
||
1101 | jermar | 221 | } |
1134 | jermar | 222 | |
223 | /* |
||
224 | * Last possibility is that the key is in the rightmost subtree. |
||
225 | */ |
||
226 | next = cur->subtree[i]; |
||
227 | val = cur->value[i - 1]; |
||
228 | if (LEAF_NODE(cur)) |
||
229 | return key == cur->key[i - 1] ? val : NULL; |
||
1101 | jermar | 230 | } |
1134 | jermar | 231 | descend: |
232 | ; |
||
1101 | jermar | 233 | } |
234 | |||
235 | /* |
||
1134 | jermar | 236 | * The key was not found in the *leaf_node and is smaller than any of its keys. |
1101 | jermar | 237 | */ |
238 | return NULL; |
||
239 | } |
||
240 | |||
241 | /** Get pointer to value with the smallest key within the node. |
||
242 | * |
||
243 | * Can be only used on leaf-level nodes. |
||
244 | * |
||
245 | * @param node B-tree node. |
||
246 | * |
||
247 | * @return Pointer to value assiciated with the smallest key. |
||
248 | */ |
||
249 | void *btree_node_min(btree_node_t *node) |
||
250 | { |
||
251 | ASSERT(LEAF_NODE(node)); |
||
252 | ASSERT(node->keys); |
||
253 | return node->value[0]; |
||
254 | } |
||
255 | |||
256 | /** Get pointer to value with the biggest key within the node. |
||
257 | * |
||
258 | * Can be only used on leaf-level nodes. |
||
259 | * |
||
260 | * @param node B-tree node. |
||
261 | * |
||
262 | * @return Pointer to value assiciated with the biggest key. |
||
263 | */ |
||
264 | void *btree_node_max(btree_node_t *node) |
||
265 | { |
||
266 | ASSERT(LEAF_NODE(node)); |
||
267 | ASSERT(node->keys); |
||
268 | return node->value[node->keys - 1]; |
||
269 | } |
||
270 | |||
271 | /** Initialize B-tree node. |
||
272 | * |
||
273 | * @param node B-tree node. |
||
274 | */ |
||
275 | void node_initialize(btree_node_t *node) |
||
276 | { |
||
277 | int i; |
||
278 | |||
279 | node->keys = 0; |
||
280 | |||
281 | /* Clean also space for the extra key. */ |
||
282 | for (i = 0; i < BTREE_MAX_KEYS + 1; i++) { |
||
283 | node->key[i] = 0; |
||
284 | node->value[i] = NULL; |
||
285 | node->subtree[i] = NULL; |
||
286 | } |
||
287 | node->subtree[i] = NULL; |
||
288 | |||
289 | node->parent = NULL; |
||
290 | |||
291 | link_initialize(&node->leaf_link); |
||
292 | |||
293 | link_initialize(&node->bfs_link); |
||
294 | node->depth = 0; |
||
295 | } |
||
296 | |||
1134 | jermar | 297 | /** Insert key-value-right-subtree triplet into B-tree non-full node. |
1101 | jermar | 298 | * |
299 | * It is actually possible to have more keys than BTREE_MAX_KEYS. |
||
300 | * This feature is used during splitting the node when the |
||
301 | * number of keys is BTREE_MAX_KEYS + 1. |
||
302 | * |
||
303 | * @param node B-tree node into wich the new key is to be inserted. |
||
304 | * @param key The key to be inserted. |
||
305 | * @param value Pointer to value to be inserted. |
||
306 | * @param rsubtree Pointer to the right subtree. |
||
307 | */ |
||
308 | void node_insert_key(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree) |
||
309 | { |
||
310 | int i; |
||
311 | |||
312 | for (i = 0; i < node->keys; i++) { |
||
313 | if (key < node->key[i]) { |
||
314 | int j; |
||
315 | |||
316 | for (j = node->keys; j > i; j--) { |
||
317 | node->key[j] = node->key[j - 1]; |
||
318 | node->value[j] = node->value[j - 1]; |
||
319 | node->subtree[j + 1] = node->subtree[j]; |
||
320 | } |
||
321 | break; |
||
322 | } |
||
323 | } |
||
324 | |||
325 | node->key[i] = key; |
||
326 | node->value[i] = value; |
||
327 | node->subtree[i + 1] = rsubtree; |
||
328 | |||
329 | node->keys++; |
||
330 | } |
||
331 | |||
1134 | jermar | 332 | /** Split full B-tree node and insert new key-value-right-subtree triplet. |
1101 | jermar | 333 | * |
334 | * This function will split a node and return pointer to a newly created |
||
1134 | jermar | 335 | * node containing keys greater than or equal to the greater of medians |
336 | * (or median) of the old keys and the newly added key. It will also write |
||
337 | * the median key to a memory address supplied by the caller. |
||
1101 | jermar | 338 | * |
1134 | jermar | 339 | * If the node being split is an index node, the median will not be |
340 | * included in the new node. If the node is a leaf node, |
||
341 | * the median will be copied there. |
||
1101 | jermar | 342 | * |
343 | * @param node B-tree node wich is going to be split. |
||
344 | * @param key The key to be inserted. |
||
345 | * @param value Pointer to the value to be inserted. |
||
346 | * @param rsubtree Pointer to the right subtree of the key being added. |
||
347 | * @param median Address in memory, where the median key will be stored. |
||
348 | * |
||
349 | * @return Newly created right sibling of node. |
||
350 | */ |
||
351 | btree_node_t *node_split(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree, __native *median) |
||
352 | { |
||
353 | btree_node_t *rnode; |
||
354 | int i, j; |
||
355 | |||
356 | ASSERT(median); |
||
357 | ASSERT(node->keys == BTREE_MAX_KEYS); |
||
358 | |||
359 | /* |
||
360 | * Use the extra space to store the extra node. |
||
361 | */ |
||
362 | node_insert_key(node, key, value, rsubtree); |
||
363 | |||
364 | /* |
||
365 | * Compute median of keys. |
||
366 | */ |
||
1134 | jermar | 367 | *median = MEDIAN_HIGH(node); |
1101 | jermar | 368 | |
1134 | jermar | 369 | /* |
370 | * Allocate and initialize new right sibling. |
||
371 | */ |
||
1101 | jermar | 372 | rnode = (btree_node_t *) malloc(sizeof(btree_node_t), 0); |
373 | node_initialize(rnode); |
||
374 | rnode->parent = node->parent; |
||
375 | rnode->depth = node->depth; |
||
376 | |||
377 | /* |
||
378 | * Copy big keys, values and subtree pointers to the new right sibling. |
||
1134 | jermar | 379 | * If this is an index node, do not copy the median. |
1101 | jermar | 380 | */ |
1134 | jermar | 381 | i = (int) INDEX_NODE(node); |
382 | for (i += MEDIAN_HIGH_INDEX(node), j = 0; i < node->keys; i++, j++) { |
||
1101 | jermar | 383 | rnode->key[j] = node->key[i]; |
384 | rnode->value[j] = node->value[i]; |
||
385 | rnode->subtree[j] = node->subtree[i]; |
||
386 | |||
387 | /* |
||
388 | * Fix parent links in subtrees. |
||
389 | */ |
||
390 | if (rnode->subtree[j]) |
||
391 | rnode->subtree[j]->parent = rnode; |
||
392 | |||
393 | } |
||
394 | rnode->subtree[j] = node->subtree[i]; |
||
395 | if (rnode->subtree[j]) |
||
396 | rnode->subtree[j]->parent = rnode; |
||
1134 | jermar | 397 | |
398 | rnode->keys = j; /* Set number of keys of the new node. */ |
||
399 | node->keys /= 2; /* Shrink the old node. */ |
||
1101 | jermar | 400 | |
401 | return rnode; |
||
402 | } |
||
403 | |||
1134 | jermar | 404 | /** Remove key from B-tree node. |
405 | * |
||
406 | * @param node B-tree node. |
||
407 | * @param key Key to be removed. |
||
408 | */ |
||
409 | void node_remove_key(btree_node_t *node, __native key) |
||
410 | { |
||
411 | } |
||
412 | |||
1101 | jermar | 413 | /** Print B-tree. |
414 | * |
||
415 | * @param t Print out B-tree. |
||
416 | */ |
||
417 | void btree_print(btree_t *t) |
||
418 | { |
||
419 | int i, depth = t->root->depth; |
||
420 | link_t head; |
||
421 | |||
422 | list_initialize(&head); |
||
423 | list_append(&t->root->bfs_link, &head); |
||
424 | |||
425 | /* |
||
426 | * Use BFS search to print out the tree. |
||
427 | * Levels are distinguished from one another by node->depth. |
||
428 | */ |
||
429 | while (!list_empty(&head)) { |
||
430 | link_t *hlp; |
||
431 | btree_node_t *node; |
||
432 | |||
433 | hlp = head.next; |
||
434 | ASSERT(hlp != &head); |
||
435 | node = list_get_instance(hlp, btree_node_t, bfs_link); |
||
436 | list_remove(hlp); |
||
437 | |||
438 | ASSERT(node); |
||
439 | |||
440 | if (node->depth != depth) { |
||
441 | printf("\n"); |
||
442 | depth = node->depth; |
||
443 | } |
||
444 | |||
445 | printf("("); |
||
446 | for (i = 0; i < node->keys; i++) { |
||
447 | printf("%d,", node->key[i]); |
||
448 | if (node->depth && node->subtree[i]) { |
||
449 | list_append(&node->subtree[i]->bfs_link, &head); |
||
450 | } |
||
451 | } |
||
452 | if (node->depth && node->subtree[i]) { |
||
453 | list_append(&node->subtree[i]->bfs_link, &head); |
||
454 | } |
||
455 | printf(")"); |
||
456 | } |
||
457 | printf("\n"); |
||
458 | } |