Rev 1121 | Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1101 | jermar | 1 | /* |
2 | * Copyright (C) 2006 Jakub Jermar |
||
3 | * All rights reserved. |
||
4 | * |
||
5 | * Redistribution and use in source and binary forms, with or without |
||
6 | * modification, are permitted provided that the following conditions |
||
7 | * are met: |
||
8 | * |
||
9 | * - Redistributions of source code must retain the above copyright |
||
10 | * notice, this list of conditions and the following disclaimer. |
||
11 | * - Redistributions in binary form must reproduce the above copyright |
||
12 | * notice, this list of conditions and the following disclaimer in the |
||
13 | * documentation and/or other materials provided with the distribution. |
||
14 | * - The name of the author may not be used to endorse or promote products |
||
15 | * derived from this software without specific prior written permission. |
||
16 | * |
||
17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
||
18 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||
19 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
||
20 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
||
21 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||
22 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
||
23 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
||
24 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||
25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
||
26 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||
27 | */ |
||
28 | |||
29 | /* |
||
30 | * This B-tree has the following properties: |
||
31 | * - it is a ballanced 2-3-4 tree (i.e. M = 4) |
||
32 | * - values (i.e. pointers to values) are stored only in leaves |
||
33 | * - leaves are linked in a list |
||
34 | * - technically, it is a B+-tree (because of the previous properties) |
||
35 | * |
||
36 | * Some of the functions below take pointer to the right-hand |
||
37 | * side subtree pointer as parameter. Note that this is sufficient |
||
38 | * because: |
||
39 | * - New root node is passed the left-hand side subtree pointer |
||
40 | * directly. |
||
41 | * - node_split() always creates the right sibling and preserves |
||
42 | * the original node (which becomes the left sibling). |
||
43 | * There is always pointer to the left-hand side subtree |
||
44 | * (i.e. left sibling) in the parent node. |
||
45 | */ |
||
46 | |||
47 | #include <adt/btree.h> |
||
48 | #include <adt/list.h> |
||
49 | #include <mm/slab.h> |
||
50 | #include <debug.h> |
||
51 | #include <panic.h> |
||
52 | #include <typedefs.h> |
||
53 | #include <print.h> |
||
54 | |||
55 | static void _btree_insert(btree_t *t, __native key, void *value, btree_node_t *rsubtree, btree_node_t *node); |
||
56 | static void node_initialize(btree_node_t *node); |
||
57 | static void node_insert_key(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree); |
||
58 | static btree_node_t *node_split(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree, __native *median); |
||
59 | |||
60 | #define ROOT_NODE(n) (!(n)->parent) |
||
61 | #define INDEX_NODE(n) ((n)->subtree[0] != NULL) |
||
62 | #define LEAF_NODE(n) ((n)->subtree[0] == NULL) |
||
63 | |||
64 | #define MEDIAN_LOW_INDEX(n) (((n)->keys-1)/2) |
||
65 | #define MEDIAN_HIGH_INDEX(n) ((n)->keys/2) |
||
66 | #define MEDIAN_LOW(n) ((n)->key[MEDIAN_LOW_INDEX((n))]); |
||
67 | #define MEDIAN_HIGH(n) ((n)->key[MEDIAN_HIGH_INDEX((n))]); |
||
68 | |||
69 | /** Create empty B-tree. |
||
70 | * |
||
71 | * @param t B-tree. |
||
72 | */ |
||
73 | void btree_create(btree_t *t) |
||
74 | { |
||
75 | list_initialize(&t->leaf_head); |
||
76 | t->root = (btree_node_t *) malloc(sizeof(btree_node_t), 0); |
||
77 | node_initialize(t->root); |
||
78 | list_append(&t->root->leaf_link, &t->leaf_head); |
||
79 | } |
||
80 | |||
81 | /** Destroy empty B-tree. */ |
||
82 | void btree_destroy(btree_t *t) |
||
83 | { |
||
84 | ASSERT(!t->root->keys); |
||
85 | free(t->root); |
||
86 | } |
||
87 | |||
88 | /** Insert key-value pair into B-tree. |
||
89 | * |
||
90 | * @param t B-tree. |
||
91 | * @param key Key to be inserted. |
||
92 | * @param value Value to be inserted. |
||
93 | * @param leaf_node Leaf node where the insertion should begin. |
||
94 | */ |
||
95 | void btree_insert(btree_t *t, __native key, void *value, btree_node_t *leaf_node) |
||
96 | { |
||
97 | btree_node_t *lnode; |
||
98 | |||
99 | ASSERT(value); |
||
100 | |||
101 | lnode = leaf_node; |
||
102 | if (!lnode) { |
||
103 | if (btree_search(t, key, &lnode)) { |
||
104 | panic("B-tree %P already contains key %d\n", t, key); |
||
105 | } |
||
106 | } |
||
107 | |||
108 | _btree_insert(t, key, value, NULL, lnode); |
||
109 | } |
||
110 | |||
111 | /** Recursively insert into B-tree. |
||
112 | * |
||
113 | * @param t B-tree. |
||
114 | * @param key Key to be inserted. |
||
115 | * @param value Value to be inserted. |
||
116 | * @param rsubtree Right subtree of the inserted key. |
||
117 | * @param node Start inserting into this node. |
||
118 | */ |
||
119 | void _btree_insert(btree_t *t, __native key, void *value, btree_node_t *rsubtree, btree_node_t *node) |
||
120 | { |
||
121 | if (node->keys < BTREE_MAX_KEYS) { |
||
122 | /* |
||
123 | * Node conatins enough space, the key can be stored immediately. |
||
124 | */ |
||
125 | node_insert_key(node, key, value, rsubtree); |
||
126 | } else { |
||
127 | btree_node_t *rnode; |
||
128 | __native median; |
||
129 | |||
130 | /* |
||
131 | * Node is full. |
||
132 | * Split it and insert the smallest key from the node containing |
||
133 | * bigger keys (i.e. the original node) into its parent. |
||
134 | */ |
||
135 | |||
136 | rnode = node_split(node, key, value, rsubtree, &median); |
||
137 | |||
138 | if (LEAF_NODE(node)) { |
||
139 | list_append(&rnode->leaf_link, &node->leaf_link); |
||
140 | } |
||
141 | |||
142 | if (ROOT_NODE(node)) { |
||
143 | /* |
||
144 | * We split the root node. Create new root. |
||
145 | */ |
||
146 | |||
147 | t->root = (btree_node_t *) malloc(sizeof(btree_node_t), 0); |
||
148 | node->parent = t->root; |
||
149 | rnode->parent = t->root; |
||
150 | node_initialize(t->root); |
||
151 | |||
152 | /* |
||
153 | * Left-hand side subtree will be the old root (i.e. node). |
||
154 | * Right-hand side subtree will be rnode. |
||
155 | */ |
||
156 | t->root->subtree[0] = node; |
||
157 | |||
158 | t->root->depth = node->depth + 1; |
||
159 | } |
||
160 | _btree_insert(t, median, NULL, rnode, node->parent); |
||
161 | } |
||
162 | |||
163 | } |
||
164 | |||
165 | /* TODO */ |
||
166 | void btree_remove(btree_t *t, __native key) |
||
167 | { |
||
168 | } |
||
169 | |||
170 | /** Search key in a B-tree. |
||
171 | * |
||
172 | * @param t B-tree. |
||
173 | * @param key Key to be searched. |
||
174 | * @param leaf_node Address where to put pointer to visited leaf node. |
||
175 | * |
||
176 | * @return Pointer to value or NULL if there is no such key. |
||
177 | */ |
||
178 | void *btree_search(btree_t *t, __native key, btree_node_t **leaf_node) |
||
179 | { |
||
180 | btree_node_t *cur, *next; |
||
181 | void *val = NULL; |
||
182 | |||
183 | /* |
||
184 | * Iteratively descend to the leaf that can contain searched key. |
||
185 | */ |
||
186 | for (cur = t->root; cur; cur = next) { |
||
187 | int i; |
||
188 | |||
189 | /* Last iteration will set this with proper leaf node address. */ |
||
190 | *leaf_node = cur; |
||
191 | for (i = 0; i < cur->keys; i++) { |
||
192 | if (key <= cur->key[i]) { |
||
193 | val = cur->value[i]; |
||
194 | next = cur->subtree[i]; |
||
195 | |||
196 | /* |
||
197 | * Check if there is anywhere to descend. |
||
198 | */ |
||
199 | if (!next) { |
||
200 | /* |
||
201 | * Leaf-level. |
||
202 | */ |
||
203 | return (key == cur->key[i]) ? val : NULL; |
||
204 | } |
||
205 | goto descend; |
||
206 | } |
||
207 | } |
||
208 | next = cur->subtree[i]; |
||
209 | descend: |
||
210 | ; |
||
211 | } |
||
212 | |||
213 | /* |
||
214 | * The key was not found in the *leaf_node and is greater than any of its keys. |
||
215 | */ |
||
216 | return NULL; |
||
217 | } |
||
218 | |||
219 | /** Get pointer to value with the smallest key within the node. |
||
220 | * |
||
221 | * Can be only used on leaf-level nodes. |
||
222 | * |
||
223 | * @param node B-tree node. |
||
224 | * |
||
225 | * @return Pointer to value assiciated with the smallest key. |
||
226 | */ |
||
227 | void *btree_node_min(btree_node_t *node) |
||
228 | { |
||
229 | ASSERT(LEAF_NODE(node)); |
||
230 | ASSERT(node->keys); |
||
231 | return node->value[0]; |
||
232 | } |
||
233 | |||
234 | /** Get pointer to value with the biggest key within the node. |
||
235 | * |
||
236 | * Can be only used on leaf-level nodes. |
||
237 | * |
||
238 | * @param node B-tree node. |
||
239 | * |
||
240 | * @return Pointer to value assiciated with the biggest key. |
||
241 | */ |
||
242 | void *btree_node_max(btree_node_t *node) |
||
243 | { |
||
244 | ASSERT(LEAF_NODE(node)); |
||
245 | ASSERT(node->keys); |
||
246 | return node->value[node->keys - 1]; |
||
247 | } |
||
248 | |||
249 | /** Initialize B-tree node. |
||
250 | * |
||
251 | * @param node B-tree node. |
||
252 | */ |
||
253 | void node_initialize(btree_node_t *node) |
||
254 | { |
||
255 | int i; |
||
256 | |||
257 | node->keys = 0; |
||
258 | |||
259 | /* Clean also space for the extra key. */ |
||
260 | for (i = 0; i < BTREE_MAX_KEYS + 1; i++) { |
||
261 | node->key[i] = 0; |
||
262 | node->value[i] = NULL; |
||
263 | node->subtree[i] = NULL; |
||
264 | } |
||
265 | node->subtree[i] = NULL; |
||
266 | |||
267 | node->parent = NULL; |
||
268 | |||
269 | link_initialize(&node->leaf_link); |
||
270 | |||
271 | link_initialize(&node->bfs_link); |
||
272 | node->depth = 0; |
||
273 | } |
||
274 | |||
275 | /** Insert key-value-left-subtree triplet into B-tree non-full node. |
||
276 | * |
||
277 | * It is actually possible to have more keys than BTREE_MAX_KEYS. |
||
278 | * This feature is used during splitting the node when the |
||
279 | * number of keys is BTREE_MAX_KEYS + 1. |
||
280 | * |
||
281 | * @param node B-tree node into wich the new key is to be inserted. |
||
282 | * @param key The key to be inserted. |
||
283 | * @param value Pointer to value to be inserted. |
||
284 | * @param rsubtree Pointer to the right subtree. |
||
285 | */ |
||
286 | void node_insert_key(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree) |
||
287 | { |
||
288 | int i; |
||
289 | |||
290 | for (i = 0; i < node->keys; i++) { |
||
291 | if (key < node->key[i]) { |
||
292 | int j; |
||
293 | |||
294 | for (j = node->keys; j > i; j--) { |
||
295 | node->key[j] = node->key[j - 1]; |
||
296 | node->value[j] = node->value[j - 1]; |
||
297 | node->subtree[j + 1] = node->subtree[j]; |
||
298 | } |
||
299 | break; |
||
300 | } |
||
301 | } |
||
302 | |||
303 | node->key[i] = key; |
||
304 | node->value[i] = value; |
||
305 | node->subtree[i + 1] = rsubtree; |
||
306 | |||
307 | node->keys++; |
||
308 | } |
||
309 | |||
310 | /** Split full B-tree node and insert new key-value-left-subtree triplet. |
||
311 | * |
||
312 | * This function will split a node and return pointer to a newly created |
||
313 | * node containing keys greater than the lesser of medians (or median) |
||
314 | * of the old keys and the newly added key. It will also write the |
||
315 | * median key to a memory address supplied by the caller. |
||
316 | * |
||
317 | * If the node being split is an index node, the median will be |
||
318 | * removed from the original node. If the node is a leaf node, |
||
319 | * the median will be preserved. |
||
320 | * |
||
321 | * @param node B-tree node wich is going to be split. |
||
322 | * @param key The key to be inserted. |
||
323 | * @param value Pointer to the value to be inserted. |
||
324 | * @param rsubtree Pointer to the right subtree of the key being added. |
||
325 | * @param median Address in memory, where the median key will be stored. |
||
326 | * |
||
327 | * @return Newly created right sibling of node. |
||
328 | */ |
||
329 | btree_node_t *node_split(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree, __native *median) |
||
330 | { |
||
331 | btree_node_t *rnode; |
||
332 | int i, j; |
||
333 | |||
334 | ASSERT(median); |
||
335 | ASSERT(node->keys == BTREE_MAX_KEYS); |
||
336 | |||
337 | /* |
||
338 | * Use the extra space to store the extra node. |
||
339 | */ |
||
340 | node_insert_key(node, key, value, rsubtree); |
||
341 | |||
342 | /* |
||
343 | * Compute median of keys. |
||
344 | */ |
||
345 | *median = MEDIAN_LOW(node); |
||
346 | |||
347 | rnode = (btree_node_t *) malloc(sizeof(btree_node_t), 0); |
||
348 | node_initialize(rnode); |
||
349 | rnode->parent = node->parent; |
||
350 | rnode->depth = node->depth; |
||
351 | |||
352 | /* |
||
353 | * Copy big keys, values and subtree pointers to the new right sibling. |
||
354 | */ |
||
355 | for (i = MEDIAN_LOW_INDEX(node) + 1, j = 0; i < node->keys; i++, j++) { |
||
356 | rnode->key[j] = node->key[i]; |
||
357 | rnode->value[j] = node->value[i]; |
||
358 | rnode->subtree[j] = node->subtree[i]; |
||
359 | |||
360 | /* |
||
361 | * Fix parent links in subtrees. |
||
362 | */ |
||
363 | if (rnode->subtree[j]) |
||
364 | rnode->subtree[j]->parent = rnode; |
||
365 | |||
366 | } |
||
367 | rnode->subtree[j] = node->subtree[i]; |
||
368 | if (rnode->subtree[j]) |
||
369 | rnode->subtree[j]->parent = rnode; |
||
370 | rnode->keys = j; |
||
371 | |||
372 | /* |
||
373 | * Shrink the old node. |
||
374 | * If this is an index node, remove the median. |
||
375 | */ |
||
376 | node->keys = MEDIAN_LOW_INDEX(node) + 1; |
||
377 | if (INDEX_NODE(node)) |
||
378 | node->keys--; |
||
379 | |||
380 | return rnode; |
||
381 | } |
||
382 | |||
383 | /** Print B-tree. |
||
384 | * |
||
385 | * @param t Print out B-tree. |
||
386 | */ |
||
387 | void btree_print(btree_t *t) |
||
388 | { |
||
389 | int i, depth = t->root->depth; |
||
390 | link_t head; |
||
391 | |||
392 | list_initialize(&head); |
||
393 | list_append(&t->root->bfs_link, &head); |
||
394 | |||
395 | /* |
||
396 | * Use BFS search to print out the tree. |
||
397 | * Levels are distinguished from one another by node->depth. |
||
398 | */ |
||
399 | while (!list_empty(&head)) { |
||
400 | link_t *hlp; |
||
401 | btree_node_t *node; |
||
402 | |||
403 | hlp = head.next; |
||
404 | ASSERT(hlp != &head); |
||
405 | node = list_get_instance(hlp, btree_node_t, bfs_link); |
||
406 | list_remove(hlp); |
||
407 | |||
408 | ASSERT(node); |
||
409 | |||
410 | if (node->depth != depth) { |
||
411 | printf("\n"); |
||
412 | depth = node->depth; |
||
413 | } |
||
414 | |||
415 | printf("("); |
||
416 | for (i = 0; i < node->keys; i++) { |
||
417 | printf("%d,", node->key[i]); |
||
418 | if (node->depth && node->subtree[i]) { |
||
419 | list_append(&node->subtree[i]->bfs_link, &head); |
||
420 | } |
||
421 | } |
||
422 | if (node->depth && node->subtree[i]) { |
||
423 | list_append(&node->subtree[i]->bfs_link, &head); |
||
424 | } |
||
425 | printf(")"); |
||
426 | } |
||
427 | printf("\n"); |
||
428 | } |