-
Notifications
You must be signed in to change notification settings - Fork 0
/
bptree.h
304 lines (240 loc) · 8.44 KB
/
bptree.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
#ifndef BPTREE_H
#define BPTREE_H
#include <memory>
#include <vector>
#include <algorithm>
#include <iostream>
// base Node class
struct Node {
std::shared_ptr<Node> parent;
bool is_leaf;
virtual ~Node() = 0;
};
Node::~Node() {}
// Invariant: keys.size() == children.size() - 1
template <typename K>
struct InternalNode: public Node {
std::vector<K> keys;
std::vector< std::shared_ptr<Node> > children;
// Insert key and child in sorted order
void insert(const K k, std::shared_ptr<Node > child) {
// If we split root, then the new root's keys will be empty
if (keys.empty()) {
keys.push_back(k);
children.push_back(child);
return;
}
auto itr = std::lower_bound(keys.begin(), keys.end(), k);
const int index = itr - keys.begin();
keys.insert(itr, k);
if (index == 0) {
children.insert(children.begin(), child);
} else {
children.insert(children.begin() + index + 1, child);
}
}
};
template <typename K, typename D>
struct LeafNode: public Node {
std::shared_ptr<LeafNode> next;
std::vector<K> keys;
std::vector<D> data; // TODO change to pointers later
// Insert key and data in sorted order
void insert(const K k, const D d) {
auto itr = std::lower_bound(keys.begin(), keys.end(), k);
const int index = itr - keys.begin();
keys.insert(itr, k);
data.insert(data.begin() + index, d);
}
};
template <typename K, typename D>
class BPTree {
std::shared_ptr<Node> root;
const int b;
int _size;
// only called when splitting root
// NOTE: creates InternalNode as parent which contains 1 child (0 keys)
void deorphan(std::shared_ptr<Node> current) {
// If no parent, then create a new parent
if (current->parent == nullptr) {
auto new_parent = std::make_shared<InternalNode<K> >();
new_parent->is_leaf = false;
new_parent->parent = nullptr;
new_parent->children.push_back(current);
current->parent = new_parent;
root = new_parent;
}
}
// splits internal node
void split_internal(std::shared_ptr<InternalNode<K>> current) {
// Ensure parent exists
deorphan(current);
// Create sibling node
auto new_node = std::make_shared<InternalNode<K> >();
new_node->is_leaf = false;
new_node->parent = current->parent;
// Move middle key into parent
const int mid = current->keys.size() / 2;
auto parent = std::dynamic_pointer_cast<InternalNode<K>>(current->parent);
parent->insert(current->keys[mid], new_node);
// Split the other keys and children
for (int i = mid + 1; i < current->keys.size(); ++i) {
new_node->keys.emplace_back(std::move(current->keys[i]));
new_node->children.emplace_back(std::move(current->children[i]));
new_node->children.back()->parent = new_node;
}
new_node->children.emplace_back(std::move(current->children.back() ));
new_node->children.back()->parent = new_node;
// Remove the items we copied
current->keys.resize(mid);
current->children.resize(mid + 1);
// Handle splitting of ancestors if necessary
if (parent->keys.size() == b) {
split_internal(parent);
}
}
// splits leaf node
void split_leaf(std::shared_ptr<LeafNode<K, D>> leaf) {
// Ensure a parent exists
deorphan(leaf);
// Create sibling node
auto new_node = std::make_shared<LeafNode<K, D> >();
new_node->is_leaf = true;
new_node->parent = leaf->parent;
new_node->next = leaf->next;
leaf->next = new_node;
// Copy half of the keys and data
const int mid = leaf->keys.size() / 2;
for (int i = mid; i < leaf->keys.size(); ++i) {
new_node->keys.emplace_back(std::move(leaf->keys[i]));
new_node->data.emplace_back(std::move(leaf->data[i]));
}
// Remove the items we copied
leaf->keys.resize(mid);
leaf->data.resize(mid);
// Add key and child for new_node to parent, and split if necessary
auto parent = std::dynamic_pointer_cast<InternalNode<K>>(leaf->parent);
parent->insert(new_node->keys[0], new_node);
if (parent->keys.size() == b) {
split_internal(parent);
}
}
void print_node(std::shared_ptr<Node> current) const {
if (current->is_leaf) {
auto temp = std::dynamic_pointer_cast<LeafNode<K, D>>(current);
for (auto k: temp->keys) {
std::cout << k << ", ";
}
} else {
auto temp = std::dynamic_pointer_cast<InternalNode<K>>( current);
for (auto k: temp->keys) {
std::cout << k << ", ";
}
}
std::cout << std::endl;
}
void print(std::shared_ptr<Node> current) const {
print_node(current);
if (!current->is_leaf) {
auto temp = std::dynamic_pointer_cast<InternalNode<K>>(current);
for (auto child: temp->children) {
print(child);
}
}
}
std::shared_ptr<Node> find_node(const K& k) const {
if (root == nullptr) {
return nullptr;
}
// Otherwise search for the right node
auto current = root;
while (!current->is_leaf) {
const auto temp = std::dynamic_pointer_cast<InternalNode<K>>(current);
const auto it = std::upper_bound(temp->keys.begin(), temp->keys.end(), k);
const int i = it - temp->keys.begin();
current = temp->children[i];
}
return current;
}
public:
// Default constructor
BPTree(): b {4} {
root = nullptr;
_size = 0;
}
BPTree(const int _b): b {_b} {
root = nullptr;
_size = 0;
}
int size() const {
return _size;
}
std::shared_ptr<D> search(const K& k) const {
// Find the containing leaf node
auto node = find_node(k);
if (node == nullptr) {
return nullptr;
}
// Search the node
auto leaf = std::dynamic_pointer_cast<LeafNode<K, D>>(node);
const auto it = std::lower_bound(leaf->keys.begin(), leaf->keys.end(), k);
const int i = it - leaf->keys.begin();
if (k == leaf->keys[i]) {
auto temp = std::make_shared<D>(leaf->data[i]);
return temp;
}
return nullptr;
}
// TODO generalize comparison operator
std::vector<D> range_query(const K low, const K& hi) {
if (hi < low) {
return {};
}
auto leaf = std::dynamic_pointer_cast<LeafNode<K, D>>(find_node(low));
std::vector<D> result;
while (leaf) {
// Find keys in range, and save data to result
const auto i = std::lower_bound(leaf->keys.begin(), leaf->keys.end(), low);
const auto j = std::upper_bound(leaf->keys.begin(), leaf->keys.end(), hi);
const auto data_i = leaf->data.begin() + (i - leaf->keys.begin());
const auto data_j = leaf->data.begin() + (j - leaf->keys.begin());
result.insert(result.end(), data_i, data_j);
if (j != leaf->keys.end()) {
break;
}
leaf = leaf->next;
}
return result;
}
// Insert a record
void insert(const K& k, const D& d) {
// Create new node if needed
if (root == nullptr) {
root = std::make_shared <LeafNode<K, D>>();
auto temp = std::dynamic_pointer_cast<LeafNode<K, D>>(root);
temp->is_leaf = true;
temp->next = nullptr;
temp->parent = nullptr;
temp->keys.push_back(k);
temp->data.push_back(d);
++_size;
return;
}
// Otherwise search for the right node
auto current = find_node(k);
// Insert into node and split if necessary
auto temp = std::dynamic_pointer_cast<LeafNode<K, D>>(current);
temp->insert(k, d);
++_size;
// Perform splitting procedure if node is full
if (temp->keys.size() == b) {
split_leaf(temp);
}
}
void print() const {
std::cout << "---\n";
print(root);
std::cout << "---\n";
}
};
#endif