0%

CMU15-445 lab02(Part 4)

TASK #2.B - B+TREE DATA STRUCTURE (DELETION)

您的B+树索引需要支持删除。如果删除导致某些页面低于占用阈值,您的B+树索引应该正确地执行合并或重新分发。同样,你的B+树索引只能支持唯一键,你应该遵循TASK #2.A中的相同指导方针。

Remove函数

  • 先判断树是否为空。
  • 调用FindLeafPageByOperation函数找到待删除的节点(已上写锁)。
  • 在leaf中删除key(如果不存在该key,则size不变)。
  • 如果删除失败,解锁并unpin自己以及经过的所有节点。
  • 如果删除成功,调用CoalesceOrRedistribute函数,进行合并或重分配(或不做处理)。
  • 还需判断叶子节点是否需要删除。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
INDEX_TEMPLATE_ARGUMENTS
void BPLUSTREE_TYPE::Remove(const KeyType &key, Transaction *transaction) {
if (IsEmpty()) {
return;
}

auto [leaf_page, root_is_latched] = FindLeafPageByOperation(key, Operation::DELETE, transaction);
LeafPage *leaf_node = reinterpret_cast<LeafPage *>(leaf_page->GetData());
int old_size = leaf_node->GetSize();
int new_size = leaf_node->RemoveAndDeleteRecord(key, comparator_); // 在leaf中删除key(如果不存在该key,则size不变)

// 删除失败
if (new_size == old_size) {
if (root_is_latched) {
root_latch_.unlock();
}
UnlockUnpinPages(transaction);

leaf_page->WUnlatch();
buffer_pool_manager_->UnpinPage(leaf_page->GetPageId(), false); // unpin leaf page

return;
}

// 删除成功,然后调用CoalesceOrRedistribute
bool *pointer_root_is_latched = new bool(root_is_latched);

bool leaf_should_delete = CoalesceOrRedistribute(leaf_node, transaction, pointer_root_is_latched);
// NOTE: unlock and unpin are finished in CoalesceOrRedistribute
// NOTE: root node must be unlocked in CoalesceOrRedistribute
assert((*pointer_root_is_latched) == false);

delete pointer_root_is_latched;

if (leaf_should_delete) {
transaction->AddIntoDeletedPageSet(leaf_page->GetPageId());
}

leaf_page->WUnlatch();
buffer_pool_manager_->UnpinPage(leaf_page->GetPageId(), true); // unpin leaf page

// NOTE: ensure deleted pages have been unpined
// 删除并清空deleted page set
for (page_id_t page_id : *transaction->GetDeletedPageSet()) {
buffer_pool_manager_->DeletePage(page_id);
}
transaction->GetDeletedPageSet()->clear();
}

CoalesceOrRedistribute函数

  • 判断该叶子节点是否同时为根节点,如果是,调用AdjustRoot函数。
  • 如果不需要合并或者重分配,直接返回false。
  • 否则找到兄弟节点,优先找到前驱节点,判断是否重分配。
  • 当kv总和能支撑两个Node,那么重新分配即可,不必删除node,重分配不影响上层,此时可unpin并解锁经过的所有节点。
  • 如果需合并,调用Coalesce函数。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
INDEX_TEMPLATE_ARGUMENTS
template <typename N>
bool BPLUSTREE_TYPE::CoalesceOrRedistribute(N *node, Transaction *transaction, bool *root_is_latched) {
if (node->IsRootPage()) {
bool root_should_delete = AdjustRoot(node);

if (*root_is_latched) {
*root_is_latched = false;
root_latch_.unlock();
}

UnlockPages(transaction);
return root_should_delete; // NOTE: size of root page can be less than min size
}

// 不需要合并或者重分配,直接返回false
if (node->GetSize() >= node->GetMinSize()) {
if (*root_is_latched) {
*root_is_latched = false;
root_latch_.unlock();
}

UnlockPages(transaction);
return false;
}

// 需要合并或者重分配
// 先获取node的parent page
Page *parent_page = buffer_pool_manager_->FetchPage(node->GetParentPageId());
InternalPage *parent = reinterpret_cast<InternalPage *>(parent_page->GetData());

// 获得node在parent的孩子指针(value)的index
int index = parent->ValueIndex(node->GetPageId());
// 寻找兄弟结点,尽量找到前一个结点(前驱结点)
page_id_t sibling_page_id = parent->ValueAt(index == 0 ? 1 : index - 1);
Page *sibling_page = buffer_pool_manager_->FetchPage(sibling_page_id);

sibling_page->WLatch(); // 记得要锁住兄弟结点

N *sibling_node = reinterpret_cast<N *>(sibling_page->GetData());

// Redistribute 当kv总和能支撑两个Node,那么重新分配即可,不必删除node
if (node->GetSize() + sibling_node->GetSize() >= node->GetMaxSize()) {
if (*root_is_latched) {
*root_is_latched = false;
root_latch_.unlock();
}

Redistribute(sibling_node, node, index);

buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);

sibling_page->WUnlatch();
buffer_pool_manager_->UnpinPage(sibling_page->GetPageId(), true);

UnlockUnpinPages(transaction);

return false; // node不必被删除
}

// Coalesce 当sibling和node只能凑成一个Node,那么合并两个结点到sibling,删除node
// Coalesce函数继续递归调用CoalesceOrRedistribute
bool parent_should_delete =
Coalesce(&sibling_node, &node, &parent, index, transaction, root_is_latched); // 返回值是parent是否需要被删除

assert((*root_is_latched) == false);

if (parent_should_delete) {
transaction->AddIntoDeletedPageSet(parent->GetPageId());
}

// NOTE: parent unlock is finished in Coalesce
buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);

sibling_page->WUnlatch();
buffer_pool_manager_->UnpinPage(sibling_page->GetPageId(), true);

return true; // node需要被删除
}

Redistribute函数

  • node是之前刚被删除过一个key的结点。
  • index=0,则neighbor是node后继结点。
  • index>0,则neighbor是node前驱结点。
  • 注意更新parent结点的相关kv对。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
INDEX_TEMPLATE_ARGUMENTS
template <typename N>
void BPLUSTREE_TYPE::Redistribute(N *neighbor_node, N *node, int index) {
Page *parent_page = buffer_pool_manager_->FetchPage(node->GetParentPageId());
InternalPage *parent = reinterpret_cast<InternalPage *>(parent_page->GetData());

if (node->IsLeafPage()) {
LeafPage *leaf_node = reinterpret_cast<LeafPage *>(node);
LeafPage *neighbor_leaf_node = reinterpret_cast<LeafPage *>(neighbor_node);
if (index == 0) { // node -> neighbor
// move neighbor's first to node's end
neighbor_leaf_node->MoveFirstToEndOf(leaf_node);
parent->SetKeyAt(1, neighbor_leaf_node->KeyAt(0));
} else { // neighbor -> node
// move neighbor's last to node's front
neighbor_leaf_node->MoveLastToFrontOf(leaf_node);
parent->SetKeyAt(index, leaf_node->KeyAt(0));
}
} else {
InternalPage *internal_node = reinterpret_cast<InternalPage *>(node);
InternalPage *neighbor_internal_node = reinterpret_cast<InternalPage *>(neighbor_node);
if (index == 0) { // case: node(left) and neighbor(right)
// set neighbor's first key to parent's second key(详见MoveFirstToEndOf函数)
// move neighbor's first to node's end
neighbor_internal_node->MoveFirstToEndOf(internal_node, parent->KeyAt(1), buffer_pool_manager_);
// set parent's second key to neighbor's "new" first key
parent->SetKeyAt(1, neighbor_internal_node->KeyAt(0));
} else { // case: neighbor(left) and node(right)
// MoveLastToFrontOf do this:
// set node's first key to parent's index key(详见MoveLastToFrontOf函数)
// move neighbor's last to node's front
neighbor_internal_node->MoveLastToFrontOf(internal_node, parent->KeyAt(index), buffer_pool_manager_);
// set parent's index key to node's "new" first key
parent->SetKeyAt(index, internal_node->KeyAt(0));
}
}

AdjustRoot函数

  • old_root_node是内部结点,且大小为1。表示内部结点其实已经没有key了,要把它的孩子更新成新的根结点。
  • old_root_node是叶结点,且大小为0。直接更新root page id。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
INDEX_TEMPLATE_ARGUMENTS
bool BPLUSTREE_TYPE::AdjustRoot(BPlusTreePage *old_root_node) {
// Case 1: old_root_node是内部结点,且大小为1。表示内部结点其实已经没有key了,所以要把它的孩子更新成新的根结点
// old_root_node (internal node) has only one size
if (!old_root_node->IsLeafPage() && old_root_node->GetSize() == 1) {
InternalPage *internal_node = reinterpret_cast<InternalPage *>(old_root_node);
page_id_t child_page_id = internal_node->RemoveAndReturnOnlyChild();
root_page_id_ = child_page_id;
UpdateRootPageId(0);
// update parent page id of new root node
Page *new_root_page = buffer_pool_manager_->FetchPage(root_page_id_);
InternalPage *new_root_node = reinterpret_cast<InternalPage *>(new_root_page->GetData());
new_root_node->SetParentPageId(INVALID_PAGE_ID);
buffer_pool_manager_->UnpinPage(new_root_page->GetPageId(), true);
return true;
}
// Case 2: old_root_node是叶结点,且大小为0。直接更新root page id
// all elements deleted from the B+ tree
if (old_root_node->IsLeafPage() && old_root_node->GetSize() == 0) {
root_page_id_ = INVALID_PAGE_ID;
UpdateRootPageId(0);
return true;
}

Coalesce函数

  • index表示node在parent中的孩子指针(value)的下标。
  • key_index表示:交换后的 node在parent中的孩子指针(value)的下标。
  • 若index=0,说明node为neighbor前驱,要保证neighbor为node的前驱,则交换变量neighbor和node,且key_index=1。
  • 分叶子节点和中间节点进行合并操作,记得叶子节点要更新链表。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
INDEX_TEMPLATE_ARGUMENTS
template <typename N>
bool BPLUSTREE_TYPE::Coalesce(N **neighbor_node, N **node,
BPlusTreeInternalPage<KeyType, page_id_t, KeyComparator> **parent, int index,
Transaction *transaction, bool *root_is_latched) {
int key_index = index;
if (index == 0) {
std::swap(neighbor_node, node); // 保证neighbor_node为node的前驱
key_index = 1;
}
KeyType middle_key = (*parent)->KeyAt(key_index); // middle_key only used in internal_node->MoveAllTo

// Move items from node to neighbor_node
if ((*node)->IsLeafPage()) {
LeafPage *leaf_node = reinterpret_cast<LeafPage *>(*node);
LeafPage *neighbor_leaf_node = reinterpret_cast<LeafPage *>(*neighbor_node);
leaf_node->MoveAllTo(neighbor_leaf_node);
neighbor_leaf_node->SetNextPageId(leaf_node->GetNextPageId());
} else {
InternalPage *internal_node = reinterpret_cast<InternalPage *>(*node);
InternalPage *neighbor_internal_node = reinterpret_cast<InternalPage *>(*neighbor_node);
// MoveAllTo do this: set node's first key to middle_key and move node to neighbor
internal_node->MoveAllTo(neighbor_internal_node, middle_key, buffer_pool_manager_);
}

// 删除node在parent中的kv信息
(*parent)->Remove(key_index); // 注意,是key_index,不是index

// 因为parent中删除了kv对,所以递归调用CoalesceOrRedistribute函数判断parent结点是否需要被删除
return CoalesceOrRedistribute(*parent, transaction, root_is_latched);

TASK #3 - INDEX ITERATOR

您将构建一个通用索引迭代器来有效地检索所有叶页。基本思想是将它们组织成一个链表,然后按存储在B+Tree叶子页面中的特定方向遍历每个键/值对。索引迭代器应该遵循c++ 17中定义的iterator的功能,包括使用一组操作符迭代一系列元素的能力,以及for-each循环(至少包含自增、解引用、相等和不相等操作符)。注意,为了支持索引的for-each循环函数,BPlusTree应该正确实现begin()end()

必须在指定的文件中实现索引迭代器。你只能修改头文件(src/include/storage/index/index_iterator.h)和它对应的源文件(src/index/storage/index_iterator.cpp)。您不需要修改任何其他文件。您必须在这些文件中的IndexIterator类中实现以下函数。在索引迭代器的实现中,只要有下面这三个方法,就可以添加任何辅助方法。

  • isEnd(): Return whether this iterator is pointing at the last key/value pair.
  • operator++(): Move to the next key/value pair.
  • operator*(): Return the key/value pair this iterator is currently pointing at.
  • operator==(): Return whether two iterators are equal
  • operator!=(): Return whether two iterators are not equal.

isEnd函数

1
2
INDEX_TEMPLATE_ARGUMENTS
bool INDEXITERATOR_TYPE::isEnd() { return leaf_->GetNextPageId() == INVALID_PAGE_ID && index_ == leaf_->GetSize(); }

operator++函数

  • 若index加1后指向当前leaf末尾(但不是整个叶子层的末尾),则进入下一个leaf且index置0。
  • pin住下一个page并加上读锁,unpin并解锁上一个page。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
INDEX_TEMPLATE_ARGUMENTS
INDEXITERATOR_TYPE &INDEXITERATOR_TYPE::operator++() {
// 若index加1后指向当前leaf末尾(但不是整个叶子层的末尾),则进入下一个leaf且index置0
index_++;
if (index_ == leaf_->GetSize() && leaf_->GetNextPageId() != INVALID_PAGE_ID) {
Page *next_page = buffer_pool_manager_->FetchPage(leaf_->GetNextPageId()); // pin next leaf page
next_page->RLatch();

page_->RUnlatch();
buffer_pool_manager_->UnpinPage(page_->GetPageId(), false); // unpin current leaf page

page_ = next_page;
leaf_ = reinterpret_cast<LeafPage *>(page_->GetData()); // update leaf page to next page
index_ = 0; // reset index to zero
}
return *this;
}

operator*函数

  • 取出leaf中的array[index],为pair类型。
1
2
INDEX_TEMPLATE_ARGUMENTS
const MappingType &INDEXITERATOR_TYPE::operator*() { return leaf_->GetItem(index_); }

operator==和operator!=函数

1
2
3
4
5
6
7
INDEX_TEMPLATE_ARGUMENTS
bool INDEXITERATOR_TYPE::operator==(const IndexIterator &itr) const {
return leaf_->GetPageId() == itr.leaf_->GetPageId() && index_ == itr.index_; // leaf page和index均相同
}

INDEX_TEMPLATE_ARGUMENTS
bool INDEXITERATOR_TYPE::operator!=(const IndexIterator &itr) const { return !(*this == itr); } // 此处可用之前重载的==

Begin函数

  • 找到包含输入键的叶子节点。
  • 此处直接用KeyIndex二分搜索(找到第一个>=key的index)。
1
2
3
4
5
6
7
INDEX_TEMPLATE_ARGUMENTS
INDEXITERATOR_TYPE BPLUSTREE_TYPE::Begin(const KeyType &key) {
Page *leaf_page = FindLeafPageByOperation(key, Operation::FIND).first;
LeafPage *leaf_node = reinterpret_cast<LeafPage *>(leaf_page->GetData());
int index = leaf_node->KeyIndex(key, comparator_);
return INDEXITERATOR_TYPE(buffer_pool_manager_, leaf_page, index);
}

end函数

1
2
3
4
5
6
7
INDEX_TEMPLATE_ARGUMENTS
INDEXITERATOR_TYPE BPLUSTREE_TYPE::end() {
Page *leaf_page = FindLeafPageByOperation(KeyType(), Operation::FIND, nullptr, false, true).first;
LeafPage *leaf_node = reinterpret_cast<LeafPage *>(leaf_page->GetData());
// 注意传入的index为leaf_node->GetSize()
return INDEXITERATOR_TYPE(buffer_pool_manager_, leaf_page, leaf_node->GetSize()); // 注意:此时leaf_node没有unpin
}

Test

此时我们以及完成Task 1, Task 2, Task 3,而Task 4已经在完成Task 2的过程中顺便完成了,可以直接进行测试。
  • 本地测试
1
2
3
4
5
6
7
8
9
10
11
12
13
14
$ ./test/b_plus_tree_delete_test 
Running main() from gmock_main.cc
[==========] Running 2 tests from 1 test suite.
[----------] Global test environment set-up.
[----------] 2 tests from BPlusTreeTests
[ RUN ] BPlusTreeTests.DeleteTest1
[ OK ] BPlusTreeTests.DeleteTest1 (0 ms)
[ RUN ] BPlusTreeTests.DeleteTest2
[ OK ] BPlusTreeTests.DeleteTest2 (0 ms)
[----------] 2 tests from BPlusTreeTests (1 ms total)

[----------] Global test environment tear-down
[==========] 2 tests from 1 test suite ran. (1 ms total)
[ PASSED ] 2 tests.
  • Gradescope测试