Skip to content

Commit 02137aa

Browse files
authored
feat(optimizer): plan order by as index scan, support create index (#388)
Signed-off-by: Alex Chi <[email protected]>
1 parent 683aa99 commit 02137aa

24 files changed

+232
-63
lines changed

.clang-tidy

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ Checks: '
4747
-readability-make-member-function-const,
4848
-readability-qualified-auto,
4949
-readability-redundant-access-specifiers,
50+
-bugprone-exception-escape,
5051
'
5152
CheckOptions:
5253
- { key: readability-identifier-naming.ClassCase, value: CamelCase }
@@ -92,3 +93,5 @@ AnalyzeTemporaryDtors: true
9293
# -readability-qualified-auto, -readability-redundant-access-specifiers
9394
# These were previously disabled for not being available in clang-tidy-8. They are now available on our clang-tidy-12,
9495
# and potentially worth investigating/fixing.
96+
# -bugprone-exception-escape
97+
# Weird. No idea how to resolve.

src/binder/bind_insert.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,15 @@ auto Binder::BindInsert(duckdb_libpgquery::PGInsertStmt *pg_stmt) -> std::unique
2222
throw NotImplementedException("insert only supports all columns, don't specify columns");
2323
}
2424

25-
auto table_name = std::string(pg_stmt->relation->relname);
25+
auto table = BindRangeVar(pg_stmt->relation);
2626

27-
auto table_info = catalog_.GetTable(table_name);
28-
if (table_info == nullptr || StringUtil::StartsWith(table_name, "__")) {
29-
throw bustub::Exception(fmt::format("invalid table for insert: {}", table_name));
27+
if (StringUtil::StartsWith(table->table_, "__")) {
28+
throw bustub::Exception(fmt::format("invalid table for insert: {}", table->table_));
3029
}
3130

3231
auto select_statement = BindSelect(reinterpret_cast<duckdb_libpgquery::PGSelectStmt *>(pg_stmt->selectStmt));
3332

34-
return std::make_unique<InsertStatement>(table_name, std::move(select_statement));
33+
return std::make_unique<InsertStatement>(std::move(table), std::move(select_statement));
3534
}
3635

3736
auto Binder::BindDelete(duckdb_libpgquery::PGDeleteStmt *stmt) -> std::unique_ptr<DeleteStatement> {

src/binder/bind_select.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "binder/tokens.h"
2626
#include "catalog/catalog.h"
2727
#include "common/exception.h"
28+
#include "common/macros.h"
2829
#include "common/util/string_util.h"
2930
#include "fmt/core.h"
3031
#include "fmt/format.h"
@@ -265,7 +266,8 @@ auto Binder::BindBaseTableRef(std::string table_name, std::optional<std::string>
265266
if (table_info == nullptr) {
266267
throw bustub::Exception(fmt::format("invalid table {}", table_name));
267268
}
268-
return std::make_unique<BoundBaseTableRef>(std::move(table_name), std::move(alias), table_info->schema_);
269+
return std::make_unique<BoundBaseTableRef>(std::move(table_name), table_info->oid_, std::move(alias),
270+
table_info->schema_);
269271
}
270272

271273
auto Binder::BindRangeVar(duckdb_libpgquery::PGRangeVar *table_ref) -> std::unique_ptr<BoundBaseTableRef> {
@@ -384,7 +386,7 @@ auto Binder::BindConstant(duckdb_libpgquery::PGAConst *node) -> std::unique_ptr<
384386
const auto &val = node->val;
385387
switch (val.type) {
386388
case duckdb_libpgquery::T_PGInteger: {
387-
BUSTUB_ASSERT(val.val.ival <= BUSTUB_INT32_MAX, "value out of range");
389+
BUSTUB_ENSURE(val.val.ival <= BUSTUB_INT32_MAX, "value out of range");
388390
return std::make_unique<BoundConstant>(ValueFactory::GetIntegerValue(static_cast<int32_t>(val.val.ival)));
389391
}
390392
case duckdb_libpgquery::T_PGString: {

src/binder/statement/insert_statement.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99

1010
namespace bustub {
1111

12-
InsertStatement::InsertStatement(std::string table, std::unique_ptr<SelectStatement> select)
12+
InsertStatement::InsertStatement(std::unique_ptr<BoundBaseTableRef> table, std::unique_ptr<SelectStatement> select)
1313
: BoundStatement(StatementType::INSERT_STATEMENT), table_(std::move(table)), select_(std::move(select)) {}
1414

1515
auto InsertStatement::ToString() const -> std::string {
16-
return fmt::format("BoundInsert {{\n table={},\n select={}\n}}", table_,
16+
return fmt::format("BoundInsert {{\n table={},\n select={}\n}}", *table_,
1717
StringUtil::IndentAllLines(select_->ToString(), 2));
1818
}
1919

src/common/bustub_instance.cpp

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "binder/bound_statement.h"
55
#include "binder/statement/create_statement.h"
66
#include "binder/statement/explain_statement.h"
7+
#include "binder/statement/index_statement.h"
78
#include "binder/statement/select_statement.h"
89
#include "buffer/buffer_pool_manager_instance.h"
910
#include "catalog/schema.h"
@@ -57,7 +58,7 @@ BustubInstance::BustubInstance(const std::string &db_file_name) {
5758
checkpoint_manager_ = new CheckpointManager(transaction_manager_, log_manager_, buffer_pool_manager_);
5859

5960
// Catalog.
60-
catalog_ = new Catalog(buffer_pool_manager_, nullptr, nullptr);
61+
catalog_ = new Catalog(buffer_pool_manager_, lock_manager_, log_manager_);
6162

6263
// Execution engine.
6364
execution_engine_ = new ExecutionEngine(buffer_pool_manager_, transaction_manager_, catalog_);
@@ -97,29 +98,60 @@ auto BustubInstance::ExecuteSql(const std::string &sql) -> std::vector<std::stri
9798
case StatementType::CREATE_STATEMENT: {
9899
const auto &create_stmt = dynamic_cast<const CreateStatement &>(*statement);
99100
auto txn = transaction_manager_->Begin();
100-
catalog_->CreateTable(txn, create_stmt.table_, Schema(create_stmt.columns_));
101+
auto info = catalog_->CreateTable(txn, create_stmt.table_, Schema(create_stmt.columns_));
101102
transaction_manager_->Commit(txn);
102103
delete txn;
104+
if (info == nullptr) {
105+
throw bustub::Exception("Failed to create table");
106+
}
107+
std::cout << "Table created with id = " << info->oid_ << std::endl;
108+
continue;
109+
}
110+
case StatementType::INDEX_STATEMENT: {
111+
const auto &index_stmt = dynamic_cast<const IndexStatement &>(*statement);
112+
auto txn = transaction_manager_->Begin();
113+
114+
std::vector<uint32_t> col_ids;
115+
for (const auto &col : index_stmt.cols_) {
116+
auto idx = index_stmt.table_->schema_.GetColIdx(col->col_name_.back());
117+
col_ids.push_back(idx);
118+
if (index_stmt.table_->schema_.GetColumn(idx).GetType() != TypeId::INTEGER) {
119+
throw NotImplementedException("only support creating index on integer column");
120+
}
121+
}
122+
if (col_ids.size() != 1) {
123+
throw NotImplementedException("only support creating index with exactly one column");
124+
}
125+
auto key_schema = Schema::CopySchema(&index_stmt.table_->schema_, col_ids);
126+
auto info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
127+
txn, index_stmt.index_name_, index_stmt.table_->table_, index_stmt.table_->schema_, key_schema, col_ids,
128+
INTEGER_SIZE, IntegerHashFunctionType{});
129+
transaction_manager_->Commit(txn);
130+
delete txn;
131+
if (info == nullptr) {
132+
throw bustub::Exception("Failed to create index");
133+
}
134+
std::cout << "Index created with id = " << info->index_oid_ << std::endl;
103135
continue;
104136
}
105137
case StatementType::EXPLAIN_STATEMENT: {
106138
const auto &explain_stmt = dynamic_cast<const ExplainStatement &>(*statement);
107139

108140
// Print binder result.
109-
std::cerr << "=== BINDER ===" << std::endl;
110-
std::cerr << statement->ToString() << std::endl;
141+
std::cout << "=== BINDER ===" << std::endl;
142+
std::cout << statement->ToString() << std::endl;
111143

112144
// Print planner result.
113145
bustub::Planner planner(*catalog_);
114146
planner.PlanQuery(*explain_stmt.statement_);
115-
std::cerr << "=== PLANNER ===" << std::endl;
116-
std::cerr << planner.plan_->ToString() << std::endl;
147+
std::cout << "=== PLANNER ===" << std::endl;
148+
std::cout << planner.plan_->ToString() << std::endl;
117149

118150
// Print optimizer result.
119151
bustub::Optimizer optimizer(*catalog_);
120152
auto optimized_plan = optimizer.Optimize(planner.plan_);
121-
std::cerr << "=== OPTIMIZER ===" << std::endl;
122-
std::cerr << optimized_plan->ToString() << std::endl;
153+
std::cout << "=== OPTIMIZER ===" << std::endl;
154+
std::cout << optimized_plan->ToString() << std::endl;
123155
continue;
124156
}
125157
default:

src/include/binder/statement/insert_statement.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <vector>
1313

1414
#include "binder/bound_statement.h"
15+
#include "binder/table_ref/bound_base_table_ref.h"
1516
#include "catalog/column.h"
1617
#include "type/value.h"
1718

@@ -25,9 +26,10 @@ class SelectStatement;
2526

2627
class InsertStatement : public BoundStatement {
2728
public:
28-
explicit InsertStatement(std::string table, std::unique_ptr<SelectStatement> select);
29+
explicit InsertStatement(std::unique_ptr<BoundBaseTableRef> table, std::unique_ptr<SelectStatement> select);
30+
31+
std::unique_ptr<BoundBaseTableRef> table_;
2932

30-
std::string table_;
3133
std::unique_ptr<SelectStatement> select_;
3234

3335
auto ToString() const -> std::string override;

src/include/binder/table_ref/bound_base_table_ref.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <utility>
66
#include "binder/bound_table_ref.h"
77
#include "catalog/schema.h"
8+
#include "concurrency/transaction.h"
89
#include "fmt/core.h"
910

1011
namespace bustub {
@@ -14,17 +15,18 @@ namespace bustub {
1415
*/
1516
class BoundBaseTableRef : public BoundTableRef {
1617
public:
17-
explicit BoundBaseTableRef(std::string table, std::optional<std::string> alias, Schema schema)
18+
explicit BoundBaseTableRef(std::string table, table_oid_t oid, std::optional<std::string> alias, Schema schema)
1819
: BoundTableRef(TableReferenceType::BASE_TABLE),
1920
table_(std::move(table)),
21+
oid_(oid),
2022
alias_(std::move(alias)),
2123
schema_(std::move(schema)) {}
2224

2325
auto ToString() const -> std::string override {
2426
if (alias_ == std::nullopt) {
25-
return fmt::format("BoundBaseTableRef {{ table={} }}", table_);
27+
return fmt::format("BoundBaseTableRef {{ table={}, oid={} }}", table_, oid_);
2628
}
27-
return fmt::format("BoundBaseTableRef {{ table={}, alias={} }}", table_, *alias_);
29+
return fmt::format("BoundBaseTableRef {{ table={}, oid={}, alias={} }}", table_, oid_, *alias_);
2830
}
2931

3032
auto GetBoundTableName() const -> std::string {
@@ -37,6 +39,9 @@ class BoundBaseTableRef : public BoundTableRef {
3739
/** The name of the table. */
3840
std::string table_;
3941

42+
/** The oid of the table. */
43+
table_oid_t oid_;
44+
4045
/** The alias of the table */
4146
std::optional<std::string> alias_;
4247

src/include/catalog/catalog.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "buffer/buffer_pool_manager.h"
2222
#include "catalog/schema.h"
2323
#include "container/hash/hash_function.h"
24+
#include "storage/index/b_plus_tree_index.h"
2425
#include "storage/index/extendible_hash_table_index.h"
2526
#include "storage/index/index.h"
2627
#include "storage/table/table_heap.h"
@@ -176,7 +177,7 @@ class Catalog {
176177
* @param table_oid The OID of the table to query
177178
* @return A (non-owning) pointer to the metadata for the table
178179
*/
179-
auto GetTable(table_oid_t table_oid) -> TableInfo * {
180+
auto GetTable(table_oid_t table_oid) const -> TableInfo * {
180181
auto meta = tables_.find(table_oid);
181182
if (meta == tables_.end()) {
182183
return NULL_TABLE_INFO;
@@ -223,8 +224,9 @@ class Catalog {
223224
// TODO(Kyle): We should update the API for CreateIndex
224225
// to allow specification of the index type itself, not
225226
// just the key, value, and comparator types
226-
auto index = std::make_unique<ExtendibleHashTableIndex<KeyType, ValueType, KeyComparator>>(std::move(meta), bpm_,
227-
hash_function);
227+
228+
// TODO(chi): support both hash index and btree index
229+
auto index = std::make_unique<BPlusTreeIndex<KeyType, ValueType, KeyComparator>>(std::move(meta), bpm_);
228230

229231
// Populate the index with all tuples in table heap
230232
auto *table_meta = GetTable(table_name);
@@ -311,7 +313,7 @@ class Catalog {
311313
* @return A vector of IndexInfo* for each index on the given table, empty vector
312314
* in the event that the table exists but no indexes have been created for it
313315
*/
314-
auto GetTableIndexes(const std::string &table_name) -> std::vector<IndexInfo *> {
316+
auto GetTableIndexes(const std::string &table_name) const -> std::vector<IndexInfo *> {
315317
// Ensure the table exists
316318
if (table_names_.find(table_name) == table_names_.end()) {
317319
return std::vector<IndexInfo *>{};

src/include/catalog/schema.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,13 @@ class Schema {
3434
*/
3535
explicit Schema(const std::vector<Column> &columns);
3636

37-
static auto CopySchema(const Schema *from, const std::vector<uint32_t> &attrs) -> Schema * {
37+
static auto CopySchema(const Schema *from, const std::vector<uint32_t> &attrs) -> Schema {
3838
std::vector<Column> cols;
3939
cols.reserve(attrs.size());
4040
for (const auto i : attrs) {
4141
cols.emplace_back(from->columns_[i]);
4242
}
43-
return new Schema{cols};
43+
return Schema{cols};
4444
}
4545

4646
/** @return all the columns in the schema */

src/include/execution/plans/delete_plan.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#pragma once
1414

15+
#include <string>
1516
#include <utility>
1617

1718
#include "catalog/catalog.h"
@@ -50,6 +51,9 @@ class DeletePlanNode : public AbstractPlanNode {
5051

5152
BUSTUB_PLAN_NODE_CLONE_WITH_CHILDREN(DeletePlanNode);
5253

54+
protected:
55+
auto PlanNodeToString() const -> std::string override { return fmt::format("Delete {{ table_oid={} }}", table_oid_); }
56+
5357
private:
5458
/** The identifier of the table from which tuples are deleted */
5559
table_oid_t table_oid_;

0 commit comments

Comments
 (0)