Skip to content
This repository was archived by the owner on Sep 27, 2019. It is now read-only.

Modified pg_column_stats initialization #1352

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Style fix
poojanilangekar committed May 18, 2018
commit 92d95fa2bb8ed6a5d9fd5bf86421f03cc13f0d08
23 changes: 12 additions & 11 deletions src/catalog/catalog.cpp
Original file line number Diff line number Diff line change
@@ -160,16 +160,17 @@ void Catalog::BootstrapSystemCatalogs(storage::Database *database,
false, {TableCatalog::ColumnId::DATABASE_OID}, pool_.get(), txn);

system_catalogs->GetIndexCatalog()->InsertIndex(
COLUMN_STATS_CATALOG_SKEY0_OID, COLUMN_STATS_CATALOG_NAME "_skey0",
COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE,
IndexConstraintType::UNIQUE, true,
{ColumnStatsCatalog::ColumnId::TABLE_ID,
ColumnStatsCatalog::ColumnId::COLUMN_ID}, pool_.get(), txn);
COLUMN_STATS_CATALOG_SKEY0_OID, COLUMN_STATS_CATALOG_NAME "_skey0",
COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE,
IndexConstraintType::UNIQUE, true,
{ColumnStatsCatalog::ColumnId::TABLE_ID,
ColumnStatsCatalog::ColumnId::COLUMN_ID},
pool_.get(), txn);
system_catalogs->GetIndexCatalog()->InsertIndex(
COLUMN_STATS_CATALOG_SKEY1_OID, COLUMN_STATS_CATALOG_NAME "_skey1",
COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE,
IndexConstraintType::UNIQUE, true,
{ColumnStatsCatalog::ColumnId::TABLE_ID}, pool_.get(), txn);
COLUMN_STATS_CATALOG_SKEY1_OID, COLUMN_STATS_CATALOG_NAME "_skey1",
COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE,
IndexConstraintType::UNIQUE, true,
{ColumnStatsCatalog::ColumnId::TABLE_ID}, pool_.get(), txn);

// Insert records(default + pg_catalog namespace) into pg_namespace
system_catalogs->GetSchemaCatalog()->InsertSchema(
@@ -198,8 +199,8 @@ void Catalog::BootstrapSystemCatalogs(storage::Database *database,
LAYOUT_CATALOG_OID, LAYOUT_CATALOG_NAME, CATALOG_SCHEMA_NAME,
database_oid, pool_.get(), txn);
system_catalogs->GetTableCatalog()->InsertTable(
COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME,
CATALOG_SCHEMA_NAME, database_oid, pool_.get(), txn);
COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME, CATALOG_SCHEMA_NAME,
database_oid, pool_.get(), txn);
}

void Catalog::Bootstrap() {
71 changes: 33 additions & 38 deletions src/catalog/column_stats_catalog.cpp
Original file line number Diff line number Diff line change
@@ -23,74 +23,71 @@ namespace peloton {
namespace catalog {

ColumnStatsCatalog::ColumnStatsCatalog(
storage::Database *pg_catalog,
UNUSED_ATTRIBUTE type::AbstractPool *pool,
UNUSED_ATTRIBUTE concurrency::TransactionContext *txn)
: AbstractCatalog(COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME,
InitializeSchema().release(), pg_catalog) {
storage::Database *pg_catalog, UNUSED_ATTRIBUTE type::AbstractPool *pool,
UNUSED_ATTRIBUTE concurrency::TransactionContext *txn)
: AbstractCatalog(COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME,
InitializeSchema().release(), pg_catalog) {
// Add indexes for pg_column_stats
AddIndex({ColumnId::TABLE_ID, ColumnId::COLUMN_ID},
COLUMN_STATS_CATALOG_SKEY0_OID, COLUMN_STATS_CATALOG_NAME "_skey0",
IndexConstraintType::UNIQUE);
AddIndex({ColumnId::TABLE_ID}, COLUMN_STATS_CATALOG_SKEY1_OID,
COLUMN_STATS_CATALOG_NAME "_skey1", IndexConstraintType::DEFAULT);

}

ColumnStatsCatalog::~ColumnStatsCatalog() {}

std::unique_ptr<catalog::Schema> ColumnStatsCatalog::InitializeSchema() {

const std::string not_null_constraint_name = "notnull";
const auto not_null_constraint = catalog::Constraint(
ConstraintType::NOTNULL, not_null_constraint_name);
const auto not_null_constraint =
catalog::Constraint(ConstraintType::NOTNULL, not_null_constraint_name);

auto table_id_column = catalog::Column(
type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER),
"table_id", true);
type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER),
"table_id", true);
table_id_column.AddConstraint(not_null_constraint);
auto column_id_column = catalog::Column(
type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER),
"column_id", true);
type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER),
"column_id", true);
column_id_column.AddConstraint(not_null_constraint);
auto num_rows_column = catalog::Column(
type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER),
"num_rows", true);
type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER),
"num_rows", true);
num_rows_column.AddConstraint(not_null_constraint);
auto cardinality_column = catalog::Column(
type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL),
"cardinality", true);
type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL),
"cardinality", true);
cardinality_column.AddConstraint(not_null_constraint);
auto frac_null_column = catalog::Column(
type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL),
"frac_null", true);
type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL),
"frac_null", true);
frac_null_column.AddConstraint(not_null_constraint);
auto most_common_vals_column = catalog::Column(
type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR),
"most_common_vals", false);
type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR),
"most_common_vals", false);
auto most_common_freqs_column = catalog::Column(
type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR),
"most_common_freqs", false);
type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR),
"most_common_freqs", false);
auto histogram_bounds_column = catalog::Column(
type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR),
"histogram_bounds", false);
type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR),
"histogram_bounds", false);
auto column_name_column = catalog::Column(
type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR),
"column_name", false);
type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR),
"column_name", false);
auto has_index_column = catalog::Column(
type::TypeId::BOOLEAN, type::Type::GetTypeSize(type::TypeId::BOOLEAN),
"has_index", true);
type::TypeId::BOOLEAN, type::Type::GetTypeSize(type::TypeId::BOOLEAN),
"has_index", true);

std::unique_ptr<catalog::Schema> column_stats_schema(new catalog::Schema(
{table_id_column, column_id_column, num_rows_column, cardinality_column,
frac_null_column, most_common_vals_column, most_common_freqs_column,
histogram_bounds_column, column_name_column, has_index_column}));
{table_id_column, column_id_column, num_rows_column, cardinality_column,
frac_null_column, most_common_vals_column, most_common_freqs_column,
histogram_bounds_column, column_name_column, has_index_column}));
return column_stats_schema;
}

bool ColumnStatsCatalog::InsertColumnStats(
oid_t table_id, oid_t column_id, int num_rows,
double cardinality, double frac_null, std::string most_common_vals,
oid_t table_id, oid_t column_id, int num_rows, double cardinality,
double frac_null, std::string most_common_vals,
std::string most_common_freqs, std::string histogram_bounds,
std::string column_name, bool has_index, type::AbstractPool *pool,
concurrency::TransactionContext *txn) {
@@ -142,8 +139,7 @@ bool ColumnStatsCatalog::InsertColumnStats(
}

bool ColumnStatsCatalog::DeleteColumnStats(
oid_t table_id, oid_t column_id,
concurrency::TransactionContext *txn) {
oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn) {
oid_t index_offset = IndexId::SECONDARY_KEY_0; // Secondary key index

std::vector<type::Value> values;
@@ -154,8 +150,7 @@ bool ColumnStatsCatalog::DeleteColumnStats(
}

std::unique_ptr<std::vector<type::Value>> ColumnStatsCatalog::GetColumnStats(
oid_t table_id, oid_t column_id,
concurrency::TransactionContext *txn) {
oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn) {
std::vector<oid_t> column_ids(
{ColumnId::NUM_ROWS, ColumnId::CARDINALITY, ColumnId::FRAC_NULL,
ColumnId::MOST_COMMON_VALS, ColumnId::MOST_COMMON_FREQS,
15 changes: 6 additions & 9 deletions src/include/catalog/column_stats_catalog.h
Original file line number Diff line number Diff line change
@@ -45,7 +45,6 @@ namespace catalog {

class ColumnStatsCatalog : public AbstractCatalog {
public:

ColumnStatsCatalog(storage::Database *pg_catalog, type::AbstractPool *pool,
concurrency::TransactionContext *txn);

@@ -54,8 +53,8 @@ class ColumnStatsCatalog : public AbstractCatalog {
//===--------------------------------------------------------------------===//
// write Related API
//===--------------------------------------------------------------------===//
bool InsertColumnStats(oid_t table_id, oid_t column_id,
int num_rows, double cardinality, double frac_null,
bool InsertColumnStats(oid_t table_id, oid_t column_id, int num_rows,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add comment documenting function purpose and arguments.

double cardinality, double frac_null,
std::string most_common_vals,
std::string most_common_freqs,
std::string histogram_bounds, std::string column_name,
@@ -68,16 +67,14 @@ class ColumnStatsCatalog : public AbstractCatalog {
// Read-only Related API
//===--------------------------------------------------------------------===//
std::unique_ptr<std::vector<type::Value>> GetColumnStats(
oid_t table_id, oid_t column_id,
concurrency::TransactionContext *txn);
oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add comment document function, args and return information.


size_t GetTableStats(
oid_t table_id, concurrency::TransactionContext *txn,
std::map<oid_t, std::unique_ptr<std::vector<type::Value>>> &
column_stats_map);
oid_t table_id, concurrency::TransactionContext *txn,
std::map<oid_t, std::unique_ptr<std::vector<type::Value>>>
&column_stats_map);
// TODO: add more if needed


/** @brief private function for initialize schema of pg_index
* @return unqiue pointer to schema
*/
3 changes: 1 addition & 2 deletions src/include/optimizer/stats/stats_storage.h
Original file line number Diff line number Diff line change
@@ -68,8 +68,7 @@ class StatsStorage {
/* Functions for triggerring stats collection */

ResultType AnalyzeStatsForAllTablesWithDatabaseOid(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add function header describing this function, args, etc.

oid_t database_oid,
concurrency::TransactionContext *txn = nullptr);
oid_t database_oid, concurrency::TransactionContext *txn = nullptr);

ResultType AnalyzeStatsForTable(
storage::DataTable *table,
42 changes: 20 additions & 22 deletions src/optimizer/stats/stats_storage.cpp
Original file line number Diff line number Diff line change
@@ -98,21 +98,20 @@ void StatsStorage::InsertOrUpdateColumnStats(
cardinality, frac_null, most_common_vals.c_str(),
most_common_freqs.c_str(), histogram_bounds.c_str());
auto pg_column_stats = catalog::Catalog::GetInstance()
->GetSystemCatalogs(database_id)
->GetColumnStatsCatalog();
->GetSystemCatalogs(database_id)
->GetColumnStatsCatalog();
auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance();

bool single_statement_txn = false;
if (txn == nullptr) {
single_statement_txn = true;
txn = txn_manager.BeginTransaction();
}
pg_column_stats->DeleteColumnStats(table_id, column_id,
txn);
pg_column_stats->InsertColumnStats(
table_id, column_id, num_rows, cardinality, frac_null,
most_common_vals, most_common_freqs, histogram_bounds, column_name,
has_index, pool_.get(), txn);
pg_column_stats->DeleteColumnStats(table_id, column_id, txn);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DeleteColumnStats if they exist, I assume. May be helpful to add comment stating that, if correct.

pg_column_stats->InsertColumnStats(table_id, column_id, num_rows, cardinality,
frac_null, most_common_vals,
most_common_freqs, histogram_bounds,
column_name, has_index, pool_.get(), txn);

if (single_statement_txn) {
txn_manager.CommitTransaction(txn);
@@ -127,13 +126,13 @@ std::shared_ptr<ColumnStats> StatsStorage::GetColumnStatsByID(oid_t database_id,
oid_t table_id,
oid_t column_id) {
auto pg_column_stats = catalog::Catalog::GetInstance()
->GetSystemCatalogs(database_id)
->GetColumnStatsCatalog();
->GetSystemCatalogs(database_id)
->GetColumnStatsCatalog();
auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance();
auto txn = txn_manager.BeginTransaction();
// std::unique_ptr<std::vector<type::Value>> column_stats_vector
auto column_stats_vector = pg_column_stats->GetColumnStats(
table_id, column_id, txn);
auto column_stats_vector =
pg_column_stats->GetColumnStats(table_id, column_id, txn);
txn_manager.CommitTransaction(txn);

return ConvertVectorToColumnStats(database_id, table_id, column_id,
@@ -211,8 +210,8 @@ std::shared_ptr<ColumnStats> StatsStorage::ConvertVectorToColumnStats(
std::shared_ptr<TableStats> StatsStorage::GetTableStats(
oid_t database_id, oid_t table_id, concurrency::TransactionContext *txn) {
auto pg_column_stats = catalog::Catalog::GetInstance()
->GetSystemCatalogs(database_id)
->GetColumnStatsCatalog();
->GetSystemCatalogs(database_id)
->GetColumnStatsCatalog();
std::map<oid_t, std::unique_ptr<std::vector<type::Value>>> column_stats_map;
pg_column_stats->GetTableStats(table_id, txn, column_stats_map);

@@ -236,8 +235,8 @@ std::shared_ptr<TableStats> StatsStorage::GetTableStats(
oid_t database_id, oid_t table_id, std::vector<oid_t> column_ids,
concurrency::TransactionContext *txn) {
auto pg_column_stats = catalog::Catalog::GetInstance()
->GetSystemCatalogs(database_id)
->GetColumnStatsCatalog();
->GetSystemCatalogs(database_id)
->GetColumnStatsCatalog();
std::map<oid_t, std::unique_ptr<std::vector<type::Value>>> column_stats_map;
pg_column_stats->GetTableStats(table_id, txn, column_stats_map);

@@ -258,18 +257,17 @@ std::shared_ptr<TableStats> StatsStorage::GetTableStats(
* datatables to collect their stats and store them in the column_stats_catalog.
*/
ResultType StatsStorage::AnalyzeStatsForAllTablesWithDatabaseOid(
oid_t database_oid,
UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) {
oid_t database_oid, UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) {
if (txn == nullptr) {
LOG_TRACE("Do not have transaction to analyze all tables' stats.");
return ResultType::FAILURE;
}

auto storage_manager = storage::StorageManager::GetInstance();
auto database = storage_manager->GetDatabaseWithOid(database);
auto database = storage_manager->GetDatabaseWithOid(database_oid);
PELOTON_ASSERT(database != nullptr);
auto pg_database = catalog::Catalog::GetInstance()
->GetDatabaseObject(database_oid, txn);
auto pg_database =
catalog::Catalog::GetInstance()->GetDatabaseObject(database_oid, txn);
auto table_objects = pg_database->GetTableObjects();
for (auto &table_object_entry : table_objects) {
auto table_oid = table_object_entry.first;
@@ -280,7 +278,7 @@ ResultType StatsStorage::AnalyzeStatsForAllTablesWithDatabaseOid(
LOG_TRACE("Analyzing table: %s", table_object->GetTableName().c_str());
auto table = database->GetTableWithOid(table_oid);
std::unique_ptr<TableStatsCollector> table_stats_collector(
new TableStatsCollector(table));
new TableStatsCollector(table));
table_stats_collector->CollectColumnStats();
InsertOrUpdateTableStats(table, table_stats_collector.get(), txn);
}
15 changes: 7 additions & 8 deletions test/optimizer/stats_storage_test.cpp
Original file line number Diff line number Diff line change
@@ -93,7 +93,6 @@ void VerifyAndPrintColumnStats(storage::DataTable *data_table,
}

TEST_F(StatsStorageTests, InsertAndGetTableStatsTest) {

const std::string db_name = "test_db";
TestingExecutorUtil::InitializeDatabase(db_name);
auto data_table = InitializeTestTable();
@@ -240,8 +239,8 @@ TEST_F(StatsStorageTests, AnalyzeStatsForAllTablesTest) {
StatsStorage *stats_storage = StatsStorage::GetInstance();

// Must pass in the transaction.
ResultType result = stats_storage
->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid);
ResultType result =
stats_storage->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid);
EXPECT_EQ(result, ResultType::FAILURE);

auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance();
@@ -253,7 +252,6 @@ TEST_F(StatsStorageTests, AnalyzeStatsForAllTablesTest) {
// Check the correctness of the stats.
VerifyAndPrintColumnStats(data_table, 4);
TestingExecutorUtil::DeleteDatabase(db_name);

}

TEST_F(StatsStorageTests, GetTableStatsTest) {
@@ -265,13 +263,14 @@ TEST_F(StatsStorageTests, GetTableStatsTest) {

auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance();
auto txn = txn_manager.BeginTransaction();
ResultType result = stats_storage
->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid, txn);
ResultType result =
stats_storage->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid, txn);
EXPECT_EQ(ResultType::SUCCESS, result);
txn_manager.CommitTransaction(txn);

txn = txn_manager.BeginTransaction();
std::shared_ptr<TableStats> table_stats = stats_storage->GetTableStats(
db_oid, data_table->GetOid(), txn);
std::shared_ptr<TableStats> table_stats =
stats_storage->GetTableStats(db_oid, data_table->GetOid(), txn);
txn_manager.CommitTransaction(txn);
EXPECT_EQ(table_stats->num_rows, tuple_count);
TestingExecutorUtil::DeleteDatabase(db_name);