From 5926cd4264aedfcba58b1825d15ff08a4bff49ad Mon Sep 17 00:00:00 2001 From: poojanilangekar Date: Wed, 9 May 2018 00:27:46 -0400 Subject: [PATCH 1/7] Modified catalog. Need to fix tests --- src/catalog/catalog.cpp | 16 +++ src/catalog/column_stats_catalog.cpp | 108 ++++++++++++-------- src/catalog/system_catalogs.cpp | 6 +- src/include/catalog/catalog_defaults.h | 10 +- src/include/catalog/column_stats_catalog.h | 68 ++++++------ src/include/catalog/system_catalogs.h | 15 ++- src/include/optimizer/stats/stats_storage.h | 4 - src/optimizer/stats/stats_storage.cpp | 48 ++++----- 8 files changed, 164 insertions(+), 111 deletions(-) diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp index a36eb71c4bd..236526bebcd 100644 --- a/src/catalog/catalog.cpp +++ b/src/catalog/catalog.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include #include "catalog/catalog.h" #include "catalog/column_catalog.h" @@ -158,6 +159,18 @@ void Catalog::BootstrapSystemCatalogs(storage::Database *database, CATALOG_SCHEMA_NAME, IndexType::BWTREE, IndexConstraintType::DEFAULT, false, {TableCatalog::ColumnId::DATABASE_OID}, pool_.get(), txn); + system_catalogs->GetIndexCatalog()->InsertIndex( + COLUMN_STATS_CATALOG_SKEY0_OID, COLUMN_STATS_CATALOG_NAME "_skey0", + COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE, + IndexConstraintType::UNIQUE, true, + {ColumnStatsCatalog::ColumnId::TABLE_ID, + ColumnStatsCatalog::ColumnId::COLUMN_ID}, pool_.get(), txn); + system_catalogs->GetIndexCatalog()->InsertIndex( + COLUMN_STATS_CATALOG_SKEY1_OID, COLUMN_STATS_CATALOG_NAME "_skey1", + COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE, + IndexConstraintType::UNIQUE, true, + {ColumnStatsCatalog::ColumnId::TABLE_ID}, pool_.get(), txn); + // Insert records(default + pg_catalog namespace) into pg_namespace system_catalogs->GetSchemaCatalog()->InsertSchema( CATALOG_SCHEMA_OID, CATALOG_SCHEMA_NAME, pool_.get(), txn); @@ -184,6 +197,9 @@ void Catalog::BootstrapSystemCatalogs(storage::Database *database, system_catalogs->GetTableCatalog()->InsertTable( LAYOUT_CATALOG_OID, LAYOUT_CATALOG_NAME, CATALOG_SCHEMA_NAME, database_oid, pool_.get(), txn); + system_catalogs->GetTableCatalog()->InsertTable( + COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME, + CATALOG_SCHEMA_NAME, database_oid, pool_.get(), txn); } void Catalog::Bootstrap() { diff --git a/src/catalog/column_stats_catalog.cpp b/src/catalog/column_stats_catalog.cpp index bbe94340cdb..d29e9f82413 100644 --- a/src/catalog/column_stats_catalog.cpp +++ b/src/catalog/column_stats_catalog.cpp @@ -13,6 +13,7 @@ #include "catalog/column_stats_catalog.h" #include "catalog/catalog.h" +#include "catalog/schema.h" #include "executor/logical_tile.h" #include "optimizer/stats/column_stats_collector.h" #include "storage/data_table.h" @@ -21,44 +22,74 @@ namespace peloton { namespace catalog { -ColumnStatsCatalog *ColumnStatsCatalog::GetInstance( - concurrency::TransactionContext *txn) { - static ColumnStatsCatalog column_stats_catalog{txn}; - return &column_stats_catalog; -} +ColumnStatsCatalog::ColumnStatsCatalog( + storage::Database *pg_catalog, + UNUSED_ATTRIBUTE type::AbstractPool *pool, + UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) + : AbstractCatalog(COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME, + InitializeSchema().release(), pg_catalog) { + // Add indexes for pg_column_stats + AddIndex({ColumnId::TABLE_ID, ColumnId::COLUMN_ID}, + COLUMN_STATS_CATALOG_SKEY0_OID, COLUMN_STATS_CATALOG_NAME "_skey0", + IndexConstraintType::UNIQUE); + AddIndex({ColumnId::TABLE_ID}, COLUMN_STATS_CATALOG_SKEY1_OID, + COLUMN_STATS_CATALOG_NAME "_skey1", IndexConstraintType::DEFAULT); -ColumnStatsCatalog::ColumnStatsCatalog(concurrency::TransactionContext *txn) - : AbstractCatalog("CREATE TABLE " CATALOG_DATABASE_NAME - "." CATALOG_SCHEMA_NAME "." COLUMN_STATS_CATALOG_NAME - " (" - "database_id INT NOT NULL, " - "table_id INT NOT NULL, " - "column_id INT NOT NULL, " - "num_rows INT NOT NULL, " - "cardinality DECIMAL NOT NULL, " - "frac_null DECIMAL NOT NULL, " - "most_common_vals VARCHAR, " - "most_common_freqs VARCHAR, " - "histogram_bounds VARCHAR, " - "column_name VARCHAR, " - "has_index BOOLEAN);", - txn) { - // unique key: (database_id, table_id, column_id) - Catalog::GetInstance()->CreateIndex( - CATALOG_DATABASE_NAME, CATALOG_SCHEMA_NAME, COLUMN_STATS_CATALOG_NAME, - {0, 1, 2}, COLUMN_STATS_CATALOG_NAME "_skey0", true, IndexType::BWTREE, - txn); - // non-unique key: (database_id, table_id) - Catalog::GetInstance()->CreateIndex( - CATALOG_DATABASE_NAME, CATALOG_SCHEMA_NAME, COLUMN_STATS_CATALOG_NAME, - {0, 1}, COLUMN_STATS_CATALOG_NAME "_skey1", false, IndexType::BWTREE, - txn); } ColumnStatsCatalog::~ColumnStatsCatalog() {} +std::unique_ptr ColumnStatsCatalog::InitializeSchema() { + + const std::string not_null_constraint_name = "notnull"; + const auto not_null_constraint = catalog::Constraint( + ConstraintType::NOTNULL, not_null_constraint_name); + + auto table_id_column = catalog::Column( + type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), + "table_id", true); + table_id_column.AddConstraint(not_null_constraint); + auto column_id_column = catalog::Column( + type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), + "column_id", true); + column_id_column.AddConstraint(not_null_constraint); + auto num_rows_column = catalog::Column( + type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), + "num_rows", true); + num_rows_column.AddConstraint(not_null_constraint); + auto cardinality_column = catalog::Column( + type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL), + "cardinality", true); + cardinality_column.AddConstraint(not_null_constraint); + auto frac_null_column = catalog::Column( + type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL), + "frac_null", true); + frac_null_column.AddConstraint(not_null_constraint); + auto most_common_vals_column = catalog::Column( + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "most_common_vals", false); + auto most_common_freqs_column = catalog::Column( + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "most_common_freqs", false); + auto histogram_bounds_column = catalog::Column( + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "histogram_bounds", false); + auto column_name_column = catalog::Column( + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "column_name", false); + auto has_index_column = catalog::Column( + type::TypeId::BOOLEAN, type::Type::GetTypeSize(type::TypeId::BOOLEAN), + "has_index", true); + + std::unique_ptr column_stats_schema(new catalog::Schema( + {table_id_column, column_id_column, num_rows_column, cardinality_column, + frac_null_column, most_common_vals_column, most_common_freqs_column, + histogram_bounds_column, column_name_column, has_index_column})); + return column_stats_schema; +} + bool ColumnStatsCatalog::InsertColumnStats( - oid_t database_id, oid_t table_id, oid_t column_id, int num_rows, + oid_t table_id, oid_t column_id, int num_rows, double cardinality, double frac_null, std::string most_common_vals, std::string most_common_freqs, std::string histogram_bounds, std::string column_name, bool has_index, type::AbstractPool *pool, @@ -66,7 +97,6 @@ bool ColumnStatsCatalog::InsertColumnStats( std::unique_ptr tuple( new storage::Tuple(catalog_table_->GetSchema(), true)); - auto val_db_id = type::ValueFactory::GetIntegerValue(database_id); auto val_table_id = type::ValueFactory::GetIntegerValue(table_id); auto val_column_id = type::ValueFactory::GetIntegerValue(column_id); auto val_num_row = type::ValueFactory::GetIntegerValue(num_rows); @@ -96,7 +126,6 @@ bool ColumnStatsCatalog::InsertColumnStats( type::ValueFactory::GetVarcharValue(column_name); type::Value val_has_index = type::ValueFactory::GetBooleanValue(has_index); - tuple->SetValue(ColumnId::DATABASE_ID, val_db_id, nullptr); tuple->SetValue(ColumnId::TABLE_ID, val_table_id, nullptr); tuple->SetValue(ColumnId::COLUMN_ID, val_column_id, nullptr); tuple->SetValue(ColumnId::NUM_ROWS, val_num_row, nullptr); @@ -113,12 +142,11 @@ bool ColumnStatsCatalog::InsertColumnStats( } bool ColumnStatsCatalog::DeleteColumnStats( - oid_t database_id, oid_t table_id, oid_t column_id, - concurrency::TransactionContext *txn) { + oid_t table_id, oid_t column_id, + concurrency::TransactionContext *txn) { oid_t index_offset = IndexId::SECONDARY_KEY_0; // Secondary key index std::vector values; - values.push_back(type::ValueFactory::GetIntegerValue(database_id).Copy()); values.push_back(type::ValueFactory::GetIntegerValue(table_id).Copy()); values.push_back(type::ValueFactory::GetIntegerValue(column_id).Copy()); @@ -126,7 +154,7 @@ bool ColumnStatsCatalog::DeleteColumnStats( } std::unique_ptr> ColumnStatsCatalog::GetColumnStats( - oid_t database_id, oid_t table_id, oid_t column_id, + oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn) { std::vector column_ids( {ColumnId::NUM_ROWS, ColumnId::CARDINALITY, ColumnId::FRAC_NULL, @@ -135,7 +163,6 @@ std::unique_ptr> ColumnStatsCatalog::GetColumnStats( oid_t index_offset = IndexId::SECONDARY_KEY_0; // Secondary key index std::vector values; - values.push_back(type::ValueFactory::GetIntegerValue(database_id).Copy()); values.push_back(type::ValueFactory::GetIntegerValue(table_id).Copy()); values.push_back(type::ValueFactory::GetIntegerValue(column_id).Copy()); @@ -175,7 +202,7 @@ std::unique_ptr> ColumnStatsCatalog::GetColumnStats( // Return value: number of column stats size_t ColumnStatsCatalog::GetTableStats( - oid_t database_id, oid_t table_id, concurrency::TransactionContext *txn, + oid_t table_id, concurrency::TransactionContext *txn, std::map>> &column_stats_map) { std::vector column_ids( @@ -186,7 +213,6 @@ size_t ColumnStatsCatalog::GetTableStats( oid_t index_offset = IndexId::SECONDARY_KEY_1; // Secondary key index std::vector values; - values.push_back(type::ValueFactory::GetIntegerValue(database_id).Copy()); values.push_back(type::ValueFactory::GetIntegerValue(table_id).Copy()); auto result_tiles = diff --git a/src/catalog/system_catalogs.cpp b/src/catalog/system_catalogs.cpp index 3900c165f74..e5f3fee02df 100644 --- a/src/catalog/system_catalogs.cpp +++ b/src/catalog/system_catalogs.cpp @@ -12,6 +12,7 @@ #include "catalog/system_catalogs.h" #include "catalog/column_catalog.h" +#include "catalog/column_stats_catalog.h" #include "catalog/index_catalog.h" #include "catalog/layout_catalog.h" #include "catalog/table_catalog.h" @@ -36,6 +37,7 @@ SystemCatalogs::SystemCatalogs(storage::Database *database, pg_query_metrics_(nullptr) { oid_t database_oid = database->GetOid(); pg_attribute_ = new ColumnCatalog(database, pool, txn); + pg_column_stats_ = new ColumnStatsCatalog(database, pool, txn); pg_namespace_ = new SchemaCatalog(database, pool, txn); pg_table_ = new TableCatalog(database, pool, txn); pg_index_ = new IndexCatalog(database, pool, txn); @@ -48,7 +50,8 @@ SystemCatalogs::SystemCatalogs(storage::Database *database, {database_oid, TABLE_CATALOG_OID}, {database_oid, SCHEMA_CATALOG_OID}, {database_oid, INDEX_CATALOG_OID}, - {database_oid, LAYOUT_CATALOG_OID}}; + {database_oid, LAYOUT_CATALOG_OID}, + {database_oid, COLUMN_STATS_CATALOG_OID}}; for (int i = 0; i < (int)shared_tables.size(); i++) { oid_t column_id = 0; @@ -72,6 +75,7 @@ SystemCatalogs::~SystemCatalogs() { delete pg_table_; delete pg_attribute_; delete pg_namespace_; + delete pg_column_stats_; if (pg_trigger_) delete pg_trigger_; // if (pg_proc) delete pg_proc; if (pg_table_metrics_) delete pg_table_metrics_; diff --git a/src/include/catalog/catalog_defaults.h b/src/include/catalog/catalog_defaults.h index 117a7e8ab6d..bb2abfe2400 100644 --- a/src/include/catalog/catalog_defaults.h +++ b/src/include/catalog/catalog_defaults.h @@ -26,17 +26,18 @@ namespace catalog { #define CATALOG_DATABASE_NAME "peloton" // Catalog tables -// 5 basic catalog tables +// basic catalog tables #define DATABASE_CATALOG_NAME "pg_database" #define SCHEMA_CATALOG_NAME "pg_namespace" #define TABLE_CATALOG_NAME "pg_table" #define INDEX_CATALOG_NAME "pg_index" #define COLUMN_CATALOG_NAME "pg_attribute" #define LAYOUT_CATALOG_NAME "pg_layout" +#define COLUMN_STATS_CATALOG_NAME "pg_column_stats" // Local oids from START_OID = 0 to START_OID + OID_OFFSET are reserved #define OID_OFFSET 100 -#define CATALOG_TABLES_COUNT 9 +#define CATALOG_TABLES_COUNT 10 // Oid mask for each type #define DATABASE_OID_MASK (static_cast(catalog::CatalogType::DATABASE)) @@ -64,6 +65,7 @@ namespace catalog { #define INDEX_CATALOG_OID (3 | TABLE_OID_MASK) #define COLUMN_CATALOG_OID (4 | TABLE_OID_MASK) #define LAYOUT_CATALOG_OID (5 | TABLE_OID_MASK) +#define COLUMN_STATS_CATALOG_OID (6 | TABLE_OID_MASK) // Reserved pg_column index oid #define COLUMN_CATALOG_PKEY_OID (0 | INDEX_OID_MASK) @@ -92,6 +94,10 @@ namespace catalog { #define LAYOUT_CATALOG_PKEY_OID (13 | INDEX_OID_MASK) #define LAYOUT_CATALOG_SKEY0_OID (14 | INDEX_OID_MASK) +// Reserved pg_column_stats index oid +#define COLUMN_STATS_CATALOG_SKEY0_OID (15 | INDEX_OID_MASK) +#define COLUMN_STATS_CATALOG_SKEY1_OID (16 | INDEX_OID_MASK) + // Use upper 8 bits indicating catalog type #define CATALOG_TYPE_OFFSET 24 diff --git a/src/include/catalog/column_stats_catalog.h b/src/include/catalog/column_stats_catalog.h index d409a9da338..affd1917ef9 100644 --- a/src/include/catalog/column_stats_catalog.h +++ b/src/include/catalog/column_stats_catalog.h @@ -14,18 +14,18 @@ // pg_column_stats // // Schema: (column offset: column_name) -// 0: database_id (pkey) -// 1: table_id (pkey) -// 2: column_id (pkey) -// 3: num_rows -// 4: cardinality -// 5: frac_null -// 6: most_common_vals -// 7: most_common_freqs -// 8: histogram_bounds +// 0: table_id (pkey) +// 1: column_id (pkey) +// 2: num_rows +// 3: cardinality +// 4: frac_null +// 5: most_common_vals +// 6: most_common_freqs +// 7: histogram_bounds // // Indexes: (index offset: indexed columns) -// 0: name & database_oid (unique & primary key) +// 0: table_id & column_id (unique) +// 1: table_id (non-unique) // //===----------------------------------------------------------------------===// @@ -35,8 +35,6 @@ #include "catalog/abstract_catalog.h" -#define COLUMN_STATS_CATALOG_NAME "pg_column_stats" - namespace peloton { namespace optimizer { @@ -47,50 +45,55 @@ namespace catalog { class ColumnStatsCatalog : public AbstractCatalog { public: - ~ColumnStatsCatalog(); - // Global Singleton - static ColumnStatsCatalog *GetInstance( - concurrency::TransactionContext *txn = nullptr); + ColumnStatsCatalog(storage::Database *pg_catalog, type::AbstractPool *pool, + concurrency::TransactionContext *txn); + + ~ColumnStatsCatalog(); //===--------------------------------------------------------------------===// // write Related API //===--------------------------------------------------------------------===// - bool InsertColumnStats(oid_t database_id, oid_t table_id, oid_t column_id, + bool InsertColumnStats(oid_t table_id, oid_t column_id, int num_rows, double cardinality, double frac_null, std::string most_common_vals, std::string most_common_freqs, std::string histogram_bounds, std::string column_name, bool has_index, type::AbstractPool *pool, concurrency::TransactionContext *txn); - bool DeleteColumnStats(oid_t database_id, oid_t table_id, oid_t column_id, + bool DeleteColumnStats(oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn); //===--------------------------------------------------------------------===// // Read-only Related API //===--------------------------------------------------------------------===// std::unique_ptr> GetColumnStats( - oid_t database_id, oid_t table_id, oid_t column_id, + oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn); size_t GetTableStats( - oid_t database_id, oid_t table_id, concurrency::TransactionContext *txn, - std::map>> & + oid_t table_id, concurrency::TransactionContext *txn, + std::map>> & column_stats_map); // TODO: add more if needed + + /** @brief private function for initialize schema of pg_index + * @return unqiue pointer to schema + */ + std::unique_ptr InitializeSchema(); + enum ColumnId { - DATABASE_ID = 0, - TABLE_ID = 1, - COLUMN_ID = 2, - NUM_ROWS = 3, - CARDINALITY = 4, - FRAC_NULL = 5, - MOST_COMMON_VALS = 6, - MOST_COMMON_FREQS = 7, - HISTOGRAM_BOUNDS = 8, - COLUMN_NAME = 9, - HAS_INDEX = 10, + TABLE_ID = 0, + COLUMN_ID = 1, + NUM_ROWS = 2, + CARDINALITY = 3, + FRAC_NULL = 4, + MOST_COMMON_VALS = 5, + MOST_COMMON_FREQS = 6, + HISTOGRAM_BOUNDS = 7, + COLUMN_NAME = 8, + HAS_INDEX = 9, // Add new columns here in creation order }; @@ -106,7 +109,6 @@ class ColumnStatsCatalog : public AbstractCatalog { }; private: - ColumnStatsCatalog(concurrency::TransactionContext *txn); enum IndexId { SECONDARY_KEY_0 = 0, diff --git a/src/include/catalog/system_catalogs.h b/src/include/catalog/system_catalogs.h index 9792d180f9d..d45c98dbef9 100644 --- a/src/include/catalog/system_catalogs.h +++ b/src/include/catalog/system_catalogs.h @@ -29,12 +29,13 @@ class Database; } // namespace storage namespace catalog { +class ColumnCatalog; +class ColumnStatsCatalog; class DatabaseCatalog; -class SchemaCatalog; -class TableCatalog; class IndexCatalog; -class ColumnCatalog; class LayoutCatalog; +class SchemaCatalog; +class TableCatalog; class SystemCatalogs { public: @@ -59,6 +60,13 @@ class SystemCatalogs { return pg_attribute_; } + ColumnStatsCatalog *GetColumnStatsCatalog() { + if (!pg_column_stats_) { + throw CatalogException("ColumnStats catalog has not been initialized"); + } + return pg_column_stats_; + } + SchemaCatalog *GetSchemaCatalog() { if (!pg_namespace_) { throw CatalogException("schema catalog has not been initialized"); @@ -117,6 +125,7 @@ class SystemCatalogs { private: ColumnCatalog *pg_attribute_; + ColumnStatsCatalog *pg_column_stats_; SchemaCatalog *pg_namespace_; TableCatalog *pg_table_; IndexCatalog *pg_index_; diff --git a/src/include/optimizer/stats/stats_storage.h b/src/include/optimizer/stats/stats_storage.h index 85472c6a12d..2d1ee5fbd59 100644 --- a/src/include/optimizer/stats/stats_storage.h +++ b/src/include/optimizer/stats/stats_storage.h @@ -41,10 +41,6 @@ class StatsStorage { StatsStorage(); - /* Functions for managing stats table and schema */ - - void CreateStatsTableInCatalog(); - /* Functions for adding, updating and quering stats */ void InsertOrUpdateTableStats(storage::DataTable *table, diff --git a/src/optimizer/stats/stats_storage.cpp b/src/optimizer/stats/stats_storage.cpp index d1b2fed6b12..e34b16331c5 100644 --- a/src/optimizer/stats/stats_storage.cpp +++ b/src/optimizer/stats/stats_storage.cpp @@ -14,6 +14,7 @@ #include "catalog/catalog.h" #include "catalog/column_stats_catalog.h" +#include "catalog/system_catalogs.h" #include "concurrency/transaction_manager_factory.h" #include "optimizer/stats/column_stats.h" #include "optimizer/stats/table_stats.h" @@ -31,24 +32,11 @@ StatsStorage *StatsStorage::GetInstance() { /** * StatsStorage - Constructor of StatsStorage. - * In the construcotr, `pg_column_stats` table and `samples_db` database are - * created. + * In the construcotr, the EphemeralPool is created. */ StatsStorage::StatsStorage() { pool_.reset(new type::EphemeralPool()); - CreateStatsTableInCatalog(); } - -/** - * CreateStatsCatalog - Create 'pg_column_stats' table in the catalog database. - */ -void StatsStorage::CreateStatsTableInCatalog() { - auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); - auto txn = txn_manager.BeginTransaction(); - catalog::ColumnStatsCatalog::GetInstance(txn); - txn_manager.CommitTransaction(txn); -} - /** * InsertOrUpdateTableStats - Add or update all column stats of a table. * This function iterates all column stats in the table stats and insert column @@ -109,7 +97,9 @@ void StatsStorage::InsertOrUpdateColumnStats( LOG_TRACE("InsertOrUpdateColumnStats, %d, %lf, %lf, %s, %s, %s", num_rows, cardinality, frac_null, most_common_vals.c_str(), most_common_freqs.c_str(), histogram_bounds.c_str()); - auto column_stats_catalog = catalog::ColumnStatsCatalog::GetInstance(nullptr); + auto pg_column_stats = catalog::Catalog::GetInstance() + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); bool single_statement_txn = false; @@ -117,10 +107,10 @@ void StatsStorage::InsertOrUpdateColumnStats( single_statement_txn = true; txn = txn_manager.BeginTransaction(); } - column_stats_catalog->DeleteColumnStats(database_id, table_id, column_id, + pg_column_stats->DeleteColumnStats(table_id, column_id, txn); - column_stats_catalog->InsertColumnStats( - database_id, table_id, column_id, num_rows, cardinality, frac_null, + pg_column_stats->InsertColumnStats( + table_id, column_id, num_rows, cardinality, frac_null, most_common_vals, most_common_freqs, histogram_bounds, column_name, has_index, pool_.get(), txn); @@ -136,12 +126,14 @@ void StatsStorage::InsertOrUpdateColumnStats( std::shared_ptr StatsStorage::GetColumnStatsByID(oid_t database_id, oid_t table_id, oid_t column_id) { - auto column_stats_catalog = catalog::ColumnStatsCatalog::GetInstance(nullptr); + auto pg_column_stats = catalog::Catalog::GetInstance() + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); auto txn = txn_manager.BeginTransaction(); // std::unique_ptr> column_stats_vector - auto column_stats_vector = column_stats_catalog->GetColumnStats( - database_id, table_id, column_id, txn); + auto column_stats_vector = pg_column_stats->GetColumnStats( + table_id, column_id, txn); txn_manager.CommitTransaction(txn); return ConvertVectorToColumnStats(database_id, table_id, column_id, @@ -218,10 +210,11 @@ std::shared_ptr StatsStorage::ConvertVectorToColumnStats( */ std::shared_ptr StatsStorage::GetTableStats( oid_t database_id, oid_t table_id, concurrency::TransactionContext *txn) { - auto column_stats_catalog = catalog::ColumnStatsCatalog::GetInstance(nullptr); + auto pg_column_stats = catalog::Catalog::GetInstance() + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); std::map>> column_stats_map; - column_stats_catalog->GetTableStats(database_id, table_id, txn, - column_stats_map); + pg_column_stats->GetTableStats(table_id, txn, column_stats_map); std::vector> column_stats_ptrs; for (auto it = column_stats_map.begin(); it != column_stats_map.end(); ++it) { @@ -242,10 +235,11 @@ std::shared_ptr StatsStorage::GetTableStats( std::shared_ptr StatsStorage::GetTableStats( oid_t database_id, oid_t table_id, std::vector column_ids, concurrency::TransactionContext *txn) { - auto column_stats_catalog = catalog::ColumnStatsCatalog::GetInstance(nullptr); + auto pg_column_stats = catalog::Catalog::GetInstance() + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); std::map>> column_stats_map; - column_stats_catalog->GetTableStats(database_id, table_id, txn, - column_stats_map); + pg_column_stats->GetTableStats(table_id, txn, column_stats_map); std::vector> column_stats_ptrs; for (oid_t col_id : column_ids) { From 5f431de25284296b04c14258414bfcb8d83a6838 Mon Sep 17 00:00:00 2001 From: poojanilangekar Date: Wed, 9 May 2018 01:25:08 -0400 Subject: [PATCH 2/7] Fixed all tests --- src/optimizer/stats/stats_storage.cpp | 52 +++++------ test/executor/drop_test.cpp | 1 - test/executor/testing_executor_util.cpp | 7 +- test/include/executor/testing_executor_util.h | 12 ++- test/optimizer/selectivity_test.cpp | 7 ++ test/optimizer/stats_storage_test.cpp | 90 +++++++++++-------- test/sql/analyze_sql_test.cpp | 2 +- 7 files changed, 106 insertions(+), 65 deletions(-) diff --git a/src/optimizer/stats/stats_storage.cpp b/src/optimizer/stats/stats_storage.cpp index e34b16331c5..c1903034a53 100644 --- a/src/optimizer/stats/stats_storage.cpp +++ b/src/optimizer/stats/stats_storage.cpp @@ -257,32 +257,34 @@ std::shared_ptr StatsStorage::GetTableStats( * AnalyzeStatsForAllTables - This function iterates all databases and * datatables to collect their stats and store them in the column_stats_catalog. */ +// Deprecating this function because the notion is no longer true. +// Needs to be handled differently. Used only in tests. ResultType StatsStorage::AnalyzeStatsForAllTables( - concurrency::TransactionContext *txn) { - if (txn == nullptr) { - LOG_TRACE("Do not have transaction to analyze all tables' stats."); - return ResultType::FAILURE; - } - - auto storage_manager = storage::StorageManager::GetInstance(); - - oid_t database_count = storage_manager->GetDatabaseCount(); - LOG_TRACE("Database count: %u", database_count); - for (oid_t db_offset = 0; db_offset < database_count; db_offset++) { - auto database = storage_manager->GetDatabaseWithOffset(db_offset); - if (database->GetOid() == CATALOG_DATABASE_OID) { - continue; - } - oid_t table_count = database->GetTableCount(); - for (oid_t table_offset = 0; table_offset < table_count; table_offset++) { - auto table = database->GetTable(table_offset); - LOG_TRACE("Analyzing table: %s", table->GetName().c_str()); - std::unique_ptr table_stats_collector( - new TableStatsCollector(table)); - table_stats_collector->CollectColumnStats(); - InsertOrUpdateTableStats(table, table_stats_collector.get(), txn); - } - } + UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) { +// if (txn == nullptr) { +// LOG_TRACE("Do not have transaction to analyze all tables' stats."); +// return ResultType::FAILURE; +// } +// +// auto storage_manager = storage::StorageManager::GetInstance(); +// +// oid_t database_count = storage_manager->GetDatabaseCount(); +// LOG_TRACE("Database count: %u", database_count); +// for (oid_t db_offset = 0; db_offset < database_count; db_offset++) { +// auto database = storage_manager->GetDatabaseWithOffset(db_offset); +// if (database->GetOid() == CATALOG_DATABASE_OID) { +// continue; +// } +// oid_t table_count = database->GetTableCount(); +// for (oid_t table_offset = 0; table_offset < table_count; table_offset++) { +// auto table = database->GetTable(table_offset); +// LOG_TRACE("Analyzing table: %s", table->GetName().c_str()); +// std::unique_ptr table_stats_collector( +// new TableStatsCollector(table)); +// table_stats_collector->CollectColumnStats(); +// InsertOrUpdateTableStats(table, table_stats_collector.get(), txn); +// } +// } return ResultType::SUCCESS; } diff --git a/test/executor/drop_test.cpp b/test/executor/drop_test.cpp index d90f06cbcb1..027c4f2e750 100644 --- a/test/executor/drop_test.cpp +++ b/test/executor/drop_test.cpp @@ -123,7 +123,6 @@ TEST_F(DropTests, DroppingTable) { ->GetTableObjects() .size(), expeected_table_count); - // free the database just created catalog->DropDatabaseWithName(TEST_DB_NAME, txn); txn_manager.CommitTransaction(txn); diff --git a/test/executor/testing_executor_util.cpp b/test/executor/testing_executor_util.cpp index cd923c11ed8..5da53374e22 100644 --- a/test/executor/testing_executor_util.cpp +++ b/test/executor/testing_executor_util.cpp @@ -46,6 +46,9 @@ using ::testing::Return; namespace peloton { namespace test { +// Initialize the default database_oid +oid_t TestingExecutorUtil::database_oid = INVALID_OID; + storage::Database *TestingExecutorUtil::InitializeDatabase( const std::string &db_name) { auto catalog = catalog::Catalog::GetInstance(); @@ -55,6 +58,7 @@ storage::Database *TestingExecutorUtil::InitializeDatabase( EXPECT_EQ(ResultType::SUCCESS, result); auto database = catalog->GetDatabaseWithName(db_name, txn); txn_manager.CommitTransaction(txn); + database_oid = database->GetOid(); return (database); } @@ -65,6 +69,7 @@ void TestingExecutorUtil::DeleteDatabase(const std::string &db_name) { auto result = catalog->DropDatabaseWithName(db_name, txn); txn_manager.CommitTransaction(txn); EXPECT_EQ(ResultType::SUCCESS, result); + database_oid = INVALID_OID; } /** @brief Helper function for defining schema */ @@ -348,7 +353,7 @@ storage::DataTable *TestingExecutorUtil::CreateTable( bool own_schema = true; bool adapt_table = false; storage::DataTable *table = storage::TableFactory::GetDataTable( - INVALID_OID, table_oid, table_schema, table_name, + database_oid, table_oid, table_schema, table_name, tuples_per_tilegroup_count, own_schema, adapt_table); if (indexes == true) { diff --git a/test/include/executor/testing_executor_util.h b/test/include/executor/testing_executor_util.h index 5d9dc14b5dd..12f086c95f1 100644 --- a/test/include/executor/testing_executor_util.h +++ b/test/include/executor/testing_executor_util.h @@ -60,9 +60,16 @@ class TestingExecutorUtil { * @brief Intializes the catalog with a new database with the give name. */ static storage::Database *InitializeDatabase(const std::string &db_name); - + /** + * @brief Drops the catalog of the database with the give name. + */ static void DeleteDatabase(const std::string &db_name); + /** + * @brief Returns the current database_oid. + */ + static oid_t GetDatabaseOid() { return database_oid; } + /** * @brief Creates a basic tile group with allocated but not populated * tuples. @@ -122,6 +129,9 @@ class TestingExecutorUtil { /** Print the tuples from a vector of logical tiles */ static std::string GetTileVectorInfo( std::vector> &tile_vec); + + private: + static oid_t database_oid; }; } // namespace test diff --git a/test/optimizer/selectivity_test.cpp b/test/optimizer/selectivity_test.cpp index cbb8df08d2c..d845cd40c37 100644 --- a/test/optimizer/selectivity_test.cpp +++ b/test/optimizer/selectivity_test.cpp @@ -119,6 +119,10 @@ TEST_F(SelectivityTests, RangeSelectivityTest) { TEST_F(SelectivityTests, LikeSelectivityTest) { const int tuple_count = 1000; const int tuple_per_tilegroup = 100; + const std::string db_name = "test_db"; + + // Initialize the DB inorder to initialize pg_column_stats + TestingExecutorUtil::InitializeDatabase(db_name); // Create a table auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); @@ -160,6 +164,9 @@ TEST_F(SelectivityTests, LikeSelectivityTest) { EXPECT_EQ(like_than_sel_1, 1); EXPECT_EQ(like_than_sel_2, 0); + + // Drop the database created + TestingExecutorUtil::DeleteDatabase(db_name); } TEST_F(SelectivityTests, EqualSelectivityTest) { diff --git a/test/optimizer/stats_storage_test.cpp b/test/optimizer/stats_storage_test.cpp index edf42131355..7ee516b5da3 100644 --- a/test/optimizer/stats_storage_test.cpp +++ b/test/optimizer/stats_storage_test.cpp @@ -93,6 +93,9 @@ void VerifyAndPrintColumnStats(storage::DataTable *data_table, } TEST_F(StatsStorageTests, InsertAndGetTableStatsTest) { + + const std::string db_name = "test_db"; + TestingExecutorUtil::InitializeDatabase(db_name); auto data_table = InitializeTestTable(); // Collect stats. @@ -108,6 +111,7 @@ TEST_F(StatsStorageTests, InsertAndGetTableStatsTest) { table_stats_collector.get()); VerifyAndPrintColumnStats(data_table.get(), 4); + TestingExecutorUtil::DeleteDatabase(db_name); } TEST_F(StatsStorageTests, InsertAndGetColumnStatsTest) { @@ -115,7 +119,10 @@ TEST_F(StatsStorageTests, InsertAndGetColumnStatsTest) { (void)catalog; StatsStorage *stats_storage = StatsStorage::GetInstance(); - oid_t database_id = 1; + const std::string db_name = "test_db"; + TestingExecutorUtil::InitializeDatabase(db_name); + + oid_t database_id = TestingExecutorUtil::GetDatabaseOid(); oid_t table_id = 2; oid_t column_id = 3; int num_rows = 10; @@ -146,6 +153,7 @@ TEST_F(StatsStorageTests, InsertAndGetColumnStatsTest) { auto column_stats_ptr2 = stats_storage->GetColumnStatsByID(database_id, table_id, column_id + 1); EXPECT_EQ(column_stats_ptr2, nullptr); + TestingExecutorUtil::DeleteDatabase(db_name); } TEST_F(StatsStorageTests, UpdateColumnStatsTest) { @@ -153,7 +161,10 @@ TEST_F(StatsStorageTests, UpdateColumnStatsTest) { (void)catalog; StatsStorage *stats_storage = StatsStorage::GetInstance(); - oid_t database_id = 1; + const std::string db_name = "test_db"; + TestingExecutorUtil::InitializeDatabase(db_name); + + oid_t database_id = TestingExecutorUtil::GetDatabaseOid(); oid_t table_id = 2; oid_t column_id = 3; @@ -193,9 +204,13 @@ TEST_F(StatsStorageTests, UpdateColumnStatsTest) { EXPECT_EQ(column_stats_ptr->frac_null, frac_null_1); EXPECT_EQ(column_stats_ptr->column_name, column_name_1); + TestingExecutorUtil::DeleteDatabase(db_name); } TEST_F(StatsStorageTests, AnalyzeStatsForTableTest) { + const std::string db_name = "test_db"; + TestingExecutorUtil::InitializeDatabase(db_name); + auto data_table = InitializeTestTable(); // Analyze table. @@ -213,44 +228,47 @@ TEST_F(StatsStorageTests, AnalyzeStatsForTableTest) { // Check the correctness of the stats. VerifyAndPrintColumnStats(data_table.get(), 4); + TestingExecutorUtil::DeleteDatabase(db_name); } // TODO: Add more tables. -TEST_F(StatsStorageTests, AnalyzeStatsForAllTablesTest) { - auto data_table = CreateTestDBAndTable(); - - StatsStorage *stats_storage = StatsStorage::GetInstance(); - - // Must pass in the transaction. - ResultType result = stats_storage->AnalyzeStatsForAllTables(); - EXPECT_EQ(result, ResultType::FAILURE); - - auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); - auto txn = txn_manager.BeginTransaction(); - result = stats_storage->AnalyzeStatsForAllTables(txn); - EXPECT_EQ(result, ResultType::SUCCESS); - txn_manager.CommitTransaction(txn); - - // Check the correctness of the stats. - VerifyAndPrintColumnStats(data_table, 4); -} - -TEST_F(StatsStorageTests, GetTableStatsTest) { - auto data_table = InitializeTestTable(); - - StatsStorage *stats_storage = StatsStorage::GetInstance(); - - auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); - auto txn = txn_manager.BeginTransaction(); - stats_storage->AnalyzeStatsForAllTables(txn); - txn_manager.CommitTransaction(txn); +//TEST_F(StatsStorageTests, AnalyzeStatsForAllTablesTest) { +// const std::string db_name = "test_db"; +// auto data_table = CreateTestDBAndTable(); +// +// StatsStorage *stats_storage = StatsStorage::GetInstance(); +// +// // Must pass in the transaction. +// ResultType result = stats_storage->AnalyzeStatsForAllTables(); +// EXPECT_EQ(result, ResultType::FAILURE); +// +// auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); +// auto txn = txn_manager.BeginTransaction(); +// result = stats_storage->AnalyzeStatsForAllTables(txn); +// EXPECT_EQ(result, ResultType::SUCCESS); +// txn_manager.CommitTransaction(txn); +// +// // Check the correctness of the stats. +// VerifyAndPrintColumnStats(data_table, 4); +// TestingExecutorUtil::DeleteDatabase(db_name); +// +//} - txn = txn_manager.BeginTransaction(); - std::shared_ptr table_stats = stats_storage->GetTableStats( - data_table->GetDatabaseOid(), data_table->GetOid(), txn); - txn_manager.CommitTransaction(txn); - EXPECT_EQ(table_stats->num_rows, tuple_count); -} +//TEST_F(StatsStorageTests, GetTableStatsTest) { +// auto data_table = InitializeTestTable(); +// StatsStorage *stats_storage = StatsStorage::GetInstance(); +// +// auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); +// auto txn = txn_manager.BeginTransaction(); +// stats_storage->AnalyzeStatsForAllTables(txn); +// txn_manager.CommitTransaction(txn); +// +// txn = txn_manager.BeginTransaction(); +// std::shared_ptr table_stats = stats_storage->GetTableStats( +// data_table->GetDatabaseOid(), data_table->GetOid(), txn); +// txn_manager.CommitTransaction(txn); +// EXPECT_EQ(table_stats->num_rows, tuple_count); +//} } // namespace test } // namespace peloton \ No newline at end of file diff --git a/test/sql/analyze_sql_test.cpp b/test/sql/analyze_sql_test.cpp index 16191ec000d..a564602a8be 100644 --- a/test/sql/analyze_sql_test.cpp +++ b/test/sql/analyze_sql_test.cpp @@ -74,7 +74,7 @@ TEST_F(AnalyzeSQLTests, AnalyzeSingleTableTest) { txn = txn_manager.BeginTransaction(); auto catalog = catalog::Catalog::GetInstance(); storage::DataTable *db_column_stats_collector_table = - catalog->GetTableWithName(CATALOG_DATABASE_NAME, CATALOG_SCHEMA_NAME, + catalog->GetTableWithName(DEFAULT_DB_NAME, CATALOG_SCHEMA_NAME, COLUMN_STATS_CATALOG_NAME, txn); EXPECT_NE(db_column_stats_collector_table, nullptr); EXPECT_EQ(db_column_stats_collector_table->GetTupleCount(), 4); From c5a0bc94e2c1e42391d8b4b58f410bfd06068a6f Mon Sep 17 00:00:00 2001 From: poojanilangekar Date: Wed, 9 May 2018 09:46:12 -0400 Subject: [PATCH 3/7] Fix include statement --- src/catalog/catalog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp index 236526bebcd..f808d16e8d3 100644 --- a/src/catalog/catalog.cpp +++ b/src/catalog/catalog.cpp @@ -10,10 +10,10 @@ // //===----------------------------------------------------------------------===// -#include #include "catalog/catalog.h" #include "catalog/column_catalog.h" +#include "catalog/column_stats_catalog.h" #include "catalog/database_catalog.h" #include "catalog/database_metrics_catalog.h" #include "catalog/index_catalog.h" From 6ffb0b32a68150e741848636d9389697ed61a86f Mon Sep 17 00:00:00 2001 From: poojanilangekar Date: Fri, 18 May 2018 15:35:49 -0400 Subject: [PATCH 4/7] Fixed AnalyzeAllTables --- src/optimizer/stats/stats_storage.cpp | 56 +++++++++++--------- test/optimizer/stats_storage_test.cpp | 74 ++++++++++++++------------- 2 files changed, 70 insertions(+), 60 deletions(-) diff --git a/src/optimizer/stats/stats_storage.cpp b/src/optimizer/stats/stats_storage.cpp index c1903034a53..0bdbaeafaca 100644 --- a/src/optimizer/stats/stats_storage.cpp +++ b/src/optimizer/stats/stats_storage.cpp @@ -261,30 +261,38 @@ std::shared_ptr StatsStorage::GetTableStats( // Needs to be handled differently. Used only in tests. ResultType StatsStorage::AnalyzeStatsForAllTables( UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) { -// if (txn == nullptr) { -// LOG_TRACE("Do not have transaction to analyze all tables' stats."); -// return ResultType::FAILURE; -// } -// -// auto storage_manager = storage::StorageManager::GetInstance(); -// -// oid_t database_count = storage_manager->GetDatabaseCount(); -// LOG_TRACE("Database count: %u", database_count); -// for (oid_t db_offset = 0; db_offset < database_count; db_offset++) { -// auto database = storage_manager->GetDatabaseWithOffset(db_offset); -// if (database->GetOid() == CATALOG_DATABASE_OID) { -// continue; -// } -// oid_t table_count = database->GetTableCount(); -// for (oid_t table_offset = 0; table_offset < table_count; table_offset++) { -// auto table = database->GetTable(table_offset); -// LOG_TRACE("Analyzing table: %s", table->GetName().c_str()); -// std::unique_ptr table_stats_collector( -// new TableStatsCollector(table)); -// table_stats_collector->CollectColumnStats(); -// InsertOrUpdateTableStats(table, table_stats_collector.get(), txn); -// } -// } + if (txn == nullptr) { + LOG_TRACE("Do not have transaction to analyze all tables' stats."); + return ResultType::FAILURE; + } + + auto storage_manager = storage::StorageManager::GetInstance(); + + oid_t database_count = storage_manager->GetDatabaseCount(); + LOG_TRACE("Database count: %u", database_count); + for (oid_t db_offset = 0; db_offset < database_count; db_offset++) { + auto database = storage_manager->GetDatabaseWithOffset(db_offset); + auto database_oid = database->GetOid(); + if (database_oid == CATALOG_DATABASE_OID) { + continue; + } + auto pg_database = catalog::Catalog::GetInstance() + ->GetDatabaseObject(database_oid, txn); + auto table_objects = pg_database->GetTableObjects(); + for (auto &table_object_entry : table_objects) { + auto table_oid = table_object_entry.first; + auto table_object = table_object_entry.second; + if (table_object->GetSchemaName() == CATALOG_SCHEMA_NAME) { + continue; + } + LOG_TRACE("Analyzing table: %s", table_object->GetTableName().c_str()); + auto table = database->GetTableWithOid(table_oid); + std::unique_ptr table_stats_collector( + new TableStatsCollector(table)); + table_stats_collector->CollectColumnStats(); + InsertOrUpdateTableStats(table, table_stats_collector.get(), txn); + } + } return ResultType::SUCCESS; } diff --git a/test/optimizer/stats_storage_test.cpp b/test/optimizer/stats_storage_test.cpp index 7ee516b5da3..6ef8388a7ba 100644 --- a/test/optimizer/stats_storage_test.cpp +++ b/test/optimizer/stats_storage_test.cpp @@ -232,43 +232,45 @@ TEST_F(StatsStorageTests, AnalyzeStatsForTableTest) { } // TODO: Add more tables. -//TEST_F(StatsStorageTests, AnalyzeStatsForAllTablesTest) { -// const std::string db_name = "test_db"; -// auto data_table = CreateTestDBAndTable(); -// -// StatsStorage *stats_storage = StatsStorage::GetInstance(); -// -// // Must pass in the transaction. -// ResultType result = stats_storage->AnalyzeStatsForAllTables(); -// EXPECT_EQ(result, ResultType::FAILURE); -// -// auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); -// auto txn = txn_manager.BeginTransaction(); -// result = stats_storage->AnalyzeStatsForAllTables(txn); -// EXPECT_EQ(result, ResultType::SUCCESS); -// txn_manager.CommitTransaction(txn); -// -// // Check the correctness of the stats. -// VerifyAndPrintColumnStats(data_table, 4); -// TestingExecutorUtil::DeleteDatabase(db_name); -// -//} +TEST_F(StatsStorageTests, AnalyzeStatsForAllTablesTest) { + const std::string db_name = "test_db"; + auto data_table = CreateTestDBAndTable(); -//TEST_F(StatsStorageTests, GetTableStatsTest) { -// auto data_table = InitializeTestTable(); -// StatsStorage *stats_storage = StatsStorage::GetInstance(); -// -// auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); -// auto txn = txn_manager.BeginTransaction(); -// stats_storage->AnalyzeStatsForAllTables(txn); -// txn_manager.CommitTransaction(txn); -// -// txn = txn_manager.BeginTransaction(); -// std::shared_ptr table_stats = stats_storage->GetTableStats( -// data_table->GetDatabaseOid(), data_table->GetOid(), txn); -// txn_manager.CommitTransaction(txn); -// EXPECT_EQ(table_stats->num_rows, tuple_count); -//} + StatsStorage *stats_storage = StatsStorage::GetInstance(); + + // Must pass in the transaction. + ResultType result = stats_storage->AnalyzeStatsForAllTables(); + EXPECT_EQ(result, ResultType::FAILURE); + + auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); + auto txn = txn_manager.BeginTransaction(); + result = stats_storage->AnalyzeStatsForAllTables(txn); + EXPECT_EQ(result, ResultType::SUCCESS); + txn_manager.CommitTransaction(txn); + + // Check the correctness of the stats. + VerifyAndPrintColumnStats(data_table, 4); + TestingExecutorUtil::DeleteDatabase(db_name); + +} + +TEST_F(StatsStorageTests, GetTableStatsTest) { + const std::string db_name = "test_db"; + auto data_table = CreateTestDBAndTable(); + StatsStorage *stats_storage = StatsStorage::GetInstance(); + + auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); + auto txn = txn_manager.BeginTransaction(); + stats_storage->AnalyzeStatsForAllTables(txn); + txn_manager.CommitTransaction(txn); + + txn = txn_manager.BeginTransaction(); + std::shared_ptr table_stats = stats_storage->GetTableStats( + data_table->GetDatabaseOid(), data_table->GetOid(), txn); + txn_manager.CommitTransaction(txn); + EXPECT_EQ(table_stats->num_rows, tuple_count); + TestingExecutorUtil::DeleteDatabase(db_name); +} } // namespace test } // namespace peloton \ No newline at end of file From 71aeb02e38969ae519d775c2d3f9ebcb9f2b2367 Mon Sep 17 00:00:00 2001 From: poojanilangekar Date: Fri, 18 May 2018 17:43:17 -0400 Subject: [PATCH 5/7] Changed AnalyzeAllTables to AnalyzeAllTablesWithDatabaseOid --- src/include/optimizer/stats/stats_storage.h | 3 +- src/optimizer/stats/stats_storage.cpp | 43 ++++++++------------- test/optimizer/stats_storage_test.cpp | 13 +++++-- 3 files changed, 28 insertions(+), 31 deletions(-) diff --git a/src/include/optimizer/stats/stats_storage.h b/src/include/optimizer/stats/stats_storage.h index 2d1ee5fbd59..52d3baaf136 100644 --- a/src/include/optimizer/stats/stats_storage.h +++ b/src/include/optimizer/stats/stats_storage.h @@ -67,7 +67,8 @@ class StatsStorage { /* Functions for triggerring stats collection */ - ResultType AnalyzeStatsForAllTables( + ResultType AnalyzeStatsForAllTablesWithDatabaseOid( + oid_t database_oid, concurrency::TransactionContext *txn = nullptr); ResultType AnalyzeStatsForTable( diff --git a/src/optimizer/stats/stats_storage.cpp b/src/optimizer/stats/stats_storage.cpp index 0bdbaeafaca..1190efec243 100644 --- a/src/optimizer/stats/stats_storage.cpp +++ b/src/optimizer/stats/stats_storage.cpp @@ -257,9 +257,8 @@ std::shared_ptr StatsStorage::GetTableStats( * AnalyzeStatsForAllTables - This function iterates all databases and * datatables to collect their stats and store them in the column_stats_catalog. */ -// Deprecating this function because the notion is no longer true. -// Needs to be handled differently. Used only in tests. -ResultType StatsStorage::AnalyzeStatsForAllTables( +ResultType StatsStorage::AnalyzeStatsForAllTablesWithDatabaseOid( + oid_t database_oid, UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) { if (txn == nullptr) { LOG_TRACE("Do not have transaction to analyze all tables' stats."); @@ -267,31 +266,23 @@ ResultType StatsStorage::AnalyzeStatsForAllTables( } auto storage_manager = storage::StorageManager::GetInstance(); - - oid_t database_count = storage_manager->GetDatabaseCount(); - LOG_TRACE("Database count: %u", database_count); - for (oid_t db_offset = 0; db_offset < database_count; db_offset++) { - auto database = storage_manager->GetDatabaseWithOffset(db_offset); - auto database_oid = database->GetOid(); - if (database_oid == CATALOG_DATABASE_OID) { + auto database = storage_manager->GetDatabaseWithOid(database); + PELOTON_ASSERT(database != nullptr); + auto pg_database = catalog::Catalog::GetInstance() + ->GetDatabaseObject(database_oid, txn); + auto table_objects = pg_database->GetTableObjects(); + for (auto &table_object_entry : table_objects) { + auto table_oid = table_object_entry.first; + auto table_object = table_object_entry.second; + if (table_object->GetSchemaName() == CATALOG_SCHEMA_NAME) { continue; } - auto pg_database = catalog::Catalog::GetInstance() - ->GetDatabaseObject(database_oid, txn); - auto table_objects = pg_database->GetTableObjects(); - for (auto &table_object_entry : table_objects) { - auto table_oid = table_object_entry.first; - auto table_object = table_object_entry.second; - if (table_object->GetSchemaName() == CATALOG_SCHEMA_NAME) { - continue; - } - LOG_TRACE("Analyzing table: %s", table_object->GetTableName().c_str()); - auto table = database->GetTableWithOid(table_oid); - std::unique_ptr table_stats_collector( - new TableStatsCollector(table)); - table_stats_collector->CollectColumnStats(); - InsertOrUpdateTableStats(table, table_stats_collector.get(), txn); - } + LOG_TRACE("Analyzing table: %s", table_object->GetTableName().c_str()); + auto table = database->GetTableWithOid(table_oid); + std::unique_ptr table_stats_collector( + new TableStatsCollector(table)); + table_stats_collector->CollectColumnStats(); + InsertOrUpdateTableStats(table, table_stats_collector.get(), txn); } return ResultType::SUCCESS; } diff --git a/test/optimizer/stats_storage_test.cpp b/test/optimizer/stats_storage_test.cpp index 6ef8388a7ba..226abeabe72 100644 --- a/test/optimizer/stats_storage_test.cpp +++ b/test/optimizer/stats_storage_test.cpp @@ -235,16 +235,18 @@ TEST_F(StatsStorageTests, AnalyzeStatsForTableTest) { TEST_F(StatsStorageTests, AnalyzeStatsForAllTablesTest) { const std::string db_name = "test_db"; auto data_table = CreateTestDBAndTable(); + auto db_oid = data_table->GetDatabaseOid(); StatsStorage *stats_storage = StatsStorage::GetInstance(); // Must pass in the transaction. - ResultType result = stats_storage->AnalyzeStatsForAllTables(); + ResultType result = stats_storage + ->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid); EXPECT_EQ(result, ResultType::FAILURE); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); auto txn = txn_manager.BeginTransaction(); - result = stats_storage->AnalyzeStatsForAllTables(txn); + result = stats_storage->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid, txn); EXPECT_EQ(result, ResultType::SUCCESS); txn_manager.CommitTransaction(txn); @@ -257,16 +259,19 @@ TEST_F(StatsStorageTests, AnalyzeStatsForAllTablesTest) { TEST_F(StatsStorageTests, GetTableStatsTest) { const std::string db_name = "test_db"; auto data_table = CreateTestDBAndTable(); + auto db_oid = data_table->GetDatabaseOid(); + StatsStorage *stats_storage = StatsStorage::GetInstance(); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); auto txn = txn_manager.BeginTransaction(); - stats_storage->AnalyzeStatsForAllTables(txn); + ResultType result = stats_storage + ->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid, txn); txn_manager.CommitTransaction(txn); txn = txn_manager.BeginTransaction(); std::shared_ptr table_stats = stats_storage->GetTableStats( - data_table->GetDatabaseOid(), data_table->GetOid(), txn); + db_oid, data_table->GetOid(), txn); txn_manager.CommitTransaction(txn); EXPECT_EQ(table_stats->num_rows, tuple_count); TestingExecutorUtil::DeleteDatabase(db_name); From 92d95fa2bb8ed6a5d9fd5bf86421f03cc13f0d08 Mon Sep 17 00:00:00 2001 From: poojanilangekar Date: Fri, 18 May 2018 17:55:07 -0400 Subject: [PATCH 6/7] Style fix --- src/catalog/catalog.cpp | 23 +++---- src/catalog/column_stats_catalog.cpp | 71 ++++++++++----------- src/include/catalog/column_stats_catalog.h | 15 ++--- src/include/optimizer/stats/stats_storage.h | 3 +- src/optimizer/stats/stats_storage.cpp | 42 ++++++------ test/optimizer/stats_storage_test.cpp | 15 ++--- 6 files changed, 79 insertions(+), 90 deletions(-) diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp index f808d16e8d3..5fd2b3c3174 100644 --- a/src/catalog/catalog.cpp +++ b/src/catalog/catalog.cpp @@ -160,16 +160,17 @@ void Catalog::BootstrapSystemCatalogs(storage::Database *database, false, {TableCatalog::ColumnId::DATABASE_OID}, pool_.get(), txn); system_catalogs->GetIndexCatalog()->InsertIndex( - COLUMN_STATS_CATALOG_SKEY0_OID, COLUMN_STATS_CATALOG_NAME "_skey0", - COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE, - IndexConstraintType::UNIQUE, true, - {ColumnStatsCatalog::ColumnId::TABLE_ID, - ColumnStatsCatalog::ColumnId::COLUMN_ID}, pool_.get(), txn); + COLUMN_STATS_CATALOG_SKEY0_OID, COLUMN_STATS_CATALOG_NAME "_skey0", + COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE, + IndexConstraintType::UNIQUE, true, + {ColumnStatsCatalog::ColumnId::TABLE_ID, + ColumnStatsCatalog::ColumnId::COLUMN_ID}, + pool_.get(), txn); system_catalogs->GetIndexCatalog()->InsertIndex( - COLUMN_STATS_CATALOG_SKEY1_OID, COLUMN_STATS_CATALOG_NAME "_skey1", - COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE, - IndexConstraintType::UNIQUE, true, - {ColumnStatsCatalog::ColumnId::TABLE_ID}, pool_.get(), txn); + COLUMN_STATS_CATALOG_SKEY1_OID, COLUMN_STATS_CATALOG_NAME "_skey1", + COLUMN_STATS_CATALOG_OID, CATALOG_SCHEMA_NAME, IndexType::BWTREE, + IndexConstraintType::UNIQUE, true, + {ColumnStatsCatalog::ColumnId::TABLE_ID}, pool_.get(), txn); // Insert records(default + pg_catalog namespace) into pg_namespace system_catalogs->GetSchemaCatalog()->InsertSchema( @@ -198,8 +199,8 @@ void Catalog::BootstrapSystemCatalogs(storage::Database *database, LAYOUT_CATALOG_OID, LAYOUT_CATALOG_NAME, CATALOG_SCHEMA_NAME, database_oid, pool_.get(), txn); system_catalogs->GetTableCatalog()->InsertTable( - COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME, - CATALOG_SCHEMA_NAME, database_oid, pool_.get(), txn); + COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME, CATALOG_SCHEMA_NAME, + database_oid, pool_.get(), txn); } void Catalog::Bootstrap() { diff --git a/src/catalog/column_stats_catalog.cpp b/src/catalog/column_stats_catalog.cpp index d29e9f82413..d9d84fb7c01 100644 --- a/src/catalog/column_stats_catalog.cpp +++ b/src/catalog/column_stats_catalog.cpp @@ -23,74 +23,71 @@ namespace peloton { namespace catalog { ColumnStatsCatalog::ColumnStatsCatalog( - storage::Database *pg_catalog, - UNUSED_ATTRIBUTE type::AbstractPool *pool, - UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) - : AbstractCatalog(COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME, - InitializeSchema().release(), pg_catalog) { + storage::Database *pg_catalog, UNUSED_ATTRIBUTE type::AbstractPool *pool, + UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) + : AbstractCatalog(COLUMN_STATS_CATALOG_OID, COLUMN_STATS_CATALOG_NAME, + InitializeSchema().release(), pg_catalog) { // Add indexes for pg_column_stats AddIndex({ColumnId::TABLE_ID, ColumnId::COLUMN_ID}, COLUMN_STATS_CATALOG_SKEY0_OID, COLUMN_STATS_CATALOG_NAME "_skey0", IndexConstraintType::UNIQUE); AddIndex({ColumnId::TABLE_ID}, COLUMN_STATS_CATALOG_SKEY1_OID, COLUMN_STATS_CATALOG_NAME "_skey1", IndexConstraintType::DEFAULT); - } ColumnStatsCatalog::~ColumnStatsCatalog() {} std::unique_ptr ColumnStatsCatalog::InitializeSchema() { - const std::string not_null_constraint_name = "notnull"; - const auto not_null_constraint = catalog::Constraint( - ConstraintType::NOTNULL, not_null_constraint_name); + const auto not_null_constraint = + catalog::Constraint(ConstraintType::NOTNULL, not_null_constraint_name); auto table_id_column = catalog::Column( - type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), - "table_id", true); + type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), + "table_id", true); table_id_column.AddConstraint(not_null_constraint); auto column_id_column = catalog::Column( - type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), - "column_id", true); + type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), + "column_id", true); column_id_column.AddConstraint(not_null_constraint); auto num_rows_column = catalog::Column( - type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), - "num_rows", true); + type::TypeId::INTEGER, type::Type::GetTypeSize(type::TypeId::INTEGER), + "num_rows", true); num_rows_column.AddConstraint(not_null_constraint); auto cardinality_column = catalog::Column( - type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL), - "cardinality", true); + type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL), + "cardinality", true); cardinality_column.AddConstraint(not_null_constraint); auto frac_null_column = catalog::Column( - type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL), - "frac_null", true); + type::TypeId::DECIMAL, type::Type::GetTypeSize(type::TypeId::DECIMAL), + "frac_null", true); frac_null_column.AddConstraint(not_null_constraint); auto most_common_vals_column = catalog::Column( - type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), - "most_common_vals", false); + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "most_common_vals", false); auto most_common_freqs_column = catalog::Column( - type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), - "most_common_freqs", false); + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "most_common_freqs", false); auto histogram_bounds_column = catalog::Column( - type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), - "histogram_bounds", false); + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "histogram_bounds", false); auto column_name_column = catalog::Column( - type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), - "column_name", false); + type::TypeId::VARCHAR, type::Type::GetTypeSize(type::TypeId::VARCHAR), + "column_name", false); auto has_index_column = catalog::Column( - type::TypeId::BOOLEAN, type::Type::GetTypeSize(type::TypeId::BOOLEAN), - "has_index", true); + type::TypeId::BOOLEAN, type::Type::GetTypeSize(type::TypeId::BOOLEAN), + "has_index", true); std::unique_ptr column_stats_schema(new catalog::Schema( - {table_id_column, column_id_column, num_rows_column, cardinality_column, - frac_null_column, most_common_vals_column, most_common_freqs_column, - histogram_bounds_column, column_name_column, has_index_column})); + {table_id_column, column_id_column, num_rows_column, cardinality_column, + frac_null_column, most_common_vals_column, most_common_freqs_column, + histogram_bounds_column, column_name_column, has_index_column})); return column_stats_schema; } bool ColumnStatsCatalog::InsertColumnStats( - oid_t table_id, oid_t column_id, int num_rows, - double cardinality, double frac_null, std::string most_common_vals, + oid_t table_id, oid_t column_id, int num_rows, double cardinality, + double frac_null, std::string most_common_vals, std::string most_common_freqs, std::string histogram_bounds, std::string column_name, bool has_index, type::AbstractPool *pool, concurrency::TransactionContext *txn) { @@ -142,8 +139,7 @@ bool ColumnStatsCatalog::InsertColumnStats( } bool ColumnStatsCatalog::DeleteColumnStats( - oid_t table_id, oid_t column_id, - concurrency::TransactionContext *txn) { + oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn) { oid_t index_offset = IndexId::SECONDARY_KEY_0; // Secondary key index std::vector values; @@ -154,8 +150,7 @@ bool ColumnStatsCatalog::DeleteColumnStats( } std::unique_ptr> ColumnStatsCatalog::GetColumnStats( - oid_t table_id, oid_t column_id, - concurrency::TransactionContext *txn) { + oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn) { std::vector column_ids( {ColumnId::NUM_ROWS, ColumnId::CARDINALITY, ColumnId::FRAC_NULL, ColumnId::MOST_COMMON_VALS, ColumnId::MOST_COMMON_FREQS, diff --git a/src/include/catalog/column_stats_catalog.h b/src/include/catalog/column_stats_catalog.h index affd1917ef9..c22e12a38ec 100644 --- a/src/include/catalog/column_stats_catalog.h +++ b/src/include/catalog/column_stats_catalog.h @@ -45,7 +45,6 @@ namespace catalog { class ColumnStatsCatalog : public AbstractCatalog { public: - ColumnStatsCatalog(storage::Database *pg_catalog, type::AbstractPool *pool, concurrency::TransactionContext *txn); @@ -54,8 +53,8 @@ class ColumnStatsCatalog : public AbstractCatalog { //===--------------------------------------------------------------------===// // write Related API //===--------------------------------------------------------------------===// - bool InsertColumnStats(oid_t table_id, oid_t column_id, - int num_rows, double cardinality, double frac_null, + bool InsertColumnStats(oid_t table_id, oid_t column_id, int num_rows, + double cardinality, double frac_null, std::string most_common_vals, std::string most_common_freqs, std::string histogram_bounds, std::string column_name, @@ -68,16 +67,14 @@ class ColumnStatsCatalog : public AbstractCatalog { // Read-only Related API //===--------------------------------------------------------------------===// std::unique_ptr> GetColumnStats( - oid_t table_id, oid_t column_id, - concurrency::TransactionContext *txn); + oid_t table_id, oid_t column_id, concurrency::TransactionContext *txn); size_t GetTableStats( - oid_t table_id, concurrency::TransactionContext *txn, - std::map>> & - column_stats_map); + oid_t table_id, concurrency::TransactionContext *txn, + std::map>> + &column_stats_map); // TODO: add more if needed - /** @brief private function for initialize schema of pg_index * @return unqiue pointer to schema */ diff --git a/src/include/optimizer/stats/stats_storage.h b/src/include/optimizer/stats/stats_storage.h index 52d3baaf136..bf906755fe8 100644 --- a/src/include/optimizer/stats/stats_storage.h +++ b/src/include/optimizer/stats/stats_storage.h @@ -68,8 +68,7 @@ class StatsStorage { /* Functions for triggerring stats collection */ ResultType AnalyzeStatsForAllTablesWithDatabaseOid( - oid_t database_oid, - concurrency::TransactionContext *txn = nullptr); + oid_t database_oid, concurrency::TransactionContext *txn = nullptr); ResultType AnalyzeStatsForTable( storage::DataTable *table, diff --git a/src/optimizer/stats/stats_storage.cpp b/src/optimizer/stats/stats_storage.cpp index 1190efec243..c1b58ce27b9 100644 --- a/src/optimizer/stats/stats_storage.cpp +++ b/src/optimizer/stats/stats_storage.cpp @@ -98,8 +98,8 @@ void StatsStorage::InsertOrUpdateColumnStats( cardinality, frac_null, most_common_vals.c_str(), most_common_freqs.c_str(), histogram_bounds.c_str()); auto pg_column_stats = catalog::Catalog::GetInstance() - ->GetSystemCatalogs(database_id) - ->GetColumnStatsCatalog(); + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); bool single_statement_txn = false; @@ -107,12 +107,11 @@ void StatsStorage::InsertOrUpdateColumnStats( single_statement_txn = true; txn = txn_manager.BeginTransaction(); } - pg_column_stats->DeleteColumnStats(table_id, column_id, - txn); - pg_column_stats->InsertColumnStats( - table_id, column_id, num_rows, cardinality, frac_null, - most_common_vals, most_common_freqs, histogram_bounds, column_name, - has_index, pool_.get(), txn); + pg_column_stats->DeleteColumnStats(table_id, column_id, txn); + pg_column_stats->InsertColumnStats(table_id, column_id, num_rows, cardinality, + frac_null, most_common_vals, + most_common_freqs, histogram_bounds, + column_name, has_index, pool_.get(), txn); if (single_statement_txn) { txn_manager.CommitTransaction(txn); @@ -127,13 +126,13 @@ std::shared_ptr StatsStorage::GetColumnStatsByID(oid_t database_id, oid_t table_id, oid_t column_id) { auto pg_column_stats = catalog::Catalog::GetInstance() - ->GetSystemCatalogs(database_id) - ->GetColumnStatsCatalog(); + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); auto txn = txn_manager.BeginTransaction(); // std::unique_ptr> column_stats_vector - auto column_stats_vector = pg_column_stats->GetColumnStats( - table_id, column_id, txn); + auto column_stats_vector = + pg_column_stats->GetColumnStats(table_id, column_id, txn); txn_manager.CommitTransaction(txn); return ConvertVectorToColumnStats(database_id, table_id, column_id, @@ -211,8 +210,8 @@ std::shared_ptr StatsStorage::ConvertVectorToColumnStats( std::shared_ptr StatsStorage::GetTableStats( oid_t database_id, oid_t table_id, concurrency::TransactionContext *txn) { auto pg_column_stats = catalog::Catalog::GetInstance() - ->GetSystemCatalogs(database_id) - ->GetColumnStatsCatalog(); + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); std::map>> column_stats_map; pg_column_stats->GetTableStats(table_id, txn, column_stats_map); @@ -236,8 +235,8 @@ std::shared_ptr StatsStorage::GetTableStats( oid_t database_id, oid_t table_id, std::vector column_ids, concurrency::TransactionContext *txn) { auto pg_column_stats = catalog::Catalog::GetInstance() - ->GetSystemCatalogs(database_id) - ->GetColumnStatsCatalog(); + ->GetSystemCatalogs(database_id) + ->GetColumnStatsCatalog(); std::map>> column_stats_map; pg_column_stats->GetTableStats(table_id, txn, column_stats_map); @@ -258,18 +257,17 @@ std::shared_ptr StatsStorage::GetTableStats( * datatables to collect their stats and store them in the column_stats_catalog. */ ResultType StatsStorage::AnalyzeStatsForAllTablesWithDatabaseOid( - oid_t database_oid, - UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) { + oid_t database_oid, UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) { if (txn == nullptr) { LOG_TRACE("Do not have transaction to analyze all tables' stats."); return ResultType::FAILURE; } auto storage_manager = storage::StorageManager::GetInstance(); - auto database = storage_manager->GetDatabaseWithOid(database); + auto database = storage_manager->GetDatabaseWithOid(database_oid); PELOTON_ASSERT(database != nullptr); - auto pg_database = catalog::Catalog::GetInstance() - ->GetDatabaseObject(database_oid, txn); + auto pg_database = + catalog::Catalog::GetInstance()->GetDatabaseObject(database_oid, txn); auto table_objects = pg_database->GetTableObjects(); for (auto &table_object_entry : table_objects) { auto table_oid = table_object_entry.first; @@ -280,7 +278,7 @@ ResultType StatsStorage::AnalyzeStatsForAllTablesWithDatabaseOid( LOG_TRACE("Analyzing table: %s", table_object->GetTableName().c_str()); auto table = database->GetTableWithOid(table_oid); std::unique_ptr table_stats_collector( - new TableStatsCollector(table)); + new TableStatsCollector(table)); table_stats_collector->CollectColumnStats(); InsertOrUpdateTableStats(table, table_stats_collector.get(), txn); } diff --git a/test/optimizer/stats_storage_test.cpp b/test/optimizer/stats_storage_test.cpp index 226abeabe72..baca8a58f39 100644 --- a/test/optimizer/stats_storage_test.cpp +++ b/test/optimizer/stats_storage_test.cpp @@ -93,7 +93,6 @@ void VerifyAndPrintColumnStats(storage::DataTable *data_table, } TEST_F(StatsStorageTests, InsertAndGetTableStatsTest) { - const std::string db_name = "test_db"; TestingExecutorUtil::InitializeDatabase(db_name); auto data_table = InitializeTestTable(); @@ -240,8 +239,8 @@ TEST_F(StatsStorageTests, AnalyzeStatsForAllTablesTest) { StatsStorage *stats_storage = StatsStorage::GetInstance(); // Must pass in the transaction. - ResultType result = stats_storage - ->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid); + ResultType result = + stats_storage->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid); EXPECT_EQ(result, ResultType::FAILURE); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); @@ -253,7 +252,6 @@ TEST_F(StatsStorageTests, AnalyzeStatsForAllTablesTest) { // Check the correctness of the stats. VerifyAndPrintColumnStats(data_table, 4); TestingExecutorUtil::DeleteDatabase(db_name); - } TEST_F(StatsStorageTests, GetTableStatsTest) { @@ -265,13 +263,14 @@ TEST_F(StatsStorageTests, GetTableStatsTest) { auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); auto txn = txn_manager.BeginTransaction(); - ResultType result = stats_storage - ->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid, txn); + ResultType result = + stats_storage->AnalyzeStatsForAllTablesWithDatabaseOid(db_oid, txn); + EXPECT_EQ(ResultType::SUCCESS, result); txn_manager.CommitTransaction(txn); txn = txn_manager.BeginTransaction(); - std::shared_ptr table_stats = stats_storage->GetTableStats( - db_oid, data_table->GetOid(), txn); + std::shared_ptr table_stats = + stats_storage->GetTableStats(db_oid, data_table->GetOid(), txn); txn_manager.CommitTransaction(txn); EXPECT_EQ(table_stats->num_rows, tuple_count); TestingExecutorUtil::DeleteDatabase(db_name); From 7dfbc59fe4c79f5ebb32efd746313f8e09ba1f5d Mon Sep 17 00:00:00 2001 From: poojanilangekar Date: Sat, 19 May 2018 01:22:53 -0400 Subject: [PATCH 7/7] Insert test tables into pg_table --- src/optimizer/stats/stats_storage.cpp | 2 +- test/executor/testing_executor_util.cpp | 2 +- test/include/executor/testing_executor_util.h | 2 +- test/optimizer/stats_storage_test.cpp | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/optimizer/stats/stats_storage.cpp b/src/optimizer/stats/stats_storage.cpp index c1b58ce27b9..6cadc47fb78 100644 --- a/src/optimizer/stats/stats_storage.cpp +++ b/src/optimizer/stats/stats_storage.cpp @@ -257,7 +257,7 @@ std::shared_ptr StatsStorage::GetTableStats( * datatables to collect their stats and store them in the column_stats_catalog. */ ResultType StatsStorage::AnalyzeStatsForAllTablesWithDatabaseOid( - oid_t database_oid, UNUSED_ATTRIBUTE concurrency::TransactionContext *txn) { + oid_t database_oid, concurrency::TransactionContext *txn) { if (txn == nullptr) { LOG_TRACE("Do not have transaction to analyze all tables' stats."); return ResultType::FAILURE; diff --git a/test/executor/testing_executor_util.cpp b/test/executor/testing_executor_util.cpp index 5da53374e22..3f8edaf40b2 100644 --- a/test/executor/testing_executor_util.cpp +++ b/test/executor/testing_executor_util.cpp @@ -426,7 +426,7 @@ storage::DataTable *TestingExecutorUtil::CreateTable( } storage::DataTable *TestingExecutorUtil::CreateTableUpdateCatalog( - int tuples_per_tilegroup_count, std::string &db_name) { + int tuples_per_tilegroup_count, const std::string &db_name) { auto table_schema = std::unique_ptr( new catalog::Schema({GetColumnInfo(0), GetColumnInfo(1), GetColumnInfo(2), GetColumnInfo(3)})); diff --git a/test/include/executor/testing_executor_util.h b/test/include/executor/testing_executor_util.h index 12f086c95f1..fa081a1f0ba 100644 --- a/test/include/executor/testing_executor_util.h +++ b/test/include/executor/testing_executor_util.h @@ -88,7 +88,7 @@ class TestingExecutorUtil { * @return A pointer to the DataTable created. */ static storage::DataTable *CreateTableUpdateCatalog( - int tuples_per_tilegroup_count, std::string &db_name); + int tuples_per_tilegroup_count, const std::string &db_name); /** @brief Creates a basic table with allocated and populated tuples */ static storage::DataTable *CreateAndPopulateTable(); diff --git a/test/optimizer/stats_storage_test.cpp b/test/optimizer/stats_storage_test.cpp index baca8a58f39..66526b70ebe 100644 --- a/test/optimizer/stats_storage_test.cpp +++ b/test/optimizer/stats_storage_test.cpp @@ -51,15 +51,15 @@ std::unique_ptr InitializeTestTable() { storage::DataTable *CreateTestDBAndTable() { const std::string test_db_name = "test_db"; - auto database = TestingExecutorUtil::InitializeDatabase(test_db_name); + TestingExecutorUtil::InitializeDatabase(test_db_name); auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); auto txn = txn_manager.BeginTransaction(); storage::DataTable *data_table = - TestingExecutorUtil::CreateTable(tuple_per_tilegroup, false); + TestingExecutorUtil::CreateTableUpdateCatalog(tuple_per_tilegroup, + test_db_name); TestingExecutorUtil::PopulateTable(data_table, tuple_count, false, false, true, txn); - database->AddTable(data_table); txn_manager.CommitTransaction(txn); return data_table; }