From f5285d8f20680a2615dfd5afe0d74a8544ac35a9 Mon Sep 17 00:00:00 2001 From: Yiding Date: Tue, 21 Jan 2025 20:43:40 +0800 Subject: [PATCH] stats, infoschema: avoid some network cost --- pkg/infoschema/infoschema.go | 22 ++++++++++++ pkg/infoschema/infoschema_v2.go | 34 +++++++++++++++++++ pkg/infoschema/interface.go | 2 ++ pkg/statistics/handle/handle.go | 33 ++++++++++++++++++ pkg/statistics/handle/types/interfaces.go | 3 ++ .../handle/usage/session_stats_collect.go | 10 ++---- pkg/statistics/handle/util/table_info.go | 10 ++++++ 7 files changed, 107 insertions(+), 7 deletions(-) diff --git a/pkg/infoschema/infoschema.go b/pkg/infoschema/infoschema.go index 7ab058d98309a..1cbed4aecb231 100644 --- a/pkg/infoschema/infoschema.go +++ b/pkg/infoschema/infoschema.go @@ -315,6 +315,19 @@ func (is *infoSchema) TableByID(_ stdctx.Context, id int64) (val table.Table, ok return slice[idx], true } +// SchemaNameAndTableNameByID implements InfoSchema.SchemaNameAndTableNameByID. +func (is *infoSchema) SchemaNameAndTableNameByID(tableID int64) (schemaName, tableName ast.CIStr, ok bool) { + tbl, ok := is.TableByID(stdctx.Background(), tableID) + if !ok { + return + } + db, ok := is.SchemaByID(tbl.Meta().DBID) + if !ok { + return + } + return db.Name, tbl.Meta().Name, true +} + func (is *infoSchema) SchemaNameByTableID(tableID int64) (schemaName ast.CIStr, ok bool) { tbl, ok := is.TableByID(stdctx.Background(), tableID) if !ok { @@ -412,6 +425,15 @@ func (is *infoSchema) AllSchemaNames() (schemas []ast.CIStr) { return rs } +// SchemaNameAndTableNameByPartitionID implements InfoSchema.SchemaNameAndTableNameByPartitionID. +func (is *infoSchema) SchemaNameAndTableNameByPartitionID(partitionID int64) (schemaName, tableName ast.CIStr, ok bool) { + tbl, db, _ := is.FindTableByPartitionID(partitionID) + if tbl == nil { + return + } + return db.Name, tbl.Meta().Name, true +} + // FindTableByPartitionID finds the partition-table info by the partitionID. // FindTableByPartitionID will traverse all the tables to find the partitionID partition in which partition-table. func (is *infoSchema) FindTableByPartitionID(partitionID int64) (table.Table, *model.DBInfo, *model.PartitionDefinition) { diff --git a/pkg/infoschema/infoschema_v2.go b/pkg/infoschema/infoschema_v2.go index cef3ca762e867..cc3c944764d52 100644 --- a/pkg/infoschema/infoschema_v2.go +++ b/pkg/infoschema/infoschema_v2.go @@ -734,6 +734,15 @@ func (is *infoschemaV2) TableByID(ctx context.Context, id int64) (val table.Tabl return ret, true } +// SchemaNameAndTableNameByID implements the InfoSchema interface. +func (is *infoschemaV2) SchemaNameAndTableNameByID(tableID int64) (schemaName, tableName ast.CIStr, ok bool) { + itm, ok := is.searchTableItemByID(tableID) + if !ok { + return + } + return itm.dbName, itm.tableName, true +} + func (is *infoschemaV2) SchemaNameByTableID(tableID int64) (schemaName ast.CIStr, ok bool) { if !tableIDIsValid(tableID) { return @@ -1106,6 +1115,31 @@ func (is *infoschemaV2) SchemaExists(schema ast.CIStr) bool { return ok } +// SchemaNameAndTableNameByPartitionID implements InfoSchema.SchemaNameAndTableNameByPartitionID. +func (is *infoschemaV2) SchemaNameAndTableNameByPartitionID(partitionID int64) (schemaName, tableName ast.CIStr, ok bool) { + var pi partitionItem + is.pid2tid.Load().DescendLessOrEqual(partitionItem{partitionID: partitionID, schemaVersion: math.MaxInt64}, + func(item partitionItem) bool { + if item.partitionID != partitionID { + return false + } + if item.schemaVersion > is.infoSchema.schemaMetaVersion { + // Skip the record. + return true + } + if item.schemaVersion <= is.infoSchema.schemaMetaVersion { + ok = !item.tomb + pi = item + return false + } + return true + }) + if !ok { + return + } + return is.SchemaNameAndTableNameByID(pi.tableID) +} + func (is *infoschemaV2) FindTableByPartitionID(partitionID int64) (table.Table, *model.DBInfo, *model.PartitionDefinition) { var ok bool var pi partitionItem diff --git a/pkg/infoschema/interface.go b/pkg/infoschema/interface.go index 8a52259dd53e5..ba320c458b7ff 100644 --- a/pkg/infoschema/interface.go +++ b/pkg/infoschema/interface.go @@ -30,7 +30,9 @@ type InfoSchema interface { context.MetaOnlyInfoSchema TableByName(ctx stdctx.Context, schema, table ast.CIStr) (table.Table, error) TableByID(ctx stdctx.Context, id int64) (table.Table, bool) + SchemaNameAndTableNameByID(tableID int64) (schemaName, tableName ast.CIStr, ok bool) SchemaNameByTableID(tableID int64) (ast.CIStr, bool) FindTableByPartitionID(partitionID int64) (table.Table, *model.DBInfo, *model.PartitionDefinition) + SchemaNameAndTableNameByPartitionID(partitionID int64) (schemaName, tableName ast.CIStr, ok bool) base() *infoSchema } diff --git a/pkg/statistics/handle/handle.go b/pkg/statistics/handle/handle.go index 9d63a732bce55..b4aec455c5a83 100644 --- a/pkg/statistics/handle/handle.go +++ b/pkg/statistics/handle/handle.go @@ -204,6 +204,39 @@ func (h *Handle) getPartitionStats(tblInfo *model.TableInfo, pid int64, returnPs return tbl } +func (h *Handle) GetPartitionStatsByID(is infoschema.InfoSchema, pid int64) *statistics.Table { + return h.getPartitionStatsByID(is, pid, true) +} + +func (h *Handle) getPartitionStatsByID(is infoschema.InfoSchema, pid int64, returnPseudo bool) *statistics.Table { + var statsTbl *statistics.Table + if h == nil { + tbl, ok := h.TableInfoByID(is, pid) + if !ok { + return nil + } + statsTbl = statistics.PseudoTable(tbl.Meta(), false, false) + } + tbl, ok := h.Get(pid) + if !ok { + if returnPseudo { + tbl, ok := h.TableInfoByID(is, pid) + if !ok { + return nil + } + statsTbl = statistics.PseudoTable(tbl.Meta(), false, true) + statsTbl.PhysicalID = pid + if tbl.Meta().GetPartitionInfo() == nil || h.Len() < 64 { + h.UpdateStatsCache(types.CacheUpdate{ + Updated: []*statistics.Table{statsTbl}, + }) + } + } + return nil + } + return tbl +} + // FlushStats flushes the cached stats update into store. func (h *Handle) FlushStats() { if err := h.DumpStatsDeltaToKV(true); err != nil { diff --git a/pkg/statistics/handle/types/interfaces.go b/pkg/statistics/handle/types/interfaces.go index a5393ac9ca4f6..f1cde76e6b7dd 100644 --- a/pkg/statistics/handle/types/interfaces.go +++ b/pkg/statistics/handle/types/interfaces.go @@ -547,6 +547,9 @@ type StatsHandle interface { // GetPartitionStats retrieves the partition stats from cache. GetPartitionStats(tblInfo *model.TableInfo, pid int64) *statistics.Table + // GetPartitionStatsByID retrieves the partition stats from cache by partition ID. + GetPartitionStatsByID(is infoschema.InfoSchema, pid int64) *statistics.Table + // GetPartitionStatsForAutoAnalyze retrieves the partition stats from cache, but it will not return pseudo. GetPartitionStatsForAutoAnalyze(tblInfo *model.TableInfo, pid int64) *statistics.Table diff --git a/pkg/statistics/handle/usage/session_stats_collect.go b/pkg/statistics/handle/usage/session_stats_collect.go index 30f19ad9677e8..608e0555e44bb 100644 --- a/pkg/statistics/handle/usage/session_stats_collect.go +++ b/pkg/statistics/handle/usage/session_stats_collect.go @@ -55,15 +55,11 @@ var ( // 3. If the stats delta haven't been dumped in the past hour, then return true. // 4. If the table stats is pseudo or empty or `Modify Count / Table Count` exceeds the threshold. func (s *statsUsageImpl) needDumpStatsDelta(is infoschema.InfoSchema, dumpAll bool, id int64, item variable.TableDelta, currentTime time.Time) bool { - tbl, ok := s.statsHandle.TableInfoByID(is, id) + dbName, _, ok := s.statsHandle.SchemaNameAndTableNameByID(is, id) if !ok { return false } - dbInfo, ok := infoschema.SchemaByTable(is, tbl.Meta()) - if !ok { - return false - } - if util.IsMemOrSysDB(dbInfo.Name.L) { + if util.IsMemOrSysDB(dbName.L) { return false } if dumpAll { @@ -76,7 +72,7 @@ func (s *statsUsageImpl) needDumpStatsDelta(is infoschema.InfoSchema, dumpAll bo // Dump the stats to kv at least once 5 minutes. return true } - statsTbl := s.statsHandle.GetPartitionStats(tbl.Meta(), id) + statsTbl := s.statsHandle.GetPartitionStatsByID(is, id) if statsTbl.Pseudo || statsTbl.RealtimeCount == 0 || float64(item.Count)/float64(statsTbl.RealtimeCount) > DumpStatsDeltaRatio { // Dump the stats when there are many modifications. return true diff --git a/pkg/statistics/handle/util/table_info.go b/pkg/statistics/handle/util/table_info.go index 746dc257b0089..641e92bc84e09 100644 --- a/pkg/statistics/handle/util/table_info.go +++ b/pkg/statistics/handle/util/table_info.go @@ -18,6 +18,7 @@ import ( "context" "github.com/pingcap/tidb/pkg/infoschema" + "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/table" ) @@ -26,6 +27,9 @@ type TableInfoGetter interface { // TableInfoByID returns the table info specified by the physicalID. // If the physicalID is corresponding to a partition, return its parent table. TableInfoByID(is infoschema.InfoSchema, physicalID int64) (table.Table, bool) + // SchemaNameAndTableNameByID returns the schema name and table name specified by the physicalID. + // This is pure memory operation. + SchemaNameAndTableNameByID(is infoschema.InfoSchema, physicalID int64) (schemaName, tableName ast.CIStr, ok bool) } // tableInfoGetterImpl is used to get table meta info. @@ -47,3 +51,9 @@ func (*tableInfoGetterImpl) TableInfoByID(is infoschema.InfoSchema, physicalID i tbl, _, _ = is.FindTableByPartitionID(physicalID) return tbl, tbl != nil } + +// SchemaNameAndTableNameByID returns the schema name and table name specified by the physicalID. +func (*tableInfoGetterImpl) SchemaNameAndTableNameByID(is infoschema.InfoSchema, physicalID int64) (schemaName, tableName ast.CIStr, ok bool) { + schemaName, tableName, ok = is.SchemaNameAndTableNameByID(physicalID) + return schemaName, tableName, ok +}