Skip to content

Commit

Permalink
Dont open shadows for read-only modsnap
Browse files Browse the repository at this point in the history
Signed-off-by: Morgan Douglas <[email protected]>
  • Loading branch information
morgando committed Feb 4, 2025
1 parent 24fba8f commit 570c7f1
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 86 deletions.
8 changes: 7 additions & 1 deletion bdb/bdb_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ typedef struct bdb_callback_tag bdb_callback_type;
struct tran_tag;
typedef struct tran_tag tran_type;

struct table_version_cache;
typedef struct table_version_cache table_version_cache;

struct bdb_attr_tag;
typedef struct bdb_attr_tag bdb_attr_type;

Expand Down Expand Up @@ -2056,9 +2059,12 @@ int bdb_llmeta_list_records(bdb_state_type *bdb_state, int *bdberr);
int bdb_have_ipu(bdb_state_type *bdb_state);

bdb_state_type *bdb_get_table_by_name(bdb_state_type *bdb_state, char *table);
int bdb_osql_check_table_version(bdb_state_type *bdb_state, tran_type *tran,
int bdb_osql_check_table_version(bdb_state_type *bdb_state, table_version_cache *cache,
int trak, int *bdberr);

int bdb_init_table_version_cache(table_version_cache **cache);
void bdb_free_table_version_cache(table_version_cache *cache);

int bdb_get_myseqnum(bdb_state_type *bdb_state, seqnum_type *seqnum);

void bdb_replace_handle(bdb_state_type *parent, int ix, bdb_state_type *handle);
Expand Down
11 changes: 7 additions & 4 deletions bdb/bdb_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -445,9 +445,6 @@ struct tran_tag {
/* Tables that this tran touches (for logical redo sc) */
hash_t *dirty_table_hash;

/* cache the versions of dta files to catch schema changes and fastinits */
int table_version_cache_sz;
unsigned long long *table_version_cache;
bdb_state_type *parent_state;

/* Send the master periodic 'acks' after this many physical commits */
Expand Down Expand Up @@ -766,6 +763,12 @@ typedef struct {
int should_reject;
} repinfo_type;

typedef struct table_version_cache
{
int sz;
unsigned long long *entries;
} table_version_cache;

struct hostinfo
{
seqnum_type seqnum;
Expand Down Expand Up @@ -1842,7 +1845,7 @@ int bdb_lite_list_records(bdb_state_type *bdb_state,
int *bdberr),
int *bdberr);

int bdb_osql_cache_table_versions(bdb_state_type *bdb_state, tran_type *tran,
int bdb_osql_cache_table_versions(bdb_state_type *bdb_state, table_version_cache *cache,
int trak, int *bdberr);
int bdb_temp_table_destroy_lru(struct temp_table *tbl,
bdb_state_type *bdb_state, int *last,
Expand Down
80 changes: 50 additions & 30 deletions bdb/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -8562,8 +8562,26 @@ int bdb_purge_unused_files(bdb_state_type *bdb_state, tran_type *tran,
return rc;
}

void bdb_free_table_version_cache(table_version_cache *cache)
{
if (cache->entries) {
free(cache->entries);
}
free(cache);
}

int bdb_init_table_version_cache(table_version_cache **cache)
{
*cache = calloc(1, sizeof(table_version_cache));
if (!(*cache)) {
return ENOMEM;
}

return 0;
}

/* Refactor to not access berkley while holding children lock */
int bdb_osql_cache_table_versions(bdb_state_type *bdb_state, tran_type *tran,
int bdb_osql_cache_table_versions(bdb_state_type *bdb_state, table_version_cache *cache,
int trak, int *bdberr)
{
int i = 0;
Expand All @@ -8579,54 +8597,56 @@ int bdb_osql_cache_table_versions(bdb_state_type *bdb_state, tran_type *tran,
if (bdb_state->parent)
bdb_state = bdb_state->parent;

if (tran->table_version_cache) {
tran->table_version_cache_sz = 0;
free(tran->table_version_cache);
tran->table_version_cache = NULL;
if (cache->entries) {
cache->sz = 0;
free(cache->entries);
cache->entries = NULL;
}

bdb_lock_children_lock(bdb_state);
tran->table_version_cache_sz = tablecount = bdb_state->numchildren;
cache->sz = tablecount = bdb_state->numchildren;
tablenames = (char **)calloc(sizeof(char *), tablecount);
tran->table_version_cache =

cache->entries =
(unsigned long long *)calloc(tablecount, sizeof(unsigned long long));
if (!cache->entries) {
bdb_unlock_children_lock(bdb_state);

logmsg(LOGMSG_ERROR, "%s: failed to allocate %zu bytes\n", __func__,
sizeof(unsigned long long) * cache->sz);
*bdberr = BDBERR_MALLOC;
rc = -1;
goto done;
}

for (int i = 0; i < tablecount; i++) {
if (bdb_state->children[i]) {
tablenames[i] = strdup(bdb_state->children[i]->name);
tran->table_version_cache[i] = bdb_state->children[i]->version_num;
cache->entries[i] = bdb_state->children[i]->version_num;
}
}

bdb_unlock_children_lock(bdb_state);

if (!tran->table_version_cache) {
logmsg(LOGMSG_ERROR, "%s: failed to allocated %zu bytes\n", __func__,
sizeof(unsigned long long) * tran->table_version_cache_sz);
*bdberr = BDBERR_MALLOC;
rc = -1;
goto done;
}

for (i = 0; i < tran->table_version_cache_sz; i++) {
for (i = 0; i < cache->sz; i++) {
if (tablenames[i] == NULL)
continue;
if (tran->table_version_cache[i] == 0) {
if (cache->entries[i] == 0) {
/* read it */
rc = bdb_get_file_version_data_by_name(
NULL, tablenames[i], 0, &tran->table_version_cache[i], bdberr);
NULL, tablenames[i], 0, &cache->entries[i], bdberr);
if (rc) {
if (*bdberr == BDBERR_FETCH_DTA) {
rc = 0;
*bdberr = BDBERR_NOERROR;
tran->table_version_cache[i] = -1;
cache->entries[i] = -1;
} else {
logmsg(LOGMSG_ERROR, "%s: failed to read file version number "
"rc=%d bdberr=%d\n",
__func__, rc, *bdberr);
free(tran->table_version_cache);
tran->table_version_cache = NULL;
tran->table_version_cache_sz = 0;
free(cache->entries);
cache->entries = NULL;
cache->sz = 0;
goto done;
}
} else {
Expand All @@ -8650,15 +8670,15 @@ int bdb_osql_cache_table_versions(bdb_state_type *bdb_state, tran_type *tran,
} else if (bdb_state->children[i] &&
bdb_state->children[i]->version_num > 0 &&
bdb_state->children[i]->version_num !=
tran->table_version_cache[i]) {
cache->entries[i]) {
retry = 1;
}
}
if (!retry) {
for (int i = 0; i < tablecount; i++) {
if (bdb_state->children[i])
bdb_state->children[i]->version_num =
tran->table_version_cache[i];
cache->entries[i];
}
}
bdb_unlock_children_lock(bdb_state);
Expand All @@ -8677,7 +8697,7 @@ int bdb_osql_cache_table_versions(bdb_state_type *bdb_state, tran_type *tran,
return rc;
}

int bdb_osql_check_table_version(bdb_state_type *bdb_state, tran_type *tran,
int bdb_osql_check_table_version(bdb_state_type *bdb_state, table_version_cache *cache,
int trak, int *bdberr)
{
int i = 0;
Expand All @@ -8697,11 +8717,11 @@ int bdb_osql_check_table_version(bdb_state_type *bdb_state, tran_type *tran,

bdb_unlock_children_lock(bdb_state);

if ((i >= 0) && (i < tran->table_version_cache_sz) &&
(tran->table_version_cache[i] != 0) &&
(tran->table_version_cache[i] == bdb_state->version_num)) {
if ((i >= 0) && (i < cache->sz) &&
(cache->entries[i] != 0) &&
(cache->entries[i] == bdb_state->version_num)) {
/*printf("OK %s [%d] %llx vs %llx\n", bdb_state->name, i,
* tran->table_version_cache[i], bdb_state->version_num);*/
* cache->entries[i], bdb_state->version_num);*/
return 0;
} else {
logmsg(LOGMSG_ERROR, "FAILED table \"%s\" changed, index=%d\n", bdb_state->name, i);
Expand Down
59 changes: 15 additions & 44 deletions bdb/tran.c
Original file line number Diff line number Diff line change
Expand Up @@ -993,10 +993,6 @@ static int bdb_tran_commit_phys_getlsn_flags(bdb_state_type *bdb_state,
tran, tran->tranclass, tran->logical_tran,
tran->logical_tran->tranclass);

if (tran->table_version_cache)
free(tran->table_version_cache);
tran->table_version_cache = NULL;

free(tran);
return rc;
}
Expand Down Expand Up @@ -1038,9 +1034,6 @@ static int bdb_tran_abort_phys_int(bdb_state_type *bdb_state, tran_type *tran,
if (reset_rowlist)
tran_reset_rowlist(tran->logical_tran);

if (tran->table_version_cache)
free(tran->table_version_cache);
tran->table_version_cache = NULL;
free(tran);
return rc;
}
Expand Down Expand Up @@ -1185,7 +1178,6 @@ tran_type *bdb_tran_begin_shadow_int(bdb_state_type *bdb_state, int tranclass,
int offset, int is_ha_retry)
{
tran_type *tran;
int rc = 0;

#if 0
SINCE SHADOW TRANSACTIONS (READ COMMITTED/SNAPSHOT/SERIALIZABLE)
Expand Down Expand Up @@ -1229,37 +1221,24 @@ tran_type *bdb_tran_begin_shadow_int(bdb_state_type *bdb_state, int tranclass,
tran->asof_ref_lsn.file = 0;
tran->asof_ref_lsn.offset = 1;
tran->asof_hashtbl = NULL;
tran->trak = trak;

if (tran) {
tran->trak = trak;

if (tran->tranclass == TRANCLASS_SNAPISOL ||
tran->tranclass == TRANCLASS_SERIALIZABLE ||
tran->tranclass == TRANCLASS_MODSNAP) {
rc = bdb_osql_cache_table_versions(bdb_state, tran, trak, bdberr);
if (rc) {
logmsg(LOGMSG_ERROR,
"%s failed to cache table versions rc=%d bdberr=%d\n",
__func__, rc, *bdberr);
}

/* register transaction so we start receiving log undos */
if (tran->tranclass != TRANCLASS_MODSNAP) {
tran->osql =
bdb_osql_trn_register(bdb_state, tran, trak, bdberr, epoch,
file, offset, is_ha_retry);
if (!tran->osql) {
if (*bdberr != BDBERR_NOT_DURABLE)
logmsg(LOGMSG_ERROR, "%s %d\n", __func__, *bdberr);

myfree(tran);
return NULL;
}
if (tran->tranclass == TRANCLASS_SNAPISOL ||
tran->tranclass == TRANCLASS_SERIALIZABLE) {
/* register transaction so we start receiving log undos */
tran->osql =
bdb_osql_trn_register(bdb_state, tran, trak, bdberr, epoch,
file, offset, is_ha_retry);
if (!tran->osql) {
if (*bdberr != BDBERR_NOT_DURABLE)
logmsg(LOGMSG_ERROR, "%s %d\n", __func__, *bdberr);

listc_init(&tran->open_cursors,
offsetof(struct bdb_cursor_ifn, lnk));
}
myfree(tran);
return NULL;
}

listc_init(&tran->open_cursors,
offsetof(struct bdb_cursor_ifn, lnk));
}

return tran;
Expand Down Expand Up @@ -2129,10 +2108,6 @@ int bdb_tran_commit_with_seqnum_int(bdb_state_type *bdb_state, tran_type *tran,
logmsg(LOGMSG_USER, "TRK_TRAN: committed %p (type=%d)\n", tran,
tran->tranclass);

if (tran->table_version_cache)
free(tran->table_version_cache);
tran->table_version_cache = NULL;

pool_free(tran->rc_pool);
myfree(tran->rc_list);
myfree(tran->rc_locks);
Expand Down Expand Up @@ -2432,10 +2407,6 @@ int bdb_tran_abort_int_int(bdb_state_type *bdb_state, tran_type *tran,
logmsg(LOGMSG_USER, "TRK_TRAN: aborted %p (type=%d)\n", tran,
tran->tranclass);

if (tran->table_version_cache)
free(tran->table_version_cache);
tran->table_version_cache = NULL;

if (tran->pglogs_queue_hash) {
hash_for(tran->pglogs_queue_hash, free_pglogs_queue_cursors, NULL);
hash_free(tran->pglogs_queue_hash);
Expand Down
2 changes: 2 additions & 0 deletions db/sql.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,8 @@ typedef struct {
int maxchunksize; /* multi-transaction bulk mode */
int crtchunksize; /* how many rows are processed already */
int nchunks; /* number of chunks. 0 for a non-chunked transaction. */

table_version_cache *table_version_cache;
} dbtran_type;
typedef dbtran_type trans_t;

Expand Down
50 changes: 43 additions & 7 deletions db/sqlglue.c
Original file line number Diff line number Diff line change
Expand Up @@ -4859,6 +4859,35 @@ int start_new_transaction(struct sqlclntstate *clnt)
return 0;
}

int cache_table_versions(struct sqlclntstate *clnt)
{
if (clnt->dbtran.table_version_cache) {
bdb_free_table_version_cache(clnt->dbtran.table_version_cache);
clnt->dbtran.table_version_cache = NULL;
}

int rc = bdb_init_table_version_cache(&clnt->dbtran.table_version_cache);
if (rc) {
logmsg(LOGMSG_ERROR,
"%s failed initialize table verison cache rc=%d\n",
__func__, rc);
return rc;
}

int bdberr;
rc = bdb_osql_cache_table_versions(thedb->bdb_env,
clnt->dbtran.table_version_cache,
clnt->bdb_osql_trak, &bdberr);
if (rc) {
logmsg(LOGMSG_ERROR,
"%s failed to cache table versions rc=%d bdberr=%d\n",
__func__, rc, bdberr);
return rc;
}

return 0;
}

/*
** Attempt to start a new transaction. A write-transaction
** is started if the second argument is nonzero, otherwise a read-
Expand Down Expand Up @@ -4958,12 +4987,19 @@ int sqlite3BtreeBeginTrans(Vdbe *vdbe, Btree *pBt, int wrflag, int *pSchemaVersi
goto done;
}

// TODO: Don't open shadows for read-only modsnap txns.
// In order to make this optimization work, we still need to latch
// the initial versions of tables and use these latched versions to
// fail on schema changes. This is currently handled in the shadow code
// but can be refactored out.
if ((clnt->dbtran.mode <= TRANLEVEL_RECOM) && wrflag == 0) { // read-only
if (clnt->dbtran.mode == TRANLEVEL_SNAPISOL ||
clnt->dbtran.mode == TRANLEVEL_SERIAL ||
clnt->dbtran.mode == TRANLEVEL_MODSNAP) {
rc = cache_table_versions(clnt);
if (rc) {
logmsg(LOGMSG_ERROR, "%s: Failed to cache table versions\n", __func__);
rc = SQLITE_ERROR;
goto done;
}
}

if ((clnt->dbtran.mode <= TRANLEVEL_RECOM || clnt->dbtran.mode == TRANLEVEL_MODSNAP)
&& wrflag == 0) { // read-only
if (clnt->has_recording == 0 || // not selectv
clnt->ctrl_sqlengine == SQLENG_NORMAL_PROCESS) { // singular selectv
rc = SQLITE_OK;
Expand Down Expand Up @@ -7945,7 +7981,7 @@ static int sqlite3LockStmtTables_int(sqlite3_stmt *pStmt, int after_recovery)
*/
int bdberr = 0;
rc = bdb_osql_check_table_version(
db->handle, clnt->dbtran.shadow_tran, 0, &bdberr);
db->handle, clnt->dbtran.table_version_cache, 0, &bdberr);
if (rc != 0) {
/* fprintf(stderr, "bdb_osql_check_table_version failed rc=%d
bdberr=%d\n",
Expand Down
Loading

0 comments on commit 570c7f1

Please sign in to comment.