From 0548da13ae9c65d624ad3f1de6dc505bf75ea64d Mon Sep 17 00:00:00 2001 From: Hiroki Takeuchi Date: Wed, 7 Apr 2021 02:41:21 +0000 Subject: [PATCH] Update liblmdb to 0.9.29 --- README.md | 2 +- lmdb/lmdb.h | 20 +-- lmdb/mdb.c | 392 +++++++++++++++++++++++++++++++--------------------- lmdb/midl.c | 3 +- lmdb/midl.h | 3 +- 5 files changed, 255 insertions(+), 165 deletions(-) diff --git a/README.md b/README.md index 551f462e..4275e1ae 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -#lmdb-go [![releases/v1.8.0](https://img.shields.io/badge/release-v1.8.0-375eab.svg)](releases) [![C/v0.9.19](https://img.shields.io/badge/C-v0.9.19-555555.svg)](https://github.com/LMDB/lmdb/blob/mdb.RE/0.9/libraries/liblmdb/CHANGES) [![Build Status](https://travis-ci.org/bmatsuo/lmdb-go.svg?branch=master)](https://travis-ci.org/bmatsuo/lmdb-go) +#lmdb-go [![releases/v1.8.0](https://img.shields.io/badge/release-v1.8.0-375eab.svg)](releases) [![C/v0.9.29](https://img.shields.io/badge/C-v0.9.29-555555.svg)](https://github.com/LMDB/lmdb/blob/mdb.RE/0.9/libraries/liblmdb/CHANGES) [![Build Status](https://travis-ci.org/bmatsuo/lmdb-go.svg?branch=master)](https://travis-ci.org/bmatsuo/lmdb-go) Go bindings to the OpenLDAP Lightning Memory-Mapped Database (LMDB). diff --git a/lmdb/lmdb.h b/lmdb/lmdb.h index 0aeff0d3..69aa2751 100644 --- a/lmdb/lmdb.h +++ b/lmdb/lmdb.h @@ -135,7 +135,7 @@ * * @author Howard Chu, Symas Corporation. * - * @copyright Copyright 2011-2016 Howard Chu, Symas Corp. All rights reserved. + * @copyright Copyright 2011-2021 Howard Chu, Symas Corp. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted only as authorized by the OpenLDAP @@ -200,7 +200,7 @@ typedef int mdb_filehandle_t; /** Library minor version */ #define MDB_VERSION_MINOR 9 /** Library patch version */ -#define MDB_VERSION_PATCH 19 +#define MDB_VERSION_PATCH 29 /** Combine args a,b,c into a single integer for easy version comparisons */ #define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c)) @@ -210,7 +210,7 @@ typedef int mdb_filehandle_t; MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH) /** The release date of this library version */ -#define MDB_VERSION_DATE "December 28, 2016" +#define MDB_VERSION_DATE "March 16, 2021" /** A stringifier for the version info */ #define MDB_VERSTR(a,b,c,d) "LMDB " #a "." #b "." #c ": (" d ")" @@ -370,7 +370,7 @@ typedef enum MDB_cursor_op { MDB_GET_BOTH, /**< Position at key/data pair. Only for #MDB_DUPSORT */ MDB_GET_BOTH_RANGE, /**< position at key, nearest data. Only for #MDB_DUPSORT */ MDB_GET_CURRENT, /**< Return key/data at current cursor position */ - MDB_GET_MULTIPLE, /**< Return key and up to a page of duplicate data items + MDB_GET_MULTIPLE, /**< Return up to a page of duplicate data items from current cursor position. Move cursor to prepare for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */ MDB_LAST, /**< Position at last key/data item */ @@ -379,7 +379,7 @@ typedef enum MDB_cursor_op { MDB_NEXT, /**< Position at next data item */ MDB_NEXT_DUP, /**< Position at next data item of current key. Only for #MDB_DUPSORT */ - MDB_NEXT_MULTIPLE, /**< Return key and up to a page of duplicate data items + MDB_NEXT_MULTIPLE, /**< Return up to a page of duplicate data items from next cursor position. Move cursor to prepare for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */ MDB_NEXT_NODUP, /**< Position at first data item of next key */ @@ -390,7 +390,7 @@ typedef enum MDB_cursor_op { MDB_SET, /**< Position at specified key */ MDB_SET_KEY, /**< Position at specified key, return key + data */ MDB_SET_RANGE, /**< Position at first key greater than or equal to specified key. */ - MDB_PREV_MULTIPLE /**< Position at previous page and return key and up to + MDB_PREV_MULTIPLE /**< Position at previous page and return up to a page of duplicate data items. Only for #MDB_DUPFIXED */ } MDB_cursor_op; @@ -576,7 +576,7 @@ int mdb_env_create(MDB_env **env); *
  • #MDB_NOTLS * Don't use Thread-Local Storage. Tie reader locktable slots to * #MDB_txn objects instead of to threads. I.e. #mdb_txn_reset() keeps - * the slot reseved for the #MDB_txn object. A thread may use parallel + * the slot reserved for the #MDB_txn object. A thread may use parallel * read-only transactions. A read-only transaction may span threads if * the user synchronizes its use. Applications that multiplex many * user threads over individual OS threads need this option. Such an @@ -928,7 +928,7 @@ void *mdb_env_get_userctx(MDB_env *env); typedef void MDB_assert_func(MDB_env *env, const char *msg); /** Set or reset the assert() callback of the environment. - * Disabled if liblmdb is buillt with NDEBUG. + * Disabled if liblmdb is built with NDEBUG. * @note This hack should become obsolete as lmdb's error handling matures. * @param[in] env An environment handle returned by #mdb_env_create(). * @param[in] func An #MDB_assert_func function, or 0. @@ -1510,6 +1510,10 @@ int mdb_cursor_put(MDB_cursor *cursor, MDB_val *key, MDB_val *data, /** @brief Delete current key/data pair * * This function deletes the key/data pair to which the cursor refers. + * This does not invalidate the cursor, so operations such as MDB_NEXT + * can still be used on it. + * Both MDB_NEXT and MDB_GET_CURRENT will return the same record after + * this operation. * @param[in] cursor A cursor handle returned by #mdb_cursor_open() * @param[in] flags Options for this operation. This parameter * must be set to 0 or one of the values described here. diff --git a/lmdb/mdb.c b/lmdb/mdb.c index 23c1f009..8cecdb2e 100644 --- a/lmdb/mdb.c +++ b/lmdb/mdb.c @@ -5,7 +5,7 @@ * BerkeleyDB API, but much simplified. */ /* - * Copyright 2011-2016 Howard Chu, Symas Corp. + * Copyright 2011-2021 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -113,6 +113,10 @@ typedef SSIZE_T ssize_t; /* Most platforms have posix_memalign, older may only have memalign */ #define HAVE_MEMALIGN 1 #include +/* On Solaris, we need the POSIX sigwait function */ +#if defined (__sun) +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif #endif #if !(defined(BYTE_ORDER) || defined(__BYTE_ORDER)) @@ -120,7 +124,10 @@ typedef SSIZE_T ssize_t; #include /* defines BYTE_ORDER on HPUX and Solaris */ #endif -#if defined(__APPLE__) || defined (BSD) || defined(__FreeBSD_kernel__) +#if defined(__FreeBSD__) && defined(__FreeBSD_version) && __FreeBSD_version >= 1100110 +# define MDB_USE_POSIX_MUTEX 1 +# define MDB_USE_ROBUST 1 +#elif defined(__APPLE__) || defined (BSD) || defined(__FreeBSD_kernel__) # define MDB_USE_POSIX_SEM 1 # define MDB_FDATASYNC fsync #elif defined(ANDROID) @@ -486,7 +493,7 @@ static txnid_t mdb_debug_start; * The string is printed literally, with no format processing. */ #define DPUTS(arg) DPRINTF(("%s", arg)) - /** Debuging output value of a cursor DBI: Negative in a sub-cursor. */ + /** Debugging output value of a cursor DBI: Negative in a sub-cursor. */ #define DDBI(mc) \ (((mc)->mc_flags & C_SUB) ? -(int)(mc)->mc_dbi : (int)(mc)->mc_dbi) /** @} */ @@ -1222,17 +1229,19 @@ typedef struct MDB_xcursor { unsigned char mx_dbflag; } MDB_xcursor; - /** Check if there is an inited xcursor, so #XCURSOR_REFRESH() is proper */ + /** Check if there is an inited xcursor */ #define XCURSOR_INITED(mc) \ ((mc)->mc_xcursor && ((mc)->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) - /** Update sub-page pointer, if any, in \b mc->mc_xcursor. Needed + /** Update the xcursor's sub-page pointer, if any, in \b mc. Needed * when the node which contains the sub-page may have moved. Called - * with \b mp = mc->mc_pg[mc->mc_top], \b ki = mc->mc_ki[mc->mc_top]. + * with leaf page \b mp = mc->mc_pg[\b top]. */ -#define XCURSOR_REFRESH(mc, mp, ki) do { \ +#define XCURSOR_REFRESH(mc, top, mp) do { \ MDB_page *xr_pg = (mp); \ - MDB_node *xr_node = NODEPTR(xr_pg, ki); \ + MDB_node *xr_node; \ + if (!XCURSOR_INITED(mc) || (mc)->mc_ki[top] >= NUMKEYS(xr_pg)) break; \ + xr_node = NODEPTR(xr_pg, (mc)->mc_ki[top]); \ if ((xr_node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) \ (mc)->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(xr_node); \ } while (0) @@ -1369,7 +1378,7 @@ static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, static int mdb_env_read_header(MDB_env *env, MDB_meta *meta); static MDB_meta *mdb_env_pick_meta(const MDB_env *env); static int mdb_env_write_meta(MDB_txn *txn); -#ifdef MDB_USE_POSIX_MUTEX /* Drop unused excl arg */ +#if defined(MDB_USE_POSIX_MUTEX) && !defined(MDB_ROBUST_SUPPORTED) /* Drop unused excl arg */ # define mdb_env_close0(env, excl) mdb_env_close1(env) #endif static void mdb_env_close0(MDB_env *env, int excl); @@ -2485,8 +2494,8 @@ mdb_page_touch(MDB_cursor *mc) if (m2 == mc) continue; if (m2->mc_pg[mc->mc_top] == mp) { m2->mc_pg[mc->mc_top] = np; - if (XCURSOR_INITED(m2) && IS_LEAF(np)) - XCURSOR_REFRESH(m2, np, m2->mc_ki[mc->mc_top]); + if (IS_LEAF(np)) + XCURSOR_REFRESH(m2, mc->mc_top, np); } } } @@ -3021,9 +3030,9 @@ mdb_txn_end(MDB_txn *txn, unsigned mode) txn->mt_parent->mt_flags &= ~MDB_TXN_HAS_CHILD; env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate; mdb_midl_free(txn->mt_free_pgs); - mdb_midl_free(txn->mt_spill_pgs); free(txn->mt_u.dirty_list); } + mdb_midl_free(txn->mt_spill_pgs); mdb_midl_free(pghead); } @@ -3088,10 +3097,41 @@ mdb_freelist_save(MDB_txn *txn) * we may be unable to return them to me_pghead. */ MDB_page *mp = txn->mt_loose_pgs; + MDB_ID2 *dl = txn->mt_u.dirty_list; + unsigned x; if ((rc = mdb_midl_need(&txn->mt_free_pgs, txn->mt_loose_count)) != 0) return rc; - for (; mp; mp = NEXT_LOOSE_PAGE(mp)) + for (; mp; mp = NEXT_LOOSE_PAGE(mp)) { mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno); + /* must also remove from dirty list */ + if (txn->mt_flags & MDB_TXN_WRITEMAP) { + for (x=1; x<=dl[0].mid; x++) + if (dl[x].mid == mp->mp_pgno) + break; + mdb_tassert(txn, x <= dl[0].mid); + } else { + x = mdb_mid2l_search(dl, mp->mp_pgno); + mdb_tassert(txn, dl[x].mid == mp->mp_pgno); + mdb_dpage_free(env, mp); + } + dl[x].mptr = NULL; + } + { + /* squash freed slots out of the dirty list */ + unsigned y; + for (y=1; dl[y].mptr && y <= dl[0].mid; y++); + if (y <= dl[0].mid) { + for(x=y, y++;;) { + while (!dl[y].mptr && y <= dl[0].mid) y++; + if (y > dl[0].mid) break; + dl[x++] = dl[y++]; + } + dl[0].mid = x-1; + } else { + /* all slots freed */ + dl[0].mid = 0; + } + } txn->mt_loose_pgs = NULL; txn->mt_loose_count = 0; } @@ -3954,9 +3994,9 @@ mdb_env_map(MDB_env *env, void *addr) * and won't map more than the file size. * Just set the maxsize right now. */ - if (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo + if (!(flags & MDB_WRITEMAP) && (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo || !SetEndOfFile(env->me_fd) - || SetFilePointer(env->me_fd, 0, NULL, 0) != 0) + || SetFilePointer(env->me_fd, 0, NULL, 0) != 0)) return ErrCode(); } @@ -3973,13 +4013,18 @@ mdb_env_map(MDB_env *env, void *addr) if (rc) return rc; #else + int mmap_flags = MAP_SHARED; int prot = PROT_READ; +#ifdef MAP_NOSYNC /* Used on FreeBSD */ + if (flags & MDB_NOSYNC) + mmap_flags |= MAP_NOSYNC; +#endif if (flags & MDB_WRITEMAP) { prot |= PROT_WRITE; if (ftruncate(env->me_fd, env->me_mapsize) < 0) return ErrCode(); } - env->me_map = mmap(addr, env->me_mapsize, prot, MAP_SHARED, + env->me_map = mmap(addr, env->me_mapsize, prot, mmap_flags, env->me_fd, 0); if (env->me_map == MAP_FAILED) { env->me_map = NULL; @@ -5057,7 +5102,7 @@ mdb_env_close0(MDB_env *env, int excl) if (env->me_fd != INVALID_HANDLE_VALUE) (void) close(env->me_fd); if (env->me_txns) { - MDB_PID_T pid = env->me_pid; + MDB_PID_T pid = getpid(); /* Clearing readers is done in this function because * me_txkey with its destructor must be disabled first. * @@ -5091,6 +5136,17 @@ mdb_env_close0(MDB_env *env, int excl) sem_unlink(env->me_txns->mti_wmname); } } +#elif defined(MDB_ROBUST_SUPPORTED) + /* If we have the filelock: If we are the + * only remaining user, clean up robust + * mutexes. + */ + if (excl == 0) + mdb_env_excl_lock(env, &excl); + if (excl > 0) { + pthread_mutex_destroy(env->me_txns->mti_rmutex); + pthread_mutex_destroy(env->me_txns->mti_wmutex); + } #endif munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); } @@ -5459,8 +5515,17 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags) if (flags & (MDB_PS_FIRST|MDB_PS_LAST)) { i = 0; - if (flags & MDB_PS_LAST) + if (flags & MDB_PS_LAST) { i = NUMKEYS(mp) - 1; + /* if already init'd, see if we're already in right place */ + if (mc->mc_flags & C_INITIALIZED) { + if (mc->mc_ki[mc->mc_top] == i) { + mc->mc_top = mc->mc_snum++; + mp = mc->mc_pg[mc->mc_top]; + goto ready; + } + } + } } else { int exact; node = mdb_node_search(mc, key, &exact); @@ -5486,6 +5551,7 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags) if ((rc = mdb_cursor_push(mc, mp))) return rc; +ready: if (flags & MDB_PS_MODIFY) { if ((rc = mdb_page_touch(mc)) != 0) return rc; @@ -5811,15 +5877,20 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) MDB_node *leaf; int rc; - if ((mc->mc_flags & C_EOF) || - ((mc->mc_flags & C_DEL) && op == MDB_NEXT_DUP)) { + if ((mc->mc_flags & C_DEL && op == MDB_NEXT_DUP)) return MDB_NOTFOUND; - } + if (!(mc->mc_flags & C_INITIALIZED)) return mdb_cursor_first(mc, key, data); mp = mc->mc_pg[mc->mc_top]; + if (mc->mc_flags & C_EOF) { + if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mp)-1) + return MDB_NOTFOUND; + mc->mc_flags ^= C_EOF; + } + if (mc->mc_db->md_flags & MDB_DUPSORT) { leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { @@ -5871,16 +5942,12 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { mdb_xcursor_init1(mc, leaf); - } - if (data) { + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc != MDB_SUCCESS) + return rc; + } else if (data) { if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) return rc; - - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); - if (rc != MDB_SUCCESS) - return rc; - } } MDB_GET_KEY(leaf, key); @@ -5904,7 +5971,8 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) mp = mc->mc_pg[mc->mc_top]; - if (mc->mc_db->md_flags & MDB_DUPSORT) { + if ((mc->mc_db->md_flags & MDB_DUPSORT) && + mc->mc_ki[mc->mc_top] < NUMKEYS(mp)) { leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { if (op == MDB_PREV || op == MDB_PREV_DUP) { @@ -5943,27 +6011,25 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u", mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top])); + if (!IS_LEAF(mp)) + return MDB_CORRUPTED; + if (IS_LEAF2(mp)) { key->mv_size = mc->mc_db->md_pad; key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); return MDB_SUCCESS; } - mdb_cassert(mc, IS_LEAF(mp)); leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { mdb_xcursor_init1(mc, leaf); - } - if (data) { + rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc != MDB_SUCCESS) + return rc; + } else if (data) { if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) return rc; - - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); - if (rc != MDB_SUCCESS) - return rc; - } } MDB_GET_KEY(leaf, key); @@ -6119,24 +6185,22 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { mdb_xcursor_init1(mc, leaf); - } - if (data) { - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - if (op == MDB_SET || op == MDB_SET_KEY || op == MDB_SET_RANGE) { - rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + if (op == MDB_SET || op == MDB_SET_KEY || op == MDB_SET_RANGE) { + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + } else { + int ex2, *ex2p; + if (op == MDB_GET_BOTH) { + ex2p = &ex2; + ex2 = 0; } else { - int ex2, *ex2p; - if (op == MDB_GET_BOTH) { - ex2p = &ex2; - ex2 = 0; - } else { - ex2p = NULL; - } - rc = mdb_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_SET_RANGE, ex2p); - if (rc != MDB_SUCCESS) - return rc; + ex2p = NULL; } - } else if (op == MDB_GET_BOTH || op == MDB_GET_BOTH_RANGE) { + rc = mdb_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_SET_RANGE, ex2p); + if (rc != MDB_SUCCESS) + return rc; + } + } else if (data) { + if (op == MDB_GET_BOTH || op == MDB_GET_BOTH_RANGE) { MDB_val olddata; MDB_cmp_func *dcmp; if ((rc = mdb_node_read(mc, leaf, &olddata)) != MDB_SUCCESS) @@ -6194,22 +6258,23 @@ mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data) mc->mc_ki[mc->mc_top] = 0; if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { - key->mv_size = mc->mc_db->md_pad; - key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], 0, key->mv_size); + if ( key ) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], 0, key->mv_size); + } return MDB_SUCCESS; } - if (data) { - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - mdb_xcursor_init1(mc, leaf); - rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); - if (rc) - return rc; - } else { - if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) - return rc; - } + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc) + return rc; + } else if (data) { + if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) + return rc; } + MDB_GET_KEY(leaf, key); return MDB_SUCCESS; } @@ -6224,36 +6289,33 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) if (mc->mc_xcursor) mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); - if (!(mc->mc_flags & C_EOF)) { - - if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { - rc = mdb_page_search(mc, NULL, MDB_PS_LAST); - if (rc != MDB_SUCCESS) - return rc; - } - mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); - + if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { + rc = mdb_page_search(mc, NULL, MDB_PS_LAST); + if (rc != MDB_SUCCESS) + return rc; } + mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); + mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1; mc->mc_flags |= C_INITIALIZED|C_EOF; leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { - key->mv_size = mc->mc_db->md_pad; - key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], key->mv_size); + if (key) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], key->mv_size); + } return MDB_SUCCESS; } - if (data) { - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - mdb_xcursor_init1(mc, leaf); - rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); - if (rc) - return rc; - } else { - if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) - return rc; - } + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc) + return rc; + } else if (data) { + if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) + return rc; } MDB_GET_KEY(leaf, key); @@ -6410,6 +6472,11 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, rc = MDB_INCOMPATIBLE; break; } + if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mc->mc_pg[mc->mc_top])) { + mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]); + rc = MDB_NOTFOUND; + break; + } { MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { @@ -6530,7 +6597,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, dkey.mv_size = 0; - if (flags == MDB_CURRENT) { + if (flags & MDB_CURRENT) { if (!(mc->mc_flags & C_INITIALIZED)) return EINVAL; rc = MDB_SUCCESS; @@ -6726,7 +6793,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, offset *= 4; /* space for 4 more */ break; } - /* FALLTHRU: Big enough MDB_DUPFIXED sub-page */ + /* FALLTHRU */ /* Big enough MDB_DUPFIXED sub-page */ case MDB_CURRENT: fp->mp_flags |= P_DIRTY; COPY_PGNO(fp->mp_pgno, mp->mp_pgno); @@ -6776,8 +6843,9 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, } else { memcpy((char *)mp + mp->mp_upper + PAGEBASE, (char *)fp + fp->mp_upper + PAGEBASE, olddata.mv_size - fp->mp_upper - PAGEBASE); + memcpy((char *)(&mp->mp_ptrs), (char *)(&fp->mp_ptrs), NUMKEYS(fp) * sizeof(mp->mp_ptrs[0])); for (i=0; imp_ptrs[i] = fp->mp_ptrs[i] + offset; + mp->mp_ptrs[i] += offset; } } @@ -6906,8 +6974,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, if (m3->mc_ki[i] >= mc->mc_ki[i] && insert_key) { m3->mc_ki[i]++; } - if (XCURSOR_INITED(m3)) - XCURSOR_REFRESH(m3, mp, m3->mc_ki[i]); + XCURSOR_REFRESH(m3, i, mp); } } } @@ -6925,7 +6992,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, xdata.mv_size = 0; xdata.mv_data = ""; leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - if (flags & MDB_CURRENT) { + if ((flags & (MDB_CURRENT|MDB_APPENDDUP)) == MDB_CURRENT) { xflags = MDB_CURRENT|MDB_NOSPILL; } else { mdb_xcursor_init1(mc, leaf); @@ -6949,7 +7016,6 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_xcursor *mx = mc->mc_xcursor; unsigned i = mc->mc_top; MDB_page *mp = mc->mc_pg[i]; - int nkeys = NUMKEYS(mp); for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; @@ -6957,8 +7023,8 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, if (m2->mc_pg[i] == mp) { if (m2->mc_ki[i] == mc->mc_ki[i]) { mdb_xcursor_init2(m2, mx, new_dupdata); - } else if (!insert_key && m2->mc_ki[i] < nkeys) { - XCURSOR_REFRESH(m2, mp, m2->mc_ki[i]); + } else if (!insert_key) { + XCURSOR_REFRESH(m2, i, mp); } } } @@ -7030,6 +7096,8 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags) return rc; mp = mc->mc_pg[mc->mc_top]; + if (!IS_LEAF(mp)) + return MDB_CORRUPTED; if (IS_LEAF2(mp)) goto del_key; leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); @@ -7063,12 +7131,7 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags) if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (!(m2->mc_flags & C_INITIALIZED)) continue; if (m2->mc_pg[mc->mc_top] == mp) { - MDB_node *n2 = leaf; - if (m2->mc_ki[mc->mc_top] != mc->mc_ki[mc->mc_top]) { - n2 = NODEPTR(mp, m2->mc_ki[mc->mc_top]); - if (n2->mn_flags & F_SUBDATA) continue; - } - m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2); + XCURSOR_REFRESH(m2, mc->mc_top, mp); } } } @@ -7644,9 +7707,15 @@ mdb_cursor_count(MDB_cursor *mc, size_t *countp) if (!(mc->mc_flags & C_INITIALIZED)) return EINVAL; - if (!mc->mc_snum || (mc->mc_flags & C_EOF)) + if (!mc->mc_snum) return MDB_NOTFOUND; + if (mc->mc_flags & C_EOF) { + if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mc->mc_pg[mc->mc_top])) + return MDB_NOTFOUND; + mc->mc_flags ^= C_EOF; + } + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { *countp = 1; @@ -7908,8 +7977,8 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top]; m3->mc_ki[csrc->mc_top-1]++; } - if (XCURSOR_INITED(m3) && IS_LEAF(mps)) - XCURSOR_REFRESH(m3, m3->mc_pg[csrc->mc_top], m3->mc_ki[csrc->mc_top]); + if (IS_LEAF(mps)) + XCURSOR_REFRESH(m3, csrc->mc_top, m3->mc_pg[csrc->mc_top]); } } else /* Adding on the right, bump others down */ @@ -7930,8 +7999,8 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) } else { m3->mc_ki[csrc->mc_top]--; } - if (XCURSOR_INITED(m3) && IS_LEAF(mps)) - XCURSOR_REFRESH(m3, m3->mc_pg[csrc->mc_top], m3->mc_ki[csrc->mc_top]); + if (IS_LEAF(mps)) + XCURSOR_REFRESH(m3, csrc->mc_top, m3->mc_pg[csrc->mc_top]); } } } @@ -8132,8 +8201,8 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) m3->mc_ki[top-1] > csrc->mc_ki[top-1]) { m3->mc_ki[top-1]--; } - if (XCURSOR_INITED(m3) && IS_LEAF(psrc)) - XCURSOR_REFRESH(m3, m3->mc_pg[top], m3->mc_ki[top]); + if (IS_LEAF(psrc)) + XCURSOR_REFRESH(m3, top, m3->mc_pg[top]); } } { @@ -8395,61 +8464,73 @@ mdb_cursor_del0(MDB_cursor *mc) } else if (m3->mc_ki[mc->mc_top] > ki) { m3->mc_ki[mc->mc_top]--; } - if (XCURSOR_INITED(m3)) - XCURSOR_REFRESH(m3, m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]); + XCURSOR_REFRESH(m3, mc->mc_top, mp); } } } rc = mdb_rebalance(mc); + if (rc) + goto fail; - if (rc == MDB_SUCCESS) { - /* DB is totally empty now, just bail out. - * Other cursors adjustments were already done - * by mdb_rebalance and aren't needed here. - */ - if (!mc->mc_snum) - return rc; + /* DB is totally empty now, just bail out. + * Other cursors adjustments were already done + * by mdb_rebalance and aren't needed here. + */ + if (!mc->mc_snum) { + mc->mc_flags |= C_EOF; + return rc; + } - mp = mc->mc_pg[mc->mc_top]; - nkeys = NUMKEYS(mp); + mp = mc->mc_pg[mc->mc_top]; + nkeys = NUMKEYS(mp); - /* Adjust other cursors pointing to mp */ - for (m2 = mc->mc_txn->mt_cursors[dbi]; !rc && m2; m2=m2->mc_next) { - m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; - if (! (m2->mc_flags & m3->mc_flags & C_INITIALIZED)) - continue; - if (m3->mc_snum < mc->mc_snum) - continue; - if (m3->mc_pg[mc->mc_top] == mp) { - /* if m3 points past last node in page, find next sibling */ - if (m3->mc_ki[mc->mc_top] >= mc->mc_ki[mc->mc_top]) { - if (m3->mc_ki[mc->mc_top] >= nkeys) { - rc = mdb_cursor_sibling(m3, 1); - if (rc == MDB_NOTFOUND) { - m3->mc_flags |= C_EOF; - rc = MDB_SUCCESS; - continue; - } + /* Adjust other cursors pointing to mp */ + for (m2 = mc->mc_txn->mt_cursors[dbi]; !rc && m2; m2=m2->mc_next) { + m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; + if (!(m2->mc_flags & m3->mc_flags & C_INITIALIZED)) + continue; + if (m3->mc_snum < mc->mc_snum) + continue; + if (m3->mc_pg[mc->mc_top] == mp) { + if (m3->mc_ki[mc->mc_top] >= mc->mc_ki[mc->mc_top]) { + /* if m3 points past last node in page, find next sibling */ + if (m3->mc_ki[mc->mc_top] >= nkeys) { + rc = mdb_cursor_sibling(m3, 1); + if (rc == MDB_NOTFOUND) { + m3->mc_flags |= C_EOF; + rc = MDB_SUCCESS; + continue; } - if (mc->mc_db->md_flags & MDB_DUPSORT) { - MDB_node *node = NODEPTR(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]); - /* If this node is a fake page, it needs to be reinited - * because its data has moved. But just reset mc_pg[0] - * if the xcursor is already live. - */ - if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) { - if (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) + if (rc) + goto fail; + } + if (m3->mc_xcursor && !(m3->mc_flags & C_EOF)) { + MDB_node *node = NODEPTR(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]); + /* If this node has dupdata, it may need to be reinited + * because its data has moved. + * If the xcursor was not initd it must be reinited. + * Else if node points to a subDB, nothing is needed. + * Else (xcursor was initd, not a subDB) needs mc_pg[0] reset. + */ + if (node->mn_flags & F_DUPDATA) { + if (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { + if (!(node->mn_flags & F_SUBDATA)) m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); - else - mdb_xcursor_init1(m3, node); + } else { + mdb_xcursor_init1(m3, node); + rc = mdb_cursor_first(&m3->mc_xcursor->mx_cursor, NULL, NULL); + if (rc) + goto fail; } } + m3->mc_xcursor->mx_cursor.mc_flags |= C_DEL; } } } - mc->mc_flags |= C_DEL; } + mc->mc_flags |= C_DEL; +fail: if (rc) mc->mc_txn->mt_flags |= MDB_TXN_ERROR; return rc; @@ -8691,7 +8772,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno * the split so the new page is emptier than the old page. * This yields better packing during sequential inserts. */ - if (nkeys < 20 || nsize > pmax/16 || newindx >= nkeys) { + if (nkeys < 32 || nsize > pmax/16 || newindx >= nkeys) { /* Find split point */ psize = 0; if (newindx <= split_indx || newindx >= nkeys) { @@ -8927,8 +9008,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno m3->mc_ki[ptop] >= mc->mc_ki[ptop]) { m3->mc_ki[ptop]++; } - if (XCURSOR_INITED(m3) && IS_LEAF(mp)) - XCURSOR_REFRESH(m3, m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]); + if (IS_LEAF(mp)) + XCURSOR_REFRESH(m3, mc->mc_top, m3->mc_pg[mc->mc_top]); } } DPRINTF(("mp left: %d, rp left: %d", SIZELEFT(mp), SIZELEFT(rp))); @@ -9733,8 +9814,11 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]); if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) != F_SUBDATA) return MDB_INCOMPATIBLE; - } else if (! (rc == MDB_NOTFOUND && (flags & MDB_CREATE))) { - return rc; + } else { + if (rc != MDB_NOTFOUND || !(flags & MDB_CREATE)) + return rc; + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) + return EACCES; } /* Done here so we cannot fail after creating a new DB */ diff --git a/lmdb/midl.c b/lmdb/midl.c index 5c6d841a..b0ea5383 100644 --- a/lmdb/midl.c +++ b/lmdb/midl.c @@ -3,7 +3,8 @@ /* $OpenLDAP$ */ /* This work is part of OpenLDAP Software . * - * Copyright 2000-2015 The OpenLDAP Foundation. + * Copyright 2000-2021 The OpenLDAP Foundation. + * Portions Copyright 2001-2021 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/lmdb/midl.h b/lmdb/midl.h index 2331e783..dd6ae77c 100644 --- a/lmdb/midl.h +++ b/lmdb/midl.h @@ -11,7 +11,8 @@ /* $OpenLDAP$ */ /* This work is part of OpenLDAP Software . * - * Copyright 2000-2015 The OpenLDAP Foundation. + * Copyright 2000-2021 The OpenLDAP Foundation. + * Portions Copyright 2001-2021 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without