mdb_internal(3)
internal
Description
internal
NAME
internal
SYNOPSIS
Modules
ID List
Management
Compatibility Macros
A bunch of macros to minimize the amount of
platform-specific ifdefs needed throughout the rest of the
code. When the features this library needs are similar
enough to POSIX to be hidden in a one-or-two line
replacement, this macro approach is used.
Debug Macros
Reader Lock Table
Readers don’t acquire any locks for their data access.
Instead, they simply record their transaction ID in the
reader table. The reader mutex is needed just to find an
empty slot in the reader table. The slot’s address is
saved in thread-specific data so that subsequent read
transactions started by the same thread need no further
locking to proceed.
Page Flags
Node Flags
Transaction DB Flags
Transaction Flags
Cursor Flags
Data Structures
struct
MDB_page
struct MDB_node
struct MDB_db
struct MDB_meta
union MDB_metabuf
struct MDB_dbx
struct MDB_txn
Opaque structure for a transaction handle.
struct MDB_cursor
Opaque structure for navigating through a database.
struct MDB_xcursor
struct MDB_pgstate
struct MDB_env
Opaque structure for a database environment.
struct MDB_ntxn
struct MDB_name
struct mdb_copy
Macros
#define
LOCK_MUTEX(rc, env, mutex) ((rc) =
LOCK_MUTEX0(mutex))
#define mdb_mutex_failed(env, mutex, rc) (rc)
#define MDB_DSYNC O_SYNC
#define MDB_FDATASYNC fdatasync
#define MDB_MSYNC(addr, len, flags)
msync(addr,len,flags)
#define MS_SYNC 1
#define MS_ASYNC 0
#define MAX_PAGESIZE (PAGEBASE ? 0x10000 :
0x8000)
The maximum size of a database page.
#define MDB_MINKEYS 2
#define MDB_MAGIC 0xBEEFC0DE
#define MDB_DATA_VERSION ((MDB_DEVEL) ? 999 :
1)
#define MDB_LOCK_VERSION 1
#define MDB_MAXKEYSIZE ((MDB_DEVEL) ? 0 : 511)
The max size of a key we can write, or 0 for computed max.
#define ENV_MAXKEY(env) (MDB_MAXKEYSIZE)
#define MAXDATASIZE 0xffffffffUL
The maximum size of a data item.
#define DKBUF
#define DKEY(x) 0
#define P_INVALID (˜(pgno_t)0)
#define F_ISSET(w, f) (((w) & (f)) == (f))
#define EVEN(n) (((n) + 1U) & -2) /*
sign-extending -2 to match n+1U */
#define DEFAULT_MAPSIZE 1048576
#define PAGEHDRSZ ((unsigned)
offsetof(MDB_page, mp_ptrs))
#define METADATA(p) ((void *)((char *)(p) +
PAGEHDRSZ))
#define PAGEBASE ((MDB_DEVEL) ?
PAGEHDRSZ : 0)
#define NUMKEYS(p) (((p)->mp_lower -
(PAGEHDRSZ-PAGEBASE)) >> 1)
#define SIZELEFT(p) (indx_t)((p)->mp_upper
- (p)->mp_lower)
#define PAGEFILL(env, p)
#define FILL_THRESHOLD 250
#define IS_LEAF(p) F_ISSET((p)->mp_flags,
P_LEAF)
#define IS_LEAF2(p) F_ISSET((p)->mp_flags,
P_LEAF2)
#define IS_BRANCH(p) F_ISSET((p)->mp_flags,
P_BRANCH)
#define IS_OVERFLOW(p)
F_ISSET((p)->mp_flags, P_OVERFLOW)
#define IS_SUBP(p) F_ISSET((p)->mp_flags,
P_SUBP)
#define OVPAGES(size, psize) ((PAGEHDRSZ-1 +
(size)) / (psize) + 1)
#define NEXT_LOOSE_PAGE(p) (*(MDB_page **)((p)
+ 2))
#define NODESIZE offsetof(MDB_node, mn_data)
#define PGNO_TOPWORD ((pgno_t)-1 >
0xffffffffu ? 32 : 0)
#define INDXSIZE(k) (NODESIZE + ((k) == NULL ?
0 : (k)->mv_size))
#define LEAFSIZE(k, d) (NODESIZE +
(k)->mv_size + (d)->mv_size)
#define NODEPTR(p, i) ((MDB_node *)((char
*)(p) + (p)->mp_ptrs[i] + PAGEBASE))
#define NODEKEY(node) (void *)((node)->mn_data)
#define NODEDATA(node) (void *)((char
*)(node)->mn_data + (node)->mn_ksize)
#define NODEPGNO(node)
#define SETPGNO(node, pgno)
#define NODEDSZ(node) ((node)->mn_lo |
((unsigned)(node)->mn_hi << 16))
#define SETDSZ(node, size)
#define NODEKSZ(node) ((node)->mn_ksize)
#define COPY_PGNO(dst, src)
#define LEAF2KEY(p, i, ks) ((char *)(p) +
PAGEHDRSZ + ((i)*(ks)))
#define MDB_GET_KEY(node, keyptr)
#define MDB_GET_KEY2(node, key) { key.mv_size =
NODEKSZ(node); key.mv_data = NODEKEY(node); }
#define MDB_VALID 0x8000
#define PERSISTENT_FLAGS (0xffff &
˜(MDB_VALID))
#define VALID_FLAGS
#define FREE_DBI 0
#define MAIN_DBI 1
#define CORE_DBS 2
#define NUM_METAS 2
#define CURSOR_STACK 32
#define XCURSOR_INITED(mc) ((mc)->mc_xcursor
&& ((mc)->mc_xcursor->mx_cursor.mc_flags &
C_INITIALIZED))
#define XCURSOR_REFRESH(mc, top, mp)
#define MDB_COMMIT_PAGES 64
#define MAX_WRITE (0x40000000U >>
(sizeof(ssize_t) == 4))
#define TXN_DBI_EXIST(txn, dbi, validity) ((txn)
&& (dbi)<(txn)->mt_numdbs &&
((txn)->mt_dbflags[dbi] & (validity)))
#define TXN_DBI_CHANGED(txn, dbi)
((txn)->mt_dbiseqs[dbi] !=
(txn)->mt_env->me_dbiseqs[dbi])
#define MDB_END_NAMES
#define MDB_END_OPMASK 0x0F
#define MDB_END_UPDATE 0x10
#define MDB_END_FREE 0x20
#define MDB_END_SLOT MDB_NOTLS
#define MDB_PS_MODIFY 1
#define MDB_PS_ROOTONLY 2
#define MDB_PS_FIRST 4
#define MDB_PS_LAST 8
#define MDB_SPLIT_REPLACE MDB_APPENDDUP
#define mdb_env_close0(env, excl) mdb_env_close1(env)
#define mdb_cmp_clong mdb_cmp_cint
#define mdb_cassert(mc, expr)
mdb_assert0((mc)->mc_txn->mt_env, expr, #expr)
#define mdb_tassert(txn, expr)
mdb_assert0((txn)->mt_env, expr, #expr)
#define mdb_eassert(env, expr) mdb_assert0(env, expr,
#expr)
#define mdb_assert0(env, expr, expr_txt)
#define MDB_NAME(str) str
#define mdb_name_cpy strcpy
#define MDB_SUFFLEN 9
#define mdb_fname_destroy(fname) do { if
((fname).mn_alloced) free((fname).mn_val); } while (0)
#define MDB_CLOEXEC 0
#define CHANGEABLE
(MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC|MDB_NOMEMINIT)
#define CHANGELESS
#define MDB_NOSPILL 0x8000
#define WITH_CURSOR_TRACKING(mn, act)
#define MDB_WBUF (1024*1024)
#define MDB_EOF 0x10
Typedefs
typedef
MDB_ID pgno_t
typedef MDB_ID txnid_t
typedef uint16_t indx_t
typedef char mdb_nchar_t
Enumerations
enum {
MDB_END_COMMITTED, MDB_END_EMPTY_COMMIT,
MDB_END_ABORT, MDB_END_RESET,
MDB_END_RESET_TMP, MDB_END_FAIL_BEGIN,
MDB_END_FAIL_BEGINCHILD }
enum Pidlock_op { Pidset = F_SETLK,
Pidcheck = F_GETLK }
enum mdb_fopen_type { MDB_O_RDONLY = O_RDONLY,
MDB_O_RDWR = O_RDWR |O_CREAT, MDB_O_META =
O_WRONLY|MDB_DSYNC |MDB_CLOEXEC, MDB_O_COPY =
O_WRONLY|O_CREAT|O_EXCL|MDB_CLOEXEC, MDB_O_MASK =
MDB_O_RDWR|MDB_CLOEXEC | MDB_O_RDONLY|MDB_O_META|MDB_O_COPY,
MDB_O_LOCKS = MDB_O_RDWR|MDB_CLOEXEC |
((MDB_O_MASK+1) & ˜MDB_O_MASK) }
Functions
static int
mdb_page_alloc (MDB_cursor *mc, int num,
MDB_page **mp)
static int mdb_page_new (MDB_cursor *mc,
uint32_t flags, int num, MDB_page **mp)
static int mdb_page_touch (MDB_cursor *mc)
static void mdb_txn_end (MDB_txn *txn,
unsigned mode)
static int mdb_page_get (MDB_cursor *mc,
pgno_t pgno, MDB_page **mp, int *lvl)
static int mdb_page_search_root (MDB_cursor
*mc, MDB_val *key, int modify)
static int mdb_page_search (MDB_cursor *mc,
MDB_val *key, int flags)
static int mdb_page_merge (MDB_cursor *csrc,
MDB_cursor *cdst)
static int mdb_page_split (MDB_cursor *mc,
MDB_val *newkey, MDB_val *newdata,
pgno_t newpgno, unsigned int nflags)
static int mdb_env_read_header (MDB_env *env,
MDB_meta *meta)
static MDB_meta * mdb_env_pick_meta (const
MDB_env *env)
static int mdb_env_write_meta (MDB_txn *txn)
static void mdb_env_close0 (MDB_env *env, int
excl)
static MDB_node * mdb_node_search
(MDB_cursor *mc, MDB_val *key, int *exactp)
static int mdb_node_add (MDB_cursor *mc,
indx_t indx, MDB_val *key, MDB_val
*data, pgno_t pgno, unsigned int flags)
static void mdb_node_del (MDB_cursor *mc, int
ksize)
static void mdb_node_shrink (MDB_page *mp,
indx_t indx)
static int mdb_node_move (MDB_cursor *csrc,
MDB_cursor *cdst, int fromleft)
static int mdb_node_read (MDB_cursor *mc,
MDB_node *leaf, MDB_val *data)
static size_t mdb_leaf_size (MDB_env *env,
MDB_val *key, MDB_val *data)
static size_t mdb_branch_size (MDB_env *env,
MDB_val *key)
static int mdb_rebalance (MDB_cursor *mc)
static int mdb_update_key (MDB_cursor *mc,
MDB_val *key)
static void mdb_cursor_pop (MDB_cursor *mc)
static int mdb_cursor_push (MDB_cursor *mc,
MDB_page *mp)
static int mdb_cursor_del0 (MDB_cursor *mc)
static int mdb_del0 (MDB_txn *txn,
MDB_dbi dbi, MDB_val *key, MDB_val
*data, unsigned flags)
static int mdb_cursor_sibling (MDB_cursor *mc,
int move_right)
static int mdb_cursor_next (MDB_cursor *mc,
MDB_val *key, MDB_val *data,
MDB_cursor_op op)
static int mdb_cursor_prev (MDB_cursor *mc,
MDB_val *key, MDB_val *data,
MDB_cursor_op op)
static int mdb_cursor_set (MDB_cursor *mc,
MDB_val *key, MDB_val *data,
MDB_cursor_op op, int *exactp)
static int mdb_cursor_first (MDB_cursor *mc,
MDB_val *key, MDB_val *data)
static int mdb_cursor_last (MDB_cursor *mc,
MDB_val *key, MDB_val *data)
static void mdb_cursor_init (MDB_cursor *mc,
MDB_txn *txn, MDB_dbi dbi, MDB_xcursor
*mx)
static void mdb_xcursor_init0 (MDB_cursor *mc)
static void mdb_xcursor_init1 (MDB_cursor *mc,
MDB_node *node)
static void mdb_xcursor_init2 (MDB_cursor *mc,
MDB_xcursor *src_mx, int force)
static int mdb_drop0 (MDB_cursor *mc, int
subs)
static void mdb_default_cmp (MDB_txn *txn,
MDB_dbi dbi)
static int mdb_reader_check0 (MDB_env *env,
int rlocked, int *dead)
char *ESECT mdb_version (int *major, int *minor, int
*patch)
Return the LMDB library version information.
char * mdb_strerror (int err)
Return a string describing a given error code.
static void ESECT mdb_assert_fail (MDB_env
*env, const char *expr_txt, const char *func, const char
*file, int line)
int mdb_cmp (MDB_txn *txn, MDB_dbi dbi,
const MDB_val *a, const MDB_val *b)
Compare two data items according to a particular database.
int mdb_dcmp (MDB_txn *txn, MDB_dbi
dbi, const MDB_val *a, const MDB_val *b)
Compare two data items according to a particular database.
static MDB_page * mdb_page_malloc
(MDB_txn *txn, unsigned num)
static void mdb_page_free (MDB_env *env,
MDB_page *mp)
static void mdb_dpage_free (MDB_env *env,
MDB_page *dp)
static void mdb_dlist_free (MDB_txn *txn)
static int mdb_page_loose (MDB_cursor *mc,
MDB_page *mp)
static int mdb_pages_xkeep (MDB_cursor *mc,
unsigned pflags, int all)
static int mdb_page_flush (MDB_txn *txn, int
keep)
static int mdb_page_spill (MDB_cursor *m0,
MDB_val *key, MDB_val *data)
static txnid_t mdb_find_oldest (MDB_txn *txn)
static void mdb_page_dirty (MDB_txn *txn,
MDB_page *mp)
static void mdb_page_copy (MDB_page *dst,
MDB_page *src, unsigned int psize)
static int mdb_page_unspill (MDB_txn *txn,
MDB_page *mp, MDB_page **ret)
int mdb_env_sync (MDB_env *env, int force)
Flush the data buffers to disk.
static int mdb_cursor_shadow (MDB_txn *src,
MDB_txn *dst)
static void mdb_cursors_close (MDB_txn *txn,
unsigned merge)
static int mdb_reader_pid (MDB_env *env, enum
Pidlock_op op, MDB_PID_T pid)
static int mdb_txn_renew0 (MDB_txn *txn)
int mdb_txn_renew (MDB_txn *txn)
Renew a read-only transaction.
int mdb_txn_begin (MDB_env *env,
MDB_txn *parent, unsigned int flags, MDB_txn
**ret)
Create a transaction for use with the environment.
MDB_env * mdb_txn_env (MDB_txn *txn)
Returns the transaction’s MDB_env.
size_t mdb_txn_id (MDB_txn *txn)
Return the transaction’s ID.
static void mdb_dbis_update (MDB_txn *txn, int
keep)
void mdb_txn_reset (MDB_txn *txn)
Reset a read-only transaction.
void mdb_txn_abort (MDB_txn *txn)
Abandon all the operations of the transaction instead of
saving them.
static int mdb_freelist_save (MDB_txn *txn)
int mdb_txn_commit (MDB_txn *txn)
Commit all the operations of a transaction into the
database.
static void ESECT mdb_env_init_meta0 (MDB_env
*env, MDB_meta *meta)
static int ESECT mdb_env_init_meta (MDB_env
*env, MDB_meta *meta)
int ESECT mdb_env_create (MDB_env **env)
Create an LMDB environment handle.
static int ESECT mdb_env_map (MDB_env *env,
void *addr)
int ESECT mdb_env_set_mapsize (MDB_env *env,
size_t size)
Set the size of the memory map to use for this environment.
int ESECT mdb_env_set_maxdbs (MDB_env *env,
MDB_dbi dbs)
Set the maximum number of named databases for the
environment.
int ESECT mdb_env_set_maxreaders (MDB_env
*env, unsigned int readers)
Set the maximum number of threads/reader slots for the
environment.
int ESECT mdb_env_get_maxreaders (MDB_env
*env, unsigned int *readers)
Get the maximum number of threads/reader slots for the
environment.
static int ESECT mdb_fsize (HANDLE fd, size_t
*size)
static int ESECT mdb_fname_init (const char *path,
unsigned envflags, MDB_name *fname)
static int ESECT mdb_fopen (const MDB_env
*env, MDB_name *fname, enum mdb_fopen_type
which, mdb_mode_t mode, HANDLE *res)
static int ESECT mdb_env_open2 (MDB_env *env)
static void mdb_env_reader_dest (void *ptr)
static int ESECT mdb_env_share_locks (MDB_env
*env, int *excl)
static int ESECT mdb_env_excl_lock (MDB_env
*env, int *excl)
static int ESECT mdb_env_setup_locks (MDB_env
*env, MDB_name *fname, int mode, int *excl)
int ESECT mdb_env_open (MDB_env *env, const
char *path, unsigned int flags, mdb_mode_t mode)
Open an environment handle.
void ESECT mdb_env_close (MDB_env *env)
Close the environment and release the memory map.
static int mdb_cmp_long (const MDB_val *a,
const MDB_val *b)
static int mdb_cmp_int (const MDB_val *a,
const MDB_val *b)
static int mdb_cmp_cint (const MDB_val *a,
const MDB_val *b)
static int mdb_cmp_memn (const MDB_val *a,
const MDB_val *b)
static int mdb_cmp_memnr (const MDB_val *a,
const MDB_val *b)
static int mdb_page_search_lowest (MDB_cursor
*mc)
static int mdb_ovpage_free (MDB_cursor *mc,
MDB_page *mp)
int mdb_get (MDB_txn *txn, MDB_dbi dbi,
MDB_val *key, MDB_val *data)
Get items from a database.
int mdb_cursor_get (MDB_cursor *mc,
MDB_val *key, MDB_val *data,
MDB_cursor_op op)
Retrieve by cursor.
static int mdb_cursor_touch (MDB_cursor *mc)
int mdb_cursor_put (MDB_cursor *mc,
MDB_val *key, MDB_val *data, unsigned int
flags)
Store by cursor.
int mdb_cursor_del (MDB_cursor *mc, unsigned
int flags)
Delete current key/data pair.
int mdb_cursor_open (MDB_txn *txn,
MDB_dbi dbi, MDB_cursor **ret)
Create a cursor handle.
int mdb_cursor_renew (MDB_txn *txn,
MDB_cursor *mc)
Renew a cursor handle.
int mdb_cursor_count (MDB_cursor *mc, size_t
*countp)
Return count of duplicates for current key.
void mdb_cursor_close (MDB_cursor *mc)
Close a cursor handle.
MDB_txn * mdb_cursor_txn (MDB_cursor *mc)
Return the cursor’s transaction handle.
MDB_dbi mdb_cursor_dbi (MDB_cursor *mc)
Return the cursor’s database handle.
static void mdb_cursor_copy (const MDB_cursor
*csrc, MDB_cursor *cdst)
int mdb_del (MDB_txn *txn, MDB_dbi dbi,
MDB_val *key, MDB_val *data)
Delete items from a database.
int mdb_put (MDB_txn *txn, MDB_dbi dbi,
MDB_val *key, MDB_val *data, unsigned int
flags)
Store items into a database.
static THREAD_RET ESECT CALL_CONV
mdb_env_copythr (void *arg)
static int ESECT mdb_env_cthr_toggle (mdb_copy
*my, int adjust)
static int ESECT mdb_env_cwalk (mdb_copy *my,
pgno_t *pg, int flags)
static int ESECT mdb_env_copyfd1 (MDB_env
*env, HANDLE fd)
static int ESECT mdb_env_copyfd0 (MDB_env
*env, HANDLE fd)
int ESECT mdb_env_copyfd2 (MDB_env *env,
HANDLE fd, unsigned int flags)
int ESECT mdb_env_copyfd (MDB_env *env,
HANDLE fd)
int ESECT mdb_env_copy2 (MDB_env *env, const
char *path, unsigned int flags)
Copy an LMDB environment to the specified path, with
options.
int ESECT mdb_env_copy (MDB_env *env, const
char *path)
Copy an LMDB environment to the specified path.
int ESECT mdb_env_set_flags (MDB_env *env,
unsigned int flag, int onoff)
Set environment flags.
int ESECT mdb_env_get_flags (MDB_env *env,
unsigned int *arg)
Get environment flags.
int ESECT mdb_env_set_userctx (MDB_env *env,
void *ctx)
Set application information associated with the
MDB_env.
void *ESECT mdb_env_get_userctx (MDB_env *env)
Get the application information associated with the
MDB_env.
int ESECT mdb_env_set_assert (MDB_env *env,
MDB_assert_func *func)
int ESECT mdb_env_get_path (MDB_env *env,
const char **arg)
Return the path that was used in mdb_env_open().
int ESECT mdb_env_get_fd (MDB_env *env,
mdb_filehandle_t *arg)
Return the filedescriptor for the given environment.
static int ESECT mdb_stat0 (MDB_env *env,
MDB_db *db, MDB_stat *arg)
int ESECT mdb_env_stat (MDB_env *env,
MDB_stat *arg)
Return statistics about the LMDB environment.
int ESECT mdb_env_info (MDB_env *env,
MDB_envinfo *arg)
Return information about the LMDB environment.
int mdb_dbi_open (MDB_txn *txn, const char
*name, unsigned int flags, MDB_dbi *dbi)
Open a database in the environment.
int ESECT mdb_stat (MDB_txn *txn,
MDB_dbi dbi, MDB_stat *arg)
Retrieve statistics for a database.
void mdb_dbi_close (MDB_env *env,
MDB_dbi dbi)
Close a database handle. Normally unnecessary. Use with
care:
int mdb_dbi_flags (MDB_txn *txn,
MDB_dbi dbi, unsigned int *flags)
Retrieve the DB flags for a database handle.
int mdb_drop (MDB_txn *txn, MDB_dbi
dbi, int del)
Empty or delete+close a database.
int mdb_set_compare (MDB_txn *txn,
MDB_dbi dbi, MDB_cmp_func *cmp)
Set a custom key comparison function for a database.
int mdb_set_dupsort (MDB_txn *txn,
MDB_dbi dbi, MDB_cmp_func *cmp)
Set a custom data comparison function for a
MDB_DUPSORT database.
int mdb_set_relfunc (MDB_txn *txn,
MDB_dbi dbi, MDB_rel_func *rel)
Set a relocation function for a MDB_FIXEDMAP
database.
int mdb_set_relctx (MDB_txn *txn,
MDB_dbi dbi, void *ctx)
Set a context pointer for a MDB_FIXEDMAP
database’s relocation function.
int ESECT mdb_env_get_maxkeysize (MDB_env
*env)
Get the maximum size of keys and MDB_DUPSORT data we
can write.
int ESECT mdb_reader_list (MDB_env *env,
MDB_msg_func *func, void *ctx)
Dump the entries in the reader lock table.
static int ESECT mdb_pid_insert (MDB_PID_T *ids,
MDB_PID_T pid)
int ESECT mdb_reader_check (MDB_env *env, int
*dead)
Check for stale entries in the reader lock table.
Variables
static char
*const mdb_errstr []
static const mdb_nchar_t *const mdb_suffixes
[2][2]
Detailed Description
Data Structure Documentation
struct MDB_page
Common header for all page types. The page type depends on mp_flags.
P_BRANCH and P_LEAF pages have unsorted ’MDB_node’s at the end, with sorted mp_ptrs[] entries referring to them. Exception: P_LEAF2 pages omit mp_ptrs and pack sorted MDB_DUPFIXED values after the page header.
P_OVERFLOW records occupy one or more contiguous pages where only the first has a page header. They hold the real data of F_BIGDATA nodes.
P_SUBP sub-pages are small leaf ’pages’ with duplicate data. A node with flag F_DUPDATA but not F_SUBDATA contains a sub-page. (Duplicate data can also go in sub-databases, which use normal pages.)
P_META pages contain MDB_meta, the start point of an LMDB snapshot.
Each non-metapage up to MDB_meta.mm_last_pg is reachable exactly once in the snapshot: Either used by a database or listed in a freeDB record.
Data Fields
union {
pgno_t p_pgno
struct MDB_page * p_next
} mp_p
uint16_t mp_pad
uint16_t mp_flags
union {
struct {
indx_t pb_lower
indx_t pb_upper
} pb
uint32_t pb_pages
} mp_pb
indx_t mp_ptrs [1]
Field Documentation
pgno_t MDB_page::p_pgno
page number
struct MDB_page* MDB_page::p_next
for in-memory list of freed pages
uint16_t MDB_page::mp_pad
key size if this is a LEAF2 page
uint16_t MDB_page::mp_flags
Page Flags
indx_t MDB_page::pb_lower
lower bound of free space
indx_t MDB_page::pb_upper
upper bound of free space
uint32_t MDB_page::pb_pages
number of overflow pages
indx_t MDB_page::mp_ptrs[1]
dynamic size
struct MDB_node
Header for a single key/data pair within a page. Used in pages of type P_BRANCH and P_LEAF without P_LEAF2. We guarantee 2-byte alignment for ’MDB_node’s.
mn_lo and mn_hi are used for data size on leaf nodes, and for child pgno on branch nodes. On 64 bit platforms, mn_flags is also used for pgno. (Branch nodes have no flags). Lo and hi are in host byte order in case some accesses can be optimized to 32-bit word access.
Leaf node flags describe node contents. F_BIGDATA says the node’s data part is the page number of an overflow page with actual data. F_DUPDATA and F_SUBDATA can be combined giving duplicate data in a sub-page/sub-database, and named databases (just F_SUBDATA).
Data Fields
unsigned short
mn_flags
unsigned short mn_ksize
char mn_data [1]
unsigned short
mn_lo
unsigned short mn_hi
Field Documentation
unsigned short MDB_node::mn_lo
part of data size or pgno
unsigned short MDB_node::mn_hi
part of data size or pgno
unsigned short MDB_node::mn_flags
Node Flags
unsigned short MDB_node::mn_ksize
key size
char MDB_node::mn_data[1]
key and data are appended here
struct MDB_db
Information about a single database in the environment.
Data Fields
uint32_t
md_pad
uint16_t md_flags
uint16_t md_depth
pgno_t md_branch_pages
pgno_t md_leaf_pages
pgno_t md_overflow_pages
size_t md_entries
pgno_t md_root
Field Documentation
uint32_t MDB_db::md_pad
also ksize for LEAF2 pages
uint16_t MDB_db::md_flags
Database Flags
uint16_t MDB_db::md_depth
depth of this tree
pgno_t MDB_db::md_branch_pages
number of internal pages
pgno_t MDB_db::md_leaf_pages
number of leaf pages
pgno_t MDB_db::md_overflow_pages
number of overflow pages
size_t MDB_db::md_entries
number of data items
pgno_t MDB_db::md_root
the root page of this tree
struct MDB_meta
Meta page content. A meta page is the start point for accessing a database snapshot. Pages 0-1 are meta pages. Transaction N writes meta page #(N % 2).
Data Fields
uint32_t
mm_magic
uint32_t mm_version
void * mm_address
size_t mm_mapsize
MDB_db mm_dbs [CORE_DBS]
pgno_t mm_last_pg
volatile txnid_t mm_txnid
Field Documentation
uint32_t MDB_meta::mm_magic
Stamp identifying this as an LMDB file. It must be set to MDB_MAGIC.
uint32_t MDB_meta::mm_version
Version number of this file. Must be set to MDB_DATA_VERSION.
void* MDB_meta::mm_address
address for fixed mapping
size_t MDB_meta::mm_mapsize
size of mmap region
MDB_db MDB_meta::mm_dbs[CORE_DBS]
first is free space, 2nd is main db
pgno_t MDB_meta::mm_last_pg
Last used page in the datafile. Actually the file may be shorter if the freeDB lists the final pages.
volatile txnid_t MDB_meta::mm_txnid
txnid that committed this page
union MDB_metabuf
Buffer for a stack-allocated meta page. The members define size and alignment, and silence type aliasing warnings. They are not used directly; that could mean incorrectly using several union members in parallel.
Data Fields
MDB_page
mb_page
struct {
char mm_pad [PAGEHDRSZ]
MDB_meta mm_meta
} mb_metabuf
struct MDB_dbx
Auxiliary DB info. The information here is mostly static/read-only. There is only a single copy of this record in the environment.
Data Fields
MDB_val
md_name
MDB_cmp_func * md_cmp
MDB_cmp_func * md_dcmp
MDB_rel_func * md_rel
void * md_relctx
Field Documentation
MDB_val MDB_dbx::md_name
name of the database
MDB_cmp_func* MDB_dbx::md_cmp
function for comparing keys
MDB_cmp_func* MDB_dbx::md_dcmp
function for comparing data items
MDB_rel_func* MDB_dbx::md_rel
user relocate function
void* MDB_dbx::md_relctx
user-provided context for md_rel
struct MDB_txn
Opaque structure for a transaction handle.
A database transaction. Every operation requires a transaction handle.
All database operations require a transaction handle. Transactions may be read-only or read-write.
Data Fields
MDB_txn
* mt_parent
MDB_txn * mt_child
pgno_t mt_next_pgno
txnid_t mt_txnid
MDB_env * mt_env
MDB_IDL mt_free_pgs
MDB_page * mt_loose_pgs
int mt_loose_count
MDB_IDL mt_spill_pgs
union {
MDB_ID2L dirty_list
MDB_reader * reader
} mt_u
MDB_dbx * mt_dbxs
MDB_db * mt_dbs
unsigned int * mt_dbiseqs
MDB_cursor ** mt_cursors
unsigned char * mt_dbflags
MDB_dbi mt_numdbs
unsigned int mt_flags
unsigned int mt_dirty_room
Field Documentation
MDB_txn* MDB_txn::mt_parent
parent of a nested txn
MDB_txn* MDB_txn::mt_child
Nested txn under this txn, set together with flag MDB_TXN_HAS_CHILD
pgno_t MDB_txn::mt_next_pgno
next unallocated page
txnid_t MDB_txn::mt_txnid
The ID of this transaction. IDs are integers incrementing from 1. Only committed write transactions increment the ID. If a transaction aborts, the ID may be re-used by the next writer.
MDB_env* MDB_txn::mt_env
the DB environment
MDB_IDL MDB_txn::mt_free_pgs
The list of pages that became unused during this transaction.
MDB_page* MDB_txn::mt_loose_pgs
The list of loose pages that became unused and may be reused in this transaction, linked through NEXT_LOOSE_PAGE(page).
int MDB_txn::mt_loose_count
Number of loose pages (mt_loose_pgs)
MDB_IDL MDB_txn::mt_spill_pgs
The sorted list of dirty pages we temporarily wrote to disk because the dirty list was full. page numbers in here are shifted left by 1, deleted slots have the LSB set.
MDB_ID2L MDB_txn::dirty_list
For write txns: Modified pages. Sorted when not MDB_WRITEMAP.
MDB_reader* MDB_txn::reader
For read txns: This thread/txn’s reader table slot, or NULL.
MDB_dbx* MDB_txn::mt_dbxs
Array of records for each DB known in the environment.
MDB_db* MDB_txn::mt_dbs
Array of MDB_db records for each known DB
unsigned int* MDB_txn::mt_dbiseqs
Array of sequence numbers for each DB handle
MDB_cursor** MDB_txn::mt_cursors
In write txns, array of cursors for each DB
unsigned char* MDB_txn::mt_dbflags
Array of flags for each DB
MDB_dbi MDB_txn::mt_numdbs
Number of DB records in use, or 0 when the txn is finished. This number only ever increments until the txn finishes; we don’t decrement it when individual DB handles are closed.
unsigned int MDB_txn::mt_flags
Transaction Flags
unsigned int MDB_txn::mt_dirty_room
dirty_list room: Array size - #dirty pages visible to this txn. Includes ancestor txns’ dirty pages not hidden by other txns’ dirty/spilled pages. Thus commit(nested txn) has room to merge dirty_list into mt_parent after freeing hidden mt_parent pages.
struct MDB_cursor
Opaque structure for navigating through a database.
Cursors are used for all DB operations. A cursor holds a path of (page pointer, key index) from the DB root to a position in the DB, plus other state. MDB_DUPSORT cursors include an xcursor to the current data item. Write txns track their cursors and keep them up to date when data moves. Exception: An xcursor’s pointer to a P_SUBP page can be stale. (A node with F_DUPDATA but no F_SUBDATA contains a subpage).
Data Fields
MDB_cursor
* mc_next
MDB_cursor * mc_backup
struct MDB_xcursor * mc_xcursor
MDB_txn * mc_txn
MDB_dbi mc_dbi
MDB_db * mc_db
MDB_dbx * mc_dbx
unsigned char * mc_dbflag
unsigned short mc_snum
unsigned short mc_top
unsigned int mc_flags
MDB_page * mc_pg [CURSOR_STACK]
indx_t mc_ki [CURSOR_STACK]
Field Documentation
MDB_cursor* MDB_cursor::mc_next
Next cursor on this DB in this txn
MDB_cursor* MDB_cursor::mc_backup
Backup of the original cursor if this cursor is a shadow
struct MDB_xcursor* MDB_cursor::mc_xcursor
Context used for databases with MDB_DUPSORT, otherwise NULL
MDB_txn* MDB_cursor::mc_txn
The transaction that owns this cursor
MDB_dbi MDB_cursor::mc_dbi
The database handle this cursor operates on
MDB_db* MDB_cursor::mc_db
The database record for this cursor
MDB_dbx* MDB_cursor::mc_dbx
The database auxiliary record for this cursor
unsigned char* MDB_cursor::mc_dbflag
The Transaction DB Flags for this database
unsigned short MDB_cursor::mc_snum
number of pushed pages
unsigned short MDB_cursor::mc_top
index of top page, normally mc_snum-1
unsigned int MDB_cursor::mc_flags
Cursor Flags
MDB_page* MDB_cursor::mc_pg[CURSOR_STACK]
stack of pushed pages
indx_t MDB_cursor::mc_ki[CURSOR_STACK]
stack of page indices
struct MDB_xcursor
Context for sorted-dup records. We could have gone to a fully recursive design, with arbitrarily deep nesting of sub-databases. But for now we only handle these levels - main DB, optional sub-DB, sorted-duplicate DB.
Data Fields
MDB_cursor
mx_cursor
MDB_db mx_db
MDB_dbx mx_dbx
unsigned char mx_dbflag
Field Documentation
MDB_cursor MDB_xcursor::mx_cursor
A sub-cursor for traversing the Dup DB
MDB_db MDB_xcursor::mx_db
The database record for this Dup DB
MDB_dbx MDB_xcursor::mx_dbx
The auxiliary DB record for this Dup DB
unsigned char MDB_xcursor::mx_dbflag
The Transaction DB Flags for this Dup DB
struct MDB_pgstate
State of FreeDB old pages, stored in the MDB_env
Data Fields
pgno_t *
mf_pghead
txnid_t mf_pglast
Field Documentation
pgno_t* MDB_pgstate::mf_pghead
Reclaimed freeDB pages, or NULL before use
txnid_t MDB_pgstate::mf_pglast
ID of last used record, or 0 if !mf_pghead
struct MDB_env
Opaque structure for a database environment.
The database environment.
A DB environment supports multiple databases, all residing in the same shared-memory map.
Data Fields
HANDLE me_fd
HANDLE me_lfd
HANDLE me_mfd
uint32_t me_flags
unsigned int me_psize
unsigned int me_os_psize
unsigned int me_maxreaders
volatile int me_close_readers
MDB_dbi me_numdbs
MDB_dbi me_maxdbs
MDB_PID_T me_pid
char * me_path
char * me_map
MDB_txninfo * me_txns
MDB_meta * me_metas [NUM_METAS]
void * me_pbuf
MDB_txn * me_txn
MDB_txn * me_txn0
size_t me_mapsize
off_t me_size
pgno_t me_maxpg
MDB_dbx * me_dbxs
uint16_t * me_dbflags
unsigned int * me_dbiseqs
pthread_key_t me_txkey
txnid_t me_pgoldest
MDB_pgstate me_pgstate
MDB_page * me_dpages
MDB_IDL me_free_pgs
MDB_ID2L me_dirty_list
int me_maxfree_1pg
unsigned int me_nodemax
int me_live_reader
void * me_userctx
MDB_assert_func * me_assert_func
Field Documentation
HANDLE MDB_env::me_fd
The main data file
HANDLE MDB_env::me_lfd
The lock file
HANDLE MDB_env::me_mfd
For writing and syncing the meta pages
uint32_t MDB_env::me_flags
Environment Flags
unsigned int MDB_env::me_psize
DB page size, inited from me_os_psize
unsigned int MDB_env::me_os_psize
OS page size, from GET_PAGESIZE
unsigned int MDB_env::me_maxreaders
size of the reader table
volatile int MDB_env::me_close_readers
Max MDB_txninfo.mti_numreaders of interest to mdb_env_close()
MDB_dbi MDB_env::me_numdbs
number of DBs opened
MDB_dbi MDB_env::me_maxdbs
size of the DB table
MDB_PID_T MDB_env::me_pid
process ID of this env
char* MDB_env::me_path
path to the DB files
char* MDB_env::me_map
the memory map of the data file
MDB_txninfo* MDB_env::me_txns
the memory map of the lock file or NULL
MDB_meta* MDB_env::me_metas[NUM_METAS]
pointers to the two meta pages
void* MDB_env::me_pbuf
scratch area for DUPSORT put()
MDB_txn* MDB_env::me_txn
current write transaction
MDB_txn* MDB_env::me_txn0
prealloc’d write transaction
size_t MDB_env::me_mapsize
size of the data memory map
off_t MDB_env::me_size
current file size
pgno_t MDB_env::me_maxpg
me_mapsize / me_psize
MDB_dbx* MDB_env::me_dbxs
array of static DB info
uint16_t* MDB_env::me_dbflags
array of flags from MDB_db.md_flags
unsigned int* MDB_env::me_dbiseqs
array of dbi sequence numbers
pthread_key_t MDB_env::me_txkey
thread-key for readers
txnid_t MDB_env::me_pgoldest
ID of oldest reader last time we looked
MDB_pgstate MDB_env::me_pgstate
state of old pages from freeDB
MDB_page* MDB_env::me_dpages
list of malloc’d blocks for re-use
MDB_IDL MDB_env::me_free_pgs
IDL of pages that became unused in a write txn
MDB_ID2L MDB_env::me_dirty_list
ID2L of pages written during a write txn. Length MDB_IDL_UM_SIZE.
int MDB_env::me_maxfree_1pg
Max number of freelist items that can fit in a single overflow page
unsigned int MDB_env::me_nodemax
Max size of a node on a page
int MDB_env::me_live_reader
have liveness lock in reader table
void* MDB_env::me_userctx
User-settable context
MDB_assert_func* MDB_env::me_assert_func
Callback for assertion failures
struct MDB_ntxn
Nested transaction
Data Fields
MDB_txn
mnt_txn
MDB_pgstate mnt_pgstate
Field Documentation
MDB_txn MDB_ntxn::mnt_txn
the transaction
MDB_pgstate MDB_ntxn::mnt_pgstate
parent transaction’s saved freestate
struct MDB_name
Filename - string of mdb_nchar_t[]
Data Fields
int
mn_len
int mn_alloced
mdb_nchar_t * mn_val
Field Documentation
int MDB_name::mn_len
Length
int MDB_name::mn_alloced
True if mn_val was malloced
mdb_nchar_t* MDB_name::mn_val
Contents
struct mdb_copy
State needed for a double-buffering compacting copy.
Data Fields
MDB_env
* mc_env
MDB_txn * mc_txn
pthread_mutex_t mc_mutex
pthread_cond_t mc_cond
char * mc_wbuf [2]
char * mc_over [2]
int mc_wlen [2]
int mc_olen [2]
pgno_t mc_next_pgno
HANDLE mc_fd
int mc_toggle
int mc_new
volatile int mc_error
Field Documentation
pthread_cond_t mdb_copy::mc_cond
Condition variable for mc_new
int mdb_copy::mc_toggle
Buffer number in provider
int mdb_copy::mc_new
(0-2 buffers to write) | (MDB_EOF at end)
volatile int mdb_copy::mc_error
Error code. Never cleared if set. Both threads can set nonzero to fail the copy. Not mutex-protected, LMDB expects atomic int.
Macro Definition Documentation
#define MDB_DSYNC O_SYNC
A flag for opening a file and requesting synchronous data writes. This is only used when writing a meta page. It’s not strictly needed; we could just do a normal write and then immediately perform a flush. But if this flag is available it saves us an extra system call.
Note
If O_DSYNC is undefined but exists in /usr/include, preferably set some compiler flag to get the definition.
#define MDB_FDATASYNC fdatasync
Function for flushing the data of a file. Define this to fsync if fdatasync() is not supported.
#define MAX_PAGESIZE (PAGEBASE ? 0x10000 : 0x8000)
The maximum size of a database page. It is 32k or 64k, since value-PAGEBASE must fit in MDB_page.mp_upper.
LMDB will use database pages < OS pages if needed. That causes more I/O in write transactions: The OS must know (read) the whole page before writing a partial page.
Note that we don’t currently support Huge pages. On Linux, regular data files cannot use Huge pages, and in general Huge pages aren’t actually pageable. We rely on the OS demand-pager to read our data and page it out when memory pressure from other processes is high. So until OSs have actual paging support for Huge pages, they’re not viable.
#define MDB_MINKEYS 2
The minimum number of keys required in a database page. Setting this to a larger value will place a smaller bound on the maximum size of a data item. Data items larger than this size will be pushed into overflow pages instead of being stored directly in the B-tree node. This value used to default to 4. With a page size of 4096 bytes that meant that any item larger than 1024 bytes would go into an overflow page. That also meant that on average 2-3KB of each overflow page was wasted space. The value cannot be lower than 2 because then there would no longer be a tree structure. With this value, items larger than 2KB will go into overflow pages, and on average only 1KB will be wasted.
#define MDB_MAGIC 0xBEEFC0DE
A stamp that identifies a file as an LMDB file. There’s nothing special about this value other than that it is easily recognizable, and it will reflect any byte order mismatches.
#define MDB_DATA_VERSION ((MDB_DEVEL) ? 999 : 1)
The version number for a database’s datafile format.
#define MDB_LOCK_VERSION 1
The version number for a database’s lockfile format.
#define MDB_MAXKEYSIZE ((MDB_DEVEL) ? 0 : 511)
The max size of a key we can write, or 0 for computed max. This macro should normally be left alone or set to 0. Note that a database with big keys or dupsort data cannot be reliably modified by a liblmdb which uses a smaller max. The default is 511 for backwards compat, or 0 when MDB_DEVEL.
Other values are allowed, for backwards compat. However: A value bigger than the computed max can break if you do not know what you are doing, and liblmdb <= 0.9.10 can break when modifying a DB with keys/dupsort data bigger than its max.
Data items in an MDB_DUPSORT database are also limited to this size, since they’re actually keys of a sub-DB. Keys and MDB_DUPSORT data items must fit on a node in a regular page.
#define ENV_MAXKEY(env) (MDB_MAXKEYSIZE)
The maximum size of a key we can write to the environment.
#define MAXDATASIZE 0xffffffffUL
The maximum size of a data item. We only store a 32 bit value for node sizes.
#define P_INVALID (˜(pgno_t)0)
An invalid page number. Mainly used to denote an empty tree.
#define F_ISSET(w, f) (((w) & (f)) == (f))
Test if the flags f are set in a flag word w.
#define EVEN(n) (((n) + 1U) & -2) /* sign-extending -2 to match n+1U */
Round n up to an even number.
#define DEFAULT_MAPSIZE 1048576
Default size of memory map. This is certainly too small for any actual applications. Apps should always set the size explicitly using mdb_env_set_mapsize().
#define PAGEHDRSZ ((unsigned) offsetof(MDB_page, mp_ptrs))
Size of the page header, excluding dynamic data at the end
#define METADATA(p) ((void *)((char *)(p) + PAGEHDRSZ))
Address of first usable data byte in a page, after the header
#define PAGEBASE ((MDB_DEVEL) ? PAGEHDRSZ : 0)
ITS#7713, change PAGEBASE to handle 65536 byte pages
#define NUMKEYS(p) (((p)->mp_lower - (PAGEHDRSZ-PAGEBASE)) >> 1)
Number of nodes on a page
#define SIZELEFT(p) (indx_t)((p)->mp_upper - (p)->mp_lower)
The amount of space remaining in the page
#define PAGEFILL(env, p)
Value:
(1000L *
((env)->me_psize - PAGEHDRSZ - SIZELEFT(p)) /
((env)->me_psize - PAGEHDRSZ))
The percentage of space used in the page, in tenths of a
percent.
#define FILL_THRESHOLD 250
The minimum page fill factor, in tenths of a percent. Pages emptier than this are candidates for merging.
#define IS_LEAF(p) F_ISSET((p)->mp_flags, P_LEAF)
Test if a page is a leaf page
#define IS_LEAF2(p) F_ISSET((p)->mp_flags, P_LEAF2)
Test if a page is a LEAF2 page
#define IS_BRANCH(p) F_ISSET((p)->mp_flags, P_BRANCH)
Test if a page is a branch page
#define IS_OVERFLOW(p) F_ISSET((p)->mp_flags, P_OVERFLOW)
Test if a page is an overflow page
#define IS_SUBP(p) F_ISSET((p)->mp_flags, P_SUBP)
Test if a page is a sub page
#define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1)
The number of overflow pages needed to store the given size.
#define NEXT_LOOSE_PAGE(p) (*(MDB_page **)((p) + 2))
Link in MDB_txn.mt_loose_pgs list. Kept outside the page header, which is needed when reusing the page.
#define NODESIZE offsetof(MDB_node, mn_data)
Size of the node header, excluding dynamic data at the end
#define PGNO_TOPWORD ((pgno_t)-1 > 0xffffffffu ? 32 : 0)
Bit position of top word in page number, for shifting mn_flags
#define INDXSIZE(k) (NODESIZE + ((k) == NULL ? 0 : (k)->mv_size))
Size of a node in a branch page with a given key. This is just the node header plus the key, there is no data.
#define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size)
Size of a node in a leaf page with a given key and data. This is node header plus key plus data size.
#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i] +PAGEBASE))
Address of node i in page p
#define NODEKEY(node) (void *)((node)->mn_data)
Address of the key for the node
#define NODEDATA(node) (void *)((char *)(node)->mn_data + (node)->mn_ksize)
Address of the data for a node
#define NODEPGNO(node)
Value:
((node)->mn_lo
| ((pgno_t) (node)->mn_hi << 16) | (PGNO_TOPWORD ?
((pgno_t) (node)->mn_flags << PGNO_TOPWORD) : 0))
Get the page number pointed to by a branch node
#define SETPGNO(node, pgno)
Value:
do {
(node)->mn_lo = (pgno) & 0xffff; (node)->mn_hi =
(pgno) >> 16; if (PGNO_TOPWORD) (node)->mn_flags =
(pgno) >> PGNO_TOPWORD; } while(0)
Set the page number in a branch node
#define NODEDSZ(node) ((node)->mn_lo | ((unsigned)(node)->mn_hi << 16))
Get the size of the data in a leaf node
#define SETDSZ(node, size)
Value:
do {
(node)->mn_lo = (size) & 0xffff; (node)->mn_hi =
(size) >> 16;} while(0)
Set the size of the data for a leaf node
#define NODEKSZ(node) ((node)->mn_ksize)
The size of a key in a node
#define COPY_PGNO(dst, src)
Value:
do { unsigned
short *s, *d; s = (unsigned short *)&(src); d =
(unsigned short *)&(dst); *d++ = *s++; *d = *s; } while
(0)
Copy a page number from src to dst
#define LEAF2KEY(p, i, ks) ((char *)(p) + PAGEHDRSZ + ((i)*(ks)))
The address of a key in a LEAF2 page. LEAF2 pages are used for MDB_DUPFIXED sorted-duplicate sub-DBs. There are no node headers, keys are stored contiguously.
#define MDB_GET_KEY(node, keyptr)
Value:
{ if ((keyptr)
!= NULL) { (keyptr)->mv_size = NODEKSZ(node);
(keyptr)->mv_data = NODEKEY(node); } }
Set the node’s key into keyptr, if
requested.
#define MDB_GET_KEY2(node, key) { key.mv_size = NODEKSZ(node); key.mv_data= NODEKEY(node); }
Set the node’s key into key.
#define MDB_VALID 0x8000
DB handle is valid, for me_dbflags
#define VALID_FLAGS
Value:
(MDB_REVERSEKEY|MDB_DUPSORT|MDB_INTEGERKEY|MDB_DUPFIXED|
MDB_INTEGERDUP|MDB_REVERSEDUP|MDB_CREATE)
mdb_dbi_open() flags
#define FREE_DBI 0
Handle for the DB used to track free pages.
#define MAIN_DBI 1
Handle for the default DB.
#define CORE_DBS 2
Number of DBs in metapage (free and main) - also hardcoded elsewhere
#define NUM_METAS 2
Number of meta pages - also hardcoded elsewhere
#define CURSOR_STACK 32
Enough space for 2ˆ32 nodes with minimum of 2 keys per node. I.e., plenty. At 4 keys per node, enough for 2ˆ64 nodes, so there’s probably no need to raise this on a 64 bit machine.
#define XCURSOR_INITED(mc) ((mc)->mc_xcursor &&((mc)->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED))
Check if there is an inited xcursor
#define XCURSOR_REFRESH(mc, top, mp)
Value:
do { MDB_page
*xr_pg = (mp); MDB_node *xr_node; if (!XCURSOR_INITED(mc) ||
(mc)->mc_ki[top] >= NUMKEYS(xr_pg)) break; xr_node =
NODEPTR(xr_pg, (mc)->mc_ki[top]); if
((xr_node->mn_flags & (F_DUPDATA|F_SUBDATA)) ==
F_DUPDATA) (mc)->mc_xcursor->mx_cursor.mc_pg[0] =
NODEDATA(xr_node); } while (0)
Update the xcursor’s sub-page pointer, if any, in
mc. Needed when the node which contains the sub-page
may have moved. Called with leaf page mp =
mc->mc_pg[top].
#define MDB_COMMIT_PAGES 64
max number of pages to commit in one writev() call
#define MAX_WRITE (0x40000000U >> (sizeof(ssize_t) == 4))
max bytes to write in one call
#define TXN_DBI_EXIST(txn, dbi, validity) ((txn) && (dbi)<(txn)->mt_numdbs&& ((txn)->mt_dbflags[dbi] & (validity)))
Check txn and dbi arguments to a function
#define TXN_DBI_CHANGED(txn, dbi) ((txn)->mt_dbiseqs[dbi] !=(txn)->mt_env->me_dbiseqs[dbi])
Check for misused dbi handles
#define MDB_END_NAMES
Value:
{"committed", "empty-commit", "abort", "reset", "reset-tmp", "fail-begin", "fail-beginchild"}
#define MDB_END_OPMASK 0x0F
mask for mdb_txn_end() operation number
#define MDB_END_UPDATE 0x10
update env state (DBIs)
#define MDB_END_FREE 0x20
free txn unless it is MDB_env.me_txn0
#define MDB_END_SLOT MDB_NOTLS
release any reader slot if MDB_NOTLS
#define MDB_SPLIT_REPLACE MDB_APPENDDUP
newkey is not new
#define mdb_cmp_clong mdb_cmp_cint
Compare two items pointing at size_t’s of unknown alignment.
#define mdb_cassert(mc, expr) mdb_assert0((mc)->mc_txn->mt_env, expr,#expr)
assert(3) variant in cursor context
#define mdb_tassert(txn, expr) mdb_assert0((txn)->mt_env, expr, #expr)
assert(3) variant in transaction context
#define mdb_eassert(env, expr) mdb_assert0(env, expr, #expr)
assert(3) variant in environment context
#define mdb_assert0(env, expr, expr_txt)
Value:
((expr) ? (void)0 : mdb_assert_fail(env, expr_txt, mdb_func_, __FILE__, __LINE__))
#define MDB_NAME(str) str
mdb_nchar_t[] string literal
#define mdb_name_cpy strcpy
Copy name (mdb_nchar_t string)
#define MDB_SUFFLEN 9
Max string length in mdb_suffixes[]
#define mdb_fname_destroy(fname) do { if ((fname).mn_alloced)free((fname).mn_val); } while (0)
Destroy fname from mdb_fname_init()
#define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC|MDB_NOMEMINIT)
Only a subset of the Environment Flags flags can be changed at runtime. Changing other flags requires closing the environment and re-opening it with the new flags.
#define CHANGELESS
Value:
(MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY| MDB_WRITEMAP|MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD)
#define MDB_NOSPILL 0x8000
Do not spill pages to disk if txn is getting full, may fail instead
#define WITH_CURSOR_TRACKING(mn, act)
Value:
do { MDB_cursor
dummy, *tracked, **tp =
&(mn).mc_txn->mt_cursors[mn.mc_dbi]; if
((mn).mc_flags & C_SUB) { dummy.mc_flags =
C_INITIALIZED; dummy.mc_xcursor = (MDB_xcursor *)&(mn);
tracked = &dummy; } else { tracked = &(mn); }
tracked->mc_next = *tp; *tp = tracked; { act; } *tp =
tracked->mc_next; } while (0)
Perform act while tracking temporary cursor
mn
#define MDB_EOF 0x10
mdb_env_copyfd1() is done reading
Typedef Documentation
typedef MDB_ID pgno_t
A page number in the database. Note that 64 bit page numbers are overkill, since pages themselves already represent 12-13 bits of addressable memory, and the OS will always limit applications to a maximum of 63 bits of address space.
Note
In the MDB_node structure, we only store 48 bits of this value, which thus limits us to only 60 bits of addressable data.
typedef MDB_ID txnid_t
A transaction ID. See struct MDB_txn.mt_txnid for details.
typedef uint16_t indx_t
Used for offsets within a single page. Since memory pages are typically 4 or 8KB in size, 12-13 bits, this is plenty.
typedef char mdb_nchar_t
Character type for file names: char on Unix, wchar_t on Windows
Enumeration Type Documentation
enum mdb_fopen_type
File type, access mode etc. for mdb_fopen()
Enumerator
MDB_O_RDONLY
for RDONLY me_fd
MDB_O_RDWR
for me_fd
MDB_O_META
for me_mfd
MDB_O_COPY
for mdb_env_copy()
MDB_O_MASK
Bitmask for open() flags in enum mdb_fopen_type. The other bits distinguish otherwise-equal MDB_O_* constants from each other.
MDB_O_LOCKS
for me_lfd
Function Documentation
static int mdb_page_alloc (MDB_cursor * mc, int num, MDB_page ** mp) [static]
Allocate page numbers and memory for writing. Maintain me_pglast, me_pghead and mt_next_pgno. Set MDB_TXN_ERROR on failure.
If there are free pages available from older transactions, they are re-used first. Otherwise allocate a new page at mt_next_pgno. Do not modify the freedB, just merge freeDB records into me_pghead[] and move me_pglast to say which records were consumed. Only this function can create me_pghead and move me_pglast/mt_next_pgno.
Parameters
mc cursor A cursor
handle identifying the transaction and database for which we
are allocating.
num the number of pages to allocate.
mp Address of the allocated page(s). Requests for
multiple pages will always be satisfied by a single
contiguous chunk of memory.
Returns
0 on success, non-zero on failure.
static int mdb_page_new (MDB_cursor * mc, uint32_t flags, int num, MDB_page** mp) [static]
Allocate and initialize new pages for a database. Set MDB_TXN_ERROR on failure.
Parameters
mc a cursor on the
database being added to.
flags flags defining what type of page is being
allocated.
num the number of pages to allocate. This is usually 1,
unless allocating overflow pages for a large record.
mp Address of a page, or NULL on failure.
Returns
0 on success, non-zero on failure.
static int mdb_page_touch (MDB_cursor * mc) [static]
Touch a page: make it dirty and re-insert into tree with updated pgno. Set MDB_TXN_ERROR on failure.
Parameters
mc cursor pointing to the page to be touched
Returns
0 on success, non-zero on failure.
static void mdb_txn_end (MDB_txn * txn, unsigned mode) [static]
End a transaction, except successful commit of a nested transaction. May be called twice for readonly txns: First reset it, then abort.
Parameters
txn the transaction
handle to end
mode why and how to end the transaction
static int mdb_page_get (MDB_cursor * mc, pgno_t pgno, MDB_page ** ret, int *lvl) [static]
Find the address of the page corresponding to a given page number. Set MDB_TXN_ERROR on failure.
Parameters
mc the cursor accessing
the page.
pgno the page number for the page to retrieve.
ret address of a pointer where the page’s address
will be stored.
lvl dirty_list inheritance level of found page.
1=current txn, 0=mapped page.
Returns
0 on success, non-zero on failure.
static int mdb_page_search_root (MDB_cursor * mc, MDB_val * key, int flags)[static]
Finish mdb_page_search() / mdb_page_search_lowest(). The cursor is at the root page, set up the rest of it.
static int mdb_page_search (MDB_cursor * mc, MDB_val * key, int flags)[static]
Search for the page a given key should be in. Push it and its parent pages on the cursor stack.
Parameters
mc the cursor for this
operation.
key the key to search for, or NULL for first/last page.
flags If MDB_PS_MODIFY is set, visited pages in the DB
are touched (updated with new page numbers). If MDB_PS_FIRST
or MDB_PS_LAST is set, find first or last leaf. This is used
by mdb_cursor_first() and mdb_cursor_last().
If MDB_PS_ROOTONLY set, just fetch root node, no further
lookups.
Returns
0 on success, non-zero on failure.
static int mdb_page_merge (MDB_cursor * csrc, MDB_cursor * cdst) [static]
Merge one page into another. The nodes from the page pointed to by csrc will be copied to the page pointed to by cdst and then the csrc page will be freed.
Parameters
csrc Cursor pointing to
the source page.
cdst Cursor pointing to the destination page.
Returns
0 on success, non-zero on failure.
static int mdb_page_split (MDB_cursor * mc, MDB_val * newkey, MDB_val *newdata, pgno_t newpgno, unsigned int nflags) [static]
Split a page and insert a new node. Set MDB_TXN_ERROR on failure.
Parameters
mc Cursor pointing to
the page and desired insertion index. The cursor will be
updated to point to the actual page and index where the node
got inserted after the split.
newkey The key for the newly inserted node.
newdata The data for the newly inserted node.
newpgno The page number, if the new node is a branch
node.
nflags The NODE_ADD_FLAGS for the new node.
Returns
0 on success, non-zero on failure.
static int ESECT mdb_env_read_header (MDB_env * env, MDB_meta * meta)[static]
Read the environment parameters of a DB environment before mapping it into memory.
Parameters
env the environment
handle
meta address of where to store the meta information
Returns
0 on success, non-zero on failure.
static MDB_meta * mdb_env_pick_meta (const MDB_env * env) [static]
Check both meta pages to see which one is newer.
Parameters
env the environment handle
Returns
newest MDB_meta.
static int mdb_env_write_meta (MDB_txn * txn) [static]
Update the environment info to commit a transaction.
Parameters
txn the transaction that’s being committed
Returns
0 on success, non-zero on failure.
static void ESECT mdb_env_close0 (MDB_env * env, int excl) [static]
Destroy resources from mdb_env_open(), clear our readers & DBIs
static MDB_node * mdb_node_search (MDB_cursor * mc, MDB_val * key, int *exactp) [static]
Search for key within a page, using binary search. Returns the smallest entry larger or equal to the key. If exactp is non-null, stores whether the found entry was an exact match in *exactp (1 or 0). Updates the cursor index with the index of the found entry. If no entry larger or equal to the key is found, returns NULL.
static int mdb_node_add (MDB_cursor * mc, indx_t indx, MDB_val * key, MDB_val* data, pgno_t pgno, unsigned int flags) [static]
Add a node to the page pointed to by the cursor. Set MDB_TXN_ERROR on failure.
Parameters
mc The cursor for this
operation.
indx The index on the page where the new node should be
added.
key The key for the new node.
data The data for the new node, if any.
pgno The page number, if adding a branch node.
flags Flags for the node.
Returns
0 on success, non-zero on failure. Possible errors are:
|
• |
ENOMEM - failed to allocate overflow pages for the node. | ||
|
• |
MDB_PAGE_FULL - there is insufficient room in the page. This error should never happen since all callers already calculate the page’s free space before calling this function. |
static void mdb_node_del (MDB_cursor * mc, int ksize) [static]
Delete the specified node from a
page.
Parameters
mc Cursor pointing to
the node to delete.
ksize The size of a node. Only used if the page is part
of a MDB_DUPFIXED database.
static void mdb_node_shrink (MDB_page * mp, indx_t indx) [static]
Compact the main page after
deleting a node on a subpage.
Parameters
mp The main page to
operate on.
indx The index of the subpage on the main page.
static int mdb_node_move (MDB_cursor * csrc, MDB_cursor * cdst, int fromleft)[static]
Move a node from csrc to cdst.
static int mdb_node_read (MDB_cursor * mc, MDB_node * leaf, MDB_val * data)[static]
Return the data associated with
a given node.
Parameters
mc The cursor for this
operation.
leaf The node being read.
data Updated to point to the node’s data.
Returns
0 on success, non-zero on failure.
static size_t mdb_leaf_size (MDB_env * env, MDB_val * key, MDB_val * data)[static]
Calculate the size of a leaf
node. The size depends on the environment’s page size;
if a data item is too large it will be put onto an overflow
page and the node size will only include the key and not the
data. Sizes are always rounded up to an even number of
bytes, to guarantee 2-byte alignment of the MDB_node
headers.
Parameters
env The environment
handle.
key The key for the node.
data The data for the node.
Returns
The number of bytes needed to store the node.
static size_t mdb_branch_size (MDB_env * env, MDB_val * key) [static]
Calculate the size of a branch
node. The size should depend on the environment’s page
size but since we currently don’t support spilling
large keys onto overflow pages, it’s simply the size
of the MDB_node header plus the size of the key.
Sizes are always rounded up to an even number of bytes, to
guarantee 2-byte alignment of the MDB_node headers.
Parameters
env The environment
handle.
key The key for the node.
Returns
The number of bytes needed to store the node.
static int mdb_rebalance (MDB_cursor * mc) [static]
Rebalance the tree after a
delete operation.
Parameters
mc Cursor pointing to the page where rebalancing should begin.
Returns
0 on success, non-zero on failure.
static int mdb_update_key (MDB_cursor * mc, MDB_val * key) [static]
Replace the key for a branch
node with a new key. Set MDB_TXN_ERROR on failure.
Parameters
mc Cursor pointing to
the node to operate on.
key The new key to use.
Returns
0 on success, non-zero on failure.
static void mdb_cursor_pop (MDB_cursor * mc) [static]
Pop a page off the top of the cursor’s stack.
static int mdb_cursor_push (MDB_cursor * mc, MDB_page * mp) [static]
Push a page onto the top of the cursor’s stack. Set MDB_TXN_ERROR on failure.
static int mdb_cursor_del0 (MDB_cursor * mc) [static]
Complete a delete operation started by mdb_cursor_del().
static int mdb_cursor_sibling (MDB_cursor * mc, int move_right) [static]
Find a sibling for a page.
Replaces the page at the top of the cursor’s stack
with the specified sibling, if one exists.
Parameters
mc The cursor for this
operation.
move_right Non-zero if the right sibling is requested,
otherwise the left sibling.
Returns
0 on success, non-zero on failure.
static int mdb_cursor_next (MDB_cursor * mc, MDB_val * key, MDB_val * data,MDB_cursor_op op) [static]
Move the cursor to the next data item.
static int mdb_cursor_prev (MDB_cursor * mc, MDB_val * key, MDB_val * data,MDB_cursor_op op) [static]
Move the cursor to the previous data item.
static int mdb_cursor_set (MDB_cursor * mc, MDB_val * key, MDB_val * data,MDB_cursor_op op, int * exactp) [static]
Set the cursor on a specific data item.
static int mdb_cursor_first (MDB_cursor * mc, MDB_val * key, MDB_val * data)[static]
Move the cursor to the first item in the database.
static int mdb_cursor_last (MDB_cursor * mc, MDB_val * key, MDB_val * data)[static]
Move the cursor to the last item in the database.
static void mdb_cursor_init (MDB_cursor * mc, MDB_txn * txn, MDB_dbi dbi,MDB_xcursor * mx) [static]
Initialize a cursor for a given transaction and database.
static void mdb_xcursor_init0 (MDB_cursor * mc) [static]
Initial setup of a sorted-dups
cursor. Sorted duplicates are implemented as a sub-database
for the given key. The duplicate data items are actually
keys of the sub-database. Operations on the duplicate data
items are performed using a sub-cursor initialized when the
sub-database is first accessed. This function does the
preliminary setup of the sub-cursor, filling in the fields
that depend only on the parent DB.
Parameters
mc The main cursor whose sorted-dups cursor is to be initialized.
static void mdb_xcursor_init1 (MDB_cursor * mc, MDB_node * node) [static]
Final setup of a sorted-dups
cursor. Sets up the fields that depend on the data from the
main cursor.
Parameters
mc The main cursor whose
sorted-dups cursor is to be initialized.
node The data containing the MDB_db record for
the sorted-dup database.
static void mdb_xcursor_init2 (MDB_cursor * mc, MDB_xcursor * src_mx, intnew_dupdata) [static]
Fixup a sorted-dups cursor due
to underlying update. Sets up some fields that depend on the
data from the main cursor. Almost the same as init1, but
skips initialization steps if the xcursor had already been
used.
Parameters
mc The main cursor whose
sorted-dups cursor is to be fixed up.
src_mx The xcursor of an up-to-date cursor.
new_dupdata True if converting from a
non-F_DUPDATA item.
static int mdb_drop0 (MDB_cursor * mc, int subs) [static]
Add all the DB’s pages to
the free list.
Parameters
mc Cursor on the DB to
free.
subs non-Zero to check for sub-DBs in this DB.
Returns
0 on success, non-zero on failure.
static void mdb_default_cmp (MDB_txn * txn, MDB_dbi dbi) [static]
Set the default comparison
functions for a database. Called immediately after a
database is opened to set the defaults. The user can then
override them with mdb_set_compare() or
mdb_set_dupsort().
Parameters
txn A transaction handle
returned by mdb_txn_begin()
dbi A database handle returned by
mdb_dbi_open()
static int ESECT mdb_reader_check0 (MDB_env * env, int rlocked, int * dead)[static]
As mdb_reader_check(). rlocked is set if caller locked me_rmutex.
char* ESECT mdb_version (int * major, int * minor, int * patch)
Return the LMDB library version information. Return the library version info.
char* mdb_strerror (int err)
Return a string describing a
given error code. This function is a superset of the ANSI C
X3.159-1989 (ANSI C) strerror(3) function. If the error code
is greater than or equal to 0, then the string returned by
the system function strerror(3) is returned. If the error
code is less than 0, an error string corresponding to the
LMDB library error is returned. See Return Codes for
a list of LMDB-specific error codes.
Parameters
err The error code
Return values
error message The description of the error
int mdb_cmp (MDB_txn * txn, MDB_dbi dbi, const MDB_val * a, const MDB_val *b)
Compare two data items according
to a particular database. This returns a comparison as if
the two data items were keys in the specified database.
Parameters
txn A transaction handle
returned by mdb_txn_begin()
dbi A database handle returned by mdb_dbi_open()
a The first item to compare
b The second item to compare
Returns
< 0 if a < b, 0 if a == b, > 0 if a > b
int mdb_dcmp (MDB_txn * txn, MDB_dbi dbi, const MDB_val * a, const MDB_val *b)
Compare two data items according
to a particular database. This returns a comparison as if
the two items were data items of the specified database. The
database must have the MDB_DUPSORT flag.
Parameters
txn A transaction handle
returned by mdb_txn_begin()
dbi A database handle returned by mdb_dbi_open()
a The first item to compare
b The second item to compare
Returns
< 0 if a < b, 0 if a == b, > 0 if a > b
static MDB_page* mdb_page_malloc (MDB_txn * txn, unsigned num) [static]
Allocate memory for a page. Re-use old malloc’d pages first for singletons, otherwise just malloc. Set MDB_TXN_ERROR on failure.
static void mdb_page_free (MDB_env * env, MDB_page * mp) [static]
Free a single page. Saves single pages to a list, for future reuse. (This is not used for multi-page overflow pages.)
static void mdb_dpage_free (MDB_env * env, MDB_page * dp) [static]
Free a dirty page
static void mdb_dlist_free (MDB_txn * txn) [static]
Return all dirty pages to dpage list
static int mdb_page_loose (MDB_cursor * mc, MDB_page * mp) [static]
Loosen or free a single page.
Saves single pages to a list for future reuse in this same
txn. It has been pulled from the freeDB and already resides
on the dirty list, but has been deleted. Use these pages
first before pulling again from the freeDB.
If the page wasn’t dirtied in this txn, just add it to
this txn’s free list.
static int mdb_pages_xkeep (MDB_cursor * mc, unsigned pflags, int all)[static]
Set or clear P_KEEP in dirty,
non-overflow, non-sub pages watched by txn.
Parameters
mc A cursor handle for
the current operation.
pflags Flags of the pages to update: P_DIRTY to set
P_KEEP, P_DIRTY|P_KEEP to clear it.
all No shortcuts. Needed except after a full
mdb_page_flush().
Returns
0 on success, non-zero on failure.
static int mdb_page_flush (MDB_txn * txn, int keep) [static]
Flush (some) dirty pages to the
map, after clearing their dirty flag.
Parameters
txn the transaction
that’s being committed
keep number of initial pages in dirty_list to keep
dirty.
Returns
0 on success, non-zero on failure.
static int mdb_page_spill (MDB_cursor * m0, MDB_val * key, MDB_val * data)[static]
Spill pages from the dirty list
back to disk. This is intended to prevent running into
MDB_TXN_FULL situations, but note that they may still
occur in a few cases: 1) our estimate of the txn size could
be too small. Currently this seems unlikely, except with a
large number of MDB_MULTIPLE items. 2) child txns may
run out of space if their parents dirtied a lot of pages and
never spilled them. TODO: we probably should do a preemptive
spill during mdb_txn_begin() of a child txn, if the
parent’s dirty_room is below a given threshold.
Otherwise, if not using nested txns, it is expected that
apps will not run into MDB_TXN_FULL any more. The
pages are flushed to disk the same way as for a txn commit,
e.g. their P_DIRTY flag is cleared. If the txn never
references them again, they can be left alone. If the txn
only reads them, they can be used without any fuss. If the
txn writes them again, they can be dirtied immediately
without going thru all of the work of
mdb_page_touch(). Such references are handled by
mdb_page_unspill().
Also note, we never spill DB root pages, nor pages of active
cursors, because we’ll need these back again soon
anyway. And in nested txns, we can’t spill a page in a
child txn if it was already spilled in a parent txn. That
would alter the parent txns’ data even though the
child hasn’t committed yet, and we’d have no way
to undo it if the child aborted.
Parameters
m0 cursor A cursor
handle identifying the transaction and database for which we
are checking space.
key For a put operation, the key being stored.
data For a put operation, the data being stored.
Returns
0 on success, non-zero on failure.
static txnid_t mdb_find_oldest (MDB_txn * txn) [static]
Find oldest txnid still referenced. Expects txn->mt_txnid > 0.
static void mdb_page_dirty (MDB_txn * txn, MDB_page * mp) [static]
Add a page to the txn’s dirty list
static void mdb_page_copy (MDB_page * dst, MDB_page * src, unsigned intpsize) [static]
Copy the used portions of a
non-overflow page.
Parameters
dst page to copy into
src page to copy from
psize size of a page
static int mdb_page_unspill (MDB_txn * txn, MDB_page * mp, MDB_page ** ret)[static]
Pull a page off the txn’s
spill list, if present. If a page being referenced was
spilled to disk in this txn, bring it back and make it
dirty/writable again.
Parameters
txn the transaction
handle.
mp the page being referenced. It must not be dirty.
ret the writable page, if any. ret is unchanged if mp
wasn’t spilled.
int mdb_env_sync (MDB_env * env, int force)
Flush the data buffers to disk.
Data is always written to disk when mdb_txn_commit()
is called, but the operating system may keep it buffered.
LMDB always flushes the OS buffers upon commit as well,
unless the environment was opened with MDB_NOSYNC or
in part MDB_NOMETASYNC. This call is not valid if the
environment was opened with MDB_RDONLY.
Parameters
env An environment
handle returned by mdb_env_create()
force If non-zero, force a synchronous flush. Otherwise
if the environment has the MDB_NOSYNC flag set the
flushes will be omitted, and with MDB_MAPASYNC they
will be asynchronous.
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EACCES - the environment is read-only. |
|||
|
• |
EINVAL - an invalid parameter was specified. |
|||
|
• |
EIO - an error occurred during synchronization. |
static int mdb_cursor_shadow (MDB_txn * src, MDB_txn * dst) [static]
Back up parent txn’s cursors, then grab the originals for tracking
static void mdb_cursors_close (MDB_txn * txn, unsigned merge) [static]
Close this write txn’s
cursors, give parent txn’s cursors back to parent.
Parameters
txn the transaction
handle.
merge true to keep changes to parent cursors, false to
revert.
Returns
0 on success, non-zero on failure.
static int mdb_reader_pid (MDB_env * env, enum Pidlock_op op, MDB_PID_T pid)[static]
Set or check a pid lock. Set
returns 0 on success. Check returns 0 if the process is
certainly dead, nonzero if it may be alive (the lock exists
or an error happened so we do not know).
On Windows Pidset is a no-op, we merely check for the
existence of the process with the given pid. On POSIX we use
a single byte lock on the lockfile, set at an offset equal
to the pid.
static int mdb_txn_renew0 (MDB_txn * txn) [static]
Common code for
mdb_txn_begin() and mdb_txn_renew().
Parameters
txn the transaction handle to initialize
Returns
0 on success, non-zero on failure.
int mdb_txn_renew (MDB_txn * txn)
Renew a read-only transaction.
This acquires a new reader lock for a transaction handle
that had been released by mdb_txn_reset(). It must be
called before a reset transaction may be used again.
Parameters
txn A transaction handle returned by mdb_txn_begin()
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
MDB_PANIC - a fatal error occurred earlier and the environment must be shut down. | ||
|
• |
EINVAL - an invalid parameter was specified. |
int mdb_txn_begin (MDB_env * env, MDB_txn * parent, unsigned int flags,MDB_txn ** txn)
Create a transaction for use
with the environment. The transaction handle may be
discarded using mdb_txn_abort() or
mdb_txn_commit().
Note
A transaction and its cursors
must only be used by a single thread, and a thread may only
have a single transaction at a time. If MDB_NOTLS is
in use, this does not apply to read-only transactions.
Cursors may not span transactions.
Parameters
env An environment
handle returned by mdb_env_create()
parent If this parameter is non-NULL, the new
transaction will be a nested transaction, with the
transaction indicated by parent as its parent.
Transactions may be nested to any level. A parent
transaction and its cursors may not issue any other
operations than mdb_txn_commit and mdb_txn_abort while it
has active child transactions.
flags Special options for this transaction. This
parameter must be set to 0 or by bitwise OR’ing
together one or more of the values described here.
|
• |
MDB_RDONLY This transaction will not perform any write operations. |
txn Address where the new MDB_txn handle will be stored
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
MDB_PANIC - a fatal error occurred earlier and the environment must be shut down. | ||
|
• |
MDB_MAP_RESIZED - another process wrote data beyond this MDB_env’s mapsize and this environment’s map must be resized as well. See mdb_env_set_mapsize(). | ||
|
• |
MDB_READERS_FULL - a read-only transaction was requested and the reader lock table is full. See mdb_env_set_maxreaders(). | ||
|
• |
ENOMEM - out of memory. |
MDB_env* mdb_txn_env (MDB_txn * txn)
Returns the transaction’s
MDB_env.
Parameters
txn A transaction handle returned by mdb_txn_begin()
size_t mdb_txn_id (MDB_txn * txn)
Return the transaction’s
ID. This returns the identifier associated with this
transaction. For a read-only transaction, this corresponds
to the snapshot being read; concurrent readers will
frequently have the same transaction ID.
Parameters
txn A transaction handle returned by mdb_txn_begin()
Returns
A transaction ID, valid if input is an active transaction.
static void mdb_dbis_update (MDB_txn * txn, int keep) [static]
Export or close DBI handles opened in this txn.
void mdb_txn_reset (MDB_txn * txn)
Reset a read-only transaction.
Abort the transaction like mdb_txn_abort(), but keep
the transaction handle. mdb_txn_renew() may reuse the
handle. This saves allocation overhead if the process will
start a new read-only transaction soon, and also locking
overhead if MDB_NOTLS is in use. The reader table
lock is released, but the table slot stays tied to its
thread or MDB_txn. Use mdb_txn_abort() to
discard a reset handle, and to free its lock table slot if
MDB_NOTLS is in use. Cursors opened within the transaction
must not be used again after this call, except with
mdb_cursor_renew(). Reader locks generally
don’t interfere with writers, but they keep old
versions of database pages allocated. Thus they prevent the
old pages from being reused when writers commit new data,
and so under heavy load the database size may grow much more
rapidly than otherwise.
Parameters
txn A transaction handle returned by mdb_txn_begin()
void mdb_txn_abort (MDB_txn * txn)
Abandon all the operations of
the transaction instead of saving them. The transaction
handle is freed. It and its cursors must not be used again
after this call, except with mdb_cursor_renew().
Note
Earlier documentation incorrectly said all cursors would be freed. Only write-transactions free cursors.
Parameters
txn A transaction handle returned by mdb_txn_begin()
static int mdb_freelist_save (MDB_txn * txn) [static]
Save the freelist as of this transaction to the freeDB. This changes the freelist. Keep trying until it stabilizes.
int mdb_txn_commit (MDB_txn * txn)
Commit all the operations of a
transaction into the database. The transaction handle is
freed. It and its cursors must not be used again after this
call, except with mdb_cursor_renew().
Note
Earlier documentation incorrectly said all cursors would be freed. Only write-transactions free cursors.
Parameters
txn A transaction handle returned by mdb_txn_begin()
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified. |
|||
|
• |
ENOSPC - no more disk space. |
|||
|
• |
EIO - a low-level I/O error occurred while writing. |
|||
|
• |
ENOMEM - out of memory. |
static void ESECT mdb_env_init_meta0 (MDB_env * env, MDB_meta * meta)[static]
Fill in most of the zeroed MDB_meta for an empty database environment
static int ESECT mdb_env_init_meta (MDB_env * env, MDB_meta * meta) [static]
Write the environment parameters
of a freshly created DB environment.
Parameters
env the environment
handle
meta the MDB_meta to write
Returns
0 on success, non-zero on failure.
int ESECT mdb_env_create (MDB_env ** env)
Create an LMDB environment
handle. This function allocates memory for a MDB_env
structure. To release the allocated memory and discard the
handle, call mdb_env_close(). Before the handle may
be used, it must be opened using mdb_env_open().
Various other options may also need to be set before opening
the handle, e.g. mdb_env_set_mapsize(),
mdb_env_set_maxreaders(),
mdb_env_set_maxdbs(), depending on usage
requirements.
Parameters
env The address where the new handle will be stored
Returns
A non-zero error value on failure and 0 on success.
int ESECT mdb_env_set_mapsize (MDB_env * env, size_t size)
Set the size of the memory map
to use for this environment. The size should be a multiple
of the OS page size. The default is 10485760 bytes. The size
of the memory map is also the maximum size of the database.
The value should be chosen as large as possible, to
accommodate future growth of the database. This function
should be called after mdb_env_create() and before
mdb_env_open(). It may be called at later times if no
transactions are active in this process. Note that the
library does not check for this condition, the caller must
ensure it explicitly.
The new size takes effect immediately for the current
process but will not be persisted to any others until a
write transaction has been committed by the current process.
Also, only mapsize increases are persisted into the
environment.
If the mapsize is increased by another process, and data has
grown beyond the range of the current mapsize,
mdb_txn_begin() will return MDB_MAP_RESIZED.
This function may be called with a size of zero to adopt the
new size.
Any attempt to set a size smaller than the space already
consumed by the environment will be silently changed to the
current size of the used space.
Parameters
env An environment
handle returned by mdb_env_create()
size The size in bytes
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified, or the environment has an active write transaction. |
int ESECT mdb_env_set_maxdbs (MDB_env * env, MDB_dbi dbs)
Set the maximum number of named
databases for the environment. This function is only needed
if multiple databases will be used in the environment.
Simpler applications that use the environment as a single
unnamed database can ignore this option. This function may
only be called after mdb_env_create() and before
mdb_env_open().
Currently a moderate number of slots are cheap but a huge
number gets expensive: 7-120 words per transaction, and
every mdb_dbi_open() does a linear search of the
opened slots.
Parameters
env An environment
handle returned by mdb_env_create()
dbs The maximum number of databases
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified, or the environment is already open. |
int ESECT mdb_env_set_maxreaders (MDB_env * env, unsigned int readers)
Set the maximum number of
threads/reader slots for the environment. This defines the
number of slots in the lock table that is used to track
readers in the the environment. The default is 126. Starting
a read-only transaction normally ties a lock table slot to
the current thread until the environment closes or the
thread exits. If MDB_NOTLS is in use, mdb_txn_begin()
instead ties the slot to the MDB_txn object until it
or the MDB_env object is destroyed. This function may
only be called after mdb_env_create() and before
mdb_env_open().
Parameters
env An environment
handle returned by mdb_env_create()
readers The maximum number of reader lock table
slots
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified, or the environment is already open. |
int ESECT mdb_env_get_maxreaders (MDB_env * env, unsigned int * readers)
Get the maximum number of
threads/reader slots for the environment.
Parameters
env An environment
handle returned by mdb_env_create()
readers Address of an integer to store the number of
readers
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified. |
static int ESECT mdb_fname_init (const char * path, unsigned envflags,MDB_name * fname) [static]
Set up filename + scratch area
for filename suffix, for opening files. It should be freed
with mdb_fname_destroy(). On Windows, paths are
converted from char *UTF-8 to wchar_t *UTF-16.
Parameters
path Pathname for
mdb_env_open().
envflags Whether a subdir and/or lockfile will be used.
fname Resulting filename, with room for a suffix if
necessary.
static int ESECT mdb_fopen (const MDB_env * env, MDB_name * fname, enummdb_fopen_type which, mdb_mode_t mode, HANDLE * res) [static]
Open an LMDB file.
Parameters
env The LMDB
environment.
fname Path from from mdb_fname_init(). A suffix
is appended if necessary to create the filename, without
changing mn_len.
which Determines file type, access mode, etc.
mode The Unix permissions for the file, if we create it.
res Resulting file handle.
Returns
0 on success, non-zero on failure.
static int ESECT mdb_env_open2 (MDB_env * env) [static]
Further setup required for opening an LMDB environment
static void mdb_env_reader_dest (void * ptr) [static]
Release a reader thread’s
slot in the reader lock table. This function is called
automatically when a thread exits.
Parameters
ptr This points to the slot in the reader lock table.
static int ESECT mdb_env_share_locks (MDB_env * env, int * excl) [static]
Downgrade the exclusive lock on the region back to shared
static int ESECT mdb_env_excl_lock (MDB_env * env, int * excl) [static]
Try to get exclusive lock, otherwise shared. Maintain *excl = -1: no/unknown lock, 0: shared, 1: exclusive.
static int ESECT mdb_env_setup_locks (MDB_env * env, MDB_name * fname, intmode, int * excl) [static]
Open and/or initialize the lock
region for the environment.
Parameters
env The LMDB
environment.
fname Filename + scratch area, from
mdb_fname_init().
mode The Unix permissions for the file, if we create it.
excl In -1, out lock type: -1 none, 0 shared, 1
exclusive
Returns
0 on success, non-zero on failure.
int ESECT mdb_env_open (MDB_env * env, const char * path, unsigned int flags,mdb_mode_t mode)
Open an environment handle. If
this function fails, mdb_env_close() must be called
to discard the MDB_env handle.
Parameters
env An environment
handle returned by mdb_env_create()
path The directory in which the database files reside.
This directory must already exist and be writable.
flags Special options for this environment. This
parameter must be set to 0 or by bitwise OR’ing
together one or more of the values described here. Flags set
by mdb_env_set_flags() are also used.
|
• |
MDB_FIXEDMAP use a fixed address for the mmap region. This flag must be specified when creating the environment, and is stored persistently in the environment. If successful, the memory map will always reside at the same virtual address and pointers used to reference data items in the database will be constant across multiple invocations. This option may not always work, depending on how the operating system has allocated memory to shared libraries and other uses. The feature is highly experimental. | ||
|
• |
MDB_NOSUBDIR By default, LMDB creates its environment in a directory whose pathname is given in path, and creates its data and lock files under that directory. With this option, path is used as-is for the database main data file. The database lock file is the path with ’-lock’ appended. | ||
|
• |
MDB_RDONLY Open the environment in read-only mode. No write operations will be allowed. LMDB will still modify the lock file - except on read-only filesystems, where LMDB does not use locks. | ||
|
• |
MDB_WRITEMAP Use a writeable memory map unless MDB_RDONLY is set. This uses fewer mallocs but loses protection from application bugs like wild pointer writes and other bad updates into the database. This may be slightly faster for DBs that fit entirely in RAM, but is slower for DBs larger than RAM. Incompatible with nested transactions. Do not mix processes with and without MDB_WRITEMAP on the same environment. This can defeat durability (mdb_env_sync etc). | ||
|
• |
MDB_NOMETASYNC Flush system buffers to disk only once per transaction, omit the metadata flush. Defer that until the system flushes files to disk, or next non-MDB_RDONLY commit or mdb_env_sync(). This optimization maintains database integrity, but a system crash may undo the last committed transaction. I.e. it preserves the ACI (atomicity, consistency, isolation) but not D (durability) database property. This flag may be changed at any time using mdb_env_set_flags(). | ||
|
• |
MDB_NOSYNC Don’t flush system buffers to disk when committing a transaction. This optimization means a system crash can corrupt the database or lose the last transactions if buffers are not yet flushed to disk. The risk is governed by how often the system flushes dirty buffers to disk and how often mdb_env_sync() is called. However, if the filesystem preserves write order and the MDB_WRITEMAP flag is not used, transactions exhibit ACI (atomicity, consistency, isolation) properties and only lose D (durability). I.e. database integrity is maintained, but a system crash may undo the final transactions. Note that (MDB_NOSYNC | MDB_WRITEMAP) leaves the system with no hint for when to write transactions to disk, unless mdb_env_sync() is called. (MDB_MAPASYNC | MDB_WRITEMAP) may be preferable. This flag may be changed at any time using mdb_env_set_flags(). | ||
|
• |
MDB_MAPASYNC When using MDB_WRITEMAP, use asynchronous flushes to disk. As with MDB_NOSYNC, a system crash can then corrupt the database or lose the last transactions. Calling mdb_env_sync() ensures on-disk database integrity until next commit. This flag may be changed at any time using mdb_env_set_flags(). | ||
|
• |
MDB_NOTLS Don’t use Thread-Local Storage. Tie reader locktable slots to MDB_txn objects instead of to threads. I.e. mdb_txn_reset() keeps the slot reseved for the MDB_txn object. A thread may use parallel read-only transactions. A read-only transaction may span threads if the user synchronizes its use. Applications that multiplex many user threads over individual OS threads need this option. Such an application must also serialize the write transactions in an OS thread, since LMDB’s write locking is unaware of the user threads. | ||
|
• |
MDB_NOLOCK Don’t do any locking. If concurrent access is anticipated, the caller must manage all concurrency itself. For proper operation the caller must enforce single-writer semantics, and must ensure that no readers are using old transactions while a writer is active. The simplest approach is to use an exclusive lock so that no readers may be active at all when a writer begins. | ||
|
• |
MDB_NORDAHEAD Turn off readahead. Most operating systems perform readahead on read requests by default. This option turns it off if the OS supports it. Turning it off may help random read performance when the DB is larger than RAM and system RAM is full. The option is not implemented on Windows. | ||
|
• |
MDB_NOMEMINIT Don’t initialize malloc’d memory before writing to unused spaces in the data file. By default, memory for pages written to the data file is obtained using malloc. While these pages may be reused in subsequent transactions, freshly malloc’d pages will be initialized to zeroes before use. This avoids persisting leftover data from other code (that used the heap and subsequently freed the memory) into the data file. Note that many other system libraries may allocate and free memory from the heap for arbitrary uses. E.g., stdio may use the heap for file I/O buffers. This initialization step has a modest performance cost so some applications may want to disable it using this flag. This option can be a problem for applications which handle sensitive data like passwords, and it makes memory checkers like Valgrind noisy. This flag is not needed with MDB_WRITEMAP, which writes directly to the mmap instead of using malloc for pages. The initialization is also skipped if MDB_RESERVE is used; the caller is expected to overwrite all of the memory that was reserved in that case. This flag may be changed at any time using mdb_env_set_flags(). |
mode The UNIX permissions to set on created files and semaphores. This parameter is ignored on Windows.
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
MDB_VERSION_MISMATCH - the version of the LMDB library doesn’t match the version that created the database environment. | ||
|
• |
MDB_INVALID - the environment file headers are corrupted. | ||
|
• |
ENOENT - the directory specified by the path parameter doesn’t exist. | ||
|
• |
EACCES - the user didn’t have permission to access the environment files. | ||
|
• |
EAGAIN - the environment was locked by another process. |
void ESECT mdb_env_close (MDB_env * env)
Close the environment and
release the memory map. Only a single thread may call this
function. All transactions, databases, and cursors must
already be closed before calling this function. Attempts to
use any such handles after calling this function will cause
a SIGSEGV. The environment handle will be freed and must not
be used again after this call.
Parameters
env An environment handle returned by mdb_env_create()
static int mdb_cmp_long (const MDB_val * a, const MDB_val * b) [static]
Compare two items pointing at aligned size_t’s
static int mdb_cmp_int (const MDB_val * a, const MDB_val * b) [static]
Compare two items pointing at
aligned unsigned int’s.
This is also set as
MDB_INTEGERDUP|MDB_DUPFIXED’s
MDB_dbx.md_dcmp, but mdb_cmp_clong() is called
instead if the data type is size_t.
static int mdb_cmp_cint (const MDB_val * a, const MDB_val * b) [static]
Compare two items pointing at unsigned ints of unknown alignment. Nodes and keys are guaranteed to be 2-byte aligned.
static int mdb_cmp_memn (const MDB_val * a, const MDB_val * b) [static]
Compare two items lexically
static int mdb_cmp_memnr (const MDB_val * a, const MDB_val * b) [static]
Compare two items in reverse byte order
static int mdb_page_search_lowest (MDB_cursor * mc) [static]
Search for the lowest key under the current branch page. This just bypasses a NUMKEYS check in the current page before calling mdb_page_search_root(), because the callers are all in situations where the current page is known to be underfilled.
int mdb_get (MDB_txn * txn, MDB_dbi dbi, MDB_val * key, MDB_val * data)
Get items from a database. This
function retrieves key/data pairs from the database. The
address and length of the data associated with the specified
key are returned in the structure to which
data refers. If the database supports duplicate keys
(MDB_DUPSORT) then the first data item for the key
will be returned. Retrieval of other items requires the use
of mdb_cursor_get().
Note
The memory pointed to by the
returned values is owned by the database. The caller need
not dispose of the memory, and may not modify it in any way.
For values returned in a read-only transaction any
modification attempts will cause a SIGSEGV.
Values returned from the database are valid only until a
subsequent update operation, or the end of the
transaction.
Parameters
txn A transaction handle
returned by mdb_txn_begin()
dbi A database handle returned by mdb_dbi_open()
key The key to search for in the database
data The data corresponding to the key
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
MDB_NOTFOUND - the key was not in the database. |
|||
|
• |
EINVAL - an invalid parameter was specified. |
int mdb_cursor_get (MDB_cursor * cursor, MDB_val * key, MDB_val * data,MDB_cursor_op op)
Retrieve by cursor. This
function retrieves key/data pairs from the database. The
address and length of the key are returned in the object to
which key refers (except for the case of the
MDB_SET option, in which the key object is
unchanged), and the address and length of the data are
returned in the object to which data refers. See
mdb_get() for restrictions on using the output
values.
Parameters
cursor A cursor handle
returned by mdb_cursor_open()
key The key for a retrieved item
data The data of a retrieved item
op A cursor operation MDB_cursor_op
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
MDB_NOTFOUND - no matching key found. |
|||
|
• |
EINVAL - an invalid parameter was specified. |
static int mdb_cursor_touch (MDB_cursor * mc) [static]
Touch all the pages in the
cursor stack. Set mc_top. Makes sure all the pages are
writable, before attempting a write operation.
Parameters
mc The cursor to operate on.
int mdb_cursor_put (MDB_cursor * cursor, MDB_val * key, MDB_val * data,unsigned int flags)
Store by cursor. This function
stores key/data pairs into the database. The cursor is
positioned at the new item, or on failure usually near it.
Note
Earlier documentation incorrectly said errors would leave the state of the cursor unchanged.
Parameters
cursor A cursor handle
returned by mdb_cursor_open()
key The key operated on.
data The data operated on.
flags Options for this operation. This parameter must be
set to 0 or one of the values described here.
|
• |
MDB_CURRENT - replace the item at the current cursor position. The key parameter must still be provided, and must match it. If using sorted duplicates (MDB_DUPSORT) the data item must still sort into the same place. This is intended to be used when the new data is the same size as the old. Otherwise it will simply perform a delete of the old record followed by an insert. | ||
|
• |
MDB_NODUPDATA - enter the new key/data pair only if it does not already appear in the database. This flag may only be specified if the database was opened with MDB_DUPSORT. The function will return MDB_KEYEXIST if the key/data pair already appears in the database. | ||
|
• |
MDB_NOOVERWRITE - enter the new key/data pair only if the key does not already appear in the database. The function will return MDB_KEYEXIST if the key already appears in the database, even if the database supports duplicates (MDB_DUPSORT). | ||
|
• |
MDB_RESERVE - reserve space for data of the given size, but don’t copy the given data. Instead, return a pointer to the reserved space, which the caller can fill in later - before the next update operation or the transaction ends. This saves an extra memcpy if the data is being generated later. This flag must not be specified if the database was opened with MDB_DUPSORT. | ||
|
• |
MDB_APPEND - append the given key/data pair to the end of the database. No key comparisons are performed. This option allows fast bulk loading when keys are already known to be in the correct order. Loading unsorted keys with this flag will cause a MDB_KEYEXIST error. | ||
|
• |
MDB_APPENDDUP - as above, but for sorted dup data. | ||
|
• |
MDB_MULTIPLE - store multiple contiguous data elements in a single request. This flag may only be specified if the database was opened with MDB_DUPFIXED. The data argument must be an array of two MDB_vals. The mv_size of the first MDB_val must be the size of a single data element. The mv_data of the first MDB_val must point to the beginning of the array of contiguous data elements. The mv_size of the second MDB_val must be the count of the number of data elements to store. On return this field will be set to the count of the number of elements actually written. The mv_data of the second MDB_val is unused. |
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
MDB_MAP_FULL - the database is full, see mdb_env_set_mapsize(). |
|||
|
• |
MDB_TXN_FULL - the transaction has too many dirty pages. |
|||
|
• |
EACCES - an attempt was made to write in a read-only transaction. |
|||
|
• |
EINVAL - an invalid parameter was specified. |
int mdb_cursor_del (MDB_cursor * cursor, unsigned int flags)
Delete current key/data pair.
This function deletes the key/data pair to which the cursor
refers. This does not invalidate the cursor, so operations
such as MDB_NEXT can still be used on it. Both MDB_NEXT and
MDB_GET_CURRENT will return the same record after this
operation.
Parameters
cursor A cursor handle
returned by mdb_cursor_open()
flags Options for this operation. This parameter must be
set to 0 or one of the values described here.
|
• |
MDB_NODUPDATA - delete all of the data items for the current key. This flag may only be specified if the database was opened with MDB_DUPSORT. |
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EACCES - an attempt was made to write in a read-only transaction. |
|||
|
• |
EINVAL - an invalid parameter was specified. |
int mdb_cursor_open (MDB_txn * txn, MDB_dbi dbi, MDB_cursor ** cursor)
Create a cursor handle. A cursor
is associated with a specific transaction and database. A
cursor cannot be used when its database handle is closed.
Nor when its transaction has ended, except with
mdb_cursor_renew(). It can be discarded with
mdb_cursor_close(). A cursor in a write-transaction
can be closed before its transaction ends, and will
otherwise be closed when its transaction ends. A cursor in a
read-only transaction must be closed explicitly, before or
after its transaction ends. It can be reused with
mdb_cursor_renew() before finally closing it.
Note
Earlier documentation said that cursors in every transaction were closed when the transaction committed or aborted.
Parameters
txn A transaction handle
returned by mdb_txn_begin()
dbi A database handle returned by mdb_dbi_open()
cursor Address where the new MDB_cursor handle
will be stored
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified. |
int mdb_cursor_renew (MDB_txn * txn, MDB_cursor * cursor)
Renew a cursor handle. A cursor
is associated with a specific transaction and database.
Cursors that are only used in read-only transactions may be
re-used, to avoid unnecessary malloc/free overhead. The
cursor may be associated with a new read-only transaction,
and referencing the same database handle as it was created
with. This may be done whether the previous transaction is
live or dead.
Parameters
txn A transaction handle
returned by mdb_txn_begin()
cursor A cursor handle returned by
mdb_cursor_open()
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified. |
int mdb_cursor_count (MDB_cursor * cursor, size_t * countp)
Return count of duplicates for
current key. This call is only valid on databases that
support sorted duplicate data items MDB_DUPSORT.
Parameters
cursor A cursor handle
returned by mdb_cursor_open()
countp Address where the count will be stored
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - cursor is not initialized, or an invalid parameter was specified. |
void mdb_cursor_close (MDB_cursor * cursor)
Close a cursor handle. The
cursor handle will be freed and must not be used again after
this call. Its transaction must still be live if it is a
write-transaction.
Parameters
cursor A cursor handle returned by mdb_cursor_open()
MDB_txn* mdb_cursor_txn (MDB_cursor * cursor)
Return the cursor’s
transaction handle.
Parameters
cursor A cursor handle returned by mdb_cursor_open()
MDB_dbi mdb_cursor_dbi (MDB_cursor * cursor)
Return the cursor’s
database handle.
Parameters
cursor A cursor handle returned by mdb_cursor_open()
static void mdb_cursor_copy (const MDB_cursor * csrc, MDB_cursor * cdst)[static]
Copy the contents of a cursor.
Parameters
csrc The cursor to copy
from.
cdst The cursor to copy to.
int mdb_del (MDB_txn * txn, MDB_dbi dbi, MDB_val * key, MDB_val * data)
Delete items from a database.
This function removes key/data pairs from the database. If
the database does not support sorted duplicate data items
(MDB_DUPSORT) the data parameter is ignored. If the
database supports sorted duplicates and the data parameter
is NULL, all of the duplicate data items for the key will be
deleted. Otherwise, if the data parameter is non-NULL only
the matching data item will be deleted. This function will
return MDB_NOTFOUND if the specified key/data pair is
not in the database.
Parameters
txn A transaction handle
returned by mdb_txn_begin()
dbi A database handle returned by mdb_dbi_open()
key The key to delete from the database
data The data to delete
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EACCES - an attempt was made to write in a read-only transaction. |
|||
|
• |
EINVAL - an invalid parameter was specified. |
int mdb_put (MDB_txn * txn, MDB_dbi dbi, MDB_val * key, MDB_val * data,unsigned int flags)
Store items into a database.
This function stores key/data pairs in the database. The
default behavior is to enter the new key/data pair,
replacing any previously existing key if duplicates are
disallowed, or adding a duplicate data item if duplicates
are allowed (MDB_DUPSORT).
Parameters
txn A transaction handle
returned by mdb_txn_begin()
dbi A database handle returned by mdb_dbi_open()
key The key to store in the database
data The data to store
flags Special options for this operation. This parameter
must be set to 0 or by bitwise OR’ing together one or
more of the values described here.
|
• |
MDB_NODUPDATA - enter the new key/data pair only if it does not already appear in the database. This flag may only be specified if the database was opened with MDB_DUPSORT. The function will return MDB_KEYEXIST if the key/data pair already appears in the database. | ||
|
• |
MDB_NOOVERWRITE - enter the new key/data pair only if the key does not already appear in the database. The function will return MDB_KEYEXIST if the key already appears in the database, even if the database supports duplicates (MDB_DUPSORT). The data parameter will be set to point to the existing item. | ||
|
• |
MDB_RESERVE - reserve space for data of the given size, but don’t copy the given data. Instead, return a pointer to the reserved space, which the caller can fill in later - before the next update operation or the transaction ends. This saves an extra memcpy if the data is being generated later. LMDB does nothing else with this memory, the caller is expected to modify all of the space requested. This flag must not be specified if the database was opened with MDB_DUPSORT. | ||
|
• |
MDB_APPEND - append the given key/data pair to the end of the database. This option allows fast bulk loading when keys are already known to be in the correct order. Loading unsorted keys with this flag will cause a MDB_KEYEXIST error. | ||
|
• |
MDB_APPENDDUP - as above, but for sorted dup data. |
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
MDB_MAP_FULL - the database is full, see mdb_env_set_mapsize(). |
|||
|
• |
MDB_TXN_FULL - the transaction has too many dirty pages. |
|||
|
• |
EACCES - an attempt was made to write in a read-only transaction. |
|||
|
• |
EINVAL - an invalid parameter was specified. |
static THREAD_RET ESECT CALL_CONV mdb_env_copythr (void * arg) [static]
Dedicated writer thread for compacting copy.
static int ESECT mdb_env_cthr_toggle (mdb_copy * my, int adjust) [static]
Give buffer and/or
MDB_EOF to writer thread, await unused buffer.
Parameters
my control structure.
adjust (1 to hand off 1 buffer) | (MDB_EOF when
ending).
static int ESECT mdb_env_cwalk (mdb_copy * my, pgno_t * pg, int flags)[static]
Depth-first tree traversal for
compacting copy.
Parameters
my control structure.
pg database root.
flags includes F_DUPDATA if it is a
sorted-duplicate sub-DB.
static int ESECT mdb_env_copyfd1 (MDB_env * env, HANDLE fd) [static]
Copy environment with compaction.
static int ESECT mdb_env_copyfd0 (MDB_env * env, HANDLE fd) [static]
Copy environment as-is.
int ESECT mdb_env_copy2 (MDB_env * env, const char * path, unsigned intflags)
Copy an LMDB environment to the
specified path, with options. This function may be used to
make a backup of an existing environment. No lockfile is
created, since it gets recreated at need.
Note
This call can trigger significant file size growth if run in parallel with write transactions, because it employs a read-only transaction. See long-lived transactions under Caveats.
Parameters
env An environment
handle returned by mdb_env_create(). It must have
already been opened successfully.
path The directory in which the copy will reside. This
directory must already exist and be writable but must
otherwise be empty.
flags Special options for this operation. This parameter
must be set to 0 or by bitwise OR’ing together one or
more of the values described here.
|
• |
MDB_CP_COMPACT - Perform compaction while copying: omit free pages and sequentially renumber all pages in output. This option consumes more CPU and runs more slowly than the default. Currently it fails if the environment has suffered a page leak. |
Returns
A non-zero error value on failure and 0 on success.
int ESECT mdb_env_copy (MDB_env * env, const char * path)
Copy an LMDB environment to the
specified path. This function may be used to make a backup
of an existing environment. No lockfile is created, since it
gets recreated at need.
Note
This call can trigger significant file size growth if run in parallel with write transactions, because it employs a read-only transaction. See long-lived transactions under Caveats.
Parameters
env An environment
handle returned by mdb_env_create(). It must have
already been opened successfully.
path The directory in which the copy will reside. This
directory must already exist and be writable but must
otherwise be empty.
Returns
A non-zero error value on failure and 0 on success.
int ESECT mdb_env_set_flags (MDB_env * env, unsigned int flags, int onoff)
Set environment flags. This may
be used to set some flags in addition to those from
mdb_env_open(), or to unset these flags. If several
threads change the flags at the same time, the result is
undefined.
Parameters
env An environment
handle returned by mdb_env_create()
flags The flags to change, bitwise OR’ed together
onoff A non-zero value sets the flags, zero clears
them.
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified. |
int ESECT mdb_env_get_flags (MDB_env * env, unsigned int * flags)
Get environment flags.
Parameters
env An environment
handle returned by mdb_env_create()
flags The address of an integer to store the flags
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified. |
int ESECT mdb_env_set_userctx (MDB_env * env, void * ctx)
Set application information
associated with the MDB_env.
Parameters
env An environment
handle returned by mdb_env_create()
ctx An arbitrary pointer for whatever the application
needs.
Returns
A non-zero error value on failure and 0 on success.
void* ESECT mdb_env_get_userctx (MDB_env * env)
Get the application information
associated with the MDB_env.
Parameters
env An environment handle returned by mdb_env_create()
Returns
The pointer set by mdb_env_set_userctx().
int ESECT mdb_env_set_assert (MDB_env * env, MDB_assert_func * func)
Set or reset the assert()
callback of the environment. Disabled if liblmdb is buillt
with NDEBUG.
Note
This hack should become obsolete as lmdb’s error handling matures.
Parameters
env An environment
handle returned by mdb_env_create().
func An MDB_assert_func function, or 0.
Returns
A non-zero error value on failure and 0 on success.
int ESECT mdb_env_get_path (MDB_env * env, const char ** path)
Return the path that was used in
mdb_env_open().
Parameters
env An environment
handle returned by mdb_env_create()
path Address of a string pointer to contain the path.
This is the actual string in the environment, not a copy. It
should not be altered in any way.
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified. |
int ESECT mdb_env_get_fd (MDB_env * env, mdb_filehandle_t * fd)
Return the filedescriptor for
the given environment. This function may be called after
fork(), so the descriptor can be closed before exec*().
Other LMDB file descriptors have FD_CLOEXEC. (Until LMDB
0.9.18, only the lockfile had that.)
Parameters
env An environment
handle returned by mdb_env_create()
fd Address of a mdb_filehandle_t to contain the
descriptor.
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified. |
static int ESECT mdb_stat0 (MDB_env * env, MDB_db * db, MDB_stat * arg)[static]
Common code for
mdb_stat() and mdb_env_stat().
Parameters
env the environment to
operate in.
db the MDB_db record containing the stats to
return.
arg the address of an MDB_stat structure to
receive the stats.
Returns
0, this function always succeeds.
int ESECT mdb_env_stat (MDB_env * env, MDB_stat * stat)
Return statistics about the LMDB
environment.
Parameters
env An environment
handle returned by mdb_env_create()
stat The address of an MDB_stat structure where
the statistics will be copied
int ESECT mdb_env_info (MDB_env * env, MDB_envinfo * stat)
Return information about the
LMDB environment.
Parameters
env An environment
handle returned by mdb_env_create()
stat The address of an MDB_envinfo structure
where the information will be copied
int mdb_dbi_open (MDB_txn * txn, const char * name, unsigned int flags,MDB_dbi * dbi)
Open a database in the
environment. A database handle denotes the name and
parameters of a database, independently of whether such a
database exists. The database handle may be discarded by
calling mdb_dbi_close(). The old database handle is
returned if the database was already open. The handle may
only be closed once.
The database handle will be private to the current
transaction until the transaction is successfully committed.
If the transaction is aborted the handle will be closed
automatically. After a successful commit the handle will
reside in the shared environment, and may be used by other
transactions.
This function must not be called from multiple concurrent
transactions in the same process. A transaction that uses
this function must finish (either commit or abort) before
any other transaction in the process may use this function.
To use named databases (with name != NULL),
mdb_env_set_maxdbs() must be called before opening
the environment. Database names are keys in the unnamed
database, and may be read but not written.
Parameters
txn A transaction handle
returned by mdb_txn_begin()
name The name of the database to open. If only a single
database is needed in the environment, this value may be
NULL.
flags Special options for this database. This parameter
must be set to 0 or by bitwise OR’ing together one or
more of the values described here.
|
• |
MDB_REVERSEKEY Keys are strings to be compared in reverse order, from the end of the strings to the beginning. By default, Keys are treated as strings and compared from beginning to end. | ||
|
• |
MDB_DUPSORT Duplicate keys may be used in the database. (Or, from another perspective, keys may have multiple data items, stored in sorted order.) By default keys must be unique and may have only a single data item. | ||
|
• |
MDB_INTEGERKEY Keys are binary integers in native byte order, either unsigned int or size_t, and will be sorted as such. The keys must all be of the same size. | ||
|
• |
MDB_DUPFIXED This flag may only be used in combination with MDB_DUPSORT. This option tells the library that the data items for this database are all the same size, which allows further optimizations in storage and retrieval. When all data items are the same size, the MDB_GET_MULTIPLE, MDB_NEXT_MULTIPLE and MDB_PREV_MULTIPLE cursor operations may be used to retrieve multiple items at once. | ||
|
• |
MDB_INTEGERDUP This option specifies that duplicate data items are binary integers, similar to MDB_INTEGERKEY keys. | ||
|
• |
MDB_REVERSEDUP This option specifies that duplicate data items should be compared as strings in reverse order. | ||
|
• |
MDB_CREATE Create the named database if it doesn’t exist. This option is not allowed in a read-only transaction or a read-only environment. |
dbi Address where the new MDB_dbi handle will be stored
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
MDB_NOTFOUND - the specified database doesn’t exist in the environment and MDB_CREATE was not specified. | ||
|
• |
MDB_DBS_FULL - too many databases have been opened. See mdb_env_set_maxdbs(). |
int ESECT mdb_stat (MDB_txn * txn, MDB_dbi dbi, MDB_stat * stat)
Retrieve statistics for a
database.
Parameters
txn A transaction handle
returned by mdb_txn_begin()
dbi A database handle returned by mdb_dbi_open()
stat The address of an MDB_stat structure where
the statistics will be copied
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified. |
void mdb_dbi_close (MDB_env * env, MDB_dbi dbi)
Close a database handle.
Normally unnecessary. Use with care: This call is not mutex
protected. Handles should only be closed by a single thread,
and only if no other threads are going to reference the
database handle or one of its cursors any further. Do not
close a handle if an existing transaction has modified its
database. Doing so can cause misbehavior from database
corruption to errors like MDB_BAD_VALSIZE (since the DB name
is gone).
Closing a database handle is not necessary, but lets
mdb_dbi_open() reuse the handle value. Usually
it’s better to set a bigger
mdb_env_set_maxdbs(), unless that value would be
large.
Parameters
env An environment
handle returned by mdb_env_create()
dbi A database handle returned by
mdb_dbi_open()
int mdb_dbi_flags (MDB_txn * txn, MDB_dbi dbi, unsigned int * flags)
Retrieve the DB flags for a
database handle.
Parameters
txn A transaction handle
returned by mdb_txn_begin()
dbi A database handle returned by mdb_dbi_open()
flags Address where the flags will be returned.
Returns
A non-zero error value on failure and 0 on success.
int mdb_drop (MDB_txn * txn, MDB_dbi dbi, int del)
Empty or delete+close a
database. See mdb_dbi_close() for restrictions about
closing the DB handle.
Parameters
txn A transaction handle
returned by mdb_txn_begin()
dbi A database handle returned by mdb_dbi_open()
del 0 to empty the DB, 1 to delete it from the
environment and close the DB handle.
Returns
A non-zero error value on failure and 0 on success.
int mdb_set_compare (MDB_txn * txn, MDB_dbi dbi, MDB_cmp_func * cmp)
Set a custom key comparison
function for a database. The comparison function is called
whenever it is necessary to compare a key specified by the
application with a key currently stored in the database. If
no comparison function is specified, and no special key
flags were specified with mdb_dbi_open(), the keys
are compared lexically, with shorter keys collating before
longer keys.
Warning
This function must be called before any data access functions are used, otherwise data corruption may occur. The same comparison function must be used by every program accessing the database, every time the database is used.
Parameters
txn A transaction handle
returned by mdb_txn_begin()
dbi A database handle returned by mdb_dbi_open()
cmp A MDB_cmp_func function
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified. |
int mdb_set_dupsort (MDB_txn * txn, MDB_dbi dbi, MDB_cmp_func * cmp)
Set a custom data comparison
function for a MDB_DUPSORT database. This comparison
function is called whenever it is necessary to compare a
data item specified by the application with a data item
currently stored in the database. This function only takes
effect if the database was opened with the
MDB_DUPSORT flag. If no comparison function is
specified, and no special key flags were specified with
mdb_dbi_open(), the data items are compared
lexically, with shorter items collating before longer items.
Warning
This function must be called before any data access functions are used, otherwise data corruption may occur. The same comparison function must be used by every program accessing the database, every time the database is used.
Parameters
txn A transaction handle
returned by mdb_txn_begin()
dbi A database handle returned by mdb_dbi_open()
cmp A MDB_cmp_func function
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified. |
int mdb_set_relfunc (MDB_txn * txn, MDB_dbi dbi, MDB_rel_func * rel)
Set a relocation function for a
MDB_FIXEDMAP database.
Todo
The relocation function is called whenever it is necessary to move the data of an item to a different position in the database (e.g. through tree balancing operations, shifts as a result of adds or deletes, etc.). It is intended to allow address/position-dependent data items to be stored in a database in an environment opened with the MDB_FIXEDMAP option. Currently the relocation feature is unimplemented and setting this function has no effect.
Parameters
txn A transaction handle
returned by mdb_txn_begin()
dbi A database handle returned by mdb_dbi_open()
rel A MDB_rel_func function
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified. |
int mdb_set_relctx (MDB_txn * txn, MDB_dbi dbi, void * ctx)
Set a context pointer for a
MDB_FIXEDMAP database’s relocation function.
See mdb_set_relfunc and MDB_rel_func for more
details.
Parameters
txn A transaction handle
returned by mdb_txn_begin()
dbi A database handle returned by mdb_dbi_open()
ctx An arbitrary pointer for whatever the application
needs. It will be passed to the callback function set by
mdb_set_relfunc as its relctx parameter
whenever the callback is invoked.
Returns
A non-zero error value on failure and 0 on success. Some possible errors are:
|
• |
EINVAL - an invalid parameter was specified. |
int ESECT mdb_env_get_maxkeysize (MDB_env * env)
Get the maximum size of keys and
MDB_DUPSORT data we can write. Depends on the
compile-time constant MDB_MAXKEYSIZE. Default 511.
See MDB_val.
Parameters
env An environment handle returned by mdb_env_create()
Returns
The maximum size of a key we can write
int ESECT mdb_reader_list (MDB_env * env, MDB_msg_func * func, void * ctx)
Dump the entries in the reader
lock table.
Parameters
env An environment
handle returned by mdb_env_create()
func A MDB_msg_func function
ctx Anything the message function needs
Returns
< 0 on failure, >= 0 on success.
static int ESECT mdb_pid_insert (MDB_PID_T * ids, MDB_PID_T pid) [static]
Insert pid into list if not already present. return -1 if already present.
int ESECT mdb_reader_check (MDB_env * env, int * dead)
Check for stale entries in the
reader lock table.
Parameters
env An environment
handle returned by mdb_env_create()
dead Number of stale slots that were cleared
Returns
0 on success, non-zero on failure.
Variable Documentation
char* const mdb_errstr[] [static]
Initial value:
= {
"MDB_KEYEXIST: Key/data pair already exists",
"MDB_NOTFOUND: No matching key/data pair found",
"MDB_PAGE_NOTFOUND: Requested page not found",
"MDB_CORRUPTED: Located page was wrong type",
"MDB_PANIC: Update of meta page failed or environment
had fatal error",
"MDB_VERSION_MISMATCH: Database environment version
mismatch",
"MDB_INVALID: File is not an LMDB file",
"MDB_MAP_FULL: Environment mapsize limit reached",
"MDB_DBS_FULL: Environment maxdbs limit reached",
"MDB_READERS_FULL: Environment maxreaders limit
reached",
"MDB_TLS_FULL: Thread-local storage keys full - too
many environments open",
"MDB_TXN_FULL: Transaction has too many dirty pages -
transaction too big",
"MDB_CURSOR_FULL: Internal error - cursor stack limit
reached",
"MDB_PAGE_FULL: Internal error - page has no more
space",
"MDB_MAP_RESIZED: Database contents grew beyond
environment mapsize",
"MDB_INCOMPATIBLE: Operation and DB incompatible, or DB
flags changed",
"MDB_BAD_RSLOT: Invalid reuse of reader locktable
slot",
"MDB_BAD_TXN: Transaction must abort, has a child, or
is invalid",
"MDB_BAD_VALSIZE: Unsupported size of key/DB name/data,
or wrong DUPFIXED size",
"MDB_BAD_DBI: The specified DBI handle was
closed/changed unexpectedly",
}
Table of descriptions for LMDB Return Codes
const mdb_nchar_t* const mdb_suffixes[2][2] [static]
Initial value:
= {
{ MDB_NAME("/data.mdb"), MDB_NAME("") },
{ MDB_NAME("/lock.mdb"),
MDB_NAME("-lock") }
}
Filename suffixes [datafile,lockfile][without,with
MDB_NOSUBDIR]
Author
Generated automatically by Doxygen for LMDB from the source code.