aboutsummaryrefslogtreecommitdiff
blob: 23fb106755edbe17fa37454e735800654870a2e9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
/*-
 * See the file LICENSE for redistribution information.
 *
 * Copyright (c) 1996, 1997
 *	Sleepycat Software.  All rights reserved.
 *
 *	@(#)db_int.h.src	10.28 (Sleepycat) 8/20/97
 */

#ifndef _DB_INTERNAL_H_
#define	_DB_INTERNAL_H_

#include "db.h"				/* Standard DB include file. */
#include "queue.h"
#include "os_ext.h"

/*******************************************************
 * General purpose constants and macros.
 *******************************************************/
#define	UINT32_T_MAX	0xffffffff	/* Maximum 32 bit unsigned. */
#define	UINT16_T_MAX	    0xffff	/* Maximum 16 bit unsigned. */

#define	DB_MIN_PGSIZE	0x000200	/* Minimum page size. */
#define	DB_MAX_PGSIZE	0x010000	/* Maximum page size. */

#define	DB_MINCACHE	10		/* Minimum cached pages */

/*
 * Aligning items to particular sizes or in pages or memory.  ALIGNP is a
 * separate macro, as we've had to cast the pointer to different integral
 * types on different architectures.
 *
 * We cast pointers into unsigned longs when manipulating them because C89
 * guarantees that u_long is the largest available integral type and further,
 * to never generate overflows.  However, neither C89 or C9X  requires that
 * any integer type be large enough to hold a pointer, although C9X created
 * the intptr_t type, which is guaranteed to hold a pointer but may or may
 * not exist.  At some point in the future, we should test for intptr_t and
 * use it where available.
 */
#undef	ALIGNTYPE
#define	ALIGNTYPE		u_long
#undef	ALIGNP
#define	ALIGNP(value, bound)	ALIGN((ALIGNTYPE)value, bound)
#undef	ALIGN
#define	ALIGN(value, bound)	(((value) + (bound) - 1) & ~((bound) - 1))

/*
 * There are several on-page structures that are declared to have a number of
 * fields followed by a variable length array of items.  The structure size
 * without including the variable length array or the address of the first of
 * those elements can be found using SSZ.
 *
 * This macro can also be used to find the offset of a structure element in a
 * structure.  This is used in various places to copy structure elements from
 * unaligned memory references, e.g., pointers into a packed page.
 *
 * There are two versions because compilers object if you take the address of
 * an array.
 */
#undef	SSZ
#define SSZ(name, field)	((int)&(((name *)0)->field))

#undef	SSZA
#define SSZA(name, field)	((int)&(((name *)0)->field[0]))

/* Free and free-string macros that overwrite memory during debugging. */
#ifdef DEBUG
#undef	FREE
#define	FREE(p, len) {							\
	memset(p, 0xff, len);						\
	free(p);							\
}
#undef	FREES
#define	FREES(p) {							\
	FREE(p, strlen(p));						\
}
#else
#undef	FREE
#define	FREE(p, len) {							\
	free(p);							\
}
#undef	FREES
#define	FREES(p) {							\
	free(p);							\
}
#endif

/* Structure used to print flag values. */
typedef struct __fn {
	u_int32_t mask;			/* Flag value. */
	const char *name;		/* Flag name. */
} FN;

/* Set, clear and test flags. */
#define	F_SET(p, f)	(p)->flags |= (f)
#define	F_CLR(p, f)	(p)->flags &= ~(f)
#define	F_ISSET(p, f)	((p)->flags & (f))
#define	LF_SET(f)	(flags |= (f))
#define	LF_CLR(f)	(flags &= ~(f))
#define	LF_ISSET(f)	(flags & (f))

/* Display separator string. */
#undef	DB_LINE
#define	DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="

/*******************************************************
 * Files.
 *******************************************************/
#ifndef MAXPATHLEN		/* Maximum path length. */
#ifdef PATH_MAX
#define	MAXPATHLEN	PATH_MAX
#else
#define	MAXPATHLEN	1024
#endif
#endif

#define	PATH_DOT	"."	/* Current working directory. */
#define	PATH_SEPARATOR	"/"	/* Path separator character. */

#ifndef S_IRUSR			/* UNIX specific file permissions. */
#define	S_IRUSR	0000400		/* R for owner */
#define	S_IWUSR	0000200		/* W for owner */
#define	S_IRGRP	0000040		/* R for group */
#define	S_IWGRP	0000020		/* W for group */
#define	S_IROTH	0000004		/* R for other */
#define	S_IWOTH	0000002		/* W for other */
#endif

#ifndef S_ISDIR			/* UNIX specific: directory test. */
#define	S_ISDIR(m)	((m & 0170000) == 0040000)
#endif

/*******************************************************
 * Mutex support.
 *******************************************************/
typedef unsigned char tsl_t;



/*
 * !!!
 * Various systems require different alignments for mutexes (the worst we've
 * seen so far is 16-bytes on some HP architectures).  The mutex (tsl_t) must
 * be first in the db_mutex_t structure, which must itself be first in the
 * region.  This ensures the alignment is as returned by mmap(2), which should
 * be sufficient.  All other mutex users must ensure proper alignment locally.
 */
#define	MUTEX_ALIGNMENT	1

/*
 * The offset of a mutex in memory.
 */
#define	MUTEX_LOCK_OFFSET(a, b)	((off_t)((u_int8_t *)b - (u_int8_t *)a))

typedef struct _db_mutex_t {
#ifdef HAVE_SPINLOCKS
	tsl_t	tsl_resource;		/* Resource test and set. */
#ifdef DEBUG
	u_long	pid;			/* Lock holder: 0 or process pid. */
#endif
#else
	off_t	off;			/* Backing file offset. */
	u_long	pid;			/* Lock holder: 0 or process pid. */
#endif
#ifdef MUTEX_STATISTICS
	u_long	mutex_set_wait;		/* Blocking mutex: required waiting. */
	u_long	mutex_set_nowait;	/* Blocking mutex: without waiting. */
#endif
} db_mutex_t;

#include "mutex_ext.h"

/*******************************************************
 * Access methods.
 *******************************************************/
/* Lock/unlock a DB thread. */
#define	DB_THREAD_LOCK(dbp)						\
	(F_ISSET(dbp, DB_AM_THREAD) ?					\
	    __db_mutex_lock((db_mutex_t *)(dbp)->mutex,  -1,		\
	        (dbp)->dbenv == NULL ? NULL : (dbp)->dbenv->db_yield) : 0)
#define	DB_THREAD_UNLOCK(dbp)						\
	(F_ISSET(dbp, DB_AM_THREAD) ?					\
	    __db_mutex_unlock((db_mutex_t *)(dbp)->mutex,  -1) : 0)

/* Btree/recno local statistics structure. */
struct __db_bt_lstat;	typedef struct __db_bt_lstat DB_BTREE_LSTAT;
struct __db_bt_lstat {
	u_int32_t bt_freed;		/* Pages freed for reuse. */
	u_int32_t bt_pfxsaved;		/* Bytes saved by prefix compression. */
	u_int32_t bt_split;		/* Total number of splits. */
	u_int32_t bt_rootsplit;		/* Root page splits. */
	u_int32_t bt_fastsplit;		/* Fast splits. */
	u_int32_t bt_added;		/* Items added. */
	u_int32_t bt_deleted;		/* Items deleted. */
	u_int32_t bt_get;		/* Items retrieved. */
	u_int32_t bt_cache_hit;		/* Hits in fast-insert code. */
	u_int32_t bt_cache_miss;	/* Misses in fast-insert code. */
};

/*******************************************************
 * Environment.
 *******************************************************/
/* Type passed to __db_appname(). */
typedef enum {
	DB_APP_NONE=0,			/* No type (region). */
	DB_APP_DATA,			/* Data file. */
	DB_APP_LOG,			/* Log file. */
	DB_APP_TMP			/* Temporary file. */
} APPNAME;

/*******************************************************
 * Regions.
 *******************************************************/
/*
 * The shared memory regions share an initial structure so that the general
 * region code can handle races between the region being deleted and other
 * processes waiting on the region mutex.
 *
 * !!!
 * Note, the mutex must be the first entry in the region; see comment above.
 */
typedef struct _rlayout {
	db_mutex_t lock;		/* Region mutex. */
	u_int32_t  refcnt;		/* Region reference count. */
	size_t	   size;		/* Region length. */
	int	   majver;		/* Major version number. */
	int	   minver;		/* Minor version number. */
	int	   patch;		/* Patch version number. */

#define	DB_R_DELETED	0x01		/* Region was deleted. */
	u_int32_t  flags;
} RLAYOUT;

/*******************************************************
 * Mpool.
 *******************************************************/
/*
 * File types for DB access methods.  Negative numbers are reserved to DB.
 */
#define	DB_FTYPE_BTREE		-1	/* Btree. */
#define	DB_FTYPE_HASH		-2	/* Hash. */

/* Structure used as the DB pgin/pgout pgcookie. */
typedef struct __dbpginfo {
	size_t	db_pagesize;		/* Underlying page size. */
	int	needswap;		/* If swapping required. */
} DB_PGINFO;

/*******************************************************
 * Log.
 *******************************************************/
/* Initialize an LSN to 'zero'. */
#define	ZERO_LSN(LSN) {							\
	(LSN).file = 0;							\
	(LSN).offset = 0;						\
}

/* Return 1 if LSN is a 'zero' lsn, otherwise return 0. */
#define	IS_ZERO_LSN(LSN)	((LSN).file == 0)

/* Test if we need to log a change. */
#define	DB_LOGGING(dbp) \
	(F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER))

#ifdef DEBUG
/*
 * Debugging macro to log operations.
 *	If DEBUG_WOP is defined, log operations that modify the database.
 *	If DEBUG_ROP is defined, log operations that read the database.
 *
 * D dbp
 * T txn
 * O operation (string)
 * K key
 * A data
 * F flags
 */
#define	LOG_OP(D, T, O, K, A, F) {					\
	DB_LSN _lsn;							\
	DBT _op;							\
	if (DB_LOGGING((D))) {						\
		memset(&_op, 0, sizeof(_op));				\
		_op.data = O;						\
		_op.size = strlen(O) + 1;				\
		(void)__db_debug_log((D)->dbenv->lg_info,		\
		    T, &_lsn, 0, &_op, (D)->log_fileid, K, A, F);	\
	}								\
}
#ifdef DEBUG_ROP
#define	DEBUG_LREAD(D, T, O, K, A, F)	LOG_OP(D, T, O, K, A, F)
#else
#define	DEBUG_LREAD(D, T, O, K, A, F)
#endif
#ifdef DEBUG_WOP
#define	DEBUG_LWRITE(D, T, O, K, A, F)	LOG_OP(D, T, O, K, A, F)
#else
#define	DEBUG_LWRITE(D, T, O, K, A, F)
#endif
#else
#define	DEBUG_LREAD(D, T, O, K, A, F)
#define	DEBUG_LWRITE(D, T, O, K, A, F)
#endif /* DEBUG */

/*******************************************************
 * Transactions and recovery.
 *******************************************************/
/*
 * The locker id space is divided between the transaction manager and the lock
 * manager.  Lockid's start at 0 and go to MAX_LOCKER_ID.  Txn Id's start at
 * MAX_LOCKER_ID + 1 and go up to MAX_TXNID.
 */
#define	MAX_LOCKER_ID	0x0fffffff
#define	MAX_TXNID	0xffffffff

/*
 * Out of band value for a lock.  The locks are returned to callers as offsets
 * into the lock regions.  Since the RLAYOUT structure begins all regions, an
 * offset of 0 is guaranteed not to be a valid lock.
 */
#define	LOCK_INVALID	0

/* The structure allocated for every transaction. */
struct __db_txn {
	DB_TXNMGR	*mgrp;		/* Pointer to transaction manager. */
	DB_TXN		*parent;	/* Pointer to transaction's parent. */
	DB_LSN		last_lsn;	/* Lsn of last log write. */
	u_int32_t	txnid;		/* Unique transaction id. */
	size_t		off;		/* Detail structure within region. */
	TAILQ_ENTRY(__db_txn) links;
};
#endif /* !_DB_INTERNAL_H_ */