dbm/ndbm notes and some code.

Ozan Yigit oz at yunexus.UUCP
Fri May 12 05:33:53 AEST 1989


During my development of sdbm, a ndbm/dbm substitute (soon to beta: do not
send mail), I have found one bug that has gone unnoticed on probably most
of ndbm/dbm implementations.  One can write a simple workaround to this,
but a bug is a bug nevertheless.  Remember that line from the man page: 

| ...
| A linear pass through all keys in a database may be made, in an
| [apparently] random order, by use of db_firstkey and dbm_nextkey.
| ... This code will traverse the data base:
| 
| for (key=dbm_firstkey(db); key.dptr!=NULL;key=dbm_nextkey(db))
| ...

This is all very nice, if you just want to extract some or all keys.
But what if you want to "selectively delete" some keys ?? Something
like this perhaps:

for (key = dbm_firstkey(db); key.dptr != 0; key = dbm_nextkey(db)) {
	prdatum(key);
	printf(": delete? ");
	if (getyesno(stdin) == 'y')	/* illusory */
		dbm_delete(db, key);
}

This is where the "database traversal" turns into a pumpkin.  Because of
internal caching of the key position for dbm_nextkey (dbm_keyptr ??),
which is appearently NOT adjusted for deletions, this traversal will never
display the key right after the one just deleted. Workaround is to save
all keys to be deleted, then perform all deletions once the sequential
traversal is complete.

On another topic: If you ever wanted to process the ".pag" file of a
dbm/ndbm database yourself, in some unorthodox fashion, perhaps for
recovery, analysis or fast dumping of the keys/records in the pages, here
are the routines that illustrate the basic structure and the operations on
those pages. These routines are derived by careful "od" of the page file,
after test insertions, deletions etc., and NOT derived from the actual
source code of dbm/ndbm. (I hear it is easier to read od output anyway)
These routines are PD. [sdbm uses an ever so slightly different format:
there is an additional short after the n (entry count) field, but sdbm will
come with the appropriate utilities for analysis, recovery etc. anyway.]

---- pair.c -------------------------------------------------
#include <whatever suitable>

#define NULL (char *) 0

/*
 * routines dealing with a dbm/ndbm data page
 * author: oz at nexus.yorku.ca
 *
 * page format:
 *	+------------------------------+
 * ino	| n | keyoff | datoff | keyoff |
 * 	+------------+--------+--------+
 *	| datoff | - - - ---->	       |
 *	+--------+---------------------+
 *	|	 F R E E A R E A       |
 *	+--------------+---------------+
 *	|  <---- - - - | data          |
 *	+--------+-----+----+----------+
 *	|  key   | data     | key      |
 *	+--------+----------+----------+
 *
 * calculating the offsets for free area:  if the number
 * of entries (ino[0]) is zero, the offset to the END of
 * the free area is the block size. Otherwise, it is the
 * nth (ino[ino[0]]) entry's offset.
 */

putpair(pag, key, val)
char *pag;
datum key;
datum val;
{
	register n;
	register off;
	register free;
	register need;
	register short *ino = (short *) pag;

	off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ;
/*
 * calculate free space and needed space
 */
	free = off - (n + 1) * sizeof(short);
	need = key.dsize + val.dsize + 2 * sizeof(short);
#ifdef DEBUG
	printf("offset %d: free %d need %d\n", off, free, need);
#endif
	if (need > free)
		return (0);
/*
 * enter the key first
 */
	off -= key.dsize;
	(void) memcpy(pag + off, key.dptr, key.dsize);
	ino[n + 1] = off;
/*
 * now the data
 */
	off -= val.dsize;
	(void) memcpy(pag + off, val.dptr, val.dsize);
	ino[n + 2] = off;
/*
 * adjust item count
 */
	ino[0] += 2;
	return (1);
}

datum null = { NULL, 0 };

datum
getpair(pag, key)
char *pag;
datum key;
{
	register i;
	register n;
	datum val;
	register short *ino = (short *) pag;

	if (!(n = ino[0]))
		return (null);

	if (!(i = seepair(pag, n, key.dptr, key.dsize)))
		return (null);

	val.dptr = pag + ino[i + 1];
	val.dsize = ino[i] - ino[i + 1];
	return (val);
}

delpair(pag, key)
char *pag;
datum key;
{
	register i;
	register n;
	register m;
	register zoo;
	register short *ino = (short *) pag;
	register char *src;
	register char *dst;

	if (!(n = ino[0]))
		return (0);

	if (!(i = seepair(pag, n, key.dptr, key.dsize)))
		return (0);
/*
 * found the key. if it is the last entry
 * [i.e. i == n - 1] we just adjust the entry count.
 * hard case: [i.e. 0 < i < n] move all data down onto the 
 * deleted pair, shift offsets onto deleted offsets, 
 * and adjust them.
 */
	if (i < n - 1) {
		dst = pag + (i == 1 ? PBLKSIZ : ino[i - 1]);
		src = pag + ino[i + 1];
		zoo = dst - src;
#ifdef DEBUG
		printf("free-up %d ", zoo);
#endif
	/*
	 * shift data/keys down
	 */
		m = ino[i + 1] - ino[n];
/*
 * should use memcpy here, but I do not trust all
 * implementations. Perhaps one day...
 */
#ifdef DUFF
#define MOVB 	*--dst = *--src;

		if (m > 0) {
			register int loop = (m + 8 - 1) >> 3;

			switch (m & (8 - 1)) {
			case 0:
				do {
				MOVB;	case 7:	MOVB;
			case 6:	MOVB;	case 5:	MOVB;
			case 4:	MOVB;	case 3:	MOVB;
			case 2:	MOVB;	case 1:	MOVB;
				} while (--loop);
			}
		}
#else
		while (m--)
			*--dst = *--src;
#endif
	/*
	 * adjust offset index
	 */
		while (i < n - 1) {
			ino[i] = ino[i + 2] + zoo;
			i++;
		}
	}
	ino[0] -= 2;
	return (1);
}

/*
 * search for the key in the page.
 * return offset index in the range 0 < i < n.
 * return 0 if not found.
 */
seepair(pag, n, key, siz)
char *pag;
register n;
register char *key;
register int siz;
{
	register i;
	register off;
	register short *ino = (short *) pag;

	off = PBLKSIZ;
	for (i = 1; i < n; i += 2) {
		if (siz == off - ino[i] &&
		    strncmp(key, pag + ino[i], siz) == 0)
			return (i);
		off = ino[i + 1];
	}
	return (0);
}
-- 
use the source, luke !!     	        Usenet:    oz at nexus.yorku.ca
uh... we forgot to tell you...          ......!uunet!utai!yunexus!oz
it is unintelligible, but hey, you      Bitnet: oz@[yulibra|yuyetti]
got it, for free (!).                   Phonet: +1 416 736-5257x3976



More information about the Comp.lang.c mailing list