/*-
 * See the file LICENSE for redistribution information.
 *
 * Copyright (c) 1998, 1999
 *	Sleepycat Software.  All rights reserved.
 */

#include "db_config.h"

#ifndef lint
static const char revid[] = "$Id: db_am.c,v 1.1.1.6.2.2 2000/02/08 00:43:46 noriko Exp $";
#endif /* not lint */

#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>

#include <errno.h>
#include <string.h>
#endif

#include "db_int.h"
#include "db_page.h"
#include "db_shash.h"
#include "btree.h"
#include "hash.h"
#include "qam.h"
#include "lock.h"
#include "mp.h"
#include "txn.h"
#include "db_am.h"
#include "db_ext.h"

/*
 * __db_cursor --
 *	Allocate and return a cursor.
 *
 * PUBLIC: int __db_cursor __P((DB *, DB_TXN *, DBC **, u_int32_t));
 */
int
__db_cursor(dbp, txn, dbcp, flags)
	DB *dbp;
	DB_TXN *txn;
	DBC **dbcp;
	u_int32_t flags;
{
	DB_ENV *dbenv;
	DBC *dbc;
	db_lockmode_t mode;
	u_int32_t op;
	int ret;

	dbenv = dbp->dbenv;

	PANIC_CHECK(dbenv);
	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->cursor");

	/* Check for invalid flags. */
	if ((ret = __db_cursorchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
		return (ret);

	if ((ret =
	    __db_icursor(dbp, txn, dbp->type, PGNO_INVALID, 0, dbcp)) != 0)
		return (ret);
	dbc = *dbcp;

	/*
	 * If this is CDB, do all the locking in the interface, which is
	 * right here.
	 */
	if (CDB_LOCKING(dbenv)) {
		op = LF_ISSET(DB_OPFLAGS_MASK);
		mode = (op == DB_WRITELOCK) ? DB_LOCK_WRITE :
		    (LF_ISSET(DB_WRITECURSOR) ? DB_LOCK_IWRITE : DB_LOCK_READ);
		if ((ret = lock_get(dbenv, dbc->locker, 0,
		    &dbc->lock_dbt, mode, &dbc->mylock)) != 0) {
			(void)__db_c_close(dbc);
			return (ret);
		}
		if (LF_ISSET(DB_WRITECURSOR))
			F_SET(dbc, DBC_WRITECURSOR);
		if (op == DB_WRITELOCK)
			F_SET(dbc, DBC_WRITER);
	}

	return (0);
}

/*
 * __db_icursor --
 *	Internal version of __db_cursor.
 *
 * PUBLIC: int __db_icursor
 * PUBLIC:     __P((DB *, DB_TXN *, DBTYPE, db_pgno_t, int, DBC **));
 */
int
__db_icursor(dbp, txn, dbtype, root, is_opd, dbcp)
	DB *dbp;
	DB_TXN *txn;
	DBTYPE dbtype;
	db_pgno_t root;
	int is_opd;
	DBC **dbcp;
{
	DBC *dbc, *adbc;
	DB_ENV *dbenv;
	int allocated, ret;

	dbenv = dbp->dbenv;
	allocated = 0;

	/*
	 * Take one from the free list if it's available.  BTREE and
	 * RECNO cursors are interchangeable, however HASH cursors are
	 * different.  Therefore if there is a cursor on the list, BTREE
	 * and RECNO will match it immediately.  HASH will only match it
	 * if you're requesting the correct type.
	 */
	MUTEX_THREAD_LOCK(dbp->mutexp);
	for (dbc = TAILQ_FIRST(&dbp->free_queue);
	    dbc != NULL;
	    dbc = TAILQ_NEXT(dbc, links)) {
		if (dbtype == dbc->dbtype ||
		    ((dbtype == DB_RECNO || dbtype == DB_BTREE) &&
		     (dbc->dbtype == DB_RECNO || dbc->dbtype == DB_BTREE))) {
			TAILQ_REMOVE(&dbp->free_queue, dbc, links);
			break;
		}
	}
	MUTEX_THREAD_UNLOCK(dbp->mutexp);

	if (dbc == NULL) {

		if ((ret = __os_calloc(1, sizeof(DBC), &dbc)) != 0)
			return (ret);
		allocated = 1;

		dbc->dbp = dbp;

		/* Set up locking information. */
		if (LOCKING_ON(dbenv)) {
			/*
			 * If we are not threaded, then there is no need to
			 * create new locker ids.  We know that no one else
			 * is running concurrently using this DB, so we can
			 * take a peek at any cursors on the active queue.
			 */
			if (!F_ISSET(dbenv, DB_ENV_THREAD) &&
			    (adbc = TAILQ_FIRST(&dbp->active_queue)) != NULL)
				dbc->lid = adbc->lid;
			else
				if ((ret = lock_id(dbenv, &dbc->lid)) != 0)
					goto err;

			memcpy(dbc->lock.fileid, dbp->fileid, DB_FILE_ID_LEN);
			if (CDB_LOCKING(dbenv)) {
				dbc->lock_dbt.size = DB_FILE_ID_LEN;
				dbc->lock_dbt.data = dbc->lock.fileid;
			} else {
				dbc->lock.type = DB_PAGE_LOCK;
				dbc->lock_dbt.size = sizeof(dbc->lock);
				dbc->lock_dbt.data = &dbc->lock;
			}
		}
	}

	/* Refresh the DBC structure. */
	DB_ASSERT(dbc->opd == NULL);
	dbc->opd = NULL;

	if ((dbc->txn = txn) == NULL)
		dbc->locker = dbc->lid;
	else
		dbc->locker = txn->txnid;

	dbc->dbtype = dbtype;
	dbc->root = root;
	dbc->pgno = PGNO_INVALID;
	dbc->indx = 0;
	DB_ASSERT(dbc->page == NULL);
	dbc->page = NULL;

	dbc->flags = is_opd ? DBC_OPD : 0;

	/* Refresh the DBC internal structure. */
	switch (dbtype) {
	case DB_BTREE:
	case DB_RECNO:
		if ((ret = __bam_c_init(dbc, dbtype)) != 0)
			goto err;
		break;
	case DB_HASH:
		if (allocated && (ret = __ham_c_init(dbc)) != 0)
			goto err;
		break;
	case DB_QUEUE:
		if (allocated && (ret = __qam_c_init(dbc)) != 0)
			goto err;
		break;
	default:
		ret = EINVAL;
		goto err;
	}

	MUTEX_THREAD_LOCK(dbp->mutexp);
	TAILQ_INSERT_TAIL(&dbp->active_queue, dbc, links);
	F_SET(dbc, DBC_ACTIVE);
	MUTEX_THREAD_UNLOCK(dbp->mutexp);

	*dbcp = dbc;
	return (0);

err:	if (allocated)
		__os_free(dbc, sizeof(*dbc));
	return (ret);
}

#ifdef DEBUG
/*
 * __db_cprint --
 *	Display the current cursor list.
 *
 * PUBLIC: int __db_cprint __P((DB *));
 */
int
__db_cprint(dbp)
	DB *dbp;
{
	static const FN fn[] = {
		{ DBC_ACTIVE, 		"active" },
		{ DBC_IDUP,		"internal duplication" },
		{ DBC_OPD, 		"off-page-dup" },
		{ DBC_RECOVER, 		"recover" },
		{ DBC_RMW, 		"read-modify-write" },
		{ DBC_WRITECURSOR,	"write cursor" },
		{ DBC_WRITER, 		"short-term write cursor" },
		{ 0,			NULL }
	};
	DBC *dbc;
	char *s;

	MUTEX_THREAD_LOCK(dbp->mutexp);
	for (dbc = TAILQ_FIRST(&dbp->active_queue);
	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
		switch (dbc->dbtype) {
		case DB_BTREE:
			s = "btree";
			break;
		case DB_HASH:
			s = "hash";
			break;
		case DB_RECNO:
			s = "recno";
			break;
		case DB_QUEUE:
			s = "queue";
			break;
		default:
			DB_ASSERT(0);
			return (1);
		}
		fprintf(stderr, "%s/%#0x: opd: %#0x\n",
		    s, (u_int)dbc, (u_int)dbc->opd);
		fprintf(stderr, "\ttxn: %#0x lid: %lu locker: %lu\n",
		    (u_int)dbc->txn, (u_long)dbc->lid, (u_long)dbc->locker);
		fprintf(stderr, "\troot: %lu page/index: %lu/%lu",
		    (u_long)dbc->root, (u_long)dbc->pgno, (u_long)dbc->indx);
		__db_prflags(dbc->flags, fn, stderr);
		fprintf(stderr, "\n");

		if (dbp->type == DB_BTREE)
			__bam_cprint(dbc);
	}
	for (dbc = TAILQ_FIRST(&dbp->free_queue);
	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links))
		fprintf(stderr, "free: %#0x ", (u_int)dbc);
	fprintf(stderr, "\n");
	MUTEX_THREAD_UNLOCK(dbp->mutexp);

	return (0);
}
#endif /* DEBUG */

/*
 * db_fd --
 *	Return a file descriptor for flock'ing.
 *
 * PUBLIC: int __db_fd __P((DB *, int *));
 */
int
__db_fd(dbp, fdp)
        DB *dbp;
	int *fdp;
{
	DB_FH *fhp;
	int ret;

	PANIC_CHECK(dbp->dbenv);
	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->fd");

	/*
	 * XXX
	 * Truly spectacular layering violation.
	 */
	if ((ret = __mp_xxx_fh(dbp->mpf, &fhp)) != 0)
		return (ret);

	if (F_ISSET(fhp, DB_FH_VALID)) {
		*fdp = fhp->fd;
		return (0);
	} else {
		*fdp = -1;
		return (ENOENT);
	}
}

/*
 * __db_get --
 *	Return a key/data pair.
 *
 * PUBLIC: int __db_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
 */
int
__db_get(dbp, txn, key, data, flags)
	DB *dbp;
	DB_TXN *txn;
	DBT *key, *data;
	u_int32_t flags;
{
	DBC *dbc;
	int ret, t_ret;

	PANIC_CHECK(dbp->dbenv);
	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get");

	if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
		return (ret);

	if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0)
		return (ret);

	DEBUG_LREAD(dbc, txn, "__db_get", key, NULL, flags);

	ret = dbc->c_get(dbc, key, data,
	    flags == 0 || flags == DB_RMW ? flags | DB_SET : flags);

	if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
		ret = t_ret;

	return (ret);
}

/*
 * __db_put --
 *	Store a key/data pair.
 *
 * PUBLIC: int __db_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
 */
int
__db_put(dbp, txn, key, data, flags)
	DB *dbp;
	DB_TXN *txn;
	DBT *key, *data;
	u_int32_t flags;
{
	DBC *dbc;
	DBT tdata;
	int ret, t_ret;

	PANIC_CHECK(dbp->dbenv);
	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->put");

	if ((ret = __db_putchk(dbp, key, data,
	    flags, F_ISSET(dbp, DB_AM_RDONLY),
	    F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK))) != 0)
		return (ret);

	if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
		return (ret);

	DEBUG_LWRITE(dbc, txn, "__db_put", key, data, flags);

	if (flags == DB_NOOVERWRITE) {
		/*
		 * Set DB_DBT_USERMEM, this might be a threaded application and
		 * the flags checking will catch us.  We don't want the actual
		 * data, so request a partial of length 0.
		 */
		memset(&tdata, 0, sizeof(tdata));
		F_SET(&tdata, DB_DBT_USERMEM | DB_DBT_PARTIAL);

		/*
		 * If we're doing page-level locking, set the read-modify-write
		 * flag, we're going to overwrite immediately.
		 */
		if ((ret = dbc->c_get(dbc, key, &tdata,
		    DB_SET | (STD_LOCKING(dbc) ? DB_RMW : 0))) == 0)
			ret = DB_KEYEXIST;
		else if (ret == DB_NOTFOUND)
			ret = 0;
	}
	if (ret == 0)
		ret = dbc->c_put(dbc, key, data, DB_KEYLAST);

	if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
		ret = t_ret;

	return (ret);
}

/*
 * __db_sync --
 *	Flush the database cache.
 *
 * PUBLIC: int __db_sync __P((DB *, u_int32_t));
 */
int
__db_sync(dbp, flags)
	DB *dbp;
	u_int32_t flags;
{
	int ret, t_ret;

	PANIC_CHECK(dbp->dbenv);
	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->sync");

	if ((ret = __db_syncchk(dbp, flags)) != 0)
		return (ret);

	/* Read-only trees never need to be sync'd. */
	if (F_ISSET(dbp, DB_AM_RDONLY))
		return (0);

	/* If it's a Recno tree, write the backing source text file. */
	if (dbp->type == DB_RECNO)
		ret = __ram_writeback(dbp);

	/* If the tree was never backed by a database file, we're done. */
	if (F_ISSET(dbp, DB_AM_INMEM))
		return (0);

	/* Flush any dirty pages from the cache to the backing file. */
	if ((t_ret = memp_fsync(dbp->mpf)) != 0 && ret == 0)
		ret = t_ret;
	return (ret);
}
