/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
 * Copyright (C) 2000 Helix Code, Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public License
 * as published by the Free Software Foundation; either version 2 of
 * the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with the Gnome Library; see the file COPYING.LIB.  If not,
 * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

/* file.c: index file read/write ops */

#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "ibex_internal.h"

static unsigned long read_number (FILE *f);
static void write_number (FILE *f, unsigned long n);
static char *get_compressed_word (FILE *f, char **lastword);

static gint free_file (gpointer key, gpointer value, gpointer data);
static void free_word (gpointer key, gpointer value, gpointer data);

/* The file format is:
 *
 * version string (currently "ibex1")
 * file count
 * list of compressed filenames, separated by \0
 * word count
 * list of compressed words, each followed by \0, a count, and that
 *   many references.
 *
 * All numbers are stored 7-bit big-endian, with the high bit telling
 * whether or not the number continues to the next byte.
 *
 * compressed text consists of a byte telling how many characters the
 * line has in common with the line before it, followed by the rest of
 * the string. Obviously this only really works if the lists are sorted.
 */

/**
 * ibex_open: open (or possibly create) an ibex index
 * @file: the name of the file
 * @flags: open flags, see open(2).
 * @mode: If O_CREAT is passed in flags, then the file mode
 * to create the new file with.  It will be anded with the current
 * umask.
 *
 * Open and/or create the named ibex file and return a handle to it.
 *
 * Return value: an ibex handle, or NULL if an error occurred.
 **/
ibex *
ibex_open (char *file, int flags, int mode)
{
	ibex *ib;
	FILE *f;
	char vbuf[sizeof (IBEX_VERSION) - 1];
	char *word, *lastword;
	unsigned long nfiles, nwords, nrefs, ref;
	ibex_file **ibfs = NULL;
	int i;
	GPtrArray *refs;
	int fd;
	char *modestr;

	fd = open(file, flags, mode);
	if (fd == -1) {
		return NULL;
	}

	/* yuck, this is because we use FILE * interface
	   internally */
	switch (flags & O_ACCMODE) {
	case O_RDONLY:
		modestr = "r";
		break;
	case O_RDWR:
		if (flags & O_APPEND)
			modestr = "a+";
		else
			modestr = "w+";
		break;
	case O_WRONLY:
		if (flags & O_APPEND)
			modestr = "a";
		else
			modestr = "w";
		break;
	default:
		if (flags & O_APPEND)
			modestr = "a+";
		else
			modestr = "r+";
		break;
	}

	f = fdopen(fd, modestr);
	if (f == NULL) {
		if (errno == 0)
			errno = ENOMEM;
		return NULL;
	}

	ib = g_malloc (sizeof (ibex));
	ib->dirty = FALSE;
	ib->path = g_strdup (file);
	ib->files = g_tree_new (strcmp);
	ib->words = g_hash_table_new (g_str_hash, g_str_equal);
	ib->oldfiles = g_ptr_array_new ();

	if (!f)
		return ib;

	/* Check version.  If its empty, then we have just created it */
	if (fread (vbuf, 1, sizeof (vbuf), f) != sizeof (vbuf)) {
		if (feof (f)) {
			return ib;
		}
	}
	if (strncmp (vbuf, IBEX_VERSION, sizeof (vbuf) != 0)) {
		errno = EINVAL;
		goto errout;
	}

	/* Read list of files. */
	nfiles = read_number (f);
	ibfs = g_malloc (nfiles * sizeof (ibex_file *));
	lastword = NULL;
	for (i = 0; i < nfiles; i++) {
		ibfs[i] = g_malloc (sizeof (ibex_file));
		ibfs[i]->name = get_compressed_word (f, &lastword);
		if (!ibfs[i]->name) {
			goto errout;
		}
		ibfs[i]->index = 0;
		g_tree_insert (ib->files, ibfs[i]->name, ibfs[i]);
	}

	/* Read list of words. */
	nwords = read_number (f);
	lastword = NULL;
	for (i = 0; i < nwords; i++) {
		word = get_compressed_word (f, &lastword);
		if (!word) {
			goto errout;
		}

		nrefs = read_number (f);
		refs = g_ptr_array_new ();
		g_ptr_array_set_size (refs, nrefs);
		while (nrefs--) {
			ref = read_number (f);
			if (ref >= nfiles) {
				goto errout;
			}
			refs->pdata[nrefs] = ibfs[ref];
		}

		g_hash_table_insert (ib->words, word, refs);
	}

	g_free (ibfs);
	fclose (f);
	return ib;

errout:

	fclose (f);
	g_tree_traverse (ib->files, free_file, G_IN_ORDER, NULL);
	g_tree_destroy (ib->files);
	g_hash_table_foreach (ib->words, free_word, NULL);
	g_hash_table_destroy (ib->words);
	g_ptr_array_free (ib->oldfiles, TRUE);
	if (ibfs)
		g_free (ibfs);
	g_free (ib->path);
	g_free (ib);

	return NULL;
}

struct ibex_write_data {
	unsigned long index;
	FILE *f;
	char *lastname;
};

/* This is an internal function to find the longest common initial
 * prefix between the last-written word and the current word.
 */
static int
get_prefix (struct ibex_write_data *iwd, char *name)
{
	int i = 0;
	if (iwd->lastname) {
		while (!strncmp (iwd->lastname, name, i + 1))
			i++;
	}
	iwd->lastname = name;
	return i;
}

static gint
write_file (gpointer key, gpointer value, gpointer data)
{
	char *file = key;
	ibex_file *ibf = value;
	struct ibex_write_data *iwd = data;
	int prefix;

	ibf->index = iwd->index++;
	prefix = get_prefix (iwd, file);
	fprintf (iwd->f, "%c%s", prefix, file + prefix);
	fputc (0, iwd->f);
	return FALSE;
}

/* scans for words which still exist in the index (after
   index removals), and adds them to the ordered tree for
   writing out in order */
static void
store_word (gpointer key, gpointer value, gpointer data)
{
	GTree *wtree = data;
	GPtrArray *refs = value;
	int i;
	ibex_file *ibf;

	for (i = 0; i < refs->len; i++) {
		ibf = g_ptr_array_index (refs, i);
		if (ibf->index == -1) {
			g_ptr_array_remove_index_fast (refs, i);
			i--;
		}
	}

	if (refs->len > 0) {
		g_tree_insert (wtree, key, value);
	}
}

/* writes a word out, in order */
static gint
write_word (gpointer key, gpointer value, gpointer data)
{
	char *word = key;
	GPtrArray *refs = value;
	struct ibex_write_data *iwd = data;
	int i, prefix;
	ibex_file *ibf;

	prefix = get_prefix (iwd, word);
	fprintf (iwd->f, "%c%s", prefix, word + prefix);
	fputc (0, iwd->f);

	write_number (iwd->f, refs->len);

	for (i = 0; i < refs->len; i++) {
		ibf = g_ptr_array_index (refs, i);
		write_number (iwd->f, ibf->index);
	}
	return FALSE;
}

/**
 * ibex_write: Write an ibex out to disk.
 * @ib: the ibex
 *
 * This writes an ibex to disk.
 *
 * Return value: 0 for success, -1 for failure (in which case errno
 * is set).
 **/
int
ibex_write (ibex *ib)
{
	struct ibex_write_data iwd;
	GTree *wtree;
	char *tmpfile;

	tmpfile = g_strdup_printf ("%s~", ib->path);
	iwd.f = fopen (tmpfile, "w");
	if (!iwd.f) {
		if (errno == 0)
			errno = ENOMEM;
		g_free (tmpfile);
		return -1;
	}

	fputs (IBEX_VERSION, iwd.f);
	if (ferror (iwd.f))
		goto lose;

	iwd.index = 0;
	iwd.lastname = NULL;
	write_number (iwd.f, g_tree_nnodes (ib->files));
	if (ferror (iwd.f))
		goto lose;
	g_tree_traverse (ib->files, write_file, G_IN_ORDER, &iwd);
	if (ferror (iwd.f))
		goto lose;

	iwd.lastname = NULL;
	wtree = g_tree_new (strcmp);
	g_hash_table_foreach (ib->words, store_word, wtree);
	write_number (iwd.f, g_tree_nnodes(wtree));
	if (ferror (iwd.f))
		goto lose;
	g_tree_traverse (wtree, write_word, G_IN_ORDER, &iwd);
	g_tree_destroy (wtree);
	if (ferror (iwd.f))
		goto lose;

	if (fclose (iwd.f) == 0 && rename (tmpfile, ib->path) == 0) {
		g_free (tmpfile);
		ib->dirty = FALSE;
		return 0;
	}

lose:
	unlink (tmpfile);
	g_free (tmpfile);
	return -1;
}

/**
 * ibex_close: Write out the ibex file (if it has changed) and free
 * the data associated with it.
 * @ib: the ibex
 *
 * If this ibex file has been modified since it was opened, this will
 * call ibex_write() to write it out to disk. It will then free all data
 * associated with the ibex. After calling ibex_close(), @ib will no
 * longer be a valid ibex.
 *
 * Return value: 0 on success, -1 on an ibex_write() failure (in which
 * case @ib will not be destroyed).
 **/
int
ibex_close (ibex *ib)
{
	ibex_file *ibf;

	if (ib->dirty && ibex_write (ib) == -1)
		return -1;

	g_tree_traverse (ib->files, free_file, G_IN_ORDER, NULL);
	g_tree_destroy (ib->files);
	g_hash_table_foreach (ib->words, free_word, NULL);
	g_hash_table_destroy (ib->words);

	while (ib->oldfiles->len) {
		ibf = g_ptr_array_remove_index (ib->oldfiles, 0);
		g_free (ibf->name);
		g_free (ibf);
	}
	g_ptr_array_free (ib->oldfiles, TRUE);
	g_free (ib->path);
	g_free (ib);

	return 0;
}

static gint
free_file (gpointer key, gpointer value, gpointer data)
{
	ibex_file *ibf = value;

	g_free (ibf->name);
	g_free (ibf);
	return FALSE;
}

static void
free_word (gpointer key, gpointer value, gpointer data)
{
	g_free (key);
	g_ptr_array_free (value, TRUE);
}

static char *
get_compressed_word (FILE *f, char **lastword)
{
	char *buf, *p;
	int c, size;

	c = getc (f);
	if (c == EOF)
		return NULL;

	size = c + 10;
	buf = g_malloc (size);
	if (*lastword)
		strncpy (buf, *lastword, c);
	p = buf + c;
	do {
		c = getc (f);
		if (c == EOF)
			return NULL;
		if (p == buf + size) {
			buf = g_realloc (buf, size + 10);
			p = buf + size;
			size += 10;
		}
		*p++ = c;
	} while (c != 0);

	*lastword = buf;
	return buf;
}

static void
write_number (FILE *f, unsigned long number)
{
	int i, flag = 0;
	char buf[4];

	i = 4;
	do {
		buf[--i] = (number & 0x7F) | flag;
		number = number >> 7;
		flag = 0x80;
	} while (number != 0);

	fwrite (buf + i, 1, 4 - i, f);
}

static unsigned long
read_number (FILE *f)
{
	int byte;
	unsigned long num;

	num = 0;
	do {
		byte = getc (f);
		num = num << 7 | (byte & 0x7F);
	} while (byte & 0x80);
  
	return num;
}