/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ /* * Copyright (C) 2000 Helix Code, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public License * as published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with the Gnome Library; see the file COPYING.LIB. If not, * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /* file.c: index file read/write ops */ #include <ctype.h> #include <errno.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include "ibex_internal.h" static unsigned long read_number (FILE *f); static void write_number (FILE *f, unsigned long n); static char *get_compressed_word (FILE *f, char **lastword); static gint free_file (gpointer key, gpointer value, gpointer data); static void free_word (gpointer key, gpointer value, gpointer data); /* The file format is: * * version string (currently "ibex1") * file count * list of compressed filenames, separated by \0 * word count * list of compressed words, each followed by \0, a count, and that * many references. * * All numbers are stored 7-bit big-endian, with the high bit telling * whether or not the number continues to the next byte. * * compressed text consists of a byte telling how many characters the * line has in common with the line before it, followed by the rest of * the string. Obviously this only really works if the lists are sorted. */ /** * ibex_open: open (or possibly create) an ibex index * @file: the name of the file * @flags: open flags, see open(2). * @mode: If O_CREAT is passed in flags, then the file mode * to create the new file with. It will be anded with the current * umask. * * Open and/or create the named ibex file and return a handle to it. * * Return value: an ibex handle, or NULL if an error occurred. **/ ibex * ibex_open (char *file, int flags, int mode) { ibex *ib; FILE *f; char vbuf[sizeof (IBEX_VERSION) - 1]; char *word, *lastword; unsigned long nfiles, nwords, nrefs, ref; ibex_file **ibfs = NULL; int i; GPtrArray *refs; int fd; char *modestr; fd = open(file, flags, mode); if (fd == -1) { return NULL; } /* yuck, this is because we use FILE * interface internally */ switch (flags & O_ACCMODE) { case O_RDONLY: modestr = "r"; break; case O_RDWR: if (flags & O_APPEND) modestr = "a+"; else modestr = "w+"; break; case O_WRONLY: if (flags & O_APPEND) modestr = "a"; else modestr = "w"; break; default: if (flags & O_APPEND) modestr = "a+"; else modestr = "r+"; break; } f = fdopen(fd, modestr); if (f == NULL) { if (errno == 0) errno = ENOMEM; return NULL; } ib = g_malloc (sizeof (ibex)); ib->dirty = FALSE; ib->path = g_strdup (file); ib->files = g_tree_new (strcmp); ib->words = g_hash_table_new (g_str_hash, g_str_equal); ib->oldfiles = g_ptr_array_new (); if (!f) return ib; /* Check version. If its empty, then we have just created it */ if (fread (vbuf, 1, sizeof (vbuf), f) != sizeof (vbuf)) { if (feof (f)) { return ib; } } if (strncmp (vbuf, IBEX_VERSION, sizeof (vbuf) != 0)) { errno = EINVAL; goto errout; } /* Read list of files. */ nfiles = read_number (f); ibfs = g_malloc (nfiles * sizeof (ibex_file *)); lastword = NULL; for (i = 0; i < nfiles; i++) { ibfs[i] = g_malloc (sizeof (ibex_file)); ibfs[i]->name = get_compressed_word (f, &lastword); if (!ibfs[i]->name) { goto errout; } ibfs[i]->index = 0; g_tree_insert (ib->files, ibfs[i]->name, ibfs[i]); } /* Read list of words. */ nwords = read_number (f); lastword = NULL; for (i = 0; i < nwords; i++) { word = get_compressed_word (f, &lastword); if (!word) { goto errout; } nrefs = read_number (f); refs = g_ptr_array_new (); g_ptr_array_set_size (refs, nrefs); while (nrefs--) { ref = read_number (f); if (ref >= nfiles) { goto errout; } refs->pdata[nrefs] = ibfs[ref]; } g_hash_table_insert (ib->words, word, refs); } g_free (ibfs); fclose (f); return ib; errout: fclose (f); g_tree_traverse (ib->files, free_file, G_IN_ORDER, NULL); g_tree_destroy (ib->files); g_hash_table_foreach (ib->words, free_word, NULL); g_hash_table_destroy (ib->words); g_ptr_array_free (ib->oldfiles, TRUE); if (ibfs) g_free (ibfs); g_free (ib->path); g_free (ib); return NULL; } struct ibex_write_data { unsigned long index; FILE *f; char *lastname; }; /* This is an internal function to find the longest common initial * prefix between the last-written word and the current word. */ static int get_prefix (struct ibex_write_data *iwd, char *name) { int i = 0; if (iwd->lastname) { while (!strncmp (iwd->lastname, name, i + 1)) i++; } iwd->lastname = name; return i; } static gint write_file (gpointer key, gpointer value, gpointer data) { char *file = key; ibex_file *ibf = value; struct ibex_write_data *iwd = data; int prefix; ibf->index = iwd->index++; prefix = get_prefix (iwd, file); fprintf (iwd->f, "%c%s", prefix, file + prefix); fputc (0, iwd->f); return FALSE; } /* scans for words which still exist in the index (after index removals), and adds them to the ordered tree for writing out in order */ static void store_word (gpointer key, gpointer value, gpointer data) { GTree *wtree = data; GPtrArray *refs = value; int i; ibex_file *ibf; for (i = 0; i < refs->len; i++) { ibf = g_ptr_array_index (refs, i); if (ibf->index == -1) { g_ptr_array_remove_index_fast (refs, i); i--; } } if (refs->len > 0) { g_tree_insert (wtree, key, value); } } /* writes a word out, in order */ static gint write_word (gpointer key, gpointer value, gpointer data) { char *word = key; GPtrArray *refs = value; struct ibex_write_data *iwd = data; int i, prefix; ibex_file *ibf; prefix = get_prefix (iwd, word); fprintf (iwd->f, "%c%s", prefix, word + prefix); fputc (0, iwd->f); write_number (iwd->f, refs->len); for (i = 0; i < refs->len; i++) { ibf = g_ptr_array_index (refs, i); write_number (iwd->f, ibf->index); } return FALSE; } /** * ibex_write: Write an ibex out to disk. * @ib: the ibex * * This writes an ibex to disk. * * Return value: 0 for success, -1 for failure (in which case errno * is set). **/ int ibex_write (ibex *ib) { struct ibex_write_data iwd; GTree *wtree; char *tmpfile; tmpfile = g_strdup_printf ("%s~", ib->path); iwd.f = fopen (tmpfile, "w"); if (!iwd.f) { if (errno == 0) errno = ENOMEM; g_free (tmpfile); return -1; } fputs (IBEX_VERSION, iwd.f); if (ferror (iwd.f)) goto lose; iwd.index = 0; iwd.lastname = NULL; write_number (iwd.f, g_tree_nnodes (ib->files)); if (ferror (iwd.f)) goto lose; g_tree_traverse (ib->files, write_file, G_IN_ORDER, &iwd); if (ferror (iwd.f)) goto lose; iwd.lastname = NULL; wtree = g_tree_new (strcmp); g_hash_table_foreach (ib->words, store_word, wtree); write_number (iwd.f, g_tree_nnodes(wtree)); if (ferror (iwd.f)) goto lose; g_tree_traverse (wtree, write_word, G_IN_ORDER, &iwd); g_tree_destroy (wtree); if (ferror (iwd.f)) goto lose; if (fclose (iwd.f) == 0 && rename (tmpfile, ib->path) == 0) { g_free (tmpfile); ib->dirty = FALSE; return 0; } lose: unlink (tmpfile); g_free (tmpfile); return -1; } /** * ibex_close: Write out the ibex file (if it has changed) and free * the data associated with it. * @ib: the ibex * * If this ibex file has been modified since it was opened, this will * call ibex_write() to write it out to disk. It will then free all data * associated with the ibex. After calling ibex_close(), @ib will no * longer be a valid ibex. * * Return value: 0 on success, -1 on an ibex_write() failure (in which * case @ib will not be destroyed). **/ int ibex_close (ibex *ib) { ibex_file *ibf; if (ib->dirty && ibex_write (ib) == -1) return -1; g_tree_traverse (ib->files, free_file, G_IN_ORDER, NULL); g_tree_destroy (ib->files); g_hash_table_foreach (ib->words, free_word, NULL); g_hash_table_destroy (ib->words); while (ib->oldfiles->len) { ibf = g_ptr_array_remove_index (ib->oldfiles, 0); g_free (ibf->name); g_free (ibf); } g_ptr_array_free (ib->oldfiles, TRUE); g_free (ib->path); g_free (ib); return 0; } static gint free_file (gpointer key, gpointer value, gpointer data) { ibex_file *ibf = value; g_free (ibf->name); g_free (ibf); return FALSE; } static void free_word (gpointer key, gpointer value, gpointer data) { g_free (key); g_ptr_array_free (value, TRUE); } static char * get_compressed_word (FILE *f, char **lastword) { char *buf, *p; int c, size; c = getc (f); if (c == EOF) return NULL; size = c + 10; buf = g_malloc (size); if (*lastword) strncpy (buf, *lastword, c); p = buf + c; do { c = getc (f); if (c == EOF) return NULL; if (p == buf + size) { buf = g_realloc (buf, size + 10); p = buf + size; size += 10; } *p++ = c; } while (c != 0); *lastword = buf; return buf; } static void write_number (FILE *f, unsigned long number) { int i, flag = 0; char buf[4]; i = 4; do { buf[--i] = (number & 0x7F) | flag; number = number >> 7; flag = 0x80; } while (number != 0); fwrite (buf + i, 1, 4 - i, f); } static unsigned long read_number (FILE *f) { int byte; unsigned long num; num = 0; do { byte = getc (f); num = num << 7 | (byte & 0x7F); } while (byte & 0x80); return num; }