diff options
Diffstat (limited to 'libibex/file.c')
-rw-r--r-- | libibex/file.c | 383 |
1 files changed, 383 insertions, 0 deletions
diff --git a/libibex/file.c b/libibex/file.c new file mode 100644 index 0000000000..e9eaab7f51 --- /dev/null +++ b/libibex/file.c @@ -0,0 +1,383 @@ +/* + Copyright 2000 Helix Code Inc. +*/ + +/* file.c: index file read/write ops */ + +#include <ctype.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "ibex_internal.h" + +static unsigned long read_number(FILE *f); +static void write_number(FILE *f, unsigned long n); +static char *get_compressed_word(FILE *f, char **lastword); + +static gint free_file(gpointer key, gpointer value, gpointer data); +static void free_word(gpointer key, gpointer value, gpointer data); + +/* The file format is: + * + * version string (currently "ibex1") + * file count + * list of compressed filenames, separated by \0 + * word count + * list of compressed words, each followed by \0, a count, and that + * many references. + * + * All numbers are stored 7-bit big-endian, with the high bit telling + * whether or not the number continues to the next byte. + * + * compressed text consists of a byte telling how many characters the + * line has in common with the line before it, followed by the rest of + * the string. Obviously this only really works if the lists are sorted. + */ +ibex * +ibex_open(char *file, gboolean create) +{ + ibex *ib; + FILE *f; + char vbuf[sizeof(IBEX_VERSION) - 1]; + char *word, *lastword; + unsigned long nfiles, nwords, nrefs, ref; + ibex_file **ibfs = NULL; + int i; + GPtrArray *refs; + + f = fopen(file, "r"); + if (!f && (errno != ENOENT || !create)) + { + if (errno == 0) + errno = ENOMEM; + return NULL; + } + + ib = g_malloc(sizeof(ibex)); + ib->dirty = FALSE; + ib->path = g_strdup(file); + ib->files = g_tree_new(strcmp); + ib->words = g_hash_table_new(g_str_hash, g_str_equal); + ib->oldfiles = g_ptr_array_new(); + + if (!f) + return ib; + + /* Check version. */ + if (fread(vbuf, 1, sizeof(vbuf), f) != sizeof(vbuf)) + { + if (feof(f)) + errno = EINVAL; + goto errout; + } + if (strncmp(vbuf, IBEX_VERSION, sizeof(vbuf) != 0)) + { + errno = EINVAL; + goto errout; + } + + /* Read list of files. */ + nfiles = read_number(f); + ibfs = g_malloc(nfiles * sizeof(ibex_file *)); + lastword = NULL; + for (i = 0; i < nfiles; i++) + { + ibfs[i] = g_malloc(sizeof(ibex_file)); + ibfs[i]->name = get_compressed_word(f, &lastword); + if (!ibfs[i]->name) + goto errout; + ibfs[i]->index = 0; + g_tree_insert(ib->files, ibfs[i]->name, ibfs[i]); + } + + /* Read list of words. */ + nwords = read_number(f); + lastword = NULL; + for (i = 0; i < nwords; i++) + { + word = get_compressed_word(f, &lastword); + if (!word) + goto errout; + + nrefs = read_number(f); + refs = g_ptr_array_new(); + g_ptr_array_set_size(refs, nrefs); + while (nrefs--) + { + ref = read_number(f); + if (ref >= nfiles) + goto errout; + refs->pdata[nrefs] = ibfs[ref]; + } + + g_hash_table_insert(ib->words, word, refs); + } + + g_free(ibfs); + fclose(f); + return ib; + +errout: + fclose(f); + g_tree_traverse(ib->files, free_file, G_IN_ORDER, NULL); + g_tree_destroy(ib->files); + g_hash_table_foreach(ib->words, free_word, NULL); + g_hash_table_destroy(ib->words); + g_ptr_array_free(ib->oldfiles, TRUE); + if (ibfs) + g_free(ibfs); + g_free(ib->path); + g_free(ib); + + return NULL; +} + +struct ibex_write_data { + unsigned long index; + FILE *f; + char *lastname; +}; + +static int +get_prefix(struct ibex_write_data *iwd, char *name) +{ + int i = 0; + if (iwd->lastname) + { + while (!strncmp(iwd->lastname, name, i + 1)) + i++; + } + iwd->lastname = name; + return i; +} + +static gint +write_file(gpointer key, gpointer value, gpointer data) +{ + char *file = key; + ibex_file *ibf = value; + struct ibex_write_data *iwd = data; + int prefix; + + ibf->index = iwd->index++; + prefix = get_prefix(iwd, file); + fprintf(iwd->f, "%c%s", prefix, file + prefix); + fputc(0, iwd->f); + return FALSE; +} + +static void +store_word(gpointer key, gpointer value, gpointer data) +{ + GTree *wtree = data; + + g_tree_insert(wtree, key, value); +} + +static gint +write_word(gpointer key, gpointer value, gpointer data) +{ + char *word = key; + GPtrArray *refs = value; + struct ibex_write_data *iwd = data; + ibex_file *ibf; + int i, ind, prefix; + + for (i = ind = 0; i < refs->len; i++) + { + ibf = g_ptr_array_index(refs, i); + if (ibf->index == -1) + { + g_ptr_array_remove_index_fast(refs, i); + i--; + } + else + ind++; + } + + if (ind != 0) + { + prefix = get_prefix(iwd, word); + fprintf(iwd->f, "%c%s", prefix, word + prefix); + fputc(0, iwd->f); + + write_number(iwd->f, ind); + + for (i = 0; i < refs->len; i++) + { + ibf = g_ptr_array_index(refs, i); + write_number(iwd->f, ibf->index); + } + } + return FALSE; +} + +int +ibex_write(ibex *ib) +{ + struct ibex_write_data iwd; + GTree *wtree; + char *tmpfile; + + tmpfile = g_strdup_printf("%s~", ib->path); + iwd.f = fopen(tmpfile, "w"); + if (!iwd.f) + { + if (errno == 0) + errno = ENOMEM; + g_free(tmpfile); + return -1; + } + + fputs(IBEX_VERSION, iwd.f); + if (ferror(iwd.f)) + goto lose; + + iwd.index = 0; + iwd.lastname = NULL; + write_number(iwd.f, g_tree_nnodes(ib->files)); + if (ferror(iwd.f)) + goto lose; + g_tree_traverse(ib->files, write_file, G_IN_ORDER, &iwd); + if (ferror(iwd.f)) + goto lose; + + iwd.lastname = NULL; + write_number(iwd.f, g_hash_table_size(ib->words)); + if (ferror(iwd.f)) + goto lose; + wtree = g_tree_new(strcmp); + g_hash_table_foreach(ib->words, store_word, wtree); + g_tree_traverse(wtree, write_word, G_IN_ORDER, &iwd); + g_tree_destroy(wtree); + if (ferror(iwd.f)) + goto lose; + + if (fclose(iwd.f) == 0 && rename(tmpfile, ib->path) == 0) + { + g_free(tmpfile); + ib->dirty = FALSE; + return 0; + } + +lose: + unlink(tmpfile); + g_free(tmpfile); + return -1; +} + +int +ibex_close(ibex *ib) +{ + ibex_file *ibf; + + if (ib->dirty && ibex_write(ib) == -1) + return -1; + + g_tree_traverse(ib->files, free_file, G_IN_ORDER, NULL); + g_tree_destroy(ib->files); + g_hash_table_foreach(ib->words, free_word, NULL); + g_hash_table_destroy(ib->words); + + while (ib->oldfiles->len) + { + ibf = g_ptr_array_remove_index(ib->oldfiles, 0); + g_free(ibf->name); + g_free(ibf); + } + g_ptr_array_free(ib->oldfiles, TRUE); + g_free(ib->path); + g_free(ib); + + return 0; +} + +static gint +free_file(gpointer key, gpointer value, gpointer data) +{ + ibex_file *ibf = value; + + g_free(ibf->name); + g_free(ibf); + return FALSE; +} + +static void +free_word(gpointer key, gpointer value, gpointer data) +{ + g_free(key); + g_ptr_array_free(value, TRUE); +} + +static char * +get_compressed_word(FILE *f, char **lastword) +{ + char *buf, *p; + int c, size; + + c = getc(f); + if (c == EOF) + return NULL; + + size = c + 10; + buf = g_malloc(size); + if (*lastword) + strncpy(buf, *lastword, c); + p = buf + c; + do + { + c = getc(f); + if (c == EOF) + return NULL; + if (p == buf + size) + { + buf = g_realloc(buf, size + 10); + p = buf + size; + size += 10; + } + *p++ = c; + } + while (c != 0); + + *lastword = buf; + return buf; +} + +static void +write_number(FILE *f, unsigned long number) +{ + int i, flag = 0; + char buf[4]; + + i = 4; + do + { + buf[--i] = (number & 0x7F) | flag; + number = number >> 7; + flag = 0x80; + } + while (number != 0); + + fwrite(buf + i, 1, 4 - i, f); +} + +static unsigned long +read_number(FILE *f) +{ + int byte; + unsigned long num; + + num = 0; + do + { + byte = getc(f); + num = num << 7 | (byte & 0x7F); + } + while (byte & 0x80); + + return num; +} + |