diff options
author | Not Zed <NotZed@Ximian.com> | 2002-03-25 20:11:44 +0800 |
---|---|---|
committer | Michael Zucci <zucchi@src.gnome.org> | 2002-03-25 20:11:44 +0800 |
commit | c6fc4e27a953c5213cff8400ec8e40f6f051e914 (patch) | |
tree | 7237e42f705cd584d36f3a2a4795a1bb16741dd3 /camel/camel-text-index.c | |
parent | ede63cde5882af8696c22ed546506ad877b73011 (diff) | |
download | gsoc2013-evolution-c6fc4e27a953c5213cff8400ec8e40f6f051e914.tar.gz gsoc2013-evolution-c6fc4e27a953c5213cff8400ec8e40f6f051e914.tar.zst gsoc2013-evolution-c6fc4e27a953c5213cff8400ec8e40f6f051e914.zip |
When we add a new name, up all of the cache limits, because we're probably
2002-03-25 Not Zed <NotZed@Ximian.com>
* camel-text-index.c (text_index_add_name): When we add a new
name, up all of the cache limits, because we're probably going to
be adding more.
(text_index_sync): Drop the cache limits back down again, we dont
need them when looking words up.
** MERGE camel_index branch.
* camel-text-index.[ch]: Added files i forgot to add (eep nearly
lost all this work!)
* camel-block-file.c (sync_nolock): Fix an infinite loop in syncing.
svn path=/trunk/; revision=16242
Diffstat (limited to 'camel/camel-text-index.c')
-rw-r--r-- | camel/camel-text-index.c | 1687 |
1 files changed, 1687 insertions, 0 deletions
diff --git a/camel/camel-text-index.c b/camel/camel-text-index.c new file mode 100644 index 0000000000..7ad069d90c --- /dev/null +++ b/camel/camel-text-index.c @@ -0,0 +1,1687 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; fill-column: 160 -*- */ +/* + * Copyright (C) 2001 Ximian Inc. + * + * Authors: Michael Zucchi <notzed@ximian.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <ctype.h> +#include <sys/stat.h> +#include <sys/uio.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "e-util/e-msgport.h" +#include "e-util/e-memory.h" + +#include "camel/camel-object.h" + +#include "camel-text-index.h" +#include "camel-block-file.h" +#include "camel-partition-table.h" + +#include <gal/unicode/gunicode.h> + +#include <stdio.h> + +#define w(x) +#define io(x) +#define d(x) /*(printf("%s(%d):%s: ", __FILE__, __LINE__, __PRETTY_FUNCTION__),(x))*/ + +/* cursor debug */ +#define c(x) + +#ifdef ENABLE_THREADS +#define CAMEL_TEXT_INDEX_LOCK(kf, lock) (e_mutex_lock(((CamelTextIndex *)kf)->priv->lock)) +#define CAMEL_TEXT_INDEX_UNLOCK(kf, lock) (e_mutex_unlock(((CamelTextIndex *)kf)->priv->lock)) +#else +#define CAMEL_TEXT_INDEX_LOCK(kf, lock) +#define CAMEL_TEXT_INDEX_UNLOCK(kf, lock) +#endif + +static int text_index_compress_nosync(CamelIndex *idx); + +void camel_text_index_dump(CamelTextIndex *idx); +void camel_text_index_info(CamelTextIndex *idx); +void camel_text_index_validate(CamelTextIndex *idx); + +/* ********************************************************************** */ + +/* "private" data, shared between index/cursor/name classes */ +typedef struct _CamelTextIndexNamePrivate CamelTextIndexNamePrivate; + +struct _CamelTextIndexNamePrivate { + GString *buffer; + camel_key_t nameid; + EMemPool *pool; +}; + +CamelTextIndexName *camel_text_index_name_new(CamelTextIndex *idx, const char *name, camel_key_t nameid); + +/* ****************************** */ + +typedef struct _CamelTextIndexCursorPrivate CamelTextIndexCursorPrivate; + +struct _CamelTextIndexCursorPrivate { + camel_block_t first; + camel_block_t next; + + int record_index; + + size_t record_count; + camel_key_t *records; + + char *current; +}; + +CamelTextIndexCursor *camel_text_index_cursor_new(CamelTextIndex *idx, camel_block_t data); + +/* ****************************** */ +typedef struct _CamelTextIndexKeyCursorPrivate CamelTextIndexKeyCursorPrivate; + +struct _CamelTextIndexKeyCursorPrivate { + CamelKeyTable *table; + + camel_key_t keyid; + unsigned int flags; + camel_block_t data; + char *current; +}; + +CamelTextIndexKeyCursor *camel_text_index_key_cursor_new(CamelTextIndex *idx, CamelKeyTable *table); + +/* ********************************************************************** */ + +#define CAMEL_TEXT_INDEX_VERSION "TEXT.000" +#define CAMEL_TEXT_INDEX_KEY_VERSION "KEYS.000" + +struct _CamelTextIndexPrivate { + CamelBlockFile *blocks; + CamelKeyFile *links; + + CamelKeyTable *word_index; + CamelPartitionTable *word_hash; + + CamelKeyTable *name_index; + CamelPartitionTable *name_hash; + + /* Cache of words to write */ + int word_cache_limit; + int word_cache_count; + EDList word_cache; + GHashTable *words; +#ifdef ENABLE_THREADS + EMutex *lock; +#endif +}; + +/* Root block of text index */ +struct _CamelTextIndexRoot { + struct _CamelBlockRoot root; + + /* FIXME: the index root could contain a pointer to the hash root */ + camel_block_t word_index_root; /* a keyindex containing the keyid -> word mapping */ + camel_block_t word_hash_root; /* a partitionindex containing word -> keyid mapping */ + + camel_block_t name_index_root; /* same, for names */ + camel_block_t name_hash_root; + + guint32 words; /* total words */ + guint32 names; /* total names */ + guint32 deleted; /* deleted names */ + guint32 keys; /* total key 'chunks' written, used with deleted to determine fragmentation */ +}; + +struct _CamelTextIndexWord { + struct _CamelTextIndexWord *next; + struct _CamelTextIndexWord *prev; + + camel_block_t data; /* where the data starts */ + camel_key_t wordid; + char *word; + unsigned int used; + camel_key_t names[32]; +}; + +#define CTI_PRIVATE(o) (((CamelTextIndex *)(o))->priv) + +#define CI_CLASS(o) ((CamelTextIndexClass *)(((CamelObject *)o)->classfuncs)) + +/* ********************************************************************** */ +/* CamelTextIndex */ +/* ********************************************************************** */ + +static CamelObjectClass *camel_text_index_parent; + +/* call locked */ +static void +text_index_add_name_to_word(CamelIndex *idx, const char *word, camel_key_t nameid) +{ + struct _CamelTextIndexWord *w, *wp, *ww; + struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx); + camel_key_t wordid; + camel_block_t data; + struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *)p->blocks->root; + + w = g_hash_table_lookup(p->words, word); + if (w == NULL) { + wordid = camel_partition_table_lookup(p->word_hash, word); + if (wordid == 0) { + data = 0; + wordid = camel_key_table_add(p->word_index, word, 0, 0); + if (wordid == 0) + return; + camel_partition_table_add(p->word_hash, word, wordid); + rb->words++; + } else { + data = camel_key_table_lookup(p->word_index, wordid, NULL, 0); + if (data == 0) + return; + } + + w = g_malloc0(sizeof(*w)); + w->word = g_strdup(word); + w->wordid = wordid; + w->used = 1; + w->data = data; + + w->names[0] = nameid; + g_hash_table_insert(p->words, w->word, w); + e_dlist_addhead(&p->word_cache, (EDListNode *)w); + p->word_cache_count++; + ww = (struct _CamelTextIndexWord *)p->word_cache.tailpred; + wp = ww->prev; + while (wp && p->word_cache_count > p->word_cache_limit) { + io(printf("writing key file entry '%s' [%x]\n", ww->word, ww->data)); + if (camel_key_file_write(p->links, &ww->data, ww->used, ww->names) != -1) { + io(printf(" new data [%x]\n", ww->data)); + rb->keys++; + camel_key_table_set_data(p->word_index, ww->wordid, ww->data); + e_dlist_remove((EDListNode *)ww); + g_hash_table_remove(p->words, ww->word); + g_free(ww->word); + g_free(ww); + p->word_cache_count--; + } + ww = wp; + wp = wp->prev; + } + } else { + e_dlist_remove((EDListNode *)w); + e_dlist_addhead(&p->word_cache, (EDListNode *)w); + w->names[w->used] = nameid; + w->used++; + if (w->used == sizeof(w->names)/sizeof(w->names[0])) { + io(printf("writing key file entry '%s' [%x]\n", w->word, w->data)); + if (camel_key_file_write(p->links, &w->data, w->used, w->names) != -1) { + rb->keys++; + camel_key_table_set_data(p->word_index, w->wordid, w->data); + } + /* FIXME: what to on error? lost data? */ + w->used = 0; + } + } +} + +static int +text_index_sync(CamelIndex *idx) +{ + struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx); + struct _CamelTextIndexWord *ww; + struct _CamelTextIndexRoot *rb; + int ret = 0, wfrag, nfrag, work = FALSE; + + if (p->blocks == NULL) + return 0; + + rb = (struct _CamelTextIndexRoot *)p->blocks->root; + + /* sync/flush word cache */ + + CAMEL_TEXT_INDEX_LOCK(idx, lock); + + /* we sync, bump down the cahce limits since we dont need them for reading */ + p->blocks->block_cache_limit = 128; + /* this doesn't really need to be dropped, its only used in updates anyway */ + p->word_cache_limit = 1024; + + work = !e_dlist_empty(&p->word_cache); + + while ( (ww = (struct _CamelTextIndexWord *)e_dlist_remhead(&p->word_cache)) ) { + if (ww->used > 0) { + io(printf("writing key file entry '%s' [%x]\n", ww->word, ww->data)); + if (camel_key_file_write(p->links, &ww->data, ww->used, ww->names) != -1) { + io(printf(" new data [%x]\n", ww->data)); + rb->keys++; + camel_key_table_set_data(p->word_index, ww->wordid, ww->data); + } else { + ret = -1; + } + ww->used = 0; + } + g_hash_table_remove(p->words, ww->word); + g_free(ww->word); + g_free(ww); + } + + /* only do the frag/compress check if we did some new writes on this index */ + if (ret == 0 && work) { + wfrag = rb->words ? (((rb->keys - rb->words) * 100)/ rb->words) : 0; + nfrag = rb->names ? ((rb->deleted * 100) / rb->names) : 0; + printf("wfrag = %d, nfrag = %d\n", wfrag, nfrag); + printf(" words = %d, keys = %d\n", rb->words, rb->keys); + if (wfrag > 30 || nfrag > 20) + ret = text_index_compress_nosync(idx); + } + + CAMEL_TEXT_INDEX_UNLOCK(idx, lock); + + return camel_block_file_sync(p->blocks); +} + +static void tmp_name(const char *in, char *o) +{ + char *s; + + s = strrchr(in, '/'); + if (s) { + memcpy(o, in, s-in+1); + memcpy(o+(s-in+1), ".#", 2); + strcpy(o+(s-in+3), s+1); + } else { + sprintf(o, ".#%s", in); + } +} + +static int +text_index_compress(CamelIndex *idx) +{ + int ret; + + CAMEL_TEXT_INDEX_LOCK(idx, lock); + + ret = camel_index_sync(idx); + if (ret != -1) + ret = text_index_compress_nosync(idx); + + CAMEL_TEXT_INDEX_UNLOCK(idx, lock); + + return ret; +} + +/* Attempt to recover index space by compressing the indices */ +static int +text_index_compress_nosync(CamelIndex *idx) +{ + CamelTextIndex *newidx; + struct _CamelTextIndexPrivate *newp, *oldp; + camel_key_t oldkeyid, newkeyid; + GHashTable *remap; + unsigned int deleted; + camel_block_t data, newdata; + int i, ret = -1; + char *name = NULL; + unsigned int flags; + char *newpath, *savepath, *oldpath; + size_t count, newcount; + camel_key_t *records, newrecords[256]; + struct _CamelTextIndexRoot *rb; + + newpath = alloca(strlen(idx->path)+5); + tmp_name(idx->path, newpath); + savepath = alloca(strlen(idx->path)+2); + sprintf(savepath, "%s~", idx->path); + oldpath = alloca(strlen(idx->path)+1); + strcpy(oldpath, idx->path); + + newidx = camel_text_index_new(newpath, O_RDWR|O_CREAT); + if (newidx == NULL) + return -1; + + newp = CTI_PRIVATE(newidx); + oldp = CTI_PRIVATE(idx); + + rb = (struct _CamelTextIndexRoot *)newp->blocks->root; + + rb->words = 0; + rb->names = 0; + rb->deleted = 0; + rb->keys = 0; + + /* Process: + For each name we still have: + Add it to the new index & setup remap table + + For each word: + Copy word's data to a new file + Add new word to index(*) (can we just copy blocks?) */ + + /* Copy undeleted names to new index file, creating new indices */ + io(printf("Copying undeleted names to new file\n")); + remap = g_hash_table_new(NULL, NULL); + oldkeyid = 0; + deleted = 0; + while ( (oldkeyid = camel_key_table_next(oldp->name_index, oldkeyid, &name, &flags, &data)) ) { + if ((flags&1) == 0) { + io(printf("copying name '%s'\n", name)); + newkeyid = camel_key_table_add(newp->name_index, name, data, flags); + if (newkeyid == 0) + goto fail; + rb->names++; + camel_partition_table_add(newp->name_hash, name, newkeyid); + g_hash_table_insert(remap, (void *)oldkeyid, (void *)newkeyid); + } else + io(printf("deleted name '%s'\n", name)); + g_free(name); + name = NULL; + deleted |= flags; + } while (oldkeyid); + + /* Copy word data across, remapping/deleting and create new index for it */ + /* We re-block the data into 256 entry lots while we're at it, since we only + have to do 1 at a time and its cheap */ + oldkeyid = 0; + while ( (oldkeyid = camel_key_table_next(oldp->word_index, oldkeyid, &name, &flags, &data)) ) { + io(printf("copying word '%s'\n", name)); + newdata = 0; + newcount = 0; + if (data) { + rb->words++; + rb->keys++; + } + while (data) { + if (camel_key_file_read(oldp->links, &data, &count, &records) == -1) { + io(printf("could not read from old keys at %d for word '%s'\n", (int)data, name)); + goto fail; + } + for (i=0;i<count;i++) { + newkeyid = (camel_key_t)g_hash_table_lookup(remap, (void *)records[i]); + if (newkeyid) { + newrecords[newcount++] = newkeyid; + if (newcount == sizeof(newrecords)/sizeof(newrecords[0])) { + if (camel_key_file_write(newp->links, &newdata, newcount, newrecords) == -1) + goto fail; + newcount = 0; + } + } + } + } + + if (newcount > 0) { + if (camel_key_file_write(newp->links, &newdata, newcount, newrecords) == -1) + goto fail; + } + + newkeyid = camel_key_table_add(newp->word_index, name, newdata, flags); + if (newkeyid == 0) + goto fail; + camel_partition_table_add(newp->word_hash, name, newkeyid); + g_free(name); + name = NULL; + } while (oldkeyid); + + if (camel_index_sync((CamelIndex *)newidx) == -1) + goto fail; + + /* Rename underlying files to match */ + ret = camel_index_rename(idx, savepath); + if (ret == -1) + goto fail; + + /* If this fails, we'll pick up something during restart? */ + ret = camel_index_rename((CamelIndex *)newidx, oldpath); + +#define myswap(a, b) { void *tmp = a; a = b; b = tmp; } + /* Poke the private data across to the new object */ + /* And change the fd's over, etc? */ + myswap(newp->blocks, oldp->blocks); + myswap(newp->links, oldp->links); + myswap(newp->word_index, oldp->word_index); + myswap(newp->word_hash, oldp->word_hash); + myswap(newp->name_index, oldp->name_index); + myswap(newp->name_hash, oldp->name_hash); +#undef myswap + + ret = 0; +fail: + camel_object_unref((CamelObject *)newidx); + g_free(name); + g_hash_table_destroy(remap); + + /* clean up temp files always */ + camel_text_index_remove(newpath); + unlink(savepath); + newpath = alloca(strlen(savepath)+6); + sprintf(newpath, "%s.data", savepath); + unlink(newpath); + + return ret; +} + +static int +text_index_rename(CamelIndex *idx, const char *path) +{ + struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx); + char *newlink; + int err, ret; + + /* TODO: Needs some lock? */ + + ret = camel_block_file_rename(p->blocks, path); + if (ret == -1) + return -1; + + newlink = alloca(strlen(path)+8); + sprintf(newlink, "%s.data", path); + ret = camel_key_file_rename(p->links, newlink); + if (ret == -1) { + err = errno; + camel_block_file_rename(p->blocks, path); + errno = err; + return -1; + } + + g_free(idx->path); + idx->path = g_strdup(path); + + return 0; +} + +static int +text_index_has_name(CamelIndex *idx, const char *name) +{ + struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx); + + return camel_partition_table_lookup(p->name_hash, name) != 0; +} + +static CamelIndexName * +text_index_add_name(CamelIndex *idx, const char *name) +{ + struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx); + camel_key_t keyid; + CamelIndexName *idn; + struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *)p->blocks->root; + + CAMEL_TEXT_INDEX_LOCK(idx, lock); + + /* if we're adding words, up the cache limits a lot */ + if (p->blocks) { + p->blocks->block_cache_limit = 1024; + p->word_cache_limit = 8192; + } + + /* If we have it already replace it */ + keyid = camel_partition_table_lookup(p->name_hash, name); + if (keyid != 0) { + rb->deleted++; + camel_key_table_set_flags(p->name_index, keyid, 1, 1); + camel_partition_table_remove(p->name_hash, name); + } + + keyid = camel_key_table_add(p->name_index, name, 0, 0); + if (keyid != 0) { + camel_partition_table_add(p->name_hash, name, keyid); + rb->names++; + } + + /* TODO: if keyid == 0, we had a failure, we shoudl somehow flag that, but for + now just return a valid object but discard its results, see text_index_write_name */ + + CAMEL_TEXT_INDEX_UNLOCK(idx, lock); + + idn = (CamelIndexName *)camel_text_index_name_new((CamelTextIndex *)idx, name, keyid); + + return idn; +} + +/* call locked */ +static void +hash_write_word(char *word, void *data, CamelIndexName *idn) +{ + CamelTextIndexName *tin = (CamelTextIndexName *)idn; + + text_index_add_name_to_word(idn->index, word, tin->priv->nameid); +} + +static int +text_index_write_name(CamelIndex *idx, CamelIndexName *idn) +{ + /* force 'flush' of any outstanding data */ + camel_index_name_add_buffer(idn, NULL, 0); + + /* see text_index_add_name for when this can be 0 */ + if (((CamelTextIndexName *)idn)->priv->nameid != 0) { + CAMEL_TEXT_INDEX_LOCK(idx, lock); + + g_hash_table_foreach(idn->words, (GHFunc)hash_write_word, idn); + + CAMEL_TEXT_INDEX_UNLOCK(idx, lock); + } + + return 0; +} + +static CamelIndexCursor * +text_index_find_name(CamelIndex *idx, const char *name) +{ + /* what was this for, umm */ + return NULL; +} + +static void +text_index_delete_name(CamelIndex *idx, const char *name) +{ + struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx); + camel_key_t keyid; + struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *)p->blocks->root; + + /* probably doesn't really need locking, but oh well */ + CAMEL_TEXT_INDEX_LOCK(idx, lock); + + /* We just mark the key deleted, and remove it from the hash table */ + keyid = camel_partition_table_lookup(p->name_hash, name); + if (keyid != 0) { + rb->deleted++; + camel_key_table_set_flags(p->name_index, keyid, 1, 1); + camel_partition_table_remove(p->name_hash, name); + } + + CAMEL_TEXT_INDEX_UNLOCK(idx, lock); +} + +static CamelIndexCursor * +text_index_find(CamelIndex *idx, const char *word) +{ + struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx); + camel_key_t keyid; + camel_block_t data = 0; + unsigned int flags; + CamelIndexCursor *idc; + + CAMEL_TEXT_INDEX_LOCK(idx, lock); + + keyid = camel_partition_table_lookup(p->word_hash, word); + if (keyid != 0) { + data = camel_key_table_lookup(p->word_index, keyid, NULL, &flags); + if (flags & 1) + data = 0; + } + + CAMEL_TEXT_INDEX_UNLOCK(idx, lock); + + idc = (CamelIndexCursor *)camel_text_index_cursor_new((CamelTextIndex *)idx, data); + + return idc; +} + +static CamelIndexCursor * +text_index_words(CamelIndex *idx) +{ + struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx); + + return (CamelIndexCursor *)camel_text_index_key_cursor_new((CamelTextIndex *)idx, p->word_index); +} + +static CamelIndexCursor * +text_index_names(CamelIndex *idx) +{ + struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx); + + return (CamelIndexCursor *)camel_text_index_key_cursor_new((CamelTextIndex *)idx, p->name_index); +} + +static void +camel_text_index_class_init(CamelTextIndexClass *klass) +{ + CamelIndexClass *iklass = (CamelIndexClass *)klass; + + camel_text_index_parent = CAMEL_OBJECT_CLASS(camel_type_get_global_classfuncs(camel_object_get_type())); + + iklass->sync = text_index_sync; + iklass->compress = text_index_compress; + + iklass->rename = text_index_rename; + + iklass->has_name = text_index_has_name; + iklass->add_name = text_index_add_name; + iklass->write_name = text_index_write_name; + iklass->find_name = text_index_find_name; + iklass->delete_name = text_index_delete_name; + iklass->find = text_index_find; + + iklass->words = text_index_words; + iklass->names = text_index_names; +} + +static void +camel_text_index_init(CamelTextIndex *idx) +{ + struct _CamelTextIndexPrivate *p; + + p = CTI_PRIVATE(idx) = g_malloc0(sizeof(*p)); + + e_dlist_init(&p->word_cache); + p->words = g_hash_table_new(g_str_hash, g_str_equal); + p->word_cache_count = 0; + /* this cache size and the block cache size have been tuned for about the best + with moderate memory usage. Doubling the memory usage barely affects performance. */ + p->word_cache_limit = 4096; /* 1024 = 128K */ + +#ifdef ENABLE_THREADS + p->lock = e_mutex_new(E_MUTEX_REC); +#endif +} + +static void +camel_text_index_finalise(CamelTextIndex *idx) +{ + struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx); + + camel_index_sync((CamelIndex *)idx); + + g_assert(e_dlist_empty(&p->word_cache)); + g_assert(g_hash_table_size(p->words) == 0); + + g_hash_table_destroy(p->words); + + if (p->word_index) + camel_object_unref((CamelObject *)p->word_index); + if (p->word_hash) + camel_object_unref((CamelObject *)p->word_hash); + if (p->name_index) + camel_object_unref((CamelObject *)p->name_index); + if (p->name_hash) + camel_object_unref((CamelObject *)p->name_hash); + + if (p->blocks) + camel_object_unref((CamelObject *)p->blocks); + if (p->links) + camel_object_unref((CamelObject *)p->links); + +#ifdef ENABLE_THREADS + e_mutex_destroy(p->lock); +#endif + + g_free(p); +} + +CamelType +camel_text_index_get_type(void) +{ + static CamelType type = CAMEL_INVALID_TYPE; + + if (type == CAMEL_INVALID_TYPE) { + type = camel_type_register(camel_object_get_type(), "CamelTextIndex", + sizeof (CamelTextIndex), + sizeof (CamelTextIndexClass), + (CamelObjectClassInitFunc) camel_text_index_class_init, + NULL, + (CamelObjectInitFunc) camel_text_index_init, + (CamelObjectFinalizeFunc) camel_text_index_finalise); + } + + return type; +} + +static char * +text_index_normalise(CamelIndex *idx, const char *in, void *data) +{ + char *word; + + /* Sigh, this is really epensive */ + word = g_utf8_normalize(in, strlen(in), G_NORMALIZE_ALL); + g_utf8_strdown(word); + + return word; +} + +/* Need flags? */ +CamelTextIndex * +camel_text_index_new(const char *path, int flags) +{ + CamelTextIndex *idx = (CamelTextIndex *)camel_object_new(camel_text_index_get_type()); + struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx); + struct _CamelTextIndexRoot *rb; + char *link; + CamelBlock *bl; + + camel_index_construct((CamelIndex *)idx, path, flags); + camel_index_set_normalise((CamelIndex *)idx, text_index_normalise, NULL); + + p->blocks = camel_block_file_new(idx->parent.path, flags, CAMEL_TEXT_INDEX_VERSION, CAMEL_BLOCK_SIZE); + link = alloca(strlen(idx->parent.path)+7); + sprintf(link, "%s.data", idx->parent.path); + p->links = camel_key_file_new(link, flags, CAMEL_TEXT_INDEX_KEY_VERSION); + + if (p->blocks == NULL || p->links == NULL) { + camel_object_unref((CamelObject *)idx); + return NULL; + } + + rb = (struct _CamelTextIndexRoot *)p->blocks->root; + + if (rb->word_index_root == 0) { + bl = camel_block_file_new_block(p->blocks); + rb->word_index_root = bl->id; + camel_block_file_unref_block(p->blocks, bl); + } + + if (rb->word_hash_root == 0) { + bl = camel_block_file_new_block(p->blocks); + rb->word_hash_root = bl->id; + camel_block_file_unref_block(p->blocks, bl); + } + + if (rb->name_index_root == 0) { + bl = camel_block_file_new_block(p->blocks); + rb->name_index_root = bl->id; + camel_block_file_unref_block(p->blocks, bl); + } + + if (rb->name_hash_root == 0) { + bl = camel_block_file_new_block(p->blocks); + rb->name_hash_root = bl->id; + camel_block_file_unref_block(p->blocks, bl); + } + + p->word_index = camel_key_table_new(p->blocks, rb->word_index_root); + p->word_hash = camel_partition_table_new(p->blocks, rb->word_hash_root); + p->name_index = camel_key_table_new(p->blocks, rb->name_index_root); + p->name_hash = camel_partition_table_new(p->blocks, rb->name_hash_root); + + if (p->word_index == NULL || p->word_hash == NULL + || p->name_index == NULL || p->name_hash == NULL) { + camel_object_unref((CamelObject *)idx); + idx = NULL; + } + + return idx; +} + +/* returns 0 if the index exists, is valid, and synced, -1 otherwise */ +int +camel_text_index_check(const char *path) +{ + char *block, *key; + CamelBlockFile *blocks; + CamelKeyFile *keys; + + block = alloca(strlen(path)+7); + sprintf(block, "%s.index", path); + blocks = camel_block_file_new(block, O_RDONLY, CAMEL_TEXT_INDEX_VERSION, CAMEL_BLOCK_SIZE); + if (blocks == NULL) { + io(printf("Check failed: No block file: %s\n", strerror(errno))); + return -1; + } + key = alloca(strlen(path)+12); + sprintf(key, "%s.index.data", path); + keys = camel_key_file_new(key, O_RDONLY, CAMEL_TEXT_INDEX_KEY_VERSION); + if (keys == NULL) { + io(printf("Check failed: No key file: %s\n", strerror(errno))); + camel_object_unref((CamelObject *)blocks); + return -1; + } + + camel_object_unref((CamelObject *)keys); + camel_object_unref((CamelObject *)blocks); + + return 0; +} + +int +camel_text_index_rename(const char *old, const char *new) +{ + char *oldname, *newname; + int err; + + /* TODO: camel_text_index_rename should find out if we have an active index and use that instead */ + + oldname = alloca(strlen(old)+12); + newname = alloca(strlen(new)+12); + sprintf(oldname, "%s.index", old); + sprintf(newname, "%s.index", new); + + if (rename(oldname, newname) == -1) + return -1; + + sprintf(oldname, "%s.index.data", old); + sprintf(newname, "%s.index.data", new); + + if (rename(oldname, newname) == -1) { + err = errno; + sprintf(oldname, "%s.index", old); + sprintf(newname, "%s.index", new); + rename(newname, oldname); + errno = err; + return -1; + } + + return 0; +} + +int +camel_text_index_remove(const char *old) +{ + char *block, *key; + int ret = 0; + + /* TODO: needs to poke any active indices to remain unlinked */ + + block = alloca(strlen(old)+12); + key = alloca(strlen(old)+12); + sprintf(block, "%s.index", old); + sprintf(key, "%s.index.data", old); + + if (unlink(block) == -1 && errno != ENOENT) + ret = -1; + if (unlink(key) == -1 && errno != ENOENT) + ret = -1; + + return ret; +} + +/* Debug */ +void +camel_text_index_info(CamelTextIndex *idx) +{ + struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx); + struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *)p->blocks->root; + int frag; + + printf("Path: '%s'\n", idx->parent.path); + printf("Version: %d\n", idx->parent.version); + printf("Flags: %08x\n", idx->parent.flags); + printf("Total words: %d\n", rb->words); + printf("Total names: %d\n", rb->names); + printf("Total deleted: %d\n", rb->deleted); + printf("Total key blocks: %d\n", rb->keys); + + if (rb->words > 0) { + frag = ((rb->keys - rb->words) * 100)/ rb->words; + printf("Word fragmentation: %d%%\n", frag); + } + + if (rb->names > 0) { + frag = (rb->deleted * 100)/ rb->names; + printf("Name fragmentation: %d%%\n", frag); + } +} + +/* Debug */ +void +camel_text_index_dump(CamelTextIndex *idx) +{ + struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx); + camel_key_t keyid; + char *word; + const char *name; + unsigned int flags; + camel_block_t data; + + /* Iterate over all names in the file first */ + + printf("UID's in index\n"); + + keyid = 0; + while ( (keyid = camel_key_table_next(p->name_index, keyid, &word, &flags, &data)) ) { + if ((flags & 1) == 0) + printf(" %s\n", word); + else + printf(" %s (deleted)\n", word); + g_free(word); + } + + printf("Word's in index\n"); + + keyid = 0; + while ( (keyid = camel_key_table_next(p->word_index, keyid, &word, &flags, &data)) ) { + CamelIndexCursor *idc; + + printf("Word: '%s':\n", word); + + idc = camel_index_find((CamelIndex *)idx, word); + while ( (name = camel_index_cursor_next(idc)) ) { + printf(" %s", name); + } + printf("\n"); + camel_object_unref((CamelObject *)idc); + g_free(word); + } +} + +/* more debug stuff */ +void +camel_text_index_validate(CamelTextIndex *idx) +{ + struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx); + camel_key_t keyid; + char *word; + const char *name; + unsigned int flags; + camel_block_t data; + char *oldword; + camel_key_t *records; + size_t count; + + GHashTable *names, *deleted, *words, *keys, *name_word, *word_word; + + names = g_hash_table_new(NULL, NULL); + deleted = g_hash_table_new(NULL, NULL); + + name_word = g_hash_table_new(g_str_hash, g_str_equal); + + words = g_hash_table_new(NULL, NULL); + keys = g_hash_table_new(NULL, NULL); + + word_word = g_hash_table_new(g_str_hash, g_str_equal); + + /* Iterate over all names in the file first */ + + printf("Checking UID consistency\n"); + + keyid = 0; + while ( (keyid = camel_key_table_next(p->name_index, keyid, &word, &flags, &data)) ) { + if ((oldword = g_hash_table_lookup(names, (void *)keyid)) != NULL + || (oldword = g_hash_table_lookup(deleted, (void *)keyid)) != NULL) { + printf("Warning, name '%s' duplicates key (%x) with name '%s'\n", word, keyid, oldword); + g_free(word); + } else { + g_hash_table_insert(name_word, word, (void *)1); + if ((flags & 1) == 0) { + g_hash_table_insert(names, (void *)keyid, word); + } else { + g_hash_table_insert(deleted, (void *)keyid, word); + } + } + } + + printf("Checking WORD member consistency\n"); + + keyid = 0; + while ( (keyid = camel_key_table_next(p->word_index, keyid, &word, &flags, &data)) ) { + CamelIndexCursor *idc; + GHashTable *used; + + /* first, check for duplicates of keyid, and data */ + if ((oldword = g_hash_table_lookup(words, (void *)keyid)) != NULL) { + printf("Warning, word '%s' duplicates key (%x) with name '%s'\n", word, keyid, oldword); + g_free(word); + word = oldword; + } else { + g_hash_table_insert(words, (void *)keyid, word); + } + + if (data == 0) { + /* This may not be an issue if things have been removed over time, + though it is a problem if its a fresh index */ + printf("Word '%s' has no data associated with it\n", word); + } else { + if ((oldword = g_hash_table_lookup(keys, (void *)data)) != NULL) { + printf("Warning, word '%s' duplicates data (%x) with name '%s'\n", word, data, oldword); + } else { + g_hash_table_insert(keys, (void *)data, word); + } + } + + if ((oldword = g_hash_table_lookup(word_word, word)) != NULL) { + printf("Warning, word '%s' occurs more than once\n", word); + } else { + g_hash_table_insert(word_word, word, word); + } + + used = g_hash_table_new(g_str_hash, g_str_equal); + + idc = camel_index_find((CamelIndex *)idx, word); + while ( (name = camel_index_cursor_next(idc)) ) { + if (g_hash_table_lookup(name_word, name) == NULL) { + printf("word '%s' references non-existant name '%s'\n", word, name); + } + if (g_hash_table_lookup(used, name) != NULL) { + printf("word '%s' uses word '%s' more than once\n", word, name); + } else { + g_hash_table_insert(used, g_strdup(name), (void *)1); + } + } + camel_object_unref((CamelObject *)idc); + + g_hash_table_foreach(used, (GHFunc)g_free, NULL); + g_hash_table_destroy(used); + + printf("word '%s'\n", word); + + while (data) { + printf(" data %x ", data); + if (camel_key_file_read(p->links, &data, &count, &records) == -1) { + printf("Warning, read failed for word '%s', at data '%d'\n", word, data); + data = 0; + } else { + printf("(%d)\n", count); + g_free(records); + } + } + } + + g_hash_table_destroy(names); + g_hash_table_destroy(deleted); + g_hash_table_destroy(words); + g_hash_table_destroy(keys); + + g_hash_table_foreach(name_word, (GHFunc)g_free, NULL); + g_hash_table_destroy(name_word); + + g_hash_table_foreach(word_word, (GHFunc)g_free, NULL); + g_hash_table_destroy(word_word); +} + +/* ********************************************************************** */ +/* CamelTextIndexName */ +/* ********************************************************************** */ + +static CamelIndexNameClass *camel_text_index_name_parent; + +#define CIN_CLASS(o) ((CamelTextIndexNameClass *)(((CamelObject *)o)->classfuncs)) +#define CIN_PRIVATE(o) (((CamelTextIndexName *)(o))->priv) + +static void +text_index_name_add_word(CamelIndexName *idn, const char *word) +{ + struct _CamelTextIndexNamePrivate *p = ((CamelTextIndexName *)idn)->priv; + + if (g_hash_table_lookup(idn->words, word) == NULL) { + char *w = e_mempool_strdup(p->pool, word); + + g_hash_table_insert(idn->words, w, w); + } +} + +/* Why? + Because it doesn't hang/loop forever on bad data + Used to clean up utf8 before it gets further */ + +static __inline__ guint32 +camel_utf8_next(const unsigned char **ptr, const unsigned char *ptrend) +{ + register unsigned char *p = (unsigned char *)*ptr; + register unsigned int c; + register guint32 v; + int l; + + if (p == ptrend) + return 0; + + while ( (c = *p++) ) { + if (c < 0x80) { + *ptr = p; + return c; + } else if ((c&0xe0) == 0xc0) { + v = c & 0x1f; + l = 1; + } else if ((c&0xf0) == 0xe0) { + v = c & 0x0f; + l = 2; + } else if ((c&0xf8) == 0xf0) { + v = c & 0x07; + l = 3; + } else if ((c&0xfc) == 0xf8) { + v = c & 0x03; + l = 4; + } else if ((c&0xfe) == 0xfc) { + v = c & 0x01; + l = 5; + } else + /* Invalid, ignore and look for next start char if room */ + if (p == ptrend) { + return 0; + } else { + continue; + } + + /* bad data or truncated buffer */ + if (p + l > ptrend) + return 0; + + while (l && ((c = *p) & 0xc0) == 0x80) { + p++; + l--; + v = (v << 6) | (c & 0x3f); + } + + /* valid char */ + if (l == 0) { + *ptr = p; + return v; + } + + /* else look for a start char again */ + } + + return 0; +} + +static size_t +text_index_name_add_buffer(CamelIndexName *idn, const char *buffer, size_t len) +{ + CamelTextIndexNamePrivate *p = CIN_PRIVATE(idn); + const unsigned char *ptr, *ptrend; + guint32 c; + unsigned char utf8[8]; + size_t utf8len; + + if (buffer == NULL) { + if (p->buffer->len) { + camel_index_name_add_word(idn, p->buffer->str); + g_string_truncate(p->buffer, 0); + } + return 0; + } + + ptr = buffer; + ptrend = buffer+len; + while ((c = camel_utf8_next(&ptr, ptrend))) { + if (g_unichar_isalnum(c)) { + c = g_unichar_tolower(c); + utf8len = g_unichar_to_utf8(c, utf8); + utf8[utf8len] = 0; + g_string_append(p->buffer, utf8); + } else { + if (p->buffer->len) { + text_index_name_add_word(idn, p->buffer->str); + /*camel_index_name_add_word(idn, p->buffer->str);*/ + g_string_truncate(p->buffer, 0); + } + } + } + + return 0; +} + +static void +camel_text_index_name_class_init(CamelTextIndexNameClass *klass) +{ + CamelIndexNameClass *nklass = (CamelIndexNameClass *)klass; + + camel_text_index_name_parent = CAMEL_INDEX_NAME_CLASS(camel_type_get_global_classfuncs(camel_index_name_get_type())); + + nklass->add_word = text_index_name_add_word; + nklass->add_buffer = text_index_name_add_buffer; +} + +static void +camel_text_index_name_init(CamelTextIndexName *idn) +{ + CamelTextIndexNamePrivate *p; + + idn->parent.words = g_hash_table_new(g_str_hash, g_str_equal); + + p = idn->priv = g_malloc0(sizeof(*idn->priv)); + p->buffer = g_string_new(""); + p->pool = e_mempool_new(256, 128, E_MEMPOOL_ALIGN_BYTE); +} + +static void +camel_text_index_name_finalise(CamelTextIndexName *idn) +{ + CamelTextIndexNamePrivate *p = CIN_PRIVATE(idn); + + g_hash_table_destroy(idn->parent.words); + + g_string_free(p->buffer, TRUE); + e_mempool_destroy(p->pool); + + g_free(p); +} + +CamelType +camel_text_index_name_get_type(void) +{ + static CamelType type = CAMEL_INVALID_TYPE; + + if (type == CAMEL_INVALID_TYPE) { + type = camel_type_register(camel_object_get_type(), "CamelTextIndexName", + sizeof (CamelTextIndexName), + sizeof (CamelTextIndexNameClass), + (CamelObjectClassInitFunc) camel_text_index_name_class_init, + NULL, + (CamelObjectInitFunc) camel_text_index_name_init, + (CamelObjectFinalizeFunc) camel_text_index_name_finalise); + } + + return type; +} + +CamelTextIndexName * +camel_text_index_name_new(CamelTextIndex *idx, const char *name, camel_key_t nameid) +{ + CamelTextIndexName *idn = (CamelTextIndexName *)camel_object_new(camel_text_index_name_get_type()); + CamelIndexName *cin = &idn->parent; + CamelTextIndexNamePrivate *p = CIN_PRIVATE(idn); + + cin->index = (CamelIndex *)idx; + camel_object_ref((CamelObject *)idx); + cin->name = g_strdup(name); + p->nameid = nameid; + + return idn; +} + +/* ********************************************************************** */ +/* CamelTextIndexCursor */ +/* ********************************************************************** */ + +static CamelIndexCursorClass *camel_text_index_cursor_parent; + +#define CIC_CLASS(o) ((CamelTextIndexCursorClass *)(((CamelObject *)o)->classfuncs)) +#define CIC_PRIVATE(o) (((CamelTextIndexCursor *)(o))->priv) + +static const char * +text_index_cursor_next(CamelIndexCursor *idc) +{ + struct _CamelTextIndexCursorPrivate *p = CIC_PRIVATE(idc); + struct _CamelTextIndexPrivate *tip = CTI_PRIVATE(idc->index); + unsigned int flags; + + c(printf("Going to next cursor for word with data '%08x' next %08x\n", p->first, p->next)); + + do { + while (p->record_index >= p->record_count) { + g_free(p->records); + p->records = NULL; + p->record_index = 0; + p->record_count = 0; + if (p->next == 0) + return NULL; + if (camel_key_file_read(tip->links, &p->next, &p->record_count, &p->records) == -1) + return NULL; + } + + g_free(p->current); + camel_key_table_lookup(tip->name_index, p->records[p->record_index], &p->current, &flags); + if (flags & 1) { + g_free(p->current); + p->current = NULL; + } + p->record_index++; + } while (p->current == NULL); + + return p->current; +} + +static void +text_index_cursor_reset(CamelIndexCursor *idc) +{ + struct _CamelTextIndexCursorPrivate *p = CIC_PRIVATE(idc); + + g_free(p->records); + g_free(p->current); + p->record_count = 0; + p->record_index = 0; + p->next = p->first; +} + +static void +camel_text_index_cursor_class_init(CamelTextIndexCursorClass *klass) +{ + CamelIndexCursorClass *cklass = (CamelIndexCursorClass *)klass; + + camel_text_index_cursor_parent = CAMEL_INDEX_CURSOR_CLASS(camel_type_get_global_classfuncs(camel_index_cursor_get_type())); + + cklass->next = text_index_cursor_next; + cklass->reset = text_index_cursor_reset; +} + +static void +camel_text_index_cursor_init(CamelTextIndexCursor *idc) +{ + CIC_PRIVATE(idc) = g_malloc0(sizeof(struct _CamelTextIndexCursorPrivate)); +} + +static void +camel_text_index_cursor_finalise(CamelTextIndexCursor *idc) +{ + struct _CamelTextIndexCursorPrivate *p = CIC_PRIVATE(idc); + + g_free(p->records); + g_free(p->current); + g_free(p); +} + +CamelType +camel_text_index_cursor_get_type(void) +{ + static CamelType type = CAMEL_INVALID_TYPE; + + if (type == CAMEL_INVALID_TYPE) { + type = camel_type_register(camel_object_get_type(), "CamelTextIndexCursor", + sizeof (CamelTextIndexCursor), + sizeof (CamelTextIndexCursorClass), + (CamelObjectClassInitFunc) camel_text_index_cursor_class_init, + NULL, + (CamelObjectInitFunc) camel_text_index_cursor_init, + (CamelObjectFinalizeFunc) camel_text_index_cursor_finalise); + } + + return type; +} + +CamelTextIndexCursor * +camel_text_index_cursor_new(CamelTextIndex *idx, camel_block_t data) +{ + CamelTextIndexCursor *idc = (CamelTextIndexCursor *)camel_object_new(camel_text_index_cursor_get_type()); + CamelIndexCursor *cic = &idc->parent; + struct _CamelTextIndexCursorPrivate *p = CIC_PRIVATE(idc); + + cic->index = (CamelIndex *)idx; + camel_object_ref((CamelObject *)idx); + p->first = data; + p->next = data; + p->record_count = 0; + p->record_index = 0; + + return idc; +} + +/* ********************************************************************** */ +/* CamelTextIndexKeyCursor */ +/* ********************************************************************** */ + +static CamelIndexCursorClass *camel_text_index_key_cursor_parent; + +#define CIKC_CLASS(o) ((CamelTextIndexKeyCursorClass *)(((CamelObject *)o)->classfuncs)) +#define CIKC_PRIVATE(o) (((CamelTextIndexKeyCursor *)(o))->priv) + +static const char * +text_index_key_cursor_next(CamelIndexCursor *idc) +{ + struct _CamelTextIndexKeyCursorPrivate *p = CIKC_PRIVATE(idc); + + c(printf("Going to next cursor for keyid %08x\n", p->keyid)); + + g_free(p->current); + p->current = NULL; + + while ( (p->keyid = camel_key_table_next(p->table, p->keyid, &p->current, &p->flags, &p->data)) ) { + if ((p->flags & 1) == 0) { + return p->current; + } else { + g_free(p->current); + p->current = NULL; + } + } + + return NULL; +} + +static void +text_index_key_cursor_reset(CamelIndexCursor *idc) +{ + struct _CamelTextIndexKeyCursorPrivate *p = CIKC_PRIVATE(idc); + + p->keyid = 0; + p->flags = 0; + p->data = 0; + g_free(p->current); + p->current = NULL; +} + +static void +camel_text_index_key_cursor_class_init(CamelTextIndexKeyCursorClass *klass) +{ + CamelIndexCursorClass *cklass = (CamelIndexCursorClass *)klass; + + camel_text_index_key_cursor_parent = CAMEL_INDEX_CURSOR_CLASS(camel_type_get_global_classfuncs(camel_index_cursor_get_type())); + + cklass->next = text_index_key_cursor_next; + cklass->reset = text_index_key_cursor_reset; +} + +static void +camel_text_index_key_cursor_init(CamelTextIndexKeyCursor *idc) +{ + struct _CamelTextIndexKeyCursorPrivate *p; + + p = idc->priv = g_malloc0(sizeof(struct _CamelTextIndexKeyCursorPrivate)); + p->keyid = 0; + p->flags = 0; + p->data = 0; + p->current = NULL; +} + +static void +camel_text_index_key_cursor_finalise(CamelTextIndexKeyCursor *idc) +{ + struct _CamelTextIndexKeyCursorPrivate *p = CIKC_PRIVATE(idc); + + g_free(p->current); + if (p->table) + camel_object_unref((CamelObject *)p->table); + g_free(p); +} + +CamelType +camel_text_index_key_cursor_get_type(void) +{ + static CamelType type = CAMEL_INVALID_TYPE; + + if (type == CAMEL_INVALID_TYPE) { + type = camel_type_register(camel_object_get_type(), "CamelTextIndexKeyCursor", + sizeof (CamelTextIndexKeyCursor), + sizeof (CamelTextIndexKeyCursorClass), + (CamelObjectClassInitFunc) camel_text_index_key_cursor_class_init, + NULL, + (CamelObjectInitFunc) camel_text_index_key_cursor_init, + (CamelObjectFinalizeFunc) camel_text_index_key_cursor_finalise); + } + + return type; +} + +CamelTextIndexKeyCursor * +camel_text_index_key_cursor_new(CamelTextIndex *idx, CamelKeyTable *table) +{ + CamelTextIndexKeyCursor *idc = (CamelTextIndexKeyCursor *)camel_object_new(camel_text_index_key_cursor_get_type()); + CamelIndexCursor *cic = &idc->parent; + struct _CamelTextIndexKeyCursorPrivate *p = CIKC_PRIVATE(idc); + + cic->index = (CamelIndex *)idx; + camel_object_ref((CamelObject *)idx); + p->table = table; + camel_object_ref((CamelObject *)table); + + return idc; +} + +/* ********************************************************************** */ + +#define m(x) + +#if 0 + +struct _CamelIndexRoot { + struct _CamelBlockRoot root; + + camel_block_t word_root; /* a keyindex containing the keyid -> word mapping */ + camel_block_t word_hash_root; /* a partitionindex containing word -> keyid mapping */ + + camel_block_t name_root; /* same, for names */ + camel_block_t name_hash_root; +}; + +char wordbuffer[] = "This is a buffer of multiple words. Some of the words are duplicates" +" while other words are the same, some are in difFerenT Different different case cAsE casE," +" with,with:with;with-with'with\"'\"various punctuation as well. So much for those Words. and 10" +" numbers in a row too 1,2,3,4,5,6,7,8,9,10! Yay!."; + +int main(int argc, char **argv) +{ +#if 0 + CamelBlockFile *bs; + CamelKeyTable *ki; + CamelPartitionTable *cpi; + CamelBlock *keyroot, *partroot; + struct _CamelIndexRoot *root; + FILE *fp; + char line[256], *key; + camel_key_t keyid; + int index = 0, flags, data; +#endif + CamelIndex *idx; + CamelIndexName *idn; + CamelIndexCursor *idc; + const char *word; + int i; + + printf("Camel text index tester!\n"); + + g_thread_init(NULL); + camel_init(NULL, 0); + + idx = (CamelIndex *)camel_text_index_new("textindex", O_CREAT|O_RDWR|O_TRUNC); + + +#if 1 + camel_index_compress(idx); + + return 0; +#endif + + for (i=0;i<100;i++) { + char name[16]; + + sprintf(name, "%d", i); + printf("Adding words to name '%s'\n", name); + idn = camel_index_add_name(idx, name); + camel_index_name_add_buffer(idn, wordbuffer, sizeof(wordbuffer)-1); + camel_index_write_name(idx, idn); + camel_object_unref((CamelObject *)idn); + } + + printf("Looking up which names contain word 'word'\n"); + idc = camel_index_find(idx, "words"); + while ( (word = camel_index_cursor_next(idc)) != NULL ) { + printf(" name is '%s'\n", word); + } + camel_object_unref((CamelObject *)idc); + printf("done.\n"); + + printf("Looking up which names contain word 'truncate'\n"); + idc = camel_index_find(idx, "truncate"); + while ( (word = camel_index_cursor_next(idc)) != NULL ) { + printf(" name is '%s'\n", word); + } + camel_object_unref((CamelObject *)idc); + printf("done.\n"); + + camel_index_sync(idx); + camel_object_unref((CamelObject *)idx); + +#if 0 + bs = camel_block_file_new("blocks", "TESTINDX", CAMEL_BLOCK_SIZE); + + root = (struct _CamelIndexRoot *)bs->root; + if (root->word_root == 0) { + keyroot = camel_block_file_new_block(bs); + root->word_root = keyroot->id; + camel_block_file_touch_block(bs, bs->root_block); + } + if (root->word_hash_root == 0) { + partroot = camel_block_file_new_block(bs); + root->word_hash_root = partroot->id; + camel_block_file_touch_block(bs, bs->root_block); + } + + ki = camel_key_table_new(bs, root->word_root); + cpi = camel_partition_table_new(bs, root->word_hash_root); + + fp = fopen("/usr/dict/words", "r"); + if (fp == NULL) { + perror("fopen"); + return 1; + } + + while (fgets(line, sizeof(line), fp) != NULL) { + line[strlen(line)-1] = 0; + + /* see if its already there */ + keyid = camel_partition_table_lookup(cpi, line); + if (keyid == 0) { + m(printf("Adding word '%s' %d\n", line, index)); + + keyid = camel_key_table_add(ki, line, index, 0); + m(printf(" key = %08x\n", keyid)); + + camel_partition_table_add(cpi, line, keyid); + + m(printf("Lookup word '%s'\n", line)); + keyid = camel_partition_table_lookup(cpi, line); + m(printf(" key = %08x\n", keyid)); + } + + m(printf("Lookup key %08x\n", keyid)); + + camel_key_table_set_flags(ki, keyid, index, 1); + + data = camel_key_table_lookup(ki, keyid, &key, &flags); + m(printf(" word = '%s' %d %04x\n", key, data, flags)); + + g_assert(data == index && strcmp(key, line) == 0); + + g_free(key); + + index++; + } + + printf("Scanning again\n"); + fseek(fp, SEEK_SET, 0); + index = 0; + while (fgets(line, sizeof(line), fp) != NULL) { + line[strlen(line)-1] = 0; + m(printf("Lookup word '%s' %d\n", line, index)); + keyid = camel_partition_table_lookup(cpi, line); + m(printf(" key = %08d\n", keyid)); + + m(printf("Lookup key %08x\n", keyid)); + data = camel_key_table_lookup(ki, keyid, &key, &flags); + m(printf(" word = '%s' %d\n", key, data)); + + g_assert(data == index && strcmp(key, line) == 0); + + g_free(key); + + index++; + } + + printf("Freeing partition index\n"); + camel_partition_table_free(cpi); + + printf("Syncing block file\n"); + camel_block_file_sync(bs); +#endif + return 0; +} + +#endif |