aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-text-index.c
diff options
context:
space:
mode:
Diffstat (limited to 'camel/camel-text-index.c')
-rw-r--r--camel/camel-text-index.c1948
1 files changed, 0 insertions, 1948 deletions
diff --git a/camel/camel-text-index.c b/camel/camel-text-index.c
deleted file mode 100644
index 7ffa9ea6da..0000000000
--- a/camel/camel-text-index.c
+++ /dev/null
@@ -1,1948 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; fill-column: 160 -*- */
-/*
- * Copyright (C) 2001-2003 Ximian Inc.
- *
- * Authors: Michael Zucchi <notzed@ximian.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/uio.h>
-#include <unistd.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <ctype.h>
-
-#include "libedataserver/e-msgport.h"
-#include "libedataserver/e-memory.h"
-
-#include "camel/camel-object.h"
-
-#include "camel-text-index.h"
-#include "camel-block-file.h"
-#include "camel-partition-table.h"
-
-#include <glib/gunicode.h>
-
-
-#define w(x)
-#define io(x)
-#define d(x) /*(printf("%s(%d):%s: ", __FILE__, __LINE__, __PRETTY_FUNCTION__),(x))*/
-
-/* cursor debug */
-#define c(x)
-
-#define CAMEL_TEXT_INDEX_MAX_WORDLEN (36)
-
-#define CAMEL_TEXT_INDEX_LOCK(kf, lock) (e_mutex_lock(((CamelTextIndex *)kf)->priv->lock))
-#define CAMEL_TEXT_INDEX_UNLOCK(kf, lock) (e_mutex_unlock(((CamelTextIndex *)kf)->priv->lock))
-
-static int text_index_compress_nosync(CamelIndex *idx);
-
-/* ********************************************************************** */
-
-/* "private" data, shared between index/cursor/name classes */
-typedef struct _CamelTextIndexNamePrivate CamelTextIndexNamePrivate;
-
-struct _CamelTextIndexNamePrivate {
- GString *buffer;
- camel_key_t nameid;
- EMemPool *pool;
-};
-
-CamelTextIndexName *camel_text_index_name_new(CamelTextIndex *idx, const char *name, camel_key_t nameid);
-
-/* ****************************** */
-
-typedef struct _CamelTextIndexCursorPrivate CamelTextIndexCursorPrivate;
-
-struct _CamelTextIndexCursorPrivate {
- camel_block_t first;
- camel_block_t next;
-
- int record_index;
-
- size_t record_count;
- camel_key_t *records;
-
- char *current;
-};
-
-CamelTextIndexCursor *camel_text_index_cursor_new(CamelTextIndex *idx, camel_block_t data);
-
-/* ****************************** */
-typedef struct _CamelTextIndexKeyCursorPrivate CamelTextIndexKeyCursorPrivate;
-
-struct _CamelTextIndexKeyCursorPrivate {
- CamelKeyTable *table;
-
- camel_key_t keyid;
- unsigned int flags;
- camel_block_t data;
- char *current;
-};
-
-CamelTextIndexKeyCursor *camel_text_index_key_cursor_new(CamelTextIndex *idx, CamelKeyTable *table);
-
-/* ********************************************************************** */
-
-#define CAMEL_TEXT_INDEX_VERSION "TEXT.000"
-#define CAMEL_TEXT_INDEX_KEY_VERSION "KEYS.000"
-
-struct _CamelTextIndexPrivate {
- CamelBlockFile *blocks;
- CamelKeyFile *links;
-
- CamelKeyTable *word_index;
- CamelPartitionTable *word_hash;
-
- CamelKeyTable *name_index;
- CamelPartitionTable *name_hash;
-
- /* Cache of words to write */
- int word_cache_limit;
- int word_cache_count;
- EDList word_cache;
- GHashTable *words;
- EMutex *lock;
-};
-
-/* Root block of text index */
-struct _CamelTextIndexRoot {
- struct _CamelBlockRoot root;
-
- /* FIXME: the index root could contain a pointer to the hash root */
- camel_block_t word_index_root; /* a keyindex containing the keyid -> word mapping */
- camel_block_t word_hash_root; /* a partitionindex containing word -> keyid mapping */
-
- camel_block_t name_index_root; /* same, for names */
- camel_block_t name_hash_root;
-
- guint32 words; /* total words */
- guint32 names; /* total names */
- guint32 deleted; /* deleted names */
- guint32 keys; /* total key 'chunks' written, used with deleted to determine fragmentation */
-};
-
-struct _CamelTextIndexWord {
- struct _CamelTextIndexWord *next;
- struct _CamelTextIndexWord *prev;
-
- camel_block_t data; /* where the data starts */
- camel_key_t wordid;
- char *word;
- unsigned int used;
- camel_key_t names[32];
-};
-
-#define CTI_PRIVATE(o) (((CamelTextIndex *)(o))->priv)
-
-#define CI_CLASS(o) ((CamelTextIndexClass *)(((CamelObject *)o)->classfuncs))
-
-/* ********************************************************************** */
-/* CamelTextIndex */
-/* ********************************************************************** */
-
-static CamelObjectClass *camel_text_index_parent;
-
-/* call locked */
-static void
-text_index_add_name_to_word(CamelIndex *idx, const char *word, camel_key_t nameid)
-{
- struct _CamelTextIndexWord *w, *wp, *ww;
- struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
- camel_key_t wordid;
- camel_block_t data;
- struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *)p->blocks->root;
-
- w = g_hash_table_lookup(p->words, word);
- if (w == NULL) {
- wordid = camel_partition_table_lookup(p->word_hash, word);
- if (wordid == 0) {
- data = 0;
- wordid = camel_key_table_add(p->word_index, word, 0, 0);
- if (wordid == 0){
- g_warning ("Could not create key entry for word '%s': %s\n",
- word, strerror (errno));
- return;
- }
- if (camel_partition_table_add(p->word_hash, word, wordid) == -1) {
- g_warning ("Could not create hash entry for word '%s': %s\n",
- word, strerror (errno));
- return;
- }
- rb->words++;
- camel_block_file_touch_block(p->blocks, p->blocks->root_block);
- } else {
- data = camel_key_table_lookup(p->word_index, wordid, NULL, 0);
- if (data == 0) {
- g_warning ("Could not find key entry for word '%s': %s\n",
- word, strerror (errno));
- return;
- }
- }
-
- w = g_malloc0(sizeof(*w));
- w->word = g_strdup(word);
- w->wordid = wordid;
- w->used = 1;
- w->data = data;
-
- w->names[0] = nameid;
- g_hash_table_insert(p->words, w->word, w);
- e_dlist_addhead(&p->word_cache, (EDListNode *)w);
- p->word_cache_count++;
- ww = (struct _CamelTextIndexWord *)p->word_cache.tailpred;
- wp = ww->prev;
- while (wp && p->word_cache_count > p->word_cache_limit) {
- io(printf("writing key file entry '%s' [%x]\n", ww->word, ww->data));
- if (camel_key_file_write(p->links, &ww->data, ww->used, ww->names) != -1) {
- io(printf(" new data [%x]\n", ww->data));
- rb->keys++;
- camel_block_file_touch_block(p->blocks, p->blocks->root_block);
- /* if this call fails - we still point to the old data - not fatal */
- camel_key_table_set_data(p->word_index, ww->wordid, ww->data);
- e_dlist_remove((EDListNode *)ww);
- g_hash_table_remove(p->words, ww->word);
- g_free(ww->word);
- g_free(ww);
- p->word_cache_count--;
- }
- ww = wp;
- wp = wp->prev;
- }
- } else {
- e_dlist_remove((EDListNode *)w);
- e_dlist_addhead(&p->word_cache, (EDListNode *)w);
- w->names[w->used] = nameid;
- w->used++;
- if (w->used == sizeof(w->names)/sizeof(w->names[0])) {
- io(printf("writing key file entry '%s' [%x]\n", w->word, w->data));
- if (camel_key_file_write(p->links, &w->data, w->used, w->names) != -1) {
- rb->keys++;
- camel_block_file_touch_block(p->blocks, p->blocks->root_block);
- /* if this call fails - we still point to the old data - not fatal */
- camel_key_table_set_data(p->word_index, w->wordid, w->data);
- }
- /* FIXME: what to on error? lost data? */
- w->used = 0;
- }
- }
-}
-
-static int
-text_index_sync(CamelIndex *idx)
-{
- struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
- struct _CamelTextIndexWord *ww;
- struct _CamelTextIndexRoot *rb;
- int ret = 0, wfrag, nfrag, work = FALSE;
-
- d(printf("sync: blocks = %p\n", p->blocks));
-
- if (p->blocks == NULL)
- return 0;
-
- rb = (struct _CamelTextIndexRoot *)p->blocks->root;
-
- /* sync/flush word cache */
-
- CAMEL_TEXT_INDEX_LOCK(idx, lock);
-
- /* we sync, bump down the cache limits since we dont need them for reading */
- p->blocks->block_cache_limit = 128;
- /* this doesn't really need to be dropped, its only used in updates anyway */
- p->word_cache_limit = 1024;
-
- work = !e_dlist_empty(&p->word_cache);
-
- while ( (ww = (struct _CamelTextIndexWord *)e_dlist_remhead(&p->word_cache)) ) {
- if (ww->used > 0) {
- io(printf("writing key file entry '%s' [%x]\n", ww->word, ww->data));
- if (camel_key_file_write(p->links, &ww->data, ww->used, ww->names) != -1) {
- io(printf(" new data [%x]\n", ww->data));
- rb->keys++;
- camel_block_file_touch_block(p->blocks, p->blocks->root_block);
- camel_key_table_set_data(p->word_index, ww->wordid, ww->data);
- } else {
- ret = -1;
- }
- ww->used = 0;
- }
- g_hash_table_remove(p->words, ww->word);
- g_free(ww->word);
- g_free(ww);
- }
-
- if (camel_key_table_sync(p->word_index) == -1
- || camel_key_table_sync(p->name_index) == -1
- || camel_partition_table_sync(p->word_hash) == -1
- || camel_partition_table_sync(p->name_hash) == -1)
- ret = -1;
-
- /* only do the frag/compress check if we did some new writes on this index */
- wfrag = rb->words ? (((rb->keys - rb->words) * 100)/ rb->words) : 0;
- nfrag = rb->names ? ((rb->deleted * 100) / rb->names) : 0;
- d(printf("wfrag = %d, nfrag = %d, work = %s, ret = %d\n", wfrag, nfrag, work?"true":"false", ret));
- d(printf(" words = %d, keys = %d\n", rb->words, rb->keys));
-
- if (ret == 0) {
- if (wfrag > 30 || nfrag > 20)
- ret = text_index_compress_nosync(idx);
- }
-
- ret = camel_block_file_sync(p->blocks);
-
- CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
-
- return ret;
-}
-
-static void tmp_name(const char *in, char *o)
-{
- char *s;
-
- s = strrchr(in, '/');
- if (s) {
- memcpy(o, in, s-in+1);
- memcpy(o+(s-in+1), ".#", 2);
- strcpy(o+(s-in+3), s+1);
- } else {
- sprintf(o, ".#%s", in);
- }
-}
-
-static int
-text_index_compress(CamelIndex *idx)
-{
- int ret;
-
- CAMEL_TEXT_INDEX_LOCK(idx, lock);
-
- ret = camel_index_sync(idx);
- if (ret != -1)
- ret = text_index_compress_nosync(idx);
-
- CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
-
- return ret;
-}
-
-/* Attempt to recover index space by compressing the indices */
-static int
-text_index_compress_nosync(CamelIndex *idx)
-{
- CamelTextIndex *newidx;
- struct _CamelTextIndexPrivate *newp, *oldp;
- camel_key_t oldkeyid, newkeyid;
- GHashTable *remap;
- unsigned int deleted;
- camel_block_t data, newdata;
- int i, ret = -1;
- char *name = NULL;
- unsigned int flags;
- char *newpath, *savepath, *oldpath;
- size_t count, newcount;
- camel_key_t *records, newrecords[256];
- struct _CamelTextIndexRoot *rb;
-
- i = strlen(idx->path)+16;
- oldpath = alloca(i);
- newpath = alloca(i);
- savepath = alloca(i);
-
- strcpy(oldpath, idx->path);
- oldpath[strlen(oldpath)-strlen(".index")] = 0;
-
- tmp_name(oldpath, newpath);
- sprintf(savepath, "%s~", oldpath);
-
- d(printf("Old index: %s\n", idx->path));
- d(printf("Old path: %s\n", oldpath));
- d(printf("New: %s\n", newpath));
- d(printf("Save: %s\n", savepath));
-
- newidx = camel_text_index_new(newpath, O_RDWR|O_CREAT);
- if (newidx == NULL)
- return -1;
-
- newp = CTI_PRIVATE(newidx);
- oldp = CTI_PRIVATE(idx);
-
- CAMEL_TEXT_INDEX_LOCK(idx, lock);
-
- rb = (struct _CamelTextIndexRoot *)newp->blocks->root;
-
- rb->words = 0;
- rb->names = 0;
- rb->deleted = 0;
- rb->keys = 0;
-
- /* Process:
- For each name we still have:
- Add it to the new index & setup remap table
-
- For each word:
- Copy word's data to a new file
- Add new word to index(*) (can we just copy blocks?) */
-
- /* Copy undeleted names to new index file, creating new indices */
- io(printf("Copying undeleted names to new file\n"));
- remap = g_hash_table_new(NULL, NULL);
- oldkeyid = 0;
- deleted = 0;
- while ( (oldkeyid = camel_key_table_next(oldp->name_index, oldkeyid, &name, &flags, &data)) ) {
- if ((flags&1) == 0) {
- io(printf("copying name '%s'\n", name));
- newkeyid = camel_key_table_add(newp->name_index, name, data, flags);
- if (newkeyid == 0)
- goto fail;
- rb->names++;
- camel_partition_table_add(newp->name_hash, name, newkeyid);
- g_hash_table_insert(remap, GINT_TO_POINTER(oldkeyid), GINT_TO_POINTER(newkeyid));
- } else
- io(printf("deleted name '%s'\n", name));
- g_free(name);
- name = NULL;
- deleted |= flags;
- }
-
- /* Copy word data across, remapping/deleting and create new index for it */
- /* We re-block the data into 256 entry lots while we're at it, since we only
- have to do 1 at a time and its cheap */
- oldkeyid = 0;
- while ( (oldkeyid = camel_key_table_next(oldp->word_index, oldkeyid, &name, &flags, &data)) ) {
- io(printf("copying word '%s'\n", name));
- newdata = 0;
- newcount = 0;
- if (data) {
- rb->words++;
- rb->keys++;
- }
- while (data) {
- if (camel_key_file_read(oldp->links, &data, &count, &records) == -1) {
- io(printf("could not read from old keys at %d for word '%s'\n", (int)data, name));
- goto fail;
- }
- for (i=0;i<count;i++) {
- newkeyid = (camel_key_t)GPOINTER_TO_INT(g_hash_table_lookup(remap, GINT_TO_POINTER(records[i])));
- if (newkeyid) {
- newrecords[newcount++] = newkeyid;
- if (newcount == sizeof(newrecords)/sizeof(newrecords[0])) {
- if (camel_key_file_write(newp->links, &newdata, newcount, newrecords) == -1) {
- g_free(records);
- goto fail;
- }
- newcount = 0;
- }
- }
- }
- g_free(records);
- }
-
- if (newcount > 0) {
- if (camel_key_file_write(newp->links, &newdata, newcount, newrecords) == -1)
- goto fail;
- }
-
- if (newdata != 0) {
- newkeyid = camel_key_table_add(newp->word_index, name, newdata, flags);
- if (newkeyid == 0)
- goto fail;
- camel_partition_table_add(newp->word_hash, name, newkeyid);
- }
- g_free(name);
- name = NULL;
- }
-
- camel_block_file_touch_block(newp->blocks, newp->blocks->root_block);
-
- if (camel_index_sync((CamelIndex *)newidx) == -1)
- goto fail;
-
- /* Rename underlying files to match */
- ret = camel_index_rename(idx, savepath);
- if (ret == -1)
- goto fail;
-
- /* If this fails, we'll pick up something during restart? */
- ret = camel_index_rename((CamelIndex *)newidx, oldpath);
-
-#define myswap(a, b) { void *tmp = a; a = b; b = tmp; }
- /* Poke the private data across to the new object */
- /* And change the fd's over, etc? */
- /* Yes: This is a hack */
- myswap(newp->blocks, oldp->blocks);
- myswap(newp->links, oldp->links);
- myswap(newp->word_index, oldp->word_index);
- myswap(newp->word_hash, oldp->word_hash);
- myswap(newp->name_index, oldp->name_index);
- myswap(newp->name_hash, oldp->name_hash);
- myswap(((CamelIndex *)newidx)->path, ((CamelIndex *)idx)->path);
-#undef myswap
-
- ret = 0;
-fail:
- CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
-
- camel_index_delete((CamelIndex *)newidx);
-
- camel_object_unref((CamelObject *)newidx);
- g_free(name);
- g_hash_table_destroy(remap);
-
- /* clean up temp files always */
- sprintf(savepath, "%s~.index", oldpath);
- unlink(savepath);
- sprintf(newpath, "%s.data", savepath);
- unlink(newpath);
-
- return ret;
-}
-
-static int
-text_index_delete(CamelIndex *idx)
-{
- struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
- int ret = 0;
-
- if (camel_block_file_delete(p->blocks) == -1)
- ret = -1;
- if (camel_key_file_delete(p->links) == -1)
- ret = -1;
-
- return ret;
-}
-
-static int
-text_index_rename(CamelIndex *idx, const char *path)
-{
- struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
- char *newlink, *newblock;
- int err, ret;
-
- CAMEL_TEXT_INDEX_LOCK(idx, lock);
-
- newblock = alloca(strlen(path)+8);
- sprintf(newblock, "%s.index", path);
- ret = camel_block_file_rename(p->blocks, newblock);
- if (ret == -1) {
- CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
- return -1;
- }
-
- newlink = alloca(strlen(path)+16);
- sprintf(newlink, "%s.index.data", path);
- ret = camel_key_file_rename(p->links, newlink);
- if (ret == -1) {
- err = errno;
- camel_block_file_rename(p->blocks, idx->path);
- CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
- errno = err;
- return -1;
- }
-
- g_free(idx->path);
- idx->path = g_strdup(newblock);
-
- CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
-
- return 0;
-}
-
-static int
-text_index_has_name(CamelIndex *idx, const char *name)
-{
- struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
-
- return camel_partition_table_lookup(p->name_hash, name) != 0;
-}
-
-static CamelIndexName *
-text_index_add_name(CamelIndex *idx, const char *name)
-{
- struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
- camel_key_t keyid;
- CamelIndexName *idn;
- struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *)p->blocks->root;
-
- CAMEL_TEXT_INDEX_LOCK(idx, lock);
-
- /* if we're adding words, up the cache limits a lot */
- if (p->blocks) {
- p->blocks->block_cache_limit = 1024;
- p->word_cache_limit = 8192;
- }
-
- /* If we have it already replace it */
- keyid = camel_partition_table_lookup(p->name_hash, name);
- if (keyid != 0) {
- /* TODO: We could just update the partition table's
- key pointer rather than having to delete it */
- rb->deleted++;
- camel_key_table_set_flags(p->name_index, keyid, 1, 1);
- camel_partition_table_remove(p->name_hash, name);
- }
-
- keyid = camel_key_table_add(p->name_index, name, 0, 0);
- if (keyid != 0) {
- camel_partition_table_add(p->name_hash, name, keyid);
- rb->names++;
- }
-
- camel_block_file_touch_block(p->blocks, p->blocks->root_block);
-
- /* TODO: if keyid == 0, we had a failure, we should somehow flag that, but for
- now just return a valid object but discard its results, see text_index_write_name */
-
- CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
-
- idn = (CamelIndexName *)camel_text_index_name_new((CamelTextIndex *)idx, name, keyid);
-
- return idn;
-}
-
-/* call locked */
-static void
-hash_write_word(char *word, void *data, CamelIndexName *idn)
-{
- CamelTextIndexName *tin = (CamelTextIndexName *)idn;
-
- text_index_add_name_to_word(idn->index, word, tin->priv->nameid);
-}
-
-static int
-text_index_write_name(CamelIndex *idx, CamelIndexName *idn)
-{
- /* force 'flush' of any outstanding data */
- camel_index_name_add_buffer(idn, NULL, 0);
-
- /* see text_index_add_name for when this can be 0 */
- if (((CamelTextIndexName *)idn)->priv->nameid != 0) {
- CAMEL_TEXT_INDEX_LOCK(idx, lock);
-
- g_hash_table_foreach(idn->words, (GHFunc)hash_write_word, idn);
-
- CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
- }
-
- return 0;
-}
-
-static CamelIndexCursor *
-text_index_find_name(CamelIndex *idx, const char *name)
-{
- /* what was this for, umm */
- return NULL;
-}
-
-static void
-text_index_delete_name(CamelIndex *idx, const char *name)
-{
- struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
- camel_key_t keyid;
- struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *)p->blocks->root;
-
- d(printf("Delete name: %s\n", name));
-
- /* probably doesn't really need locking, but oh well */
- CAMEL_TEXT_INDEX_LOCK(idx, lock);
-
- /* We just mark the key deleted, and remove it from the hash table */
- keyid = camel_partition_table_lookup(p->name_hash, name);
- if (keyid != 0) {
- rb->deleted++;
- camel_block_file_touch_block(p->blocks, p->blocks->root_block);
- camel_key_table_set_flags(p->name_index, keyid, 1, 1);
- camel_partition_table_remove(p->name_hash, name);
- }
-
- CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
-}
-
-static CamelIndexCursor *
-text_index_find(CamelIndex *idx, const char *word)
-{
- struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
- camel_key_t keyid;
- camel_block_t data = 0;
- unsigned int flags;
- CamelIndexCursor *idc;
-
- CAMEL_TEXT_INDEX_LOCK(idx, lock);
-
- keyid = camel_partition_table_lookup(p->word_hash, word);
- if (keyid != 0) {
- data = camel_key_table_lookup(p->word_index, keyid, NULL, &flags);
- if (flags & 1)
- data = 0;
- }
-
- CAMEL_TEXT_INDEX_UNLOCK(idx, lock);
-
- idc = (CamelIndexCursor *)camel_text_index_cursor_new((CamelTextIndex *)idx, data);
-
- return idc;
-}
-
-static CamelIndexCursor *
-text_index_words(CamelIndex *idx)
-{
- struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
-
- return (CamelIndexCursor *)camel_text_index_key_cursor_new((CamelTextIndex *)idx, p->word_index);
-}
-
-static CamelIndexCursor *
-text_index_names(CamelIndex *idx)
-{
- struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
-
- return (CamelIndexCursor *)camel_text_index_key_cursor_new((CamelTextIndex *)idx, p->name_index);
-}
-
-static void
-camel_text_index_class_init(CamelTextIndexClass *klass)
-{
- CamelIndexClass *iklass = (CamelIndexClass *)klass;
-
- camel_text_index_parent = CAMEL_OBJECT_CLASS(camel_type_get_global_classfuncs(camel_object_get_type()));
-
- iklass->sync = text_index_sync;
- iklass->compress = text_index_compress;
- iklass->delete = text_index_delete;
-
- iklass->rename = text_index_rename;
-
- iklass->has_name = text_index_has_name;
- iklass->add_name = text_index_add_name;
- iklass->write_name = text_index_write_name;
- iklass->find_name = text_index_find_name;
- iklass->delete_name = text_index_delete_name;
- iklass->find = text_index_find;
-
- iklass->words = text_index_words;
- iklass->names = text_index_names;
-}
-
-static void
-camel_text_index_init(CamelTextIndex *idx)
-{
- struct _CamelTextIndexPrivate *p;
-
- p = CTI_PRIVATE(idx) = g_malloc0(sizeof(*p));
-
- e_dlist_init(&p->word_cache);
- p->words = g_hash_table_new(g_str_hash, g_str_equal);
- p->word_cache_count = 0;
- /* this cache size and the block cache size have been tuned for about the best
- with moderate memory usage. Doubling the memory usage barely affects performance. */
- p->word_cache_limit = 4096; /* 1024 = 128K */
-
- p->lock = e_mutex_new(E_MUTEX_REC);
-}
-
-static void
-camel_text_index_finalise(CamelTextIndex *idx)
-{
- struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
-
- camel_index_sync((CamelIndex *)idx);
-
- g_assert(e_dlist_empty(&p->word_cache));
- g_assert(g_hash_table_size(p->words) == 0);
-
- g_hash_table_destroy(p->words);
-
- if (p->word_index)
- camel_object_unref((CamelObject *)p->word_index);
- if (p->word_hash)
- camel_object_unref((CamelObject *)p->word_hash);
- if (p->name_index)
- camel_object_unref((CamelObject *)p->name_index);
- if (p->name_hash)
- camel_object_unref((CamelObject *)p->name_hash);
-
- if (p->blocks)
- camel_object_unref((CamelObject *)p->blocks);
- if (p->links)
- camel_object_unref((CamelObject *)p->links);
-
- e_mutex_destroy(p->lock);
-
- g_free(p);
-}
-
-CamelType
-camel_text_index_get_type(void)
-{
- static CamelType type = CAMEL_INVALID_TYPE;
-
- if (type == CAMEL_INVALID_TYPE) {
- type = camel_type_register(camel_index_get_type(), "CamelTextIndex",
- sizeof (CamelTextIndex),
- sizeof (CamelTextIndexClass),
- (CamelObjectClassInitFunc) camel_text_index_class_init,
- NULL,
- (CamelObjectInitFunc) camel_text_index_init,
- (CamelObjectFinalizeFunc) camel_text_index_finalise);
- }
-
- return type;
-}
-
-static char *
-text_index_normalise(CamelIndex *idx, const char *in, void *data)
-{
- char *word;
-
- /* Sigh, this is really expensive */
- /*g_utf8_normalize(in, strlen(in), G_NORMALIZE_ALL);*/
- word = g_utf8_strdown(in, -1);
-
- return word;
-}
-
-CamelTextIndex *
-camel_text_index_new(const char *path, int flags)
-{
- CamelTextIndex *idx = (CamelTextIndex *)camel_object_new(camel_text_index_get_type());
- struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
- struct _CamelTextIndexRoot *rb;
- char *link;
- CamelBlock *bl;
-
- camel_index_construct((CamelIndex *)idx, path, flags);
- camel_index_set_normalise((CamelIndex *)idx, text_index_normalise, NULL);
-
- p->blocks = camel_block_file_new(idx->parent.path, flags, CAMEL_TEXT_INDEX_VERSION, CAMEL_BLOCK_SIZE);
- link = alloca(strlen(idx->parent.path)+7);
- sprintf(link, "%s.data", idx->parent.path);
- p->links = camel_key_file_new(link, flags, CAMEL_TEXT_INDEX_KEY_VERSION);
-
- if (p->blocks == NULL || p->links == NULL) {
- camel_object_unref((CamelObject *)idx);
- return NULL;
- }
-
- rb = (struct _CamelTextIndexRoot *)p->blocks->root;
-
- if (rb->word_index_root == 0) {
- bl = camel_block_file_new_block(p->blocks);
- rb->word_index_root = bl->id;
- camel_block_file_unref_block(p->blocks, bl);
- camel_block_file_touch_block(p->blocks, p->blocks->root_block);
- }
-
- if (rb->word_hash_root == 0) {
- bl = camel_block_file_new_block(p->blocks);
- rb->word_hash_root = bl->id;
- camel_block_file_unref_block(p->blocks, bl);
- camel_block_file_touch_block(p->blocks, p->blocks->root_block);
- }
-
- if (rb->name_index_root == 0) {
- bl = camel_block_file_new_block(p->blocks);
- rb->name_index_root = bl->id;
- camel_block_file_unref_block(p->blocks, bl);
- camel_block_file_touch_block(p->blocks, p->blocks->root_block);
- }
-
- if (rb->name_hash_root == 0) {
- bl = camel_block_file_new_block(p->blocks);
- rb->name_hash_root = bl->id;
- camel_block_file_unref_block(p->blocks, bl);
- camel_block_file_touch_block(p->blocks, p->blocks->root_block);
- }
-
- p->word_index = camel_key_table_new(p->blocks, rb->word_index_root);
- p->word_hash = camel_partition_table_new(p->blocks, rb->word_hash_root);
- p->name_index = camel_key_table_new(p->blocks, rb->name_index_root);
- p->name_hash = camel_partition_table_new(p->blocks, rb->name_hash_root);
-
- if (p->word_index == NULL || p->word_hash == NULL
- || p->name_index == NULL || p->name_hash == NULL) {
- camel_object_unref((CamelObject *)idx);
- idx = NULL;
- }
-
- return idx;
-}
-
-/* returns 0 if the index exists, is valid, and synced, -1 otherwise */
-int
-camel_text_index_check(const char *path)
-{
- char *block, *key;
- CamelBlockFile *blocks;
- CamelKeyFile *keys;
-
- block = alloca(strlen(path)+7);
- sprintf(block, "%s.index", path);
- blocks = camel_block_file_new(block, O_RDONLY, CAMEL_TEXT_INDEX_VERSION, CAMEL_BLOCK_SIZE);
- if (blocks == NULL) {
- io(printf("Check failed: No block file: %s\n", strerror (errno)));
- return -1;
- }
- key = alloca(strlen(path)+12);
- sprintf(key, "%s.index.data", path);
- keys = camel_key_file_new(key, O_RDONLY, CAMEL_TEXT_INDEX_KEY_VERSION);
- if (keys == NULL) {
- io(printf("Check failed: No key file: %s\n", strerror (errno)));
- camel_object_unref((CamelObject *)blocks);
- return -1;
- }
-
- camel_object_unref((CamelObject *)keys);
- camel_object_unref((CamelObject *)blocks);
-
- return 0;
-}
-
-int
-camel_text_index_rename(const char *old, const char *new)
-{
- char *oldname, *newname;
- int err;
-
- /* TODO: camel_text_index_rename should find out if we have an active index and use that instead */
-
- oldname = alloca(strlen(old)+12);
- newname = alloca(strlen(new)+12);
- sprintf(oldname, "%s.index", old);
- sprintf(newname, "%s.index", new);
-
- if (rename(oldname, newname) == -1 && errno != ENOENT)
- return -1;
-
- sprintf(oldname, "%s.index.data", old);
- sprintf(newname, "%s.index.data", new);
-
- if (rename(oldname, newname) == -1 && errno != ENOENT) {
- err = errno;
- sprintf(oldname, "%s.index", old);
- sprintf(newname, "%s.index", new);
- rename(newname, oldname);
- errno = err;
- return -1;
- }
-
- return 0;
-}
-
-int
-camel_text_index_remove(const char *old)
-{
- char *block, *key;
- int ret = 0;
-
- /* TODO: needs to poke any active indices to remain unlinked */
-
- block = alloca(strlen(old)+12);
- key = alloca(strlen(old)+12);
- sprintf(block, "%s.index", old);
- sprintf(key, "%s.index.data", old);
-
- if (unlink(block) == -1 && errno != ENOENT)
- ret = -1;
- if (unlink(key) == -1 && errno != ENOENT)
- ret = -1;
-
- return ret;
-}
-
-/* Debug */
-void
-camel_text_index_info(CamelTextIndex *idx)
-{
- struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
- struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *)p->blocks->root;
- int frag;
-
- printf("Path: '%s'\n", idx->parent.path);
- printf("Version: %d\n", idx->parent.version);
- printf("Flags: %08x\n", idx->parent.flags);
- printf("Total words: %d\n", rb->words);
- printf("Total names: %d\n", rb->names);
- printf("Total deleted: %d\n", rb->deleted);
- printf("Total key blocks: %d\n", rb->keys);
-
- if (rb->words > 0) {
- frag = ((rb->keys - rb->words) * 100)/ rb->words;
- printf("Word fragmentation: %d%%\n", frag);
- }
-
- if (rb->names > 0) {
- frag = (rb->deleted * 100)/ rb->names;
- printf("Name fragmentation: %d%%\n", frag);
- }
-}
-
-/* #define DUMP_RAW */
-
-#ifdef DUMP_RAW
-enum { KEY_ROOT = 1, KEY_DATA = 2, PARTITION_MAP = 4, PARTITION_DATA = 8 };
-
-static void
-add_type(GHashTable *map, camel_block_t id, int type)
-{
- camel_block_t old;
-
- old = g_hash_table_lookup(map, id);
- if (old == type)
- return;
-
- if (old != 0 && old != type)
- g_warning("block %x redefined as type %d, already type %d\n", id, type, old);
- g_hash_table_insert(map, id, GINT_TO_POINTER (type|old));
-}
-
-static void
-add_partition(GHashTable *map, CamelBlockFile *blocks, camel_block_t id)
-{
- CamelBlock *bl;
- CamelPartitionMapBlock *pm;
- int i;
-
- while (id) {
- add_type(map, id, PARTITION_MAP);
- bl = camel_block_file_get_block(blocks, id);
- if (bl == NULL) {
- g_warning("couldn't get parition: %x\n", id);
- return;
- }
-
- pm = (CamelPartitionMapBlock *)&bl->data;
- if (pm->used > sizeof(pm->partition)/sizeof(pm->partition[0])) {
- g_warning("Partition block %x invalid\n", id);
- camel_block_file_unref_block(blocks, bl);
- return;
- }
-
- for (i=0;i<pm->used;i++)
- add_type(map, pm->partition[i].blockid, PARTITION_DATA);
-
- id = pm->next;
- camel_block_file_unref_block(blocks, bl);
- }
-}
-
-static void
-add_keys(GHashTable *map, CamelBlockFile *blocks, camel_block_t id)
-{
- CamelBlock *rbl, *bl;
- CamelKeyRootBlock *root;
- CamelKeyBlock *kb;
-
- add_type(map, id, KEY_ROOT);
- rbl = camel_block_file_get_block(blocks, id);
- if (rbl == NULL) {
- g_warning("couldn't get key root: %x\n", id);
- return;
- }
- root = (CamelKeyRootBlock *)&rbl->data;
- id = root->first;
-
- while (id) {
- add_type(map, id, KEY_DATA);
- bl = camel_block_file_get_block(blocks, id);
- if (bl == NULL) {
- g_warning("couldn't get key: %x\n", id);
- break;
- }
-
- kb = (CamelKeyBlock *)&bl->data;
- id = kb->next;
- camel_block_file_unref_block(blocks, bl);
- }
-
- camel_block_file_unref_block(blocks, rbl);
-}
-
-static void
-dump_raw(GHashTable *map, char *path)
-{
- char buf[1024];
- char line[256];
- char *p, c, *e, *a, *o;
- int v, n, len, i, type;
- char hex[16] = "0123456789ABCDEF";
- int fd;
- camel_block_t id, total;
-
- fd = open(path, O_RDONLY);
- if (fd == -1)
- return;
-
- total = 0;
- while ((len = read(fd, buf, 1024)) == 1024) {
- id = total;
-
-
-
- type = g_hash_table_lookup(map, id);
- switch(type) {
- case 0:
- printf(" - unknown -\n");
- break;
- default:
- printf(" - invalid -\n");
- break;
- case KEY_ROOT: {
- CamelKeyRootBlock *r = (CamelKeyRootBlock *)buf;
- printf("Key root:\n");
- printf("First: %08x Last: %08x Free: %08x\n", r->first, r->last, r->free);
- } break;
- case KEY_DATA: {
- CamelKeyBlock *k = (CamelKeyBlock *)buf;
- printf("Key data:\n");
- printf("Next: %08x Used: %u\n", k->next, k->used);
- for (i=0;i<k->used;i++) {
- if (i == 0)
- len = sizeof(k->u.keydata);
- else
- len = k->u.keys[i-1].offset;
- len -= k->u.keys[i].offset;
- printf("[%03d]: %08x %5d %06x %3d '%.*s'\n", i,
- k->u.keys[i].data, k->u.keys[i].offset, k->u.keys[i].flags,
- len, len, k->u.keydata+k->u.keys[i].offset);
- }
- } break;
- case PARTITION_MAP: {
- CamelPartitionMapBlock *m = (CamelPartitionMapBlock *)buf;
- printf("Partition map\n");
- printf("Next: %08x Used: %u\n", m->next, m->used);
- for (i=0;i<m->used;i++) {
- printf("[%03d]: %08x -> %08x\n", i, m->partition[i].hashid, m->partition[i].blockid);
- }
- } break;
- case PARTITION_DATA: {
- CamelPartitionKeyBlock *k = (CamelPartitionKeyBlock *)buf;
- printf("Partition data\n");
- printf("Used: %u\n", k->used);
- } break;
- }
-
-
- printf("--raw--\n");
-
- len = 1024;
- p = buf;
- do {
- sprintf(line, "%08x: ", total);
- total += 16;
- o = line+10;
- a = o+16*2+2;
- i = 0;
- while (len && i<16) {
- c = *p++;
- *a++ = isprint(c)?c:'.';
- *o++ = hex[(c>>4)&0x0f];
- *o++ = hex[c&0x0f];
- i++;
- if (i==8)
- *o++ = ' ';
- len--;
- }
- *a = 0;
- printf("%s\n", line);
- } while (len);
- printf("\n");
- }
-}
-#endif
-
-/* Debug */
-void
-camel_text_index_dump(CamelTextIndex *idx)
-{
- struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
-#ifndef DUMP_RAW
- camel_key_t keyid;
- char *word;
- const char *name;
- unsigned int flags;
- camel_block_t data;
-
- /* Iterate over all names in the file first */
-
- printf("UID's in index\n");
-
- keyid = 0;
- while ( (keyid = camel_key_table_next(p->name_index, keyid, &word, &flags, &data)) ) {
- if ((flags & 1) == 0)
- printf(" %s\n", word);
- else
- printf(" %s (deleted)\n", word);
- g_free(word);
- }
-
- printf("Word's in index\n");
-
- keyid = 0;
- while ( (keyid = camel_key_table_next(p->word_index, keyid, &word, &flags, &data)) ) {
- CamelIndexCursor *idc;
-
- printf("Word: '%s':\n", word);
-
- idc = camel_index_find((CamelIndex *)idx, word);
- while ( (name = camel_index_cursor_next(idc)) ) {
- printf(" %s", name);
- }
- printf("\n");
- camel_object_unref((CamelObject *)idc);
- g_free(word);
- }
-#else
- /* a more low-level dump routine */
- GHashTable *block_type = g_hash_table_new(NULL, NULL);
- camel_block_t id;
- struct stat st;
- int type;
-
- add_keys(block_type, p->blocks, p->word_index->rootid);
- add_keys(block_type, p->blocks, p->name_index->rootid);
-
- add_partition(block_type, p->blocks, p->word_hash->rootid);
- add_partition(block_type, p->blocks, p->name_hash->rootid);
-
- dump_raw(block_type, p->blocks->path);
- g_hash_table_destroy(block_type);
-#endif
-}
-
-/* more debug stuff */
-void
-camel_text_index_validate(CamelTextIndex *idx)
-{
- struct _CamelTextIndexPrivate *p = CTI_PRIVATE(idx);
- camel_key_t keyid;
- char *word;
- const char *name;
- unsigned int flags;
- camel_block_t data;
- char *oldword;
- camel_key_t *records;
- size_t count;
-
- GHashTable *names, *deleted, *words, *keys, *name_word, *word_word;
-
- names = g_hash_table_new(NULL, NULL);
- deleted = g_hash_table_new(NULL, NULL);
-
- name_word = g_hash_table_new(g_str_hash, g_str_equal);
-
- words = g_hash_table_new(NULL, NULL);
- keys = g_hash_table_new(NULL, NULL);
-
- word_word = g_hash_table_new(g_str_hash, g_str_equal);
-
- /* Iterate over all names in the file first */
-
- printf("Checking UID consistency\n");
-
- keyid = 0;
- while ( (keyid = camel_key_table_next(p->name_index, keyid, &word, &flags, &data)) ) {
- if ((oldword = g_hash_table_lookup(names, GINT_TO_POINTER(keyid))) != NULL
- || (oldword = g_hash_table_lookup(deleted, GINT_TO_POINTER(keyid))) != NULL) {
- printf("Warning, name '%s' duplicates key (%x) with name '%s'\n", word, keyid, oldword);
- g_free(word);
- } else {
- g_hash_table_insert(name_word, word, GINT_TO_POINTER (1));
- if ((flags & 1) == 0) {
- g_hash_table_insert(names, GINT_TO_POINTER(keyid), word);
- } else {
- g_hash_table_insert(deleted, GINT_TO_POINTER(keyid), word);
- }
- }
- }
-
- printf("Checking WORD member consistency\n");
-
- keyid = 0;
- while ( (keyid = camel_key_table_next(p->word_index, keyid, &word, &flags, &data)) ) {
- CamelIndexCursor *idc;
- GHashTable *used;
-
- /* first, check for duplicates of keyid, and data */
- if ((oldword = g_hash_table_lookup(words, GINT_TO_POINTER(keyid))) != NULL) {
- printf("Warning, word '%s' duplicates key (%x) with name '%s'\n", word, keyid, oldword);
- g_free(word);
- word = oldword;
- } else {
- g_hash_table_insert(words, GINT_TO_POINTER(keyid), word);
- }
-
- if (data == 0) {
- /* This may not be an issue if things have been removed over time,
- though it is a problem if its a fresh index */
- printf("Word '%s' has no data associated with it\n", word);
- } else {
- if ((oldword = g_hash_table_lookup(keys, GUINT_TO_POINTER(data))) != NULL) {
- printf("Warning, word '%s' duplicates data (%x) with name '%s'\n", word, data, oldword);
- } else {
- g_hash_table_insert(keys, GUINT_TO_POINTER(data), word);
- }
- }
-
- if ((oldword = g_hash_table_lookup(word_word, word)) != NULL) {
- printf("Warning, word '%s' occurs more than once\n", word);
- } else {
- g_hash_table_insert(word_word, word, word);
- }
-
- used = g_hash_table_new(g_str_hash, g_str_equal);
-
- idc = camel_index_find((CamelIndex *)idx, word);
- while ( (name = camel_index_cursor_next(idc)) ) {
- if (g_hash_table_lookup(name_word, name) == NULL) {
- printf("word '%s' references non-existant name '%s'\n", word, name);
- }
- if (g_hash_table_lookup(used, name) != NULL) {
- printf("word '%s' uses word '%s' more than once\n", word, name);
- } else {
- g_hash_table_insert(used, g_strdup(name), (void *)1);
- }
- }
- camel_object_unref((CamelObject *)idc);
-
- g_hash_table_foreach(used, (GHFunc)g_free, NULL);
- g_hash_table_destroy(used);
-
- printf("word '%s'\n", word);
-
- while (data) {
- printf(" data %x ", data);
- if (camel_key_file_read(p->links, &data, &count, &records) == -1) {
- printf("Warning, read failed for word '%s', at data '%d'\n", word, data);
- data = 0;
- } else {
- printf("(%d)\n", count);
- g_free(records);
- }
- }
- }
-
- g_hash_table_destroy(names);
- g_hash_table_destroy(deleted);
- g_hash_table_destroy(words);
- g_hash_table_destroy(keys);
-
- g_hash_table_foreach(name_word, (GHFunc)g_free, NULL);
- g_hash_table_destroy(name_word);
-
- g_hash_table_foreach(word_word, (GHFunc)g_free, NULL);
- g_hash_table_destroy(word_word);
-}
-
-/* ********************************************************************** */
-/* CamelTextIndexName */
-/* ********************************************************************** */
-
-static CamelIndexNameClass *camel_text_index_name_parent;
-
-#define CIN_CLASS(o) ((CamelTextIndexNameClass *)(((CamelObject *)o)->classfuncs))
-#define CIN_PRIVATE(o) (((CamelTextIndexName *)(o))->priv)
-
-static void
-text_index_name_add_word(CamelIndexName *idn, const char *word)
-{
- struct _CamelTextIndexNamePrivate *p = ((CamelTextIndexName *)idn)->priv;
-
- if (g_hash_table_lookup(idn->words, word) == NULL) {
- char *w = e_mempool_strdup(p->pool, word);
-
- g_hash_table_insert(idn->words, w, w);
- }
-}
-
-/* Why?
- Because it doesn't hang/loop forever on bad data
- Used to clean up utf8 before it gets further */
-
-static inline guint32
-camel_utf8_next(const unsigned char **ptr, const unsigned char *ptrend)
-{
- register unsigned char *p = (unsigned char *)*ptr;
- register unsigned int c;
- register guint32 v;
- int l;
-
- if (p == ptrend)
- return 0;
-
- while ( (c = *p++) ) {
- if (c < 0x80) {
- *ptr = p;
- return c;
- } else if ((c&0xe0) == 0xc0) {
- v = c & 0x1f;
- l = 1;
- } else if ((c&0xf0) == 0xe0) {
- v = c & 0x0f;
- l = 2;
- } else if ((c&0xf8) == 0xf0) {
- v = c & 0x07;
- l = 3;
- } else if ((c&0xfc) == 0xf8) {
- v = c & 0x03;
- l = 4;
- } else if ((c&0xfe) == 0xfc) {
- v = c & 0x01;
- l = 5;
- } else
- /* Invalid, ignore and look for next start char if room */
- if (p == ptrend) {
- return 0;
- } else {
- continue;
- }
-
- /* bad data or truncated buffer */
- if (p + l > ptrend)
- return 0;
-
- while (l && ((c = *p) & 0xc0) == 0x80) {
- p++;
- l--;
- v = (v << 6) | (c & 0x3f);
- }
-
- /* valid char */
- if (l == 0) {
- *ptr = p;
- return v;
- }
-
- /* else look for a start char again */
- }
-
- return 0;
-}
-
-static size_t
-text_index_name_add_buffer(CamelIndexName *idn, const char *buffer, size_t len)
-{
- CamelTextIndexNamePrivate *p = CIN_PRIVATE(idn);
- const unsigned char *ptr, *ptrend;
- guint32 c;
- unsigned char utf8[8];
- size_t utf8len;
-
- if (buffer == NULL) {
- if (p->buffer->len) {
- camel_index_name_add_word(idn, p->buffer->str);
- g_string_truncate(p->buffer, 0);
- }
- return 0;
- }
-
- ptr = buffer;
- ptrend = buffer+len;
- while ((c = camel_utf8_next(&ptr, ptrend))) {
- if (g_unichar_isalnum(c)) {
- c = g_unichar_tolower(c);
- utf8len = g_unichar_to_utf8(c, utf8);
- utf8[utf8len] = 0;
- g_string_append(p->buffer, utf8);
- } else {
- if (p->buffer->len > 0 && p->buffer->len <= CAMEL_TEXT_INDEX_MAX_WORDLEN) {
- text_index_name_add_word(idn, p->buffer->str);
- /*camel_index_name_add_word(idn, p->buffer->str);*/
- }
-
- g_string_truncate (p->buffer, 0);
- }
- }
-
- return 0;
-}
-
-static void
-camel_text_index_name_class_init(CamelTextIndexNameClass *klass)
-{
- CamelIndexNameClass *nklass = (CamelIndexNameClass *)klass;
-
- camel_text_index_name_parent = CAMEL_INDEX_NAME_CLASS(camel_type_get_global_classfuncs(camel_index_name_get_type()));
-
- nklass->add_word = text_index_name_add_word;
- nklass->add_buffer = text_index_name_add_buffer;
-}
-
-static void
-camel_text_index_name_init(CamelTextIndexName *idn)
-{
- CamelTextIndexNamePrivate *p;
-
- idn->parent.words = g_hash_table_new(g_str_hash, g_str_equal);
-
- p = idn->priv = g_malloc0(sizeof(*idn->priv));
- p->buffer = g_string_new("");
- p->pool = e_mempool_new(256, 128, E_MEMPOOL_ALIGN_BYTE);
-}
-
-static void
-camel_text_index_name_finalise(CamelTextIndexName *idn)
-{
- CamelTextIndexNamePrivate *p = CIN_PRIVATE(idn);
-
- g_hash_table_destroy(idn->parent.words);
-
- g_string_free(p->buffer, TRUE);
- e_mempool_destroy(p->pool);
-
- g_free(p);
-}
-
-CamelType
-camel_text_index_name_get_type(void)
-{
- static CamelType type = CAMEL_INVALID_TYPE;
-
- if (type == CAMEL_INVALID_TYPE) {
- type = camel_type_register(camel_index_name_get_type(), "CamelTextIndexName",
- sizeof (CamelTextIndexName),
- sizeof (CamelTextIndexNameClass),
- (CamelObjectClassInitFunc) camel_text_index_name_class_init,
- NULL,
- (CamelObjectInitFunc) camel_text_index_name_init,
- (CamelObjectFinalizeFunc) camel_text_index_name_finalise);
- }
-
- return type;
-}
-
-CamelTextIndexName *
-camel_text_index_name_new(CamelTextIndex *idx, const char *name, camel_key_t nameid)
-{
- CamelTextIndexName *idn = (CamelTextIndexName *)camel_object_new(camel_text_index_name_get_type());
- CamelIndexName *cin = &idn->parent;
- CamelTextIndexNamePrivate *p = CIN_PRIVATE(idn);
-
- cin->index = (CamelIndex *)idx;
- camel_object_ref((CamelObject *)idx);
- cin->name = e_mempool_strdup(p->pool, name);
- p->nameid = nameid;
-
- return idn;
-}
-
-/* ********************************************************************** */
-/* CamelTextIndexCursor */
-/* ********************************************************************** */
-
-static CamelIndexCursorClass *camel_text_index_cursor_parent;
-
-#define CIC_CLASS(o) ((CamelTextIndexCursorClass *)(((CamelObject *)o)->classfuncs))
-#define CIC_PRIVATE(o) (((CamelTextIndexCursor *)(o))->priv)
-
-static const char *
-text_index_cursor_next(CamelIndexCursor *idc)
-{
- struct _CamelTextIndexCursorPrivate *p = CIC_PRIVATE(idc);
- struct _CamelTextIndexPrivate *tip = CTI_PRIVATE(idc->index);
- unsigned int flags;
-
- c(printf("Going to next cursor for word with data '%08x' next %08x\n", p->first, p->next));
-
- do {
- while (p->record_index >= p->record_count) {
- g_free(p->records);
- p->records = NULL;
- p->record_index = 0;
- p->record_count = 0;
- if (p->next == 0)
- return NULL;
- if (camel_key_file_read(tip->links, &p->next, &p->record_count, &p->records) == -1)
- return NULL;
- }
-
- g_free(p->current);
- camel_key_table_lookup(tip->name_index, p->records[p->record_index], &p->current, &flags);
- if (flags & 1) {
- g_free(p->current);
- p->current = NULL;
- }
- p->record_index++;
- } while (p->current == NULL);
-
- return p->current;
-}
-
-static void
-text_index_cursor_reset(CamelIndexCursor *idc)
-{
- struct _CamelTextIndexCursorPrivate *p = CIC_PRIVATE(idc);
-
- g_free(p->records);
- p->records = NULL;
- g_free(p->current);
- p->current = NULL;
- p->record_count = 0;
- p->record_index = 0;
- p->next = p->first;
-}
-
-static void
-camel_text_index_cursor_class_init(CamelTextIndexCursorClass *klass)
-{
- CamelIndexCursorClass *cklass = (CamelIndexCursorClass *)klass;
-
- camel_text_index_cursor_parent = CAMEL_INDEX_CURSOR_CLASS(camel_type_get_global_classfuncs(camel_index_cursor_get_type()));
-
- cklass->next = text_index_cursor_next;
- cklass->reset = text_index_cursor_reset;
-}
-
-static void
-camel_text_index_cursor_init(CamelTextIndexCursor *idc)
-{
- CIC_PRIVATE(idc) = g_malloc0(sizeof(struct _CamelTextIndexCursorPrivate));
-}
-
-static void
-camel_text_index_cursor_finalise(CamelTextIndexCursor *idc)
-{
- struct _CamelTextIndexCursorPrivate *p = CIC_PRIVATE(idc);
-
- g_free(p->records);
- g_free(p->current);
- g_free(p);
-}
-
-CamelType
-camel_text_index_cursor_get_type(void)
-{
- static CamelType type = CAMEL_INVALID_TYPE;
-
- if (type == CAMEL_INVALID_TYPE) {
- type = camel_type_register(camel_index_cursor_get_type(), "CamelTextIndexCursor",
- sizeof (CamelTextIndexCursor),
- sizeof (CamelTextIndexCursorClass),
- (CamelObjectClassInitFunc) camel_text_index_cursor_class_init,
- NULL,
- (CamelObjectInitFunc) camel_text_index_cursor_init,
- (CamelObjectFinalizeFunc) camel_text_index_cursor_finalise);
- }
-
- return type;
-}
-
-CamelTextIndexCursor *
-camel_text_index_cursor_new(CamelTextIndex *idx, camel_block_t data)
-{
- CamelTextIndexCursor *idc = (CamelTextIndexCursor *)camel_object_new(camel_text_index_cursor_get_type());
- CamelIndexCursor *cic = &idc->parent;
- struct _CamelTextIndexCursorPrivate *p = CIC_PRIVATE(idc);
-
- cic->index = (CamelIndex *)idx;
- camel_object_ref((CamelObject *)idx);
- p->first = data;
- p->next = data;
- p->record_count = 0;
- p->record_index = 0;
-
- return idc;
-}
-
-/* ********************************************************************** */
-/* CamelTextIndexKeyCursor */
-/* ********************************************************************** */
-
-static CamelIndexCursorClass *camel_text_index_key_cursor_parent;
-
-#define CIKC_CLASS(o) ((CamelTextIndexKeyCursorClass *)(((CamelObject *)o)->classfuncs))
-#define CIKC_PRIVATE(o) (((CamelTextIndexKeyCursor *)(o))->priv)
-
-static const char *
-text_index_key_cursor_next(CamelIndexCursor *idc)
-{
- struct _CamelTextIndexKeyCursorPrivate *p = CIKC_PRIVATE(idc);
-
- c(printf("Going to next cursor for keyid %08x\n", p->keyid));
-
- g_free(p->current);
- p->current = NULL;
-
- while ( (p->keyid = camel_key_table_next(p->table, p->keyid, &p->current, &p->flags, &p->data)) ) {
- if ((p->flags & 1) == 0) {
- return p->current;
- } else {
- g_free(p->current);
- p->current = NULL;
- }
- }
-
- return NULL;
-}
-
-static void
-text_index_key_cursor_reset(CamelIndexCursor *idc)
-{
- struct _CamelTextIndexKeyCursorPrivate *p = CIKC_PRIVATE(idc);
-
- p->keyid = 0;
- p->flags = 0;
- p->data = 0;
- g_free(p->current);
- p->current = NULL;
-}
-
-static void
-camel_text_index_key_cursor_class_init(CamelTextIndexKeyCursorClass *klass)
-{
- CamelIndexCursorClass *cklass = (CamelIndexCursorClass *)klass;
-
- camel_text_index_key_cursor_parent = CAMEL_INDEX_CURSOR_CLASS(camel_type_get_global_classfuncs(camel_index_cursor_get_type()));
-
- cklass->next = text_index_key_cursor_next;
- cklass->reset = text_index_key_cursor_reset;
-}
-
-static void
-camel_text_index_key_cursor_init(CamelTextIndexKeyCursor *idc)
-{
- struct _CamelTextIndexKeyCursorPrivate *p;
-
- p = idc->priv = g_malloc0(sizeof(struct _CamelTextIndexKeyCursorPrivate));
- p->keyid = 0;
- p->flags = 0;
- p->data = 0;
- p->current = NULL;
-}
-
-static void
-camel_text_index_key_cursor_finalise(CamelTextIndexKeyCursor *idc)
-{
- struct _CamelTextIndexKeyCursorPrivate *p = CIKC_PRIVATE(idc);
-
- g_free(p->current);
- if (p->table)
- camel_object_unref((CamelObject *)p->table);
- g_free(p);
-}
-
-CamelType
-camel_text_index_key_cursor_get_type(void)
-{
- static CamelType type = CAMEL_INVALID_TYPE;
-
- if (type == CAMEL_INVALID_TYPE) {
- type = camel_type_register(camel_index_cursor_get_type(), "CamelTextIndexKeyCursor",
- sizeof (CamelTextIndexKeyCursor),
- sizeof (CamelTextIndexKeyCursorClass),
- (CamelObjectClassInitFunc) camel_text_index_key_cursor_class_init,
- NULL,
- (CamelObjectInitFunc) camel_text_index_key_cursor_init,
- (CamelObjectFinalizeFunc) camel_text_index_key_cursor_finalise);
- }
-
- return type;
-}
-
-CamelTextIndexKeyCursor *
-camel_text_index_key_cursor_new(CamelTextIndex *idx, CamelKeyTable *table)
-{
- CamelTextIndexKeyCursor *idc = (CamelTextIndexKeyCursor *)camel_object_new(camel_text_index_key_cursor_get_type());
- CamelIndexCursor *cic = &idc->parent;
- struct _CamelTextIndexKeyCursorPrivate *p = CIKC_PRIVATE(idc);
-
- cic->index = (CamelIndex *)idx;
- camel_object_ref((CamelObject *)idx);
- p->table = table;
- camel_object_ref((CamelObject *)table);
-
- return idc;
-}
-
-/* ********************************************************************** */
-
-#define m(x)
-
-#if 0
-
-struct _CamelIndexRoot {
- struct _CamelBlockRoot root;
-
- camel_block_t word_root; /* a keyindex containing the keyid -> word mapping */
- camel_block_t word_hash_root; /* a partitionindex containing word -> keyid mapping */
-
- camel_block_t name_root; /* same, for names */
- camel_block_t name_hash_root;
-};
-
-char wordbuffer[] = "This is a buffer of multiple words. Some of the words are duplicates"
-" while other words are the same, some are in difFerenT Different different case cAsE casE,"
-" with,with:with;with-with'with\"'\"various punctuation as well. So much for those Words. and 10"
-" numbers in a row too 1,2,3,4,5,6,7,8,9,10! Yay!.";
-
-int main(int argc, char **argv)
-{
-#if 0
- CamelBlockFile *bs;
- CamelKeyTable *ki;
- CamelPartitionTable *cpi;
- CamelBlock *keyroot, *partroot;
- struct _CamelIndexRoot *root;
- FILE *fp;
- char line[256], *key;
- camel_key_t keyid;
- int index = 0, flags, data;
-#endif
- CamelIndex *idx;
- CamelIndexName *idn;
- CamelIndexCursor *idc;
- const char *word;
- int i;
-
- printf("Camel text index tester!\n");
-
- g_thread_init(NULL);
- camel_init(NULL, 0);
-
- idx = (CamelIndex *)camel_text_index_new("textindex", O_CREAT|O_RDWR|O_TRUNC);
-
-
-#if 1
- camel_index_compress(idx);
-
- return 0;
-#endif
-
- for (i=0;i<100;i++) {
- char name[16];
-
- sprintf(name, "%d", i);
- printf("Adding words to name '%s'\n", name);
- idn = camel_index_add_name(idx, name);
- camel_index_name_add_buffer(idn, wordbuffer, sizeof(wordbuffer)-1);
- camel_index_write_name(idx, idn);
- camel_object_unref((CamelObject *)idn);
- }
-
- printf("Looking up which names contain word 'word'\n");
- idc = camel_index_find(idx, "words");
- while ( (word = camel_index_cursor_next(idc)) != NULL ) {
- printf(" name is '%s'\n", word);
- }
- camel_object_unref((CamelObject *)idc);
- printf("done.\n");
-
- printf("Looking up which names contain word 'truncate'\n");
- idc = camel_index_find(idx, "truncate");
- while ( (word = camel_index_cursor_next(idc)) != NULL ) {
- printf(" name is '%s'\n", word);
- }
- camel_object_unref((CamelObject *)idc);
- printf("done.\n");
-
- camel_index_sync(idx);
- camel_object_unref((CamelObject *)idx);
-
-#if 0
- bs = camel_block_file_new("blocks", "TESTINDX", CAMEL_BLOCK_SIZE);
-
- root = (struct _CamelIndexRoot *)bs->root;
- if (root->word_root == 0) {
- keyroot = camel_block_file_new_block(bs);
- root->word_root = keyroot->id;
- camel_block_file_touch_block(bs, bs->root_block);
- }
- if (root->word_hash_root == 0) {
- partroot = camel_block_file_new_block(bs);
- root->word_hash_root = partroot->id;
- camel_block_file_touch_block(bs, bs->root_block);
- }
-
- ki = camel_key_table_new(bs, root->word_root);
- cpi = camel_partition_table_new(bs, root->word_hash_root);
-
- fp = fopen("/usr/dict/words", "r");
- if (fp == NULL) {
- perror("fopen");
- return 1;
- }
-
- while (fgets(line, sizeof(line), fp) != NULL) {
- line[strlen(line)-1] = 0;
-
- /* see if its already there */
- keyid = camel_partition_table_lookup(cpi, line);
- if (keyid == 0) {
- m(printf("Adding word '%s' %d\n", line, index));
-
- keyid = camel_key_table_add(ki, line, index, 0);
- m(printf(" key = %08x\n", keyid));
-
- camel_partition_table_add(cpi, line, keyid);
-
- m(printf("Lookup word '%s'\n", line));
- keyid = camel_partition_table_lookup(cpi, line);
- m(printf(" key = %08x\n", keyid));
- }
-
- m(printf("Lookup key %08x\n", keyid));
-
- camel_key_table_set_flags(ki, keyid, index, 1);
-
- data = camel_key_table_lookup(ki, keyid, &key, &flags);
- m(printf(" word = '%s' %d %04x\n", key, data, flags));
-
- g_assert(data == index && strcmp(key, line) == 0);
-
- g_free(key);
-
- index++;
- }
-
- printf("Scanning again\n");
- fseek(fp, SEEK_SET, 0);
- index = 0;
- while (fgets(line, sizeof(line), fp) != NULL) {
- line[strlen(line)-1] = 0;
- m(printf("Lookup word '%s' %d\n", line, index));
- keyid = camel_partition_table_lookup(cpi, line);
- m(printf(" key = %08d\n", keyid));
-
- m(printf("Lookup key %08x\n", keyid));
- data = camel_key_table_lookup(ki, keyid, &key, &flags);
- m(printf(" word = '%s' %d\n", key, data));
-
- g_assert(data == index && strcmp(key, line) == 0);
-
- g_free(key);
-
- index++;
- }
-
- printf("Freeing partition index\n");
- camel_partition_table_free(cpi);
-
- printf("Syncing block file\n");
- camel_block_file_sync(bs);
-#endif
- return 0;
-}
-
-#endif