Bugfixes, performance improvemnts. Should scale up much better than

before, and be more bugfree than ever! 2000-10-25 Not Zed <NotZed@HelixCode.com> * ibex_internal.h (IBEX_VERSION): Bumped to another version. The file format hasn't changed, but earlier bugs may create invalid files. * block.c (ibex_block_read): Use the root data directly. (ibex_block_cache_open): As well. (ibex_block_get): And here too. (ibex_block_cache_sync): Sync the root block directly here. * block.h: Pad root block out to 1024 bytes. Added root block to struct _memcache. * disktail.c (tail_get): Dirty the root block. (tail_get): Fix for changes to root access. (disk_remove): And here too. * wordindexmem.c (sync_cache_entry): Handle the case of not having any files in the list, which can happen now. (word_index_pre): Make sure we set the wordid on the new cache entry. * ibex_block.c (ibex_save): Sigh. Pass the right argument to index_post. * block.c (ibex_block_cache_open): Create a word_index_mem for indexing the words, rather than a word_index. * ibex_block.c (ibex_index_buffer): If we haven't called index_pre yet, do it before indexing anything. (ibex_save): If wehave called index_pre previously, call index_post. (ibex_close): And same for here. * index.h: Added a cursor class, and cursor retrieval function for iterating through an index's keys. * wordindexmem.c (ibex_create_word_index_mem): New word class, similar to wordindex, but meant to be faster for updates. (word_index_pre): Implement. We load all keys into memory. (word_index_post): Implement. We sync and free all keys. (find): Remove lru code, its no longer a cache, but a lookup table. (add_index_cache): Remove lru code here too. (find_name): And here. (word_flush): Flush the hashtable direct. (word_close): Call flush to flush, rather than doing it ourselves. (add_index_cache): If we are in an index state, we can assume a cache miss == a new word. (word_index_post): Maintain whether or not we are in an index state, and the depth of the state. (word_index_pre): Likewise. Dont reread the index if we have already. (cache_sanity): Fixed for struct changes. * wordindex.h (IBEXWordClass): Added functions to prepare/cleanup for lots of indexing. i.e. can be used to optimise indexing speed at the cost of extra memory usage during the indexing process. * hash.c (hash_cursor_create): Create a new cursor for iterating through a hashtable. (hash_cursor_close): 'close' the cursor. It is upto the application to close any cursors it creates. (hash_cursor_next): Goto the next key id. (hash_cursor_next_key): Goto the next key, reutrn the key. (hash_get_cursor): Return a cursor object. * wordindex.c (word_index_post): (word_index_pre): Added (empty) callbacks for pre/post functions. svn path=/trunk/; revision=6165
author: Not Zed <NotZed@HelixCode.com> 2000-10-25 21:59:44 +0800
committer: Michael Zucci <zucchi@src.gnome.org> 2000-10-25 21:59:44 +0800
commit: 9aae808cd0b4cf8bd35f6c0205e30c79f62632ef (patch)
tree: b0fb452db42daadf61124e840f0db9004a62df1f /libibex/hash.c
parent: a586a32c93238991650cf7275b81316444739ee5 (diff)
download: gsoc2013-evolution-9aae808cd0b4cf8bd35f6c0205e30c79f62632ef.tar.gz
gsoc2013-evolution-9aae808cd0b4cf8bd35f6c0205e30c79f62632ef.tar.zst
gsoc2013-evolution-9aae808cd0b4cf8bd35f6c0205e30c79f62632ef.zip
1 files changed, 95 insertions, 0 deletions
diff --git a/libibex/hash.c b/libibex/hash.c
index 9395e1e00b..0581d22fcb 100644
--- a/libibex/hash.c
+++ b/libibex/hash.c
@@ -42,6 +42,14 @@
 
 typedef guint32 hashid_t;
 
+struct _HASHCursor {
+	struct _IBEXCursor cursor;
+
+	hashid_t block;
+	unsigned int index;
+	unsigned int size;
+};
+
 static struct _IBEXIndex *hash_create(struct _memcache *bc, int size);
 static struct _IBEXIndex *hash_open(struct _memcache *bc, blockid_t root);
 static int hash_sync(struct _IBEXIndex *index);
@@ -53,6 +61,12 @@ static hashid_t hash_insert(struct _IBEXIndex *index, const char *key, int keyle
 static char *hash_get_key(struct _IBEXIndex *index, hashid_t hashbucket, int *len);
 static void hash_set_data_block(struct _IBEXIndex *index, hashid_t keyid, blockid_t blockid, blockid_t tail);
 static blockid_t hash_get_data_block(struct _IBEXIndex *index, hashid_t keyid, blockid_t *tail);
+static struct _IBEXCursor *hash_get_cursor(struct _IBEXIndex *index);
+
+static struct _IBEXCursor *hash_cursor_create(struct _IBEXIndex *);
+static void hash_cursor_close(struct _IBEXCursor *);
+static guint32 hash_cursor_next(struct _IBEXCursor *);
+static char *hash_cursor_next_key(struct _IBEXCursor *, int *keylenptr);
 
 struct _IBEXIndexClass ibex_hash_class = {
 	hash_create, hash_open,
@@ -63,6 +77,13 @@ struct _IBEXIndexClass ibex_hash_class = {
 	hash_get_key,
 	hash_set_data_block,
 	hash_get_data_block,
+	hash_get_cursor,
+};
+
+struct _IBEXCursorClass ibex_hash_cursor_class = {
+	hash_cursor_close,
+	hash_cursor_next,
+	hash_cursor_next_key
 };
 
 /* the reason we have the tail here is that otherwise we need to
@@ -197,6 +218,12 @@ static int hash_close(struct _IBEXIndex *index)
 	return 0;
 }
 
+/* get an iterator class */
+static struct _IBEXCursor *hash_get_cursor(struct _IBEXIndex *index)
+{
+	return hash_cursor_create(index);
+}
+
 /* convert a hashbucket id into a name */
 static char *
 hash_get_key(struct _IBEXIndex *index, hashid_t hashbucket, int *len)
@@ -620,6 +647,74 @@ hash_insert(struct _IBEXIndex *index, const char *key, int keylen)
 	return HASH_KEY(keybucket, 0);
 }
 
+/* hash cursor functions */
+static struct _IBEXCursor *
+hash_cursor_create(struct _IBEXIndex *idx)
+{
+	struct _HASHCursor *idc;
+	struct _hashroot *hashroot;
+
+	idc = g_malloc(sizeof(*idc));
+	idc->cursor.klass = &ibex_hash_cursor_class;
+	idc->cursor.index = idx;
+	idc->block = 0;
+	idc->index = 0;
+
+	hashroot = (struct _hashroot *)ibex_block_read(idx->blocks, idx->root);
+	idc->size = hashroot->size;
+
+	return &idc->cursor;
+}
+
+static void
+hash_cursor_close(struct _IBEXCursor *idc)
+{
+	g_free(idc);
+}
+
+static guint32
+hash_cursor_next(struct _IBEXCursor *idc)
+{
+	struct _HASHCursor *hc = (struct _HASHCursor *)idc;
+	struct _hashroot *hashroot;
+	struct _hashblock *bucket;
+	struct _hashtableblock *table;
+
+	/* get the next bucket chain */
+	if (hc->block != 0) {
+		int ind;
+
+		bucket = (struct _hashblock *)ibex_block_read(idc->index->blocks, HASH_BLOCK(hc->block));
+		ind = HASH_INDEX(hc->block);
+
+		g_assert(ind < bucket->used);
+
+		hc->block = bucket->hb_keys[ind].next;
+	}
+
+	if (hc->block == 0) {
+		hashroot = (struct _hashroot *)ibex_block_read(idc->index->blocks, idc->index->root);
+		while (hc->block == 0 && hc->index < hc->size) {
+			table = (struct _hashtableblock *)
+				ibex_block_read(idc->index->blocks,
+						hashroot->table[hc->index / (BLOCK_SIZE/sizeof(blockid_t))]);
+			hc->block = table->buckets[hc->index % (BLOCK_SIZE/sizeof(blockid_t))];
+
+			hc->index++;
+		}
+	}
+
+	return hc->block;
+}
+
+static char *
+hash_cursor_next_key(struct _IBEXCursor *idc, int *keylenptr)
+{
+	/* TODO: this could be made slightly mroe efficient going to the structs direct.
+	   but i'm lazy today */
+	return idc->index->klass->get_key(idc->index, idc->klass->next(idc), keylenptr);
+}
+
 /* debug */
 void ibex_hash_dump(struct _IBEXIndex *index);
 static void ibex_hash_dump_rec(struct _IBEXIndex *index, int *words, int *wordslen);
author	Not Zed <NotZed@HelixCode.com>	2000-10-25 21:59:44 +0800
committer	Michael Zucci <zucchi@src.gnome.org>	2000-10-25 21:59:44 +0800
commit	9aae808cd0b4cf8bd35f6c0205e30c79f62632ef (patch)
tree	b0fb452db42daadf61124e840f0db9004a62df1f /libibex/hash.c
parent	a586a32c93238991650cf7275b81316444739ee5 (diff)
download	gsoc2013-evolution-9aae808cd0b4cf8bd35f6c0205e30c79f62632ef.tar.gz gsoc2013-evolution-9aae808cd0b4cf8bd35f6c0205e30c79f62632ef.tar.zst gsoc2013-evolution-9aae808cd0b4cf8bd35f6c0205e30c79f62632ef.zip