aboutsummaryrefslogtreecommitdiffstats
path: root/libibex/hash.c
diff options
context:
space:
mode:
authorNot Zed <NotZed@HelixCode.com>2000-10-25 21:59:44 +0800
committerMichael Zucci <zucchi@src.gnome.org>2000-10-25 21:59:44 +0800
commit9aae808cd0b4cf8bd35f6c0205e30c79f62632ef (patch)
treeb0fb452db42daadf61124e840f0db9004a62df1f /libibex/hash.c
parenta586a32c93238991650cf7275b81316444739ee5 (diff)
downloadgsoc2013-evolution-9aae808cd0b4cf8bd35f6c0205e30c79f62632ef.tar.gz
gsoc2013-evolution-9aae808cd0b4cf8bd35f6c0205e30c79f62632ef.tar.zst
gsoc2013-evolution-9aae808cd0b4cf8bd35f6c0205e30c79f62632ef.zip
Bugfixes, performance improvemnts. Should scale up much better than
before, and be more bugfree than ever! 2000-10-25 Not Zed <NotZed@HelixCode.com> * ibex_internal.h (IBEX_VERSION): Bumped to another version. The file format hasn't changed, but earlier bugs may create invalid files. * block.c (ibex_block_read): Use the root data directly. (ibex_block_cache_open): As well. (ibex_block_get): And here too. (ibex_block_cache_sync): Sync the root block directly here. * block.h: Pad root block out to 1024 bytes. Added root block to struct _memcache. * disktail.c (tail_get): Dirty the root block. (tail_get): Fix for changes to root access. (disk_remove): And here too. * wordindexmem.c (sync_cache_entry): Handle the case of not having any files in the list, which can happen now. (word_index_pre): Make sure we set the wordid on the new cache entry. * ibex_block.c (ibex_save): Sigh. Pass the right argument to index_post. * block.c (ibex_block_cache_open): Create a word_index_mem for indexing the words, rather than a word_index. * ibex_block.c (ibex_index_buffer): If we haven't called index_pre yet, do it before indexing anything. (ibex_save): If wehave called index_pre previously, call index_post. (ibex_close): And same for here. * index.h: Added a cursor class, and cursor retrieval function for iterating through an index's keys. * wordindexmem.c (ibex_create_word_index_mem): New word class, similar to wordindex, but meant to be faster for updates. (word_index_pre): Implement. We load all keys into memory. (word_index_post): Implement. We sync and free all keys. (find): Remove lru code, its no longer a cache, but a lookup table. (add_index_cache): Remove lru code here too. (find_name): And here. (word_flush): Flush the hashtable direct. (word_close): Call flush to flush, rather than doing it ourselves. (add_index_cache): If we are in an index state, we can assume a cache miss == a new word. (word_index_post): Maintain whether or not we are in an index state, and the depth of the state. (word_index_pre): Likewise. Dont reread the index if we have already. (cache_sanity): Fixed for struct changes. * wordindex.h (IBEXWordClass): Added functions to prepare/cleanup for lots of indexing. i.e. can be used to optimise indexing speed at the cost of extra memory usage during the indexing process. * hash.c (hash_cursor_create): Create a new cursor for iterating through a hashtable. (hash_cursor_close): 'close' the cursor. It is upto the application to close any cursors it creates. (hash_cursor_next): Goto the next key id. (hash_cursor_next_key): Goto the next key, reutrn the key. (hash_get_cursor): Return a cursor object. * wordindex.c (word_index_post): (word_index_pre): Added (empty) callbacks for pre/post functions. svn path=/trunk/; revision=6165
Diffstat (limited to 'libibex/hash.c')
-rw-r--r--libibex/hash.c95
1 files changed, 95 insertions, 0 deletions
diff --git a/libibex/hash.c b/libibex/hash.c
index 9395e1e00b..0581d22fcb 100644
--- a/libibex/hash.c
+++ b/libibex/hash.c
@@ -42,6 +42,14 @@
typedef guint32 hashid_t;
+struct _HASHCursor {
+ struct _IBEXCursor cursor;
+
+ hashid_t block;
+ unsigned int index;
+ unsigned int size;
+};
+
static struct _IBEXIndex *hash_create(struct _memcache *bc, int size);
static struct _IBEXIndex *hash_open(struct _memcache *bc, blockid_t root);
static int hash_sync(struct _IBEXIndex *index);
@@ -53,6 +61,12 @@ static hashid_t hash_insert(struct _IBEXIndex *index, const char *key, int keyle
static char *hash_get_key(struct _IBEXIndex *index, hashid_t hashbucket, int *len);
static void hash_set_data_block(struct _IBEXIndex *index, hashid_t keyid, blockid_t blockid, blockid_t tail);
static blockid_t hash_get_data_block(struct _IBEXIndex *index, hashid_t keyid, blockid_t *tail);
+static struct _IBEXCursor *hash_get_cursor(struct _IBEXIndex *index);
+
+static struct _IBEXCursor *hash_cursor_create(struct _IBEXIndex *);
+static void hash_cursor_close(struct _IBEXCursor *);
+static guint32 hash_cursor_next(struct _IBEXCursor *);
+static char *hash_cursor_next_key(struct _IBEXCursor *, int *keylenptr);
struct _IBEXIndexClass ibex_hash_class = {
hash_create, hash_open,
@@ -63,6 +77,13 @@ struct _IBEXIndexClass ibex_hash_class = {
hash_get_key,
hash_set_data_block,
hash_get_data_block,
+ hash_get_cursor,
+};
+
+struct _IBEXCursorClass ibex_hash_cursor_class = {
+ hash_cursor_close,
+ hash_cursor_next,
+ hash_cursor_next_key
};
/* the reason we have the tail here is that otherwise we need to
@@ -197,6 +218,12 @@ static int hash_close(struct _IBEXIndex *index)
return 0;
}
+/* get an iterator class */
+static struct _IBEXCursor *hash_get_cursor(struct _IBEXIndex *index)
+{
+ return hash_cursor_create(index);
+}
+
/* convert a hashbucket id into a name */
static char *
hash_get_key(struct _IBEXIndex *index, hashid_t hashbucket, int *len)
@@ -620,6 +647,74 @@ hash_insert(struct _IBEXIndex *index, const char *key, int keylen)
return HASH_KEY(keybucket, 0);
}
+/* hash cursor functions */
+static struct _IBEXCursor *
+hash_cursor_create(struct _IBEXIndex *idx)
+{
+ struct _HASHCursor *idc;
+ struct _hashroot *hashroot;
+
+ idc = g_malloc(sizeof(*idc));
+ idc->cursor.klass = &ibex_hash_cursor_class;
+ idc->cursor.index = idx;
+ idc->block = 0;
+ idc->index = 0;
+
+ hashroot = (struct _hashroot *)ibex_block_read(idx->blocks, idx->root);
+ idc->size = hashroot->size;
+
+ return &idc->cursor;
+}
+
+static void
+hash_cursor_close(struct _IBEXCursor *idc)
+{
+ g_free(idc);
+}
+
+static guint32
+hash_cursor_next(struct _IBEXCursor *idc)
+{
+ struct _HASHCursor *hc = (struct _HASHCursor *)idc;
+ struct _hashroot *hashroot;
+ struct _hashblock *bucket;
+ struct _hashtableblock *table;
+
+ /* get the next bucket chain */
+ if (hc->block != 0) {
+ int ind;
+
+ bucket = (struct _hashblock *)ibex_block_read(idc->index->blocks, HASH_BLOCK(hc->block));
+ ind = HASH_INDEX(hc->block);
+
+ g_assert(ind < bucket->used);
+
+ hc->block = bucket->hb_keys[ind].next;
+ }
+
+ if (hc->block == 0) {
+ hashroot = (struct _hashroot *)ibex_block_read(idc->index->blocks, idc->index->root);
+ while (hc->block == 0 && hc->index < hc->size) {
+ table = (struct _hashtableblock *)
+ ibex_block_read(idc->index->blocks,
+ hashroot->table[hc->index / (BLOCK_SIZE/sizeof(blockid_t))]);
+ hc->block = table->buckets[hc->index % (BLOCK_SIZE/sizeof(blockid_t))];
+
+ hc->index++;
+ }
+ }
+
+ return hc->block;
+}
+
+static char *
+hash_cursor_next_key(struct _IBEXCursor *idc, int *keylenptr)
+{
+ /* TODO: this could be made slightly mroe efficient going to the structs direct.
+ but i'm lazy today */
+ return idc->index->klass->get_key(idc->index, idc->klass->next(idc), keylenptr);
+}
+
/* debug */
void ibex_hash_dump(struct _IBEXIndex *index);
static void ibex_hash_dump_rec(struct _IBEXIndex *index, int *words, int *wordslen);