aboutsummaryrefslogtreecommitdiffstats
path: root/libibex
diff options
context:
space:
mode:
authorNot Zed <NotZed@HelixCode.com>2000-10-12 21:40:55 +0800
committerMichael Zucci <zucchi@src.gnome.org>2000-10-12 21:40:55 +0800
commit1deca02b41aae9bf2632ea1420cc900fcf41531d (patch)
treee26816ec82c3990801aca2a35686d5df2ba00957 /libibex
parent6c337e9ea0623b48688c018973d35823dde21f60 (diff)
downloadgsoc2013-evolution-1deca02b41aae9bf2632ea1420cc900fcf41531d.tar.gz
gsoc2013-evolution-1deca02b41aae9bf2632ea1420cc900fcf41531d.tar.zst
gsoc2013-evolution-1deca02b41aae9bf2632ea1420cc900fcf41531d.zip
Added some stat stuff.
2000-10-12 Not Zed <NotZed@HelixCode.com> * index.h: Added some stat stuff. * wordindex.c (struct _wordcache): Changed files[] to be a pointer to an allocated block/or an individual item. (find): Fix for changes to struct. (find_name): " (sync_cache_entry): " (add): " (add_list): " (add_index_cache): Free the cache file array if it was created. (word_flush): And here. (word_close): And here too. (ibex_create_word_index): Double the size of the hashtables. (word_flush): Make sure we reset the wordcount to 0 if we remove the list items. DOH. (add_index_cache): Use a slightly more sohpisticated aging algorithm to remove expired nodes. 2000-10-10 Not Zed <NotZed@HelixCode.com> * hash.c (hash_find): (hash_remove): (hash_insert): Truncate key if it is too big to fit in a single block to MAX_KEYLEN bytes. svn path=/trunk/; revision=5882
Diffstat (limited to 'libibex')
-rw-r--r--libibex/ChangeLog20
-rw-r--r--libibex/block.c9
-rw-r--r--libibex/hash.c10
-rw-r--r--libibex/index.h6
-rw-r--r--libibex/wordindex.c168
5 files changed, 187 insertions, 26 deletions
diff --git a/libibex/ChangeLog b/libibex/ChangeLog
index 50aed83023..5021bc12b4 100644
--- a/libibex/ChangeLog
+++ b/libibex/ChangeLog
@@ -1,3 +1,23 @@
+2000-10-12 Not Zed <NotZed@HelixCode.com>
+
+ * index.h: Added some stat stuff.
+
+ * wordindex.c (struct _wordcache): Changed files[] to be a pointer
+ to an allocated block/or an individual item.
+ (find): Fix for changes to struct.
+ (find_name): "
+ (sync_cache_entry): "
+ (add): "
+ (add_list): "
+ (add_index_cache): Free the cache file array if it was created.
+ (word_flush): And here.
+ (word_close): And here too.
+ (ibex_create_word_index): Double the size of the hashtables.
+ (word_flush): Make sure we reset the wordcount to 0 if we remove
+ the list items. DOH.
+ (add_index_cache): Use a slightly more sohpisticated aging
+ algorithm to remove expired nodes.
+
2000-10-10 Not Zed <NotZed@HelixCode.com>
* hash.c (hash_find):
diff --git a/libibex/block.c b/libibex/block.c
index d971077dba..6f6cd60657 100644
--- a/libibex/block.c
+++ b/libibex/block.c
@@ -38,6 +38,7 @@
#define d(x)
/*#define DEBUG*/
+int block_log;
#ifdef IBEX_STATS
static void
@@ -202,6 +203,9 @@ ibex_block_dirty(struct _block *block)
static void
sync_block(struct _memcache *block_cache, struct _memblock *memblock)
{
+ if (block_log)
+ printf("writing block %d\n", memblock->block);
+
lseek(block_cache->fd, memblock->block, SEEK_SET);
if (write(block_cache->fd, &memblock->data, sizeof(memblock->data)) != -1) {
memblock->flags &= ~BLOCK_DIRTY;
@@ -324,6 +328,9 @@ ibex_block_read(struct _memcache *block_cache, blockid_t blockid)
add_miss(block_cache, blockid);
add_read(block_cache, blockid);
#endif
+ if (block_log)
+ printf("miss block %d\n", blockid);
+
d(printf("loading blockid from disk %d\n", blockid));
memblock = g_malloc(sizeof(*memblock));
memblock->block = blockid;
@@ -345,6 +352,8 @@ ibex_block_read(struct _memcache *block_cache, blockid_t blockid)
struct _memblock *rootblock = g_hash_table_lookup(block_cache->index, (void *)0);
struct _root *root = (struct _root *)&rootblock->data;
+ printf("Unsyncing root block\n");
+
g_assert(rootblock != NULL);
root->flags &= ~IBEX_ROOT_SYNCF;
sync_block(block_cache, rootblock);
diff --git a/libibex/hash.c b/libibex/hash.c
index a36bef33b0..25a6e92ef9 100644
--- a/libibex/hash.c
+++ b/libibex/hash.c
@@ -190,6 +190,9 @@ static int hash_sync(struct _IBEXIndex *index)
static int hash_close(struct _IBEXIndex *index)
{
+#ifdef INDEX_STAT
+ printf("Performed %d lookups, average %f depth\n", index->lookups, (double)index->lookup_total/index->lookups);
+#endif
g_free(index);
return 0;
}
@@ -259,12 +262,19 @@ hash_find(struct _IBEXIndex *index, const char *key, int keylen)
/* and its bucket */
hashbucket = table->buckets[hashentry];
+#ifdef INDEX_STAT
+ index->lookups++;
+#endif
/* go down the bucket chain, reading each entry till we are done ... */
while (hashbucket != 0) {
struct _hashblock *bucket;
char *start, *end;
int ind;
+#ifdef INDEX_STAT
+ index->lookup_total++;
+#endif
+
d(printf(" checking bucket %d\n", hashbucket));
/* get the real bucket id from the hashbucket id */
diff --git a/libibex/index.h b/libibex/index.h
index 35e3df23c2..c3c83c1bcf 100644
--- a/libibex/index.h
+++ b/libibex/index.h
@@ -25,10 +25,16 @@
/* an indexing 'class' maps a key to 1 piece of info */
+#define INDEX_STAT
+
struct _IBEXIndex {
struct _IBEXIndexClass *klass;
struct _memcache *blocks;
blockid_t root; /* root block of ondisk index data */
+#ifdef INDEX_STAT
+ int lookups; /* how many lookups */
+ int lookup_total; /* how many blocks loaded for all lookups (hash chain depth) */
+#endif
};
struct _IBEXIndexClass {
diff --git a/libibex/wordindex.c b/libibex/wordindex.c
index d988ee1482..da25389b27 100644
--- a/libibex/wordindex.c
+++ b/libibex/wordindex.c
@@ -45,7 +45,7 @@ of words, and could then be discarded (:flush()).
#define d(x)
/*#define WORDCACHE_SIZE (256)*/
-#define WORDCACHE_SIZE (10240)
+#define WORDCACHE_SIZE (4096)
extern struct _IBEXStoreClass ibex_diskarray_class;
extern struct _IBEXIndexClass ibex_hash_class;
@@ -56,15 +56,20 @@ extern struct _IBEXIndexClass ibex_hash_class;
*/
+#define CACHE_FILE_COUNT (62)
+
struct _wordcache {
struct _wordcache *next;
struct _wordcache *prev;
nameid_t wordid; /* disk wordid */
blockid_t wordblock; /* head of disk list */
blockid_t wordtail; /* and the tail data */
- int filecount; /* how many we have in memory */
- /*nameid_t files[32];*/ /* memory cache of files */
- nameid_t files[62]; /* memory cache of files */
+ short filecount; /* how many valid items in files[] */
+ short filealloc; /* how much allocated space in files[] */
+ union {
+ nameid_t *files; /* memory cache of files */
+ nameid_t file0; /* if filecount == 1 && filealloc == 0, store directly */
+ } file;
char word[1]; /* actual word follows */
};
@@ -106,7 +111,7 @@ ibex_create_word_index(struct _memcache *bc, blockid_t *wordroot, blockid_t *nam
printf("opening wordindex root = %d\n", *wordroot);
idx->wordindex = ibex_hash_class.open(bc, *wordroot);
} else {
- idx->wordindex = ibex_hash_class.create(bc, 1024);
+ idx->wordindex = ibex_hash_class.create(bc, 2048);
*wordroot = idx->wordindex->root;
printf("creating wordindex root = %d\n", *wordroot);
}
@@ -114,7 +119,7 @@ ibex_create_word_index(struct _memcache *bc, blockid_t *wordroot, blockid_t *nam
printf("opening nameindex root = %d\n", *nameroot);
idx->nameindex = ibex_hash_class.open(bc, *nameroot);
} else {
- idx->nameindex = ibex_hash_class.create(bc, 1024);
+ idx->nameindex = ibex_hash_class.create(bc, 2048);
*nameroot = idx->nameindex->root;
printf("creating nameindex root = %d\n", *nameroot);
}
@@ -126,7 +131,7 @@ static void
cache_sanity(struct _wordcache *head)
{
while (head->next) {
- g_assert(head->filecount <= sizeof(head->files)/sizeof(head->files[0]));
+ g_assert(head->filecount <= head->filealloc);
g_assert(strlen(head->word) != 0);
head = head->next;
}
@@ -209,7 +214,10 @@ static GPtrArray *find(struct _IBEXWord *idx, const char *word)
names = idx->wordstore->klass->get(idx->wordstore, wordblock, wordtail);
/* .. including any memory-only data */
if (cache) {
- g_array_append_vals(names, cache->files, cache->filecount);
+ if (cache->filealloc == 0 && cache->filecount == 1)
+ g_array_append_val(names, cache->file.file0);
+ else
+ g_array_append_vals(names, cache->file.files, cache->filecount);
}
/* walk it ... converting id's back to strings */
@@ -242,9 +250,14 @@ static gboolean find_name(struct _IBEXWord *idx, const char *name, const char *w
/* freshen cache entry if we touch it */
ibex_list_remove((struct _listnode *)cache);
ibex_list_addtail(&idx->wordnodes, (struct _listnode *)cache);
- for (i=0;i<cache->filecount;i++) {
- if (cache->files[i] == nameid)
+ if (cache->filecount == 1 && cache->filealloc == 0) {
+ if (cache->file.file0 == nameid)
return TRUE;
+ } else {
+ for (i=0;i<cache->filecount;i++) {
+ if (cache->file.files[i] == nameid)
+ return TRUE;
+ }
}
/* not there? well we can use the wordid anyway */
wordid = cache->wordid;
@@ -265,8 +278,11 @@ sync_cache_entry(struct _IBEXWord *idx, struct _wordcache *cache)
GArray array; /* just use this as a header */
blockid_t oldblock, oldtail;
- d(printf("syncing cache entry '%s'\n", cache->word));
- array.data = (char *)cache->files;
+ d(printf("syncing cache entry '%s' used %d\n", cache->word, cache->filecount));
+ if (cache->filecount == 1 && cache->filealloc == 0)
+ array.data = (char *)&cache->file.file0;
+ else
+ array.data = (char *)cache->file.files;
array.len = cache->filecount;
oldblock = cache->wordblock;
oldtail = cache->wordtail;
@@ -305,13 +321,35 @@ add_index_cache(struct _IBEXWord *idx, const char *word)
if (cache == 0) {
/* see if we have to flush off the last entry */
if (idx->wordcount >= WORDCACHE_SIZE) {
+ struct _wordcache *mincache;
+ int min, count=0;
/* remove last entry, and flush it */
cache = (struct _wordcache *)idx->wordnodes.tailpred;
+ mincache = cache;
+ min = mincache->filecount;
+
d(printf("flushing word from cache %s\n", cache->word));
- ibex_list_remove((struct _listnode *)cache);
- g_hash_table_remove(idx->wordcache, cache->word);
- sync_cache_entry(idx, cache);
- g_free(cache);
+ /* instead of just using the last entry, we try and find an entry with
+ with only 1 item (failing that, the smallest in the range we look at) */
+ /* this could probably benefit greatly from a more sophisticated aging algorithm */
+ while (cache->next && count < 100) {
+ if (cache->filecount == 1) {
+ mincache = cache;
+ break;
+ }
+ if (cache->filecount > 0 && cache->filecount < min) {
+ mincache = cache;
+ min = cache->filecount;
+ }
+ cache = cache->next;
+ count++;
+ }
+ ibex_list_remove((struct _listnode *)mincache);
+ g_hash_table_remove(idx->wordcache, mincache->word);
+ sync_cache_entry(idx, mincache);
+ if (mincache->filealloc)
+ g_free(mincache->file.files);
+ g_free(mincache);
idx->wordcount--;
}
cache = g_malloc0(sizeof(*cache)+strlen(word));
@@ -347,14 +385,34 @@ static void add(struct _IBEXWord *idx, const char *name, const char *word)
add_index_key(idx->nameindex, name, &nameid, &nameblock, &nametail);
/* check for repeats of the last name - dont add anything */
- if (cache->files[cache->filecount] == nameid)
- return;
+ if (cache->filecount == 1 && cache->filealloc == 0) {
+ if (cache->file.file0 == nameid)
+ return;
+ } else {
+ if (cache->file.files[cache->filecount] == nameid)
+ return;
+ }
+
+ /* see if we are setting the first, drop it in the union */
+ if (cache->filecount == 0 && cache->filealloc == 0) {
+ cache->file.file0 = nameid;
+ } else if (cache->filecount == 1 && cache->filealloc == 0) {
+ nameid_t saveid;
+ /* we need to allocate space for words */
+ saveid = cache->file.file0;
+ cache->file.files = g_malloc(sizeof(cache->file.files[0]) * CACHE_FILE_COUNT);
+ /* this could possibly grow as needed, but i wont for now */
+ cache->filealloc = CACHE_FILE_COUNT;
+ cache->file.files[0] = saveid;
+ cache->file.files[1] = nameid;
+ } else {
+ cache->file.files[cache->filecount] = nameid;
+ }
- /* yay, now insert the nameindex into the word list, and the word index into the name list */
- cache->files[cache->filecount] = nameid;
cache->filecount++;
+
/* if we are full, force a flush now */
- if (cache->filecount >= sizeof(cache->files)/sizeof(cache->files[0])) {
+ if (cache->filealloc && cache->filecount >= cache->filealloc) {
sync_cache_entry(idx, cache);
}
@@ -391,14 +449,33 @@ static void add_list(struct _IBEXWord *idx, const char *name, GPtrArray *words)
/*d(cache_sanity((struct _wordcache *)idx->wordnodes.head));*/
- /* check for duplicates; doesn't catch duplicates over an overflow
- boundary. Watch me care. */
+ /* check for duplicates; doesn't catch duplicates over an overflow boundary. Watch me care. */
if (cache->filecount == 0
- || cache->files[cache->filecount-1] != nameid) {
- cache->files[cache->filecount] = nameid;
+ /* the 1 item case */
+ || (cache->filecount == 1 && cache->filealloc == 0 && cache->file.file0 != nameid)
+ /* the normal case */
+ || (cache->filealloc > 0 && cache->file.files[cache->filecount-1] != nameid)) {
+
+ /* see if we are setting the first, drop it in the union */
+ if (cache->filecount == 0 && cache->filealloc == 0) {
+ cache->file.file0 = nameid;
+ } else if (cache->filecount == 1 && cache->filealloc == 0) {
+ nameid_t saveid;
+ /* we need to allocate space for words */
+ saveid = cache->file.file0;
+ cache->file.files = g_malloc(sizeof(cache->file.files[0]) * CACHE_FILE_COUNT);
+ /* this could possibly grow as needed, but i wont for now */
+ cache->filealloc = CACHE_FILE_COUNT;
+ cache->file.files[0] = saveid;
+ cache->file.files[1] = nameid;
+ } else {
+ cache->file.files[cache->filecount] = nameid;
+ }
+
cache->filecount++;
+
/* if we are full, force a flush now */
- if (cache->filecount >= sizeof(cache->files)/sizeof(cache->files[0])) {
+ if (cache->filealloc && cache->filecount >= cache->filealloc) {
sync_cache_entry(idx, cache);
}
@@ -452,30 +529,69 @@ static int
word_flush(struct _IBEXWord *idx)
{
struct _wordcache *cache = (struct _wordcache *)idx->wordnodes.head, *next;
+ extern int block_log;
+ int count= 0;
+ int used=0, wasted=0;
+
+ block_log = 0;
next = cache->next;
while (next) {
+ count++;
+ used += sizeof(struct _wordcache) + (cache->filealloc * sizeof(nameid_t));
+ if (cache->filealloc)
+ wasted += (cache->filealloc-cache->filecount)*sizeof(nameid_t);
+ else
+ wasted += (1-cache->filecount) * sizeof(nameid_t);
+
+ /*printf("syncing word %s\n", cache->word);*/
sync_cache_entry(idx, cache);
g_hash_table_remove(idx->wordcache, cache->word);
+ if (cache->filealloc)
+ g_free(cache->file.files);
g_free(cache);
cache = next;
next = cache->next;
}
+
+ printf("sync cache entries = %d\n used memory = %d\n wasted memory = %d\n", count, used, wasted);
+
+ block_log = 0;
ibex_list_new(&idx->wordnodes);
+ idx->wordcount = 0;
return 0;
}
static int word_close(struct _IBEXWord *idx)
{
struct _wordcache *cache = (struct _wordcache *)idx->wordnodes.head, *next;
+ extern int block_log;
+ int count= 0;
+ int used=0, wasted=0;
+
+ block_log = 0;
next = cache->next;
while (next) {
+ count++;
+ used += sizeof(struct _wordcache) + (cache->filealloc * sizeof(nameid_t));
+ if (cache->filealloc)
+ wasted += (cache->filealloc-cache->filecount)*sizeof(nameid_t);
+ else
+ wasted += (1-cache->filecount) * sizeof(nameid_t);
+
+ /*printf("closing word %s\n", cache->word);*/
sync_cache_entry(idx, cache);
+ if (cache->filealloc)
+ g_free(cache->file.files);
g_free(cache);
cache = next;
next = cache->next;
}
+ block_log = 0;
+
+ printf("cache entries = %d\n used memory = %d\n wasted memory = %d\n", count, used, wasted);
+
idx->namestore->klass->close(idx->namestore);
idx->nameindex->klass->close(idx->nameindex);
/*same as namestore: