From 10181b57a9fb648f5fd424ca611820a3cf42c42b Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Sat, 9 Sep 2017 18:03:07 +0200 Subject: core, eth/downloader: commit block data using batches (#15115) * ethdb: add Putter interface and Has method * ethdb: improve docs and add IdealBatchSize * ethdb: remove memory batch lock Batches are not safe for concurrent use. * core: use ethdb.Putter for Write* functions This covers the easy cases. * core/state: simplify StateSync * trie: optimize local node check * ethdb: add ValueSize to Batch * core: optimize HasHeader check This avoids one random database read get the block number. For many uses of HasHeader, the expectation is that it's actually there. Using Has avoids a load + decode of the value. * core: write fast sync block data in batches Collect writes into batches up to the ideal size instead of issuing many small, concurrent writes. * eth/downloader: commit larger state batches Collect nodes into a batch up to the ideal size instead of committing whenever a node is received. * core: optimize HasBlock check This avoids a random database read to get the number. * core: use numberCache in HasHeader numberCache has higher capacity, increasing the odds of finding the header without a database lookup. * core: write imported block data using a batch Restore batch writes of state and add blocks, tx entries, receipts to the same batch. The change also simplifies the miner. This commit also removes posting of logs when a forked block is imported. * core: fix DB write error handling * ethdb: use RLock for Has * core: fix HasBlock comment --- ethdb/database.go | 24 ++++++++++++++++++++---- ethdb/interface.go | 18 ++++++++++++++++-- ethdb/memory_database.go | 21 ++++++++++++++------- 3 files changed, 50 insertions(+), 13 deletions(-) (limited to 'ethdb') diff --git a/ethdb/database.go b/ethdb/database.go index 7d5fb0b9e..93755dd7e 100644 --- a/ethdb/database.go +++ b/ethdb/database.go @@ -109,6 +109,10 @@ func (db *LDBDatabase) Put(key []byte, value []byte) error { return db.db.Put(key, value, nil) } +func (db *LDBDatabase) Has(key []byte) (bool, error) { + return db.db.Has(key, nil) +} + // Get returns the given key if it's present. func (db *LDBDatabase) Get(key []byte) ([]byte, error) { // Measure the database get latency, if requested @@ -271,19 +275,19 @@ func (db *LDBDatabase) meter(refresh time.Duration) { } } -// TODO: remove this stuff and expose leveldb directly - func (db *LDBDatabase) NewBatch() Batch { return &ldbBatch{db: db.db, b: new(leveldb.Batch)} } type ldbBatch struct { - db *leveldb.DB - b *leveldb.Batch + db *leveldb.DB + b *leveldb.Batch + size int } func (b *ldbBatch) Put(key, value []byte) error { b.b.Put(key, value) + b.size += len(value) return nil } @@ -291,6 +295,10 @@ func (b *ldbBatch) Write() error { return b.db.Write(b.b, nil) } +func (b *ldbBatch) ValueSize() int { + return b.size +} + type table struct { db Database prefix string @@ -309,6 +317,10 @@ func (dt *table) Put(key []byte, value []byte) error { return dt.db.Put(append([]byte(dt.prefix), key...), value) } +func (dt *table) Has(key []byte) (bool, error) { + return dt.db.Has(append([]byte(dt.prefix), key...)) +} + func (dt *table) Get(key []byte) ([]byte, error) { return dt.db.Get(append([]byte(dt.prefix), key...)) } @@ -342,3 +354,7 @@ func (tb *tableBatch) Put(key, value []byte) error { func (tb *tableBatch) Write() error { return tb.batch.Write() } + +func (tb *tableBatch) ValueSize() int { + return tb.batch.ValueSize() +} diff --git a/ethdb/interface.go b/ethdb/interface.go index f4b787a52..99a5b770d 100644 --- a/ethdb/interface.go +++ b/ethdb/interface.go @@ -16,15 +16,29 @@ package ethdb -type Database interface { +// Code using batches should try to add this much data to the batch. +// The value was determined empirically. +const IdealBatchSize = 100 * 1024 + +// Putter wraps the database write operation supported by both batches and regular databases. +type Putter interface { Put(key []byte, value []byte) error +} + +// Database wraps all database operations. All methods are safe for concurrent use. +type Database interface { + Putter Get(key []byte) ([]byte, error) + Has(key []byte) (bool, error) Delete(key []byte) error Close() NewBatch() Batch } +// Batch is a write-only database that commits changes to its host database +// when Write is called. Batch cannot be used concurrently. type Batch interface { - Put(key, value []byte) error + Putter + ValueSize() int // amount of data in the batch Write() error } diff --git a/ethdb/memory_database.go b/ethdb/memory_database.go index 11b093724..699bd0c9f 100644 --- a/ethdb/memory_database.go +++ b/ethdb/memory_database.go @@ -45,6 +45,14 @@ func (db *MemDatabase) Put(key []byte, value []byte) error { return nil } +func (db *MemDatabase) Has(key []byte) (bool, error) { + db.lock.RLock() + defer db.lock.RUnlock() + + _, ok := db.db[string(key)] + return ok, nil +} + func (db *MemDatabase) Get(key []byte) ([]byte, error) { db.lock.RLock() defer db.lock.RUnlock() @@ -93,21 +101,16 @@ type kv struct{ k, v []byte } type memBatch struct { db *MemDatabase writes []kv - lock sync.RWMutex + size int } func (b *memBatch) Put(key, value []byte) error { - b.lock.Lock() - defer b.lock.Unlock() - b.writes = append(b.writes, kv{common.CopyBytes(key), common.CopyBytes(value)}) + b.size += len(value) return nil } func (b *memBatch) Write() error { - b.lock.RLock() - defer b.lock.RUnlock() - b.db.lock.Lock() defer b.db.lock.Unlock() @@ -116,3 +119,7 @@ func (b *memBatch) Write() error { } return nil } + +func (b *memBatch) ValueSize() int { + return b.size +} -- cgit