diff options
author | Anton Evangelatov <anton.evangelatov@gmail.com> | 2018-02-23 17:56:08 +0800 |
---|---|---|
committer | Péter Szilágyi <peterke@gmail.com> | 2018-02-23 17:56:08 +0800 |
commit | ae9f97221a96a86e4343a5c3cc4b1db44627a2f3 (patch) | |
tree | 0154be72d0f2e1f032d129b9433d1bf3939cd8f0 /metrics | |
parent | 7f74bdf8dded0e1ac3c01e043c2ed89d78f308cf (diff) | |
download | dexon-ae9f97221a96a86e4343a5c3cc4b1db44627a2f3.tar.gz dexon-ae9f97221a96a86e4343a5c3cc4b1db44627a2f3.tar.zst dexon-ae9f97221a96a86e4343a5c3cc4b1db44627a2f3.zip |
metrics: pull library and introduce ResettingTimer and InfluxDB reporter (#15910)
* go-metrics: fork library and introduce ResettingTimer and InfluxDB reporter.
* vendor: change nonsense/go-metrics to ethersphere/go-metrics
* go-metrics: add tests. move ResettingTimer logic from reporter to type.
* all, metrics: pull in metrics package in go-ethereum
* metrics/test: make sure metrics are enabled for tests
* metrics: apply gosimple rules
* metrics/exp, internal/debug: init expvar endpoint when starting pprof server
* internal/debug: tiny comment formatting fix
Diffstat (limited to 'metrics')
53 files changed, 6491 insertions, 57 deletions
diff --git a/metrics/FORK.md b/metrics/FORK.md new file mode 100644 index 000000000..b19985bf5 --- /dev/null +++ b/metrics/FORK.md @@ -0,0 +1 @@ +This repo has been forked from https://github.com/rcrowley/go-metrics at commit e181e09 diff --git a/metrics/LICENSE b/metrics/LICENSE new file mode 100644 index 000000000..363fa9ee7 --- /dev/null +++ b/metrics/LICENSE @@ -0,0 +1,29 @@ +Copyright 2012 Richard Crowley. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +THIS SOFTWARE IS PROVIDED BY RICHARD CROWLEY ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL RICHARD CROWLEY OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation +are those of the authors and should not be interpreted as representing +official policies, either expressed or implied, of Richard Crowley. diff --git a/metrics/README.md b/metrics/README.md new file mode 100644 index 000000000..bc2a45a83 --- /dev/null +++ b/metrics/README.md @@ -0,0 +1,166 @@ +go-metrics +========== + +![travis build status](https://travis-ci.org/rcrowley/go-metrics.svg?branch=master) + +Go port of Coda Hale's Metrics library: <https://github.com/dropwizard/metrics>. + +Documentation: <http://godoc.org/github.com/rcrowley/go-metrics>. + +Usage +----- + +Create and update metrics: + +```go +c := metrics.NewCounter() +metrics.Register("foo", c) +c.Inc(47) + +g := metrics.NewGauge() +metrics.Register("bar", g) +g.Update(47) + +r := NewRegistry() +g := metrics.NewRegisteredFunctionalGauge("cache-evictions", r, func() int64 { return cache.getEvictionsCount() }) + +s := metrics.NewExpDecaySample(1028, 0.015) // or metrics.NewUniformSample(1028) +h := metrics.NewHistogram(s) +metrics.Register("baz", h) +h.Update(47) + +m := metrics.NewMeter() +metrics.Register("quux", m) +m.Mark(47) + +t := metrics.NewTimer() +metrics.Register("bang", t) +t.Time(func() {}) +t.Update(47) +``` + +Register() is not threadsafe. For threadsafe metric registration use +GetOrRegister: + +```go +t := metrics.GetOrRegisterTimer("account.create.latency", nil) +t.Time(func() {}) +t.Update(47) +``` + +**NOTE:** Be sure to unregister short-lived meters and timers otherwise they will +leak memory: + +```go +// Will call Stop() on the Meter to allow for garbage collection +metrics.Unregister("quux") +// Or similarly for a Timer that embeds a Meter +metrics.Unregister("bang") +``` + +Periodically log every metric in human-readable form to standard error: + +```go +go metrics.Log(metrics.DefaultRegistry, 5 * time.Second, log.New(os.Stderr, "metrics: ", log.Lmicroseconds)) +``` + +Periodically log every metric in slightly-more-parseable form to syslog: + +```go +w, _ := syslog.Dial("unixgram", "/dev/log", syslog.LOG_INFO, "metrics") +go metrics.Syslog(metrics.DefaultRegistry, 60e9, w) +``` + +Periodically emit every metric to Graphite using the [Graphite client](https://github.com/cyberdelia/go-metrics-graphite): + +```go + +import "github.com/cyberdelia/go-metrics-graphite" + +addr, _ := net.ResolveTCPAddr("tcp", "127.0.0.1:2003") +go graphite.Graphite(metrics.DefaultRegistry, 10e9, "metrics", addr) +``` + +Periodically emit every metric into InfluxDB: + +**NOTE:** this has been pulled out of the library due to constant fluctuations +in the InfluxDB API. In fact, all client libraries are on their way out. see +issues [#121](https://github.com/rcrowley/go-metrics/issues/121) and +[#124](https://github.com/rcrowley/go-metrics/issues/124) for progress and details. + +```go +import "github.com/vrischmann/go-metrics-influxdb" + +go influxdb.InfluxDB(metrics.DefaultRegistry, + 10e9, + "127.0.0.1:8086", + "database-name", + "username", + "password" +) +``` + +Periodically upload every metric to Librato using the [Librato client](https://github.com/mihasya/go-metrics-librato): + +**Note**: the client included with this repository under the `librato` package +has been deprecated and moved to the repository linked above. + +```go +import "github.com/mihasya/go-metrics-librato" + +go librato.Librato(metrics.DefaultRegistry, + 10e9, // interval + "example@example.com", // account owner email address + "token", // Librato API token + "hostname", // source + []float64{0.95}, // percentiles to send + time.Millisecond, // time unit +) +``` + +Periodically emit every metric to StatHat: + +```go +import "github.com/rcrowley/go-metrics/stathat" + +go stathat.Stathat(metrics.DefaultRegistry, 10e9, "example@example.com") +``` + +Maintain all metrics along with expvars at `/debug/metrics`: + +This uses the same mechanism as [the official expvar](http://golang.org/pkg/expvar/) +but exposed under `/debug/metrics`, which shows a json representation of all your usual expvars +as well as all your go-metrics. + + +```go +import "github.com/rcrowley/go-metrics/exp" + +exp.Exp(metrics.DefaultRegistry) +``` + +Installation +------------ + +```sh +go get github.com/rcrowley/go-metrics +``` + +StatHat support additionally requires their Go client: + +```sh +go get github.com/stathat/go +``` + +Publishing Metrics +------------------ + +Clients are available for the following destinations: + +* Librato - https://github.com/mihasya/go-metrics-librato +* Graphite - https://github.com/cyberdelia/go-metrics-graphite +* InfluxDB - https://github.com/vrischmann/go-metrics-influxdb +* Ganglia - https://github.com/appscode/metlia +* Prometheus - https://github.com/deathowl/go-metrics-prometheus +* DataDog - https://github.com/syntaqx/go-metrics-datadog +* SignalFX - https://github.com/pascallouisperez/go-metrics-signalfx diff --git a/metrics/counter.go b/metrics/counter.go new file mode 100644 index 000000000..c7f2b4bd3 --- /dev/null +++ b/metrics/counter.go @@ -0,0 +1,112 @@ +package metrics + +import "sync/atomic" + +// Counters hold an int64 value that can be incremented and decremented. +type Counter interface { + Clear() + Count() int64 + Dec(int64) + Inc(int64) + Snapshot() Counter +} + +// GetOrRegisterCounter returns an existing Counter or constructs and registers +// a new StandardCounter. +func GetOrRegisterCounter(name string, r Registry) Counter { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewCounter).(Counter) +} + +// NewCounter constructs a new StandardCounter. +func NewCounter() Counter { + if !Enabled { + return NilCounter{} + } + return &StandardCounter{0} +} + +// NewRegisteredCounter constructs and registers a new StandardCounter. +func NewRegisteredCounter(name string, r Registry) Counter { + c := NewCounter() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// CounterSnapshot is a read-only copy of another Counter. +type CounterSnapshot int64 + +// Clear panics. +func (CounterSnapshot) Clear() { + panic("Clear called on a CounterSnapshot") +} + +// Count returns the count at the time the snapshot was taken. +func (c CounterSnapshot) Count() int64 { return int64(c) } + +// Dec panics. +func (CounterSnapshot) Dec(int64) { + panic("Dec called on a CounterSnapshot") +} + +// Inc panics. +func (CounterSnapshot) Inc(int64) { + panic("Inc called on a CounterSnapshot") +} + +// Snapshot returns the snapshot. +func (c CounterSnapshot) Snapshot() Counter { return c } + +// NilCounter is a no-op Counter. +type NilCounter struct{} + +// Clear is a no-op. +func (NilCounter) Clear() {} + +// Count is a no-op. +func (NilCounter) Count() int64 { return 0 } + +// Dec is a no-op. +func (NilCounter) Dec(i int64) {} + +// Inc is a no-op. +func (NilCounter) Inc(i int64) {} + +// Snapshot is a no-op. +func (NilCounter) Snapshot() Counter { return NilCounter{} } + +// StandardCounter is the standard implementation of a Counter and uses the +// sync/atomic package to manage a single int64 value. +type StandardCounter struct { + count int64 +} + +// Clear sets the counter to zero. +func (c *StandardCounter) Clear() { + atomic.StoreInt64(&c.count, 0) +} + +// Count returns the current count. +func (c *StandardCounter) Count() int64 { + return atomic.LoadInt64(&c.count) +} + +// Dec decrements the counter by the given amount. +func (c *StandardCounter) Dec(i int64) { + atomic.AddInt64(&c.count, -i) +} + +// Inc increments the counter by the given amount. +func (c *StandardCounter) Inc(i int64) { + atomic.AddInt64(&c.count, i) +} + +// Snapshot returns a read-only copy of the counter. +func (c *StandardCounter) Snapshot() Counter { + return CounterSnapshot(c.Count()) +} diff --git a/metrics/counter_test.go b/metrics/counter_test.go new file mode 100644 index 000000000..dfb03b4e8 --- /dev/null +++ b/metrics/counter_test.go @@ -0,0 +1,77 @@ +package metrics + +import "testing" + +func BenchmarkCounter(b *testing.B) { + c := NewCounter() + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Inc(1) + } +} + +func TestCounterClear(t *testing.T) { + c := NewCounter() + c.Inc(1) + c.Clear() + if count := c.Count(); 0 != count { + t.Errorf("c.Count(): 0 != %v\n", count) + } +} + +func TestCounterDec1(t *testing.T) { + c := NewCounter() + c.Dec(1) + if count := c.Count(); -1 != count { + t.Errorf("c.Count(): -1 != %v\n", count) + } +} + +func TestCounterDec2(t *testing.T) { + c := NewCounter() + c.Dec(2) + if count := c.Count(); -2 != count { + t.Errorf("c.Count(): -2 != %v\n", count) + } +} + +func TestCounterInc1(t *testing.T) { + c := NewCounter() + c.Inc(1) + if count := c.Count(); 1 != count { + t.Errorf("c.Count(): 1 != %v\n", count) + } +} + +func TestCounterInc2(t *testing.T) { + c := NewCounter() + c.Inc(2) + if count := c.Count(); 2 != count { + t.Errorf("c.Count(): 2 != %v\n", count) + } +} + +func TestCounterSnapshot(t *testing.T) { + c := NewCounter() + c.Inc(1) + snapshot := c.Snapshot() + c.Inc(1) + if count := snapshot.Count(); 1 != count { + t.Errorf("c.Count(): 1 != %v\n", count) + } +} + +func TestCounterZero(t *testing.T) { + c := NewCounter() + if count := c.Count(); 0 != count { + t.Errorf("c.Count(): 0 != %v\n", count) + } +} + +func TestGetOrRegisterCounter(t *testing.T) { + r := NewRegistry() + NewRegisteredCounter("foo", r).Inc(47) + if c := GetOrRegisterCounter("foo", r); 47 != c.Count() { + t.Fatal(c) + } +} diff --git a/metrics/debug.go b/metrics/debug.go new file mode 100644 index 000000000..de4a2739f --- /dev/null +++ b/metrics/debug.go @@ -0,0 +1,76 @@ +package metrics + +import ( + "runtime/debug" + "time" +) + +var ( + debugMetrics struct { + GCStats struct { + LastGC Gauge + NumGC Gauge + Pause Histogram + //PauseQuantiles Histogram + PauseTotal Gauge + } + ReadGCStats Timer + } + gcStats debug.GCStats +) + +// Capture new values for the Go garbage collector statistics exported in +// debug.GCStats. This is designed to be called as a goroutine. +func CaptureDebugGCStats(r Registry, d time.Duration) { + for range time.Tick(d) { + CaptureDebugGCStatsOnce(r) + } +} + +// Capture new values for the Go garbage collector statistics exported in +// debug.GCStats. This is designed to be called in a background goroutine. +// Giving a registry which has not been given to RegisterDebugGCStats will +// panic. +// +// Be careful (but much less so) with this because debug.ReadGCStats calls +// the C function runtime·lock(runtime·mheap) which, while not a stop-the-world +// operation, isn't something you want to be doing all the time. +func CaptureDebugGCStatsOnce(r Registry) { + lastGC := gcStats.LastGC + t := time.Now() + debug.ReadGCStats(&gcStats) + debugMetrics.ReadGCStats.UpdateSince(t) + + debugMetrics.GCStats.LastGC.Update(gcStats.LastGC.UnixNano()) + debugMetrics.GCStats.NumGC.Update(gcStats.NumGC) + if lastGC != gcStats.LastGC && 0 < len(gcStats.Pause) { + debugMetrics.GCStats.Pause.Update(int64(gcStats.Pause[0])) + } + //debugMetrics.GCStats.PauseQuantiles.Update(gcStats.PauseQuantiles) + debugMetrics.GCStats.PauseTotal.Update(int64(gcStats.PauseTotal)) +} + +// Register metrics for the Go garbage collector statistics exported in +// debug.GCStats. The metrics are named by their fully-qualified Go symbols, +// i.e. debug.GCStats.PauseTotal. +func RegisterDebugGCStats(r Registry) { + debugMetrics.GCStats.LastGC = NewGauge() + debugMetrics.GCStats.NumGC = NewGauge() + debugMetrics.GCStats.Pause = NewHistogram(NewExpDecaySample(1028, 0.015)) + //debugMetrics.GCStats.PauseQuantiles = NewHistogram(NewExpDecaySample(1028, 0.015)) + debugMetrics.GCStats.PauseTotal = NewGauge() + debugMetrics.ReadGCStats = NewTimer() + + r.Register("debug.GCStats.LastGC", debugMetrics.GCStats.LastGC) + r.Register("debug.GCStats.NumGC", debugMetrics.GCStats.NumGC) + r.Register("debug.GCStats.Pause", debugMetrics.GCStats.Pause) + //r.Register("debug.GCStats.PauseQuantiles", debugMetrics.GCStats.PauseQuantiles) + r.Register("debug.GCStats.PauseTotal", debugMetrics.GCStats.PauseTotal) + r.Register("debug.ReadGCStats", debugMetrics.ReadGCStats) +} + +// Allocate an initial slice for gcStats.Pause to avoid allocations during +// normal operation. +func init() { + gcStats.Pause = make([]time.Duration, 11) +} diff --git a/metrics/debug_test.go b/metrics/debug_test.go new file mode 100644 index 000000000..07eb86784 --- /dev/null +++ b/metrics/debug_test.go @@ -0,0 +1,48 @@ +package metrics + +import ( + "runtime" + "runtime/debug" + "testing" + "time" +) + +func BenchmarkDebugGCStats(b *testing.B) { + r := NewRegistry() + RegisterDebugGCStats(r) + b.ResetTimer() + for i := 0; i < b.N; i++ { + CaptureDebugGCStatsOnce(r) + } +} + +func TestDebugGCStatsBlocking(t *testing.T) { + if g := runtime.GOMAXPROCS(0); g < 2 { + t.Skipf("skipping TestDebugGCMemStatsBlocking with GOMAXPROCS=%d\n", g) + return + } + ch := make(chan int) + go testDebugGCStatsBlocking(ch) + var gcStats debug.GCStats + t0 := time.Now() + debug.ReadGCStats(&gcStats) + t1 := time.Now() + t.Log("i++ during debug.ReadGCStats:", <-ch) + go testDebugGCStatsBlocking(ch) + d := t1.Sub(t0) + t.Log(d) + time.Sleep(d) + t.Log("i++ during time.Sleep:", <-ch) +} + +func testDebugGCStatsBlocking(ch chan int) { + i := 0 + for { + select { + case ch <- i: + return + default: + i++ + } + } +} diff --git a/metrics/ewma.go b/metrics/ewma.go new file mode 100644 index 000000000..3aecd4fa3 --- /dev/null +++ b/metrics/ewma.go @@ -0,0 +1,118 @@ +package metrics + +import ( + "math" + "sync" + "sync/atomic" +) + +// EWMAs continuously calculate an exponentially-weighted moving average +// based on an outside source of clock ticks. +type EWMA interface { + Rate() float64 + Snapshot() EWMA + Tick() + Update(int64) +} + +// NewEWMA constructs a new EWMA with the given alpha. +func NewEWMA(alpha float64) EWMA { + if !Enabled { + return NilEWMA{} + } + return &StandardEWMA{alpha: alpha} +} + +// NewEWMA1 constructs a new EWMA for a one-minute moving average. +func NewEWMA1() EWMA { + return NewEWMA(1 - math.Exp(-5.0/60.0/1)) +} + +// NewEWMA5 constructs a new EWMA for a five-minute moving average. +func NewEWMA5() EWMA { + return NewEWMA(1 - math.Exp(-5.0/60.0/5)) +} + +// NewEWMA15 constructs a new EWMA for a fifteen-minute moving average. +func NewEWMA15() EWMA { + return NewEWMA(1 - math.Exp(-5.0/60.0/15)) +} + +// EWMASnapshot is a read-only copy of another EWMA. +type EWMASnapshot float64 + +// Rate returns the rate of events per second at the time the snapshot was +// taken. +func (a EWMASnapshot) Rate() float64 { return float64(a) } + +// Snapshot returns the snapshot. +func (a EWMASnapshot) Snapshot() EWMA { return a } + +// Tick panics. +func (EWMASnapshot) Tick() { + panic("Tick called on an EWMASnapshot") +} + +// Update panics. +func (EWMASnapshot) Update(int64) { + panic("Update called on an EWMASnapshot") +} + +// NilEWMA is a no-op EWMA. +type NilEWMA struct{} + +// Rate is a no-op. +func (NilEWMA) Rate() float64 { return 0.0 } + +// Snapshot is a no-op. +func (NilEWMA) Snapshot() EWMA { return NilEWMA{} } + +// Tick is a no-op. +func (NilEWMA) Tick() {} + +// Update is a no-op. +func (NilEWMA) Update(n int64) {} + +// StandardEWMA is the standard implementation of an EWMA and tracks the number +// of uncounted events and processes them on each tick. It uses the +// sync/atomic package to manage uncounted events. +type StandardEWMA struct { + uncounted int64 // /!\ this should be the first member to ensure 64-bit alignment + alpha float64 + rate float64 + init bool + mutex sync.Mutex +} + +// Rate returns the moving average rate of events per second. +func (a *StandardEWMA) Rate() float64 { + a.mutex.Lock() + defer a.mutex.Unlock() + return a.rate * float64(1e9) +} + +// Snapshot returns a read-only copy of the EWMA. +func (a *StandardEWMA) Snapshot() EWMA { + return EWMASnapshot(a.Rate()) +} + +// Tick ticks the clock to update the moving average. It assumes it is called +// every five seconds. +func (a *StandardEWMA) Tick() { + count := atomic.LoadInt64(&a.uncounted) + atomic.AddInt64(&a.uncounted, -count) + instantRate := float64(count) / float64(5e9) + a.mutex.Lock() + defer a.mutex.Unlock() + if a.init { + a.rate += a.alpha * (instantRate - a.rate) + } else { + a.init = true + a.rate = instantRate + } +} + +// Update adds n uncounted events. +func (a *StandardEWMA) Update(n int64) { + atomic.AddInt64(&a.uncounted, n) +} diff --git a/metrics/ewma_test.go b/metrics/ewma_test.go new file mode 100644 index 000000000..0430fbd24 --- /dev/null +++ b/metrics/ewma_test.go @@ -0,0 +1,225 @@ +package metrics + +import "testing" + +func BenchmarkEWMA(b *testing.B) { + a := NewEWMA1() + b.ResetTimer() + for i := 0; i < b.N; i++ { + a.Update(1) + a.Tick() + } +} + +func TestEWMA1(t *testing.T) { + a := NewEWMA1() + a.Update(3) + a.Tick() + if rate := a.Rate(); 0.6 != rate { + t.Errorf("initial a.Rate(): 0.6 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.22072766470286553 != rate { + t.Errorf("1 minute a.Rate(): 0.22072766470286553 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.08120116994196772 != rate { + t.Errorf("2 minute a.Rate(): 0.08120116994196772 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.029872241020718428 != rate { + t.Errorf("3 minute a.Rate(): 0.029872241020718428 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.01098938333324054 != rate { + t.Errorf("4 minute a.Rate(): 0.01098938333324054 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.004042768199451294 != rate { + t.Errorf("5 minute a.Rate(): 0.004042768199451294 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.0014872513059998212 != rate { + t.Errorf("6 minute a.Rate(): 0.0014872513059998212 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.0005471291793327122 != rate { + t.Errorf("7 minute a.Rate(): 0.0005471291793327122 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.00020127757674150815 != rate { + t.Errorf("8 minute a.Rate(): 0.00020127757674150815 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 7.404588245200814e-05 != rate { + t.Errorf("9 minute a.Rate(): 7.404588245200814e-05 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 2.7239957857491083e-05 != rate { + t.Errorf("10 minute a.Rate(): 2.7239957857491083e-05 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 1.0021020474147462e-05 != rate { + t.Errorf("11 minute a.Rate(): 1.0021020474147462e-05 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 3.6865274119969525e-06 != rate { + t.Errorf("12 minute a.Rate(): 3.6865274119969525e-06 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 1.3561976441886433e-06 != rate { + t.Errorf("13 minute a.Rate(): 1.3561976441886433e-06 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 4.989172314621449e-07 != rate { + t.Errorf("14 minute a.Rate(): 4.989172314621449e-07 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 1.8354139230109722e-07 != rate { + t.Errorf("15 minute a.Rate(): 1.8354139230109722e-07 != %v\n", rate) + } +} + +func TestEWMA5(t *testing.T) { + a := NewEWMA5() + a.Update(3) + a.Tick() + if rate := a.Rate(); 0.6 != rate { + t.Errorf("initial a.Rate(): 0.6 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.49123845184678905 != rate { + t.Errorf("1 minute a.Rate(): 0.49123845184678905 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.4021920276213837 != rate { + t.Errorf("2 minute a.Rate(): 0.4021920276213837 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.32928698165641596 != rate { + t.Errorf("3 minute a.Rate(): 0.32928698165641596 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.269597378470333 != rate { + t.Errorf("4 minute a.Rate(): 0.269597378470333 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.2207276647028654 != rate { + t.Errorf("5 minute a.Rate(): 0.2207276647028654 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.18071652714732128 != rate { + t.Errorf("6 minute a.Rate(): 0.18071652714732128 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.14795817836496392 != rate { + t.Errorf("7 minute a.Rate(): 0.14795817836496392 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.12113791079679326 != rate { + t.Errorf("8 minute a.Rate(): 0.12113791079679326 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.09917933293295193 != rate { + t.Errorf("9 minute a.Rate(): 0.09917933293295193 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.08120116994196763 != rate { + t.Errorf("10 minute a.Rate(): 0.08120116994196763 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.06648189501740036 != rate { + t.Errorf("11 minute a.Rate(): 0.06648189501740036 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.05443077197364752 != rate { + t.Errorf("12 minute a.Rate(): 0.05443077197364752 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.04456414692860035 != rate { + t.Errorf("13 minute a.Rate(): 0.04456414692860035 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.03648603757513079 != rate { + t.Errorf("14 minute a.Rate(): 0.03648603757513079 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.0298722410207183831020718428 != rate { + t.Errorf("15 minute a.Rate(): 0.0298722410207183831020718428 != %v\n", rate) + } +} + +func TestEWMA15(t *testing.T) { + a := NewEWMA15() + a.Update(3) + a.Tick() + if rate := a.Rate(); 0.6 != rate { + t.Errorf("initial a.Rate(): 0.6 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.5613041910189706 != rate { + t.Errorf("1 minute a.Rate(): 0.5613041910189706 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.5251039914257684 != rate { + t.Errorf("2 minute a.Rate(): 0.5251039914257684 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.4912384518467888184678905 != rate { + t.Errorf("3 minute a.Rate(): 0.4912384518467888184678905 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.459557003018789 != rate { + t.Errorf("4 minute a.Rate(): 0.459557003018789 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.4299187863442732 != rate { + t.Errorf("5 minute a.Rate(): 0.4299187863442732 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.4021920276213831 != rate { + t.Errorf("6 minute a.Rate(): 0.4021920276213831 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.37625345116383313 != rate { + t.Errorf("7 minute a.Rate(): 0.37625345116383313 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.3519877317060185 != rate { + t.Errorf("8 minute a.Rate(): 0.3519877317060185 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.3292869816564153165641596 != rate { + t.Errorf("9 minute a.Rate(): 0.3292869816564153165641596 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.3080502714195546 != rate { + t.Errorf("10 minute a.Rate(): 0.3080502714195546 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.2881831806538789 != rate { + t.Errorf("11 minute a.Rate(): 0.2881831806538789 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.26959737847033216 != rate { + t.Errorf("12 minute a.Rate(): 0.26959737847033216 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.2522102307052083 != rate { + t.Errorf("13 minute a.Rate(): 0.2522102307052083 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.23594443252115815 != rate { + t.Errorf("14 minute a.Rate(): 0.23594443252115815 != %v\n", rate) + } + elapseMinute(a) + if rate := a.Rate(); 0.2207276647028646247028654470286553 != rate { + t.Errorf("15 minute a.Rate(): 0.2207276647028646247028654470286553 != %v\n", rate) + } +} + +func elapseMinute(a EWMA) { + for i := 0; i < 12; i++ { + a.Tick() + } +} diff --git a/metrics/exp/exp.go b/metrics/exp/exp.go new file mode 100644 index 000000000..c19d00a94 --- /dev/null +++ b/metrics/exp/exp.go @@ -0,0 +1,156 @@ +// Hook go-metrics into expvar +// on any /debug/metrics request, load all vars from the registry into expvar, and execute regular expvar handler +package exp + +import ( + "expvar" + "fmt" + "net/http" + "sync" + + "github.com/ethereum/go-ethereum/metrics" +) + +type exp struct { + expvarLock sync.Mutex // expvar panics if you try to register the same var twice, so we must probe it safely + registry metrics.Registry +} + +func (exp *exp) expHandler(w http.ResponseWriter, r *http.Request) { + // load our variables into expvar + exp.syncToExpvar() + + // now just run the official expvar handler code (which is not publicly callable, so pasted inline) + w.Header().Set("Content-Type", "application/json; charset=utf-8") + fmt.Fprintf(w, "{\n") + first := true + expvar.Do(func(kv expvar.KeyValue) { + if !first { + fmt.Fprintf(w, ",\n") + } + first = false + fmt.Fprintf(w, "%q: %s", kv.Key, kv.Value) + }) + fmt.Fprintf(w, "\n}\n") +} + +// Exp will register an expvar powered metrics handler with http.DefaultServeMux on "/debug/vars" +func Exp(r metrics.Registry) { + h := ExpHandler(r) + // this would cause a panic: + // panic: http: multiple registrations for /debug/vars + // http.HandleFunc("/debug/vars", e.expHandler) + // haven't found an elegant way, so just use a different endpoint + http.Handle("/debug/metrics", h) +} + +// ExpHandler will return an expvar powered metrics handler. +func ExpHandler(r metrics.Registry) http.Handler { + e := exp{sync.Mutex{}, r} + return http.HandlerFunc(e.expHandler) +} + +func (exp *exp) getInt(name string) *expvar.Int { + var v *expvar.Int + exp.expvarLock.Lock() + p := expvar.Get(name) + if p != nil { + v = p.(*expvar.Int) + } else { + v = new(expvar.Int) + expvar.Publish(name, v) + } + exp.expvarLock.Unlock() + return v +} + +func (exp *exp) getFloat(name string) *expvar.Float { + var v *expvar.Float + exp.expvarLock.Lock() + p := expvar.Get(name) + if p != nil { + v = p.(*expvar.Float) + } else { + v = new(expvar.Float) + expvar.Publish(name, v) + } + exp.expvarLock.Unlock() + return v +} + +func (exp *exp) publishCounter(name string, metric metrics.Counter) { + v := exp.getInt(name) + v.Set(metric.Count()) +} + +func (exp *exp) publishGauge(name string, metric metrics.Gauge) { + v := exp.getInt(name) + v.Set(metric.Value()) +} +func (exp *exp) publishGaugeFloat64(name string, metric metrics.GaugeFloat64) { + exp.getFloat(name).Set(metric.Value()) +} + +func (exp *exp) publishHistogram(name string, metric metrics.Histogram) { + h := metric.Snapshot() + ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + exp.getInt(name + ".count").Set(h.Count()) + exp.getFloat(name + ".min").Set(float64(h.Min())) + exp.getFloat(name + ".max").Set(float64(h.Max())) + exp.getFloat(name + ".mean").Set(h.Mean()) + exp.getFloat(name + ".std-dev").Set(h.StdDev()) + exp.getFloat(name + ".50-percentile").Set(ps[0]) + exp.getFloat(name + ".75-percentile").Set(ps[1]) + exp.getFloat(name + ".95-percentile").Set(ps[2]) + exp.getFloat(name + ".99-percentile").Set(ps[3]) + exp.getFloat(name + ".999-percentile").Set(ps[4]) +} + +func (exp *exp) publishMeter(name string, metric metrics.Meter) { + m := metric.Snapshot() + exp.getInt(name + ".count").Set(m.Count()) + exp.getFloat(name + ".one-minute").Set(m.Rate1()) + exp.getFloat(name + ".five-minute").Set(m.Rate5()) + exp.getFloat(name + ".fifteen-minute").Set((m.Rate15())) + exp.getFloat(name + ".mean").Set(m.RateMean()) +} + +func (exp *exp) publishTimer(name string, metric metrics.Timer) { + t := metric.Snapshot() + ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + exp.getInt(name + ".count").Set(t.Count()) + exp.getFloat(name + ".min").Set(float64(t.Min())) + exp.getFloat(name + ".max").Set(float64(t.Max())) + exp.getFloat(name + ".mean").Set(t.Mean()) + exp.getFloat(name + ".std-dev").Set(t.StdDev()) + exp.getFloat(name + ".50-percentile").Set(ps[0]) + exp.getFloat(name + ".75-percentile").Set(ps[1]) + exp.getFloat(name + ".95-percentile").Set(ps[2]) + exp.getFloat(name + ".99-percentile").Set(ps[3]) + exp.getFloat(name + ".999-percentile").Set(ps[4]) + exp.getFloat(name + ".one-minute").Set(t.Rate1()) + exp.getFloat(name + ".five-minute").Set(t.Rate5()) + exp.getFloat(name + ".fifteen-minute").Set(t.Rate15()) + exp.getFloat(name + ".mean-rate").Set(t.RateMean()) +} + +func (exp *exp) syncToExpvar() { + exp.registry.Each(func(name string, i interface{}) { + switch i.(type) { + case metrics.Counter: + exp.publishCounter(name, i.(metrics.Counter)) + case metrics.Gauge: + exp.publishGauge(name, i.(metrics.Gauge)) + case metrics.GaugeFloat64: + exp.publishGaugeFloat64(name, i.(metrics.GaugeFloat64)) + case metrics.Histogram: + exp.publishHistogram(name, i.(metrics.Histogram)) + case metrics.Meter: + exp.publishMeter(name, i.(metrics.Meter)) + case metrics.Timer: + exp.publishTimer(name, i.(metrics.Timer)) + default: + panic(fmt.Sprintf("unsupported type for '%s': %T", name, i)) + } + }) +} diff --git a/metrics/gauge.go b/metrics/gauge.go new file mode 100644 index 000000000..0fbfdb860 --- /dev/null +++ b/metrics/gauge.go @@ -0,0 +1,120 @@ +package metrics + +import "sync/atomic" + +// Gauges hold an int64 value that can be set arbitrarily. +type Gauge interface { + Snapshot() Gauge + Update(int64) + Value() int64 +} + +// GetOrRegisterGauge returns an existing Gauge or constructs and registers a +// new StandardGauge. +func GetOrRegisterGauge(name string, r Registry) Gauge { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewGauge).(Gauge) +} + +// NewGauge constructs a new StandardGauge. +func NewGauge() Gauge { + if !Enabled { + return NilGauge{} + } + return &StandardGauge{0} +} + +// NewRegisteredGauge constructs and registers a new StandardGauge. +func NewRegisteredGauge(name string, r Registry) Gauge { + c := NewGauge() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// NewFunctionalGauge constructs a new FunctionalGauge. +func NewFunctionalGauge(f func() int64) Gauge { + if !Enabled { + return NilGauge{} + } + return &FunctionalGauge{value: f} +} + +// NewRegisteredFunctionalGauge constructs and registers a new StandardGauge. +func NewRegisteredFunctionalGauge(name string, r Registry, f func() int64) Gauge { + c := NewFunctionalGauge(f) + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// GaugeSnapshot is a read-only copy of another Gauge. +type GaugeSnapshot int64 + +// Snapshot returns the snapshot. +func (g GaugeSnapshot) Snapshot() Gauge { return g } + +// Update panics. +func (GaugeSnapshot) Update(int64) { + panic("Update called on a GaugeSnapshot") +} + +// Value returns the value at the time the snapshot was taken. +func (g GaugeSnapshot) Value() int64 { return int64(g) } + +// NilGauge is a no-op Gauge. +type NilGauge struct{} + +// Snapshot is a no-op. +func (NilGauge) Snapshot() Gauge { return NilGauge{} } + +// Update is a no-op. +func (NilGauge) Update(v int64) {} + +// Value is a no-op. +func (NilGauge) Value() int64 { return 0 } + +// StandardGauge is the standard implementation of a Gauge and uses the +// sync/atomic package to manage a single int64 value. +type StandardGauge struct { + value int64 +} + +// Snapshot returns a read-only copy of the gauge. +func (g *StandardGauge) Snapshot() Gauge { + return GaugeSnapshot(g.Value()) +} + +// Update updates the gauge's value. +func (g *StandardGauge) Update(v int64) { + atomic.StoreInt64(&g.value, v) +} + +// Value returns the gauge's current value. +func (g *StandardGauge) Value() int64 { + return atomic.LoadInt64(&g.value) +} + +// FunctionalGauge returns value from given function +type FunctionalGauge struct { + value func() int64 +} + +// Value returns the gauge's current value. +func (g FunctionalGauge) Value() int64 { + return g.value() +} + +// Snapshot returns the snapshot. +func (g FunctionalGauge) Snapshot() Gauge { return GaugeSnapshot(g.Value()) } + +// Update panics. +func (FunctionalGauge) Update(int64) { + panic("Update called on a FunctionalGauge") +} diff --git a/metrics/gauge_float64.go b/metrics/gauge_float64.go new file mode 100644 index 000000000..66819c957 --- /dev/null +++ b/metrics/gauge_float64.go @@ -0,0 +1,127 @@ +package metrics + +import "sync" + +// GaugeFloat64s hold a float64 value that can be set arbitrarily. +type GaugeFloat64 interface { + Snapshot() GaugeFloat64 + Update(float64) + Value() float64 +} + +// GetOrRegisterGaugeFloat64 returns an existing GaugeFloat64 or constructs and registers a +// new StandardGaugeFloat64. +func GetOrRegisterGaugeFloat64(name string, r Registry) GaugeFloat64 { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewGaugeFloat64()).(GaugeFloat64) +} + +// NewGaugeFloat64 constructs a new StandardGaugeFloat64. +func NewGaugeFloat64() GaugeFloat64 { + if !Enabled { + return NilGaugeFloat64{} + } + return &StandardGaugeFloat64{ + value: 0.0, + } +} + +// NewRegisteredGaugeFloat64 constructs and registers a new StandardGaugeFloat64. +func NewRegisteredGaugeFloat64(name string, r Registry) GaugeFloat64 { + c := NewGaugeFloat64() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// NewFunctionalGauge constructs a new FunctionalGauge. +func NewFunctionalGaugeFloat64(f func() float64) GaugeFloat64 { + if !Enabled { + return NilGaugeFloat64{} + } + return &FunctionalGaugeFloat64{value: f} +} + +// NewRegisteredFunctionalGauge constructs and registers a new StandardGauge. +func NewRegisteredFunctionalGaugeFloat64(name string, r Registry, f func() float64) GaugeFloat64 { + c := NewFunctionalGaugeFloat64(f) + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// GaugeFloat64Snapshot is a read-only copy of another GaugeFloat64. +type GaugeFloat64Snapshot float64 + +// Snapshot returns the snapshot. +func (g GaugeFloat64Snapshot) Snapshot() GaugeFloat64 { return g } + +// Update panics. +func (GaugeFloat64Snapshot) Update(float64) { + panic("Update called on a GaugeFloat64Snapshot") +} + +// Value returns the value at the time the snapshot was taken. +func (g GaugeFloat64Snapshot) Value() float64 { return float64(g) } + +// NilGauge is a no-op Gauge. +type NilGaugeFloat64 struct{} + +// Snapshot is a no-op. +func (NilGaugeFloat64) Snapshot() GaugeFloat64 { return NilGaugeFloat64{} } + +// Update is a no-op. +func (NilGaugeFloat64) Update(v float64) {} + +// Value is a no-op. +func (NilGaugeFloat64) Value() float64 { return 0.0 } + +// StandardGaugeFloat64 is the standard implementation of a GaugeFloat64 and uses +// sync.Mutex to manage a single float64 value. +type StandardGaugeFloat64 struct { + mutex sync.Mutex + value float64 +} + +// Snapshot returns a read-only copy of the gauge. +func (g *StandardGaugeFloat64) Snapshot() GaugeFloat64 { + return GaugeFloat64Snapshot(g.Value()) +} + +// Update updates the gauge's value. +func (g *StandardGaugeFloat64) Update(v float64) { + g.mutex.Lock() + defer g.mutex.Unlock() + g.value = v +} + +// Value returns the gauge's current value. +func (g *StandardGaugeFloat64) Value() float64 { + g.mutex.Lock() + defer g.mutex.Unlock() + return g.value +} + +// FunctionalGaugeFloat64 returns value from given function +type FunctionalGaugeFloat64 struct { + value func() float64 +} + +// Value returns the gauge's current value. +func (g FunctionalGaugeFloat64) Value() float64 { + return g.value() +} + +// Snapshot returns the snapshot. +func (g FunctionalGaugeFloat64) Snapshot() GaugeFloat64 { return GaugeFloat64Snapshot(g.Value()) } + +// Update panics. +func (FunctionalGaugeFloat64) Update(float64) { + panic("Update called on a FunctionalGaugeFloat64") +} diff --git a/metrics/gauge_float64_test.go b/metrics/gauge_float64_test.go new file mode 100644 index 000000000..99e62a403 --- /dev/null +++ b/metrics/gauge_float64_test.go @@ -0,0 +1,59 @@ +package metrics + +import "testing" + +func BenchmarkGuageFloat64(b *testing.B) { + g := NewGaugeFloat64() + b.ResetTimer() + for i := 0; i < b.N; i++ { + g.Update(float64(i)) + } +} + +func TestGaugeFloat64(t *testing.T) { + g := NewGaugeFloat64() + g.Update(float64(47.0)) + if v := g.Value(); float64(47.0) != v { + t.Errorf("g.Value(): 47.0 != %v\n", v) + } +} + +func TestGaugeFloat64Snapshot(t *testing.T) { + g := NewGaugeFloat64() + g.Update(float64(47.0)) + snapshot := g.Snapshot() + g.Update(float64(0)) + if v := snapshot.Value(); float64(47.0) != v { + t.Errorf("g.Value(): 47.0 != %v\n", v) + } +} + +func TestGetOrRegisterGaugeFloat64(t *testing.T) { + r := NewRegistry() + NewRegisteredGaugeFloat64("foo", r).Update(float64(47.0)) + t.Logf("registry: %v", r) + if g := GetOrRegisterGaugeFloat64("foo", r); float64(47.0) != g.Value() { + t.Fatal(g) + } +} + +func TestFunctionalGaugeFloat64(t *testing.T) { + var counter float64 + fg := NewFunctionalGaugeFloat64(func() float64 { + counter++ + return counter + }) + fg.Value() + fg.Value() + if counter != 2 { + t.Error("counter != 2") + } +} + +func TestGetOrRegisterFunctionalGaugeFloat64(t *testing.T) { + r := NewRegistry() + NewRegisteredFunctionalGaugeFloat64("foo", r, func() float64 { return 47 }) + if g := GetOrRegisterGaugeFloat64("foo", r); 47 != g.Value() { + t.Fatal(g) + } +} diff --git a/metrics/gauge_test.go b/metrics/gauge_test.go new file mode 100644 index 000000000..1f2603d33 --- /dev/null +++ b/metrics/gauge_test.go @@ -0,0 +1,68 @@ +package metrics + +import ( + "fmt" + "testing" +) + +func BenchmarkGuage(b *testing.B) { + g := NewGauge() + b.ResetTimer() + for i := 0; i < b.N; i++ { + g.Update(int64(i)) + } +} + +func TestGauge(t *testing.T) { + g := NewGauge() + g.Update(int64(47)) + if v := g.Value(); 47 != v { + t.Errorf("g.Value(): 47 != %v\n", v) + } +} + +func TestGaugeSnapshot(t *testing.T) { + g := NewGauge() + g.Update(int64(47)) + snapshot := g.Snapshot() + g.Update(int64(0)) + if v := snapshot.Value(); 47 != v { + t.Errorf("g.Value(): 47 != %v\n", v) + } +} + +func TestGetOrRegisterGauge(t *testing.T) { + r := NewRegistry() + NewRegisteredGauge("foo", r).Update(47) + if g := GetOrRegisterGauge("foo", r); 47 != g.Value() { + t.Fatal(g) + } +} + +func TestFunctionalGauge(t *testing.T) { + var counter int64 + fg := NewFunctionalGauge(func() int64 { + counter++ + return counter + }) + fg.Value() + fg.Value() + if counter != 2 { + t.Error("counter != 2") + } +} + +func TestGetOrRegisterFunctionalGauge(t *testing.T) { + r := NewRegistry() + NewRegisteredFunctionalGauge("foo", r, func() int64 { return 47 }) + if g := GetOrRegisterGauge("foo", r); 47 != g.Value() { + t.Fatal(g) + } +} + +func ExampleGetOrRegisterGauge() { + m := "server.bytes_sent" + g := GetOrRegisterGauge(m, nil) + g.Update(47) + fmt.Println(g.Value()) // Output: 47 +} diff --git a/metrics/graphite.go b/metrics/graphite.go new file mode 100644 index 000000000..142eec86b --- /dev/null +++ b/metrics/graphite.go @@ -0,0 +1,113 @@ +package metrics + +import ( + "bufio" + "fmt" + "log" + "net" + "strconv" + "strings" + "time" +) + +// GraphiteConfig provides a container with configuration parameters for +// the Graphite exporter +type GraphiteConfig struct { + Addr *net.TCPAddr // Network address to connect to + Registry Registry // Registry to be exported + FlushInterval time.Duration // Flush interval + DurationUnit time.Duration // Time conversion unit for durations + Prefix string // Prefix to be prepended to metric names + Percentiles []float64 // Percentiles to export from timers and histograms +} + +// Graphite is a blocking exporter function which reports metrics in r +// to a graphite server located at addr, flushing them every d duration +// and prepending metric names with prefix. +func Graphite(r Registry, d time.Duration, prefix string, addr *net.TCPAddr) { + GraphiteWithConfig(GraphiteConfig{ + Addr: addr, + Registry: r, + FlushInterval: d, + DurationUnit: time.Nanosecond, + Prefix: prefix, + Percentiles: []float64{0.5, 0.75, 0.95, 0.99, 0.999}, + }) +} + +// GraphiteWithConfig is a blocking exporter function just like Graphite, +// but it takes a GraphiteConfig instead. +func GraphiteWithConfig(c GraphiteConfig) { + log.Printf("WARNING: This go-metrics client has been DEPRECATED! It has been moved to https://github.com/cyberdelia/go-metrics-graphite and will be removed from rcrowley/go-metrics on August 12th 2015") + for range time.Tick(c.FlushInterval) { + if err := graphite(&c); nil != err { + log.Println(err) + } + } +} + +// GraphiteOnce performs a single submission to Graphite, returning a +// non-nil error on failed connections. This can be used in a loop +// similar to GraphiteWithConfig for custom error handling. +func GraphiteOnce(c GraphiteConfig) error { + log.Printf("WARNING: This go-metrics client has been DEPRECATED! It has been moved to https://github.com/cyberdelia/go-metrics-graphite and will be removed from rcrowley/go-metrics on August 12th 2015") + return graphite(&c) +} + +func graphite(c *GraphiteConfig) error { + now := time.Now().Unix() + du := float64(c.DurationUnit) + conn, err := net.DialTCP("tcp", nil, c.Addr) + if nil != err { + return err + } + defer conn.Close() + w := bufio.NewWriter(conn) + c.Registry.Each(func(name string, i interface{}) { + switch metric := i.(type) { + case Counter: + fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, metric.Count(), now) + case Gauge: + fmt.Fprintf(w, "%s.%s.value %d %d\n", c.Prefix, name, metric.Value(), now) + case GaugeFloat64: + fmt.Fprintf(w, "%s.%s.value %f %d\n", c.Prefix, name, metric.Value(), now) + case Histogram: + h := metric.Snapshot() + ps := h.Percentiles(c.Percentiles) + fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, h.Count(), now) + fmt.Fprintf(w, "%s.%s.min %d %d\n", c.Prefix, name, h.Min(), now) + fmt.Fprintf(w, "%s.%s.max %d %d\n", c.Prefix, name, h.Max(), now) + fmt.Fprintf(w, "%s.%s.mean %.2f %d\n", c.Prefix, name, h.Mean(), now) + fmt.Fprintf(w, "%s.%s.std-dev %.2f %d\n", c.Prefix, name, h.StdDev(), now) + for psIdx, psKey := range c.Percentiles { + key := strings.Replace(strconv.FormatFloat(psKey*100.0, 'f', -1, 64), ".", "", 1) + fmt.Fprintf(w, "%s.%s.%s-percentile %.2f %d\n", c.Prefix, name, key, ps[psIdx], now) + } + case Meter: + m := metric.Snapshot() + fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, m.Count(), now) + fmt.Fprintf(w, "%s.%s.one-minute %.2f %d\n", c.Prefix, name, m.Rate1(), now) + fmt.Fprintf(w, "%s.%s.five-minute %.2f %d\n", c.Prefix, name, m.Rate5(), now) + fmt.Fprintf(w, "%s.%s.fifteen-minute %.2f %d\n", c.Prefix, name, m.Rate15(), now) + fmt.Fprintf(w, "%s.%s.mean %.2f %d\n", c.Prefix, name, m.RateMean(), now) + case Timer: + t := metric.Snapshot() + ps := t.Percentiles(c.Percentiles) + fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, t.Count(), now) + fmt.Fprintf(w, "%s.%s.min %d %d\n", c.Prefix, name, t.Min()/int64(du), now) + fmt.Fprintf(w, "%s.%s.max %d %d\n", c.Prefix, name, t.Max()/int64(du), now) + fmt.Fprintf(w, "%s.%s.mean %.2f %d\n", c.Prefix, name, t.Mean()/du, now) + fmt.Fprintf(w, "%s.%s.std-dev %.2f %d\n", c.Prefix, name, t.StdDev()/du, now) + for psIdx, psKey := range c.Percentiles { + key := strings.Replace(strconv.FormatFloat(psKey*100.0, 'f', -1, 64), ".", "", 1) + fmt.Fprintf(w, "%s.%s.%s-percentile %.2f %d\n", c.Prefix, name, key, ps[psIdx], now) + } + fmt.Fprintf(w, "%s.%s.one-minute %.2f %d\n", c.Prefix, name, t.Rate1(), now) + fmt.Fprintf(w, "%s.%s.five-minute %.2f %d\n", c.Prefix, name, t.Rate5(), now) + fmt.Fprintf(w, "%s.%s.fifteen-minute %.2f %d\n", c.Prefix, name, t.Rate15(), now) + fmt.Fprintf(w, "%s.%s.mean-rate %.2f %d\n", c.Prefix, name, t.RateMean(), now) + } + w.Flush() + }) + return nil +} diff --git a/metrics/graphite_test.go b/metrics/graphite_test.go new file mode 100644 index 000000000..c797c781d --- /dev/null +++ b/metrics/graphite_test.go @@ -0,0 +1,22 @@ +package metrics + +import ( + "net" + "time" +) + +func ExampleGraphite() { + addr, _ := net.ResolveTCPAddr("net", ":2003") + go Graphite(DefaultRegistry, 1*time.Second, "some.prefix", addr) +} + +func ExampleGraphiteWithConfig() { + addr, _ := net.ResolveTCPAddr("net", ":2003") + go GraphiteWithConfig(GraphiteConfig{ + Addr: addr, + Registry: DefaultRegistry, + FlushInterval: 1 * time.Second, + DurationUnit: time.Millisecond, + Percentiles: []float64{0.5, 0.75, 0.99, 0.999}, + }) +} diff --git a/metrics/healthcheck.go b/metrics/healthcheck.go new file mode 100644 index 000000000..f1ae31e34 --- /dev/null +++ b/metrics/healthcheck.go @@ -0,0 +1,61 @@ +package metrics + +// Healthchecks hold an error value describing an arbitrary up/down status. +type Healthcheck interface { + Check() + Error() error + Healthy() + Unhealthy(error) +} + +// NewHealthcheck constructs a new Healthcheck which will use the given +// function to update its status. +func NewHealthcheck(f func(Healthcheck)) Healthcheck { + if !Enabled { + return NilHealthcheck{} + } + return &StandardHealthcheck{nil, f} +} + +// NilHealthcheck is a no-op. +type NilHealthcheck struct{} + +// Check is a no-op. +func (NilHealthcheck) Check() {} + +// Error is a no-op. +func (NilHealthcheck) Error() error { return nil } + +// Healthy is a no-op. +func (NilHealthcheck) Healthy() {} + +// Unhealthy is a no-op. +func (NilHealthcheck) Unhealthy(error) {} + +// StandardHealthcheck is the standard implementation of a Healthcheck and +// stores the status and a function to call to update the status. +type StandardHealthcheck struct { + err error + f func(Healthcheck) +} + +// Check runs the healthcheck function to update the healthcheck's status. +func (h *StandardHealthcheck) Check() { + h.f(h) +} + +// Error returns the healthcheck's status, which will be nil if it is healthy. +func (h *StandardHealthcheck) Error() error { + return h.err +} + +// Healthy marks the healthcheck as healthy. +func (h *StandardHealthcheck) Healthy() { + h.err = nil +} + +// Unhealthy marks the healthcheck as unhealthy. The error is stored and +// may be retrieved by the Error method. +func (h *StandardHealthcheck) Unhealthy(err error) { + h.err = err +} diff --git a/metrics/histogram.go b/metrics/histogram.go new file mode 100644 index 000000000..46f3bbd2f --- /dev/null +++ b/metrics/histogram.go @@ -0,0 +1,202 @@ +package metrics + +// Histograms calculate distribution statistics from a series of int64 values. +type Histogram interface { + Clear() + Count() int64 + Max() int64 + Mean() float64 + Min() int64 + Percentile(float64) float64 + Percentiles([]float64) []float64 + Sample() Sample + Snapshot() Histogram + StdDev() float64 + Sum() int64 + Update(int64) + Variance() float64 +} + +// GetOrRegisterHistogram returns an existing Histogram or constructs and +// registers a new StandardHistogram. +func GetOrRegisterHistogram(name string, r Registry, s Sample) Histogram { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, func() Histogram { return NewHistogram(s) }).(Histogram) +} + +// NewHistogram constructs a new StandardHistogram from a Sample. +func NewHistogram(s Sample) Histogram { + if !Enabled { + return NilHistogram{} + } + return &StandardHistogram{sample: s} +} + +// NewRegisteredHistogram constructs and registers a new StandardHistogram from +// a Sample. +func NewRegisteredHistogram(name string, r Registry, s Sample) Histogram { + c := NewHistogram(s) + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// HistogramSnapshot is a read-only copy of another Histogram. +type HistogramSnapshot struct { + sample *SampleSnapshot +} + +// Clear panics. +func (*HistogramSnapshot) Clear() { + panic("Clear called on a HistogramSnapshot") +} + +// Count returns the number of samples recorded at the time the snapshot was +// taken. +func (h *HistogramSnapshot) Count() int64 { return h.sample.Count() } + +// Max returns the maximum value in the sample at the time the snapshot was +// taken. +func (h *HistogramSnapshot) Max() int64 { return h.sample.Max() } + +// Mean returns the mean of the values in the sample at the time the snapshot +// was taken. +func (h *HistogramSnapshot) Mean() float64 { return h.sample.Mean() } + +// Min returns the minimum value in the sample at the time the snapshot was +// taken. +func (h *HistogramSnapshot) Min() int64 { return h.sample.Min() } + +// Percentile returns an arbitrary percentile of values in the sample at the +// time the snapshot was taken. +func (h *HistogramSnapshot) Percentile(p float64) float64 { + return h.sample.Percentile(p) +} + +// Percentiles returns a slice of arbitrary percentiles of values in the sample +// at the time the snapshot was taken. +func (h *HistogramSnapshot) Percentiles(ps []float64) []float64 { + return h.sample.Percentiles(ps) +} + +// Sample returns the Sample underlying the histogram. +func (h *HistogramSnapshot) Sample() Sample { return h.sample } + +// Snapshot returns the snapshot. +func (h *HistogramSnapshot) Snapshot() Histogram { return h } + +// StdDev returns the standard deviation of the values in the sample at the +// time the snapshot was taken. +func (h *HistogramSnapshot) StdDev() float64 { return h.sample.StdDev() } + +// Sum returns the sum in the sample at the time the snapshot was taken. +func (h *HistogramSnapshot) Sum() int64 { return h.sample.Sum() } + +// Update panics. +func (*HistogramSnapshot) Update(int64) { + panic("Update called on a HistogramSnapshot") +} + +// Variance returns the variance of inputs at the time the snapshot was taken. +func (h *HistogramSnapshot) Variance() float64 { return h.sample.Variance() } + +// NilHistogram is a no-op Histogram. +type NilHistogram struct{} + +// Clear is a no-op. +func (NilHistogram) Clear() {} + +// Count is a no-op. +func (NilHistogram) Count() int64 { return 0 } + +// Max is a no-op. +func (NilHistogram) Max() int64 { return 0 } + +// Mean is a no-op. +func (NilHistogram) Mean() float64 { return 0.0 } + +// Min is a no-op. +func (NilHistogram) Min() int64 { return 0 } + +// Percentile is a no-op. +func (NilHistogram) Percentile(p float64) float64 { return 0.0 } + +// Percentiles is a no-op. +func (NilHistogram) Percentiles(ps []float64) []float64 { + return make([]float64, len(ps)) +} + +// Sample is a no-op. +func (NilHistogram) Sample() Sample { return NilSample{} } + +// Snapshot is a no-op. +func (NilHistogram) Snapshot() Histogram { return NilHistogram{} } + +// StdDev is a no-op. +func (NilHistogram) StdDev() float64 { return 0.0 } + +// Sum is a no-op. +func (NilHistogram) Sum() int64 { return 0 } + +// Update is a no-op. +func (NilHistogram) Update(v int64) {} + +// Variance is a no-op. +func (NilHistogram) Variance() float64 { return 0.0 } + +// StandardHistogram is the standard implementation of a Histogram and uses a +// Sample to bound its memory use. +type StandardHistogram struct { + sample Sample +} + +// Clear clears the histogram and its sample. +func (h *StandardHistogram) Clear() { h.sample.Clear() } + +// Count returns the number of samples recorded since the histogram was last +// cleared. +func (h *StandardHistogram) Count() int64 { return h.sample.Count() } + +// Max returns the maximum value in the sample. +func (h *StandardHistogram) Max() int64 { return h.sample.Max() } + +// Mean returns the mean of the values in the sample. +func (h *StandardHistogram) Mean() float64 { return h.sample.Mean() } + +// Min returns the minimum value in the sample. +func (h *StandardHistogram) Min() int64 { return h.sample.Min() } + +// Percentile returns an arbitrary percentile of the values in the sample. +func (h *StandardHistogram) Percentile(p float64) float64 { + return h.sample.Percentile(p) +} + +// Percentiles returns a slice of arbitrary percentiles of the values in the +// sample. +func (h *StandardHistogram) Percentiles(ps []float64) []float64 { + return h.sample.Percentiles(ps) +} + +// Sample returns the Sample underlying the histogram. +func (h *StandardHistogram) Sample() Sample { return h.sample } + +// Snapshot returns a read-only copy of the histogram. +func (h *StandardHistogram) Snapshot() Histogram { + return &HistogramSnapshot{sample: h.sample.Snapshot().(*SampleSnapshot)} +} + +// StdDev returns the standard deviation of the values in the sample. +func (h *StandardHistogram) StdDev() float64 { return h.sample.StdDev() } + +// Sum returns the sum in the sample. +func (h *StandardHistogram) Sum() int64 { return h.sample.Sum() } + +// Update samples a new value. +func (h *StandardHistogram) Update(v int64) { h.sample.Update(v) } + +// Variance returns the variance of the values in the sample. +func (h *StandardHistogram) Variance() float64 { return h.sample.Variance() } diff --git a/metrics/histogram_test.go b/metrics/histogram_test.go new file mode 100644 index 000000000..d7f4f0171 --- /dev/null +++ b/metrics/histogram_test.go @@ -0,0 +1,95 @@ +package metrics + +import "testing" + +func BenchmarkHistogram(b *testing.B) { + h := NewHistogram(NewUniformSample(100)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + h.Update(int64(i)) + } +} + +func TestGetOrRegisterHistogram(t *testing.T) { + r := NewRegistry() + s := NewUniformSample(100) + NewRegisteredHistogram("foo", r, s).Update(47) + if h := GetOrRegisterHistogram("foo", r, s); 1 != h.Count() { + t.Fatal(h) + } +} + +func TestHistogram10000(t *testing.T) { + h := NewHistogram(NewUniformSample(100000)) + for i := 1; i <= 10000; i++ { + h.Update(int64(i)) + } + testHistogram10000(t, h) +} + +func TestHistogramEmpty(t *testing.T) { + h := NewHistogram(NewUniformSample(100)) + if count := h.Count(); 0 != count { + t.Errorf("h.Count(): 0 != %v\n", count) + } + if min := h.Min(); 0 != min { + t.Errorf("h.Min(): 0 != %v\n", min) + } + if max := h.Max(); 0 != max { + t.Errorf("h.Max(): 0 != %v\n", max) + } + if mean := h.Mean(); 0.0 != mean { + t.Errorf("h.Mean(): 0.0 != %v\n", mean) + } + if stdDev := h.StdDev(); 0.0 != stdDev { + t.Errorf("h.StdDev(): 0.0 != %v\n", stdDev) + } + ps := h.Percentiles([]float64{0.5, 0.75, 0.99}) + if 0.0 != ps[0] { + t.Errorf("median: 0.0 != %v\n", ps[0]) + } + if 0.0 != ps[1] { + t.Errorf("75th percentile: 0.0 != %v\n", ps[1]) + } + if 0.0 != ps[2] { + t.Errorf("99th percentile: 0.0 != %v\n", ps[2]) + } +} + +func TestHistogramSnapshot(t *testing.T) { + h := NewHistogram(NewUniformSample(100000)) + for i := 1; i <= 10000; i++ { + h.Update(int64(i)) + } + snapshot := h.Snapshot() + h.Update(0) + testHistogram10000(t, snapshot) +} + +func testHistogram10000(t *testing.T, h Histogram) { + if count := h.Count(); 10000 != count { + t.Errorf("h.Count(): 10000 != %v\n", count) + } + if min := h.Min(); 1 != min { + t.Errorf("h.Min(): 1 != %v\n", min) + } + if max := h.Max(); 10000 != max { + t.Errorf("h.Max(): 10000 != %v\n", max) + } + if mean := h.Mean(); 5000.5 != mean { + t.Errorf("h.Mean(): 5000.5 != %v\n", mean) + } + if stdDev := h.StdDev(); 2886.751331514372 != stdDev { + t.Errorf("h.StdDev(): 2886.751331514372 != %v\n", stdDev) + } + ps := h.Percentiles([]float64{0.5, 0.75, 0.99}) + if 5000.5 != ps[0] { + t.Errorf("median: 5000.5 != %v\n", ps[0]) + } + if 7500.75 != ps[1] { + t.Errorf("75th percentile: 7500.75 != %v\n", ps[1]) + } + if 9900.99 != ps[2] { + t.Errorf("99th percentile: 9900.99 != %v\n", ps[2]) + } +} diff --git a/metrics/influxdb/LICENSE b/metrics/influxdb/LICENSE new file mode 100644 index 000000000..e5bf20cdb --- /dev/null +++ b/metrics/influxdb/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2015 Vincent Rischmann + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/metrics/influxdb/README.md b/metrics/influxdb/README.md new file mode 100644 index 000000000..b76b1a3f9 --- /dev/null +++ b/metrics/influxdb/README.md @@ -0,0 +1,30 @@ +go-metrics-influxdb +=================== + +This is a reporter for the [go-metrics](https://github.com/rcrowley/go-metrics) library which will post the metrics to [InfluxDB](https://influxdb.com/). + +Note +---- + +This is only compatible with InfluxDB 0.9+. + +Usage +----- + +```go +import "github.com/vrischmann/go-metrics-influxdb" + +go influxdb.InfluxDB( + metrics.DefaultRegistry, // metrics registry + time.Second * 10, // interval + "http://localhost:8086", // the InfluxDB url + "mydb", // your InfluxDB database + "myuser", // your InfluxDB user + "mypassword", // your InfluxDB password +) +``` + +License +------- + +go-metrics-influxdb is licensed under the MIT license. See the LICENSE file for details. diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go new file mode 100644 index 000000000..d5cb4da66 --- /dev/null +++ b/metrics/influxdb/influxdb.go @@ -0,0 +1,227 @@ +package influxdb + +import ( + "fmt" + "log" + uurl "net/url" + "time" + + "github.com/ethereum/go-ethereum/metrics" + "github.com/influxdata/influxdb/client" +) + +type reporter struct { + reg metrics.Registry + interval time.Duration + + url uurl.URL + database string + username string + password string + namespace string + tags map[string]string + + client *client.Client + + cache map[string]int64 +} + +// InfluxDB starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval. +func InfluxDB(r metrics.Registry, d time.Duration, url, database, username, password, namespace string) { + InfluxDBWithTags(r, d, url, database, username, password, namespace, nil) +} + +// InfluxDBWithTags starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval with the specified tags +func InfluxDBWithTags(r metrics.Registry, d time.Duration, url, database, username, password, namespace string, tags map[string]string) { + u, err := uurl.Parse(url) + if err != nil { + log.Printf("unable to parse InfluxDB url %s. err=%v", url, err) + return + } + + rep := &reporter{ + reg: r, + interval: d, + url: *u, + database: database, + username: username, + password: password, + namespace: namespace, + tags: tags, + cache: make(map[string]int64), + } + if err := rep.makeClient(); err != nil { + log.Printf("unable to make InfluxDB client. err=%v", err) + return + } + + rep.run() +} + +func (r *reporter) makeClient() (err error) { + r.client, err = client.NewClient(client.Config{ + URL: r.url, + Username: r.username, + Password: r.password, + }) + + return +} + +func (r *reporter) run() { + intervalTicker := time.Tick(r.interval) + pingTicker := time.Tick(time.Second * 5) + + for { + select { + case <-intervalTicker: + if err := r.send(); err != nil { + log.Printf("unable to send to InfluxDB. err=%v", err) + } + case <-pingTicker: + _, _, err := r.client.Ping() + if err != nil { + log.Printf("got error while sending a ping to InfluxDB, trying to recreate client. err=%v", err) + + if err = r.makeClient(); err != nil { + log.Printf("unable to make InfluxDB client. err=%v", err) + } + } + } + } +} + +func (r *reporter) send() error { + var pts []client.Point + + r.reg.Each(func(name string, i interface{}) { + now := time.Now() + namespace := r.namespace + + switch metric := i.(type) { + case metrics.Counter: + v := metric.Count() + l := r.cache[name] + pts = append(pts, client.Point{ + Measurement: fmt.Sprintf("%s%s.count", namespace, name), + Tags: r.tags, + Fields: map[string]interface{}{ + "value": v - l, + }, + Time: now, + }) + r.cache[name] = v + case metrics.Gauge: + ms := metric.Snapshot() + pts = append(pts, client.Point{ + Measurement: fmt.Sprintf("%s%s.gauge", namespace, name), + Tags: r.tags, + Fields: map[string]interface{}{ + "value": ms.Value(), + }, + Time: now, + }) + case metrics.GaugeFloat64: + ms := metric.Snapshot() + pts = append(pts, client.Point{ + Measurement: fmt.Sprintf("%s%s.gauge", namespace, name), + Tags: r.tags, + Fields: map[string]interface{}{ + "value": ms.Value(), + }, + Time: now, + }) + case metrics.Histogram: + ms := metric.Snapshot() + ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + pts = append(pts, client.Point{ + Measurement: fmt.Sprintf("%s%s.histogram", namespace, name), + Tags: r.tags, + Fields: map[string]interface{}{ + "count": ms.Count(), + "max": ms.Max(), + "mean": ms.Mean(), + "min": ms.Min(), + "stddev": ms.StdDev(), + "variance": ms.Variance(), + "p50": ps[0], + "p75": ps[1], + "p95": ps[2], + "p99": ps[3], + "p999": ps[4], + "p9999": ps[5], + }, + Time: now, + }) + case metrics.Meter: + ms := metric.Snapshot() + pts = append(pts, client.Point{ + Measurement: fmt.Sprintf("%s%s.meter", namespace, name), + Tags: r.tags, + Fields: map[string]interface{}{ + "count": ms.Count(), + "m1": ms.Rate1(), + "m5": ms.Rate5(), + "m15": ms.Rate15(), + "mean": ms.RateMean(), + }, + Time: now, + }) + case metrics.Timer: + ms := metric.Snapshot() + ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + pts = append(pts, client.Point{ + Measurement: fmt.Sprintf("%s%s.timer", namespace, name), + Tags: r.tags, + Fields: map[string]interface{}{ + "count": ms.Count(), + "max": ms.Max(), + "mean": ms.Mean(), + "min": ms.Min(), + "stddev": ms.StdDev(), + "variance": ms.Variance(), + "p50": ps[0], + "p75": ps[1], + "p95": ps[2], + "p99": ps[3], + "p999": ps[4], + "p9999": ps[5], + "m1": ms.Rate1(), + "m5": ms.Rate5(), + "m15": ms.Rate15(), + "meanrate": ms.RateMean(), + }, + Time: now, + }) + case metrics.ResettingTimer: + t := metric.Snapshot() + + if len(t.Values()) > 0 { + ps := t.Percentiles([]float64{50, 95, 99}) + val := t.Values() + pts = append(pts, client.Point{ + Measurement: fmt.Sprintf("%s%s.span", namespace, name), + Tags: r.tags, + Fields: map[string]interface{}{ + "count": len(val), + "max": val[len(val)-1], + "mean": t.Mean(), + "min": val[0], + "p50": ps[0], + "p95": ps[1], + "p99": ps[2], + }, + Time: now, + }) + } + } + }) + + bps := client.BatchPoints{ + Points: pts, + Database: r.database, + } + + _, err := r.client.Write(bps) + return err +} diff --git a/metrics/init_test.go b/metrics/init_test.go new file mode 100644 index 000000000..43401e833 --- /dev/null +++ b/metrics/init_test.go @@ -0,0 +1,5 @@ +package metrics + +func init() { + Enabled = true +} diff --git a/metrics/json.go b/metrics/json.go new file mode 100644 index 000000000..2087d8211 --- /dev/null +++ b/metrics/json.go @@ -0,0 +1,31 @@ +package metrics + +import ( + "encoding/json" + "io" + "time" +) + +// MarshalJSON returns a byte slice containing a JSON representation of all +// the metrics in the Registry. +func (r *StandardRegistry) MarshalJSON() ([]byte, error) { + return json.Marshal(r.GetAll()) +} + +// WriteJSON writes metrics from the given registry periodically to the +// specified io.Writer as JSON. +func WriteJSON(r Registry, d time.Duration, w io.Writer) { + for range time.Tick(d) { + WriteJSONOnce(r, w) + } +} + +// WriteJSONOnce writes metrics from the given registry to the specified +// io.Writer as JSON. +func WriteJSONOnce(r Registry, w io.Writer) { + json.NewEncoder(w).Encode(r) +} + +func (p *PrefixedRegistry) MarshalJSON() ([]byte, error) { + return json.Marshal(p.GetAll()) +} diff --git a/metrics/json_test.go b/metrics/json_test.go new file mode 100644 index 000000000..cf70051f7 --- /dev/null +++ b/metrics/json_test.go @@ -0,0 +1,28 @@ +package metrics + +import ( + "bytes" + "encoding/json" + "testing" +) + +func TestRegistryMarshallJSON(t *testing.T) { + b := &bytes.Buffer{} + enc := json.NewEncoder(b) + r := NewRegistry() + r.Register("counter", NewCounter()) + enc.Encode(r) + if s := b.String(); "{\"counter\":{\"count\":0}}\n" != s { + t.Fatalf(s) + } +} + +func TestRegistryWriteJSONOnce(t *testing.T) { + r := NewRegistry() + r.Register("counter", NewCounter()) + b := &bytes.Buffer{} + WriteJSONOnce(r, b) + if s := b.String(); s != "{\"counter\":{\"count\":0}}\n" { + t.Fail() + } +} diff --git a/metrics/librato/client.go b/metrics/librato/client.go new file mode 100644 index 000000000..8c0c850e3 --- /dev/null +++ b/metrics/librato/client.go @@ -0,0 +1,102 @@ +package librato + +import ( + "bytes" + "encoding/json" + "fmt" + "io/ioutil" + "net/http" +) + +const Operations = "operations" +const OperationsShort = "ops" + +type LibratoClient struct { + Email, Token string +} + +// property strings +const ( + // display attributes + Color = "color" + DisplayMax = "display_max" + DisplayMin = "display_min" + DisplayUnitsLong = "display_units_long" + DisplayUnitsShort = "display_units_short" + DisplayStacked = "display_stacked" + DisplayTransform = "display_transform" + // special gauge display attributes + SummarizeFunction = "summarize_function" + Aggregate = "aggregate" + + // metric keys + Name = "name" + Period = "period" + Description = "description" + DisplayName = "display_name" + Attributes = "attributes" + + // measurement keys + MeasureTime = "measure_time" + Source = "source" + Value = "value" + + // special gauge keys + Count = "count" + Sum = "sum" + Max = "max" + Min = "min" + SumSquares = "sum_squares" + + // batch keys + Counters = "counters" + Gauges = "gauges" + + MetricsPostUrl = "https://metrics-api.librato.com/v1/metrics" +) + +type Measurement map[string]interface{} +type Metric map[string]interface{} + +type Batch struct { + Gauges []Measurement `json:"gauges,omitempty"` + Counters []Measurement `json:"counters,omitempty"` + MeasureTime int64 `json:"measure_time"` + Source string `json:"source"` +} + +func (self *LibratoClient) PostMetrics(batch Batch) (err error) { + var ( + js []byte + req *http.Request + resp *http.Response + ) + + if len(batch.Counters) == 0 && len(batch.Gauges) == 0 { + return nil + } + + if js, err = json.Marshal(batch); err != nil { + return + } + + if req, err = http.NewRequest("POST", MetricsPostUrl, bytes.NewBuffer(js)); err != nil { + return + } + + req.Header.Set("Content-Type", "application/json") + req.SetBasicAuth(self.Email, self.Token) + + if resp, err = http.DefaultClient.Do(req); err != nil { + return + } + + if resp.StatusCode != http.StatusOK { + var body []byte + if body, err = ioutil.ReadAll(resp.Body); err != nil { + body = []byte(fmt.Sprintf("(could not fetch response body for error: %s)", err)) + } + err = fmt.Errorf("Unable to post to Librato: %d %s %s", resp.StatusCode, resp.Status, string(body)) + } + return +} diff --git a/metrics/librato/librato.go b/metrics/librato/librato.go new file mode 100644 index 000000000..f8c8c9ecb --- /dev/null +++ b/metrics/librato/librato.go @@ -0,0 +1,235 @@ +package librato + +import ( + "fmt" + "log" + "math" + "regexp" + "time" + + "github.com/ethereum/go-ethereum/metrics" +) + +// a regexp for extracting the unit from time.Duration.String +var unitRegexp = regexp.MustCompile(`[^\\d]+$`) + +// a helper that turns a time.Duration into librato display attributes for timer metrics +func translateTimerAttributes(d time.Duration) (attrs map[string]interface{}) { + attrs = make(map[string]interface{}) + attrs[DisplayTransform] = fmt.Sprintf("x/%d", int64(d)) + attrs[DisplayUnitsShort] = string(unitRegexp.Find([]byte(d.String()))) + return +} + +type Reporter struct { + Email, Token string + Namespace string + Source string + Interval time.Duration + Registry metrics.Registry + Percentiles []float64 // percentiles to report on histogram metrics + TimerAttributes map[string]interface{} // units in which timers will be displayed + intervalSec int64 +} + +func NewReporter(r metrics.Registry, d time.Duration, e string, t string, s string, p []float64, u time.Duration) *Reporter { + return &Reporter{e, t, "", s, d, r, p, translateTimerAttributes(u), int64(d / time.Second)} +} + +func Librato(r metrics.Registry, d time.Duration, e string, t string, s string, p []float64, u time.Duration) { + NewReporter(r, d, e, t, s, p, u).Run() +} + +func (self *Reporter) Run() { + log.Printf("WARNING: This client has been DEPRECATED! It has been moved to https://github.com/mihasya/go-metrics-librato and will be removed from rcrowley/go-metrics on August 5th 2015") + ticker := time.Tick(self.Interval) + metricsApi := &LibratoClient{self.Email, self.Token} + for now := range ticker { + var metrics Batch + var err error + if metrics, err = self.BuildRequest(now, self.Registry); err != nil { + log.Printf("ERROR constructing librato request body %s", err) + continue + } + if err := metricsApi.PostMetrics(metrics); err != nil { + log.Printf("ERROR sending metrics to librato %s", err) + continue + } + } +} + +// calculate sum of squares from data provided by metrics.Histogram +// see http://en.wikipedia.org/wiki/Standard_deviation#Rapid_calculation_methods +func sumSquares(s metrics.Sample) float64 { + count := float64(s.Count()) + sumSquared := math.Pow(count*s.Mean(), 2) + sumSquares := math.Pow(count*s.StdDev(), 2) + sumSquared/count + if math.IsNaN(sumSquares) { + return 0.0 + } + return sumSquares +} +func sumSquaresTimer(t metrics.Timer) float64 { + count := float64(t.Count()) + sumSquared := math.Pow(count*t.Mean(), 2) + sumSquares := math.Pow(count*t.StdDev(), 2) + sumSquared/count + if math.IsNaN(sumSquares) { + return 0.0 + } + return sumSquares +} + +func (self *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Batch, err error) { + snapshot = Batch{ + // coerce timestamps to a stepping fn so that they line up in Librato graphs + MeasureTime: (now.Unix() / self.intervalSec) * self.intervalSec, + Source: self.Source, + } + snapshot.Gauges = make([]Measurement, 0) + snapshot.Counters = make([]Measurement, 0) + histogramGaugeCount := 1 + len(self.Percentiles) + r.Each(func(name string, metric interface{}) { + if self.Namespace != "" { + name = fmt.Sprintf("%s.%s", self.Namespace, name) + } + measurement := Measurement{} + measurement[Period] = self.Interval.Seconds() + switch m := metric.(type) { + case metrics.Counter: + if m.Count() > 0 { + measurement[Name] = fmt.Sprintf("%s.%s", name, "count") + measurement[Value] = float64(m.Count()) + measurement[Attributes] = map[string]interface{}{ + DisplayUnitsLong: Operations, + DisplayUnitsShort: OperationsShort, + DisplayMin: "0", + } + snapshot.Counters = append(snapshot.Counters, measurement) + } + case metrics.Gauge: + measurement[Name] = name + measurement[Value] = float64(m.Value()) + snapshot.Gauges = append(snapshot.Gauges, measurement) + case metrics.GaugeFloat64: + measurement[Name] = name + measurement[Value] = m.Value() + snapshot.Gauges = append(snapshot.Gauges, measurement) + case metrics.Histogram: + if m.Count() > 0 { + gauges := make([]Measurement, histogramGaugeCount) + s := m.Sample() + measurement[Name] = fmt.Sprintf("%s.%s", name, "hist") + measurement[Count] = uint64(s.Count()) + measurement[Max] = float64(s.Max()) + measurement[Min] = float64(s.Min()) + measurement[Sum] = float64(s.Sum()) + measurement[SumSquares] = sumSquares(s) + gauges[0] = measurement + for i, p := range self.Percentiles { + gauges[i+1] = Measurement{ + Name: fmt.Sprintf("%s.%.2f", measurement[Name], p), + Value: s.Percentile(p), + Period: measurement[Period], + } + } + snapshot.Gauges = append(snapshot.Gauges, gauges...) + } + case metrics.Meter: + measurement[Name] = name + measurement[Value] = float64(m.Count()) + snapshot.Counters = append(snapshot.Counters, measurement) + snapshot.Gauges = append(snapshot.Gauges, + Measurement{ + Name: fmt.Sprintf("%s.%s", name, "1min"), + Value: m.Rate1(), + Period: int64(self.Interval.Seconds()), + Attributes: map[string]interface{}{ + DisplayUnitsLong: Operations, + DisplayUnitsShort: OperationsShort, + DisplayMin: "0", + }, + }, + Measurement{ + Name: fmt.Sprintf("%s.%s", name, "5min"), + Value: m.Rate5(), + Period: int64(self.Interval.Seconds()), + Attributes: map[string]interface{}{ + DisplayUnitsLong: Operations, + DisplayUnitsShort: OperationsShort, + DisplayMin: "0", + }, + }, + Measurement{ + Name: fmt.Sprintf("%s.%s", name, "15min"), + Value: m.Rate15(), + Period: int64(self.Interval.Seconds()), + Attributes: map[string]interface{}{ + DisplayUnitsLong: Operations, + DisplayUnitsShort: OperationsShort, + DisplayMin: "0", + }, + }, + ) + case metrics.Timer: + measurement[Name] = name + measurement[Value] = float64(m.Count()) + snapshot.Counters = append(snapshot.Counters, measurement) + if m.Count() > 0 { + libratoName := fmt.Sprintf("%s.%s", name, "timer.mean") + gauges := make([]Measurement, histogramGaugeCount) + gauges[0] = Measurement{ + Name: libratoName, + Count: uint64(m.Count()), + Sum: m.Mean() * float64(m.Count()), + Max: float64(m.Max()), + Min: float64(m.Min()), + SumSquares: sumSquaresTimer(m), + Period: int64(self.Interval.Seconds()), + Attributes: self.TimerAttributes, + } + for i, p := range self.Percentiles { + gauges[i+1] = Measurement{ + Name: fmt.Sprintf("%s.timer.%2.0f", name, p*100), + Value: m.Percentile(p), + Period: int64(self.Interval.Seconds()), + Attributes: self.TimerAttributes, + } + } + snapshot.Gauges = append(snapshot.Gauges, gauges...) + snapshot.Gauges = append(snapshot.Gauges, + Measurement{ + Name: fmt.Sprintf("%s.%s", name, "rate.1min"), + Value: m.Rate1(), + Period: int64(self.Interval.Seconds()), + Attributes: map[string]interface{}{ + DisplayUnitsLong: Operations, + DisplayUnitsShort: OperationsShort, + DisplayMin: "0", + }, + }, + Measurement{ + Name: fmt.Sprintf("%s.%s", name, "rate.5min"), + Value: m.Rate5(), + Period: int64(self.Interval.Seconds()), + Attributes: map[string]interface{}{ + DisplayUnitsLong: Operations, + DisplayUnitsShort: OperationsShort, + DisplayMin: "0", + }, + }, + Measurement{ + Name: fmt.Sprintf("%s.%s", name, "rate.15min"), + Value: m.Rate15(), + Period: int64(self.Interval.Seconds()), + Attributes: map[string]interface{}{ + DisplayUnitsLong: Operations, + DisplayUnitsShort: OperationsShort, + DisplayMin: "0", + }, + }, + ) + } + } + }) + return +} diff --git a/metrics/log.go b/metrics/log.go new file mode 100644 index 000000000..0c8ea7c97 --- /dev/null +++ b/metrics/log.go @@ -0,0 +1,80 @@ +package metrics + +import ( + "time" +) + +type Logger interface { + Printf(format string, v ...interface{}) +} + +func Log(r Registry, freq time.Duration, l Logger) { + LogScaled(r, freq, time.Nanosecond, l) +} + +// Output each metric in the given registry periodically using the given +// logger. Print timings in `scale` units (eg time.Millisecond) rather than nanos. +func LogScaled(r Registry, freq time.Duration, scale time.Duration, l Logger) { + du := float64(scale) + duSuffix := scale.String()[1:] + + for range time.Tick(freq) { + r.Each(func(name string, i interface{}) { + switch metric := i.(type) { + case Counter: + l.Printf("counter %s\n", name) + l.Printf(" count: %9d\n", metric.Count()) + case Gauge: + l.Printf("gauge %s\n", name) + l.Printf(" value: %9d\n", metric.Value()) + case GaugeFloat64: + l.Printf("gauge %s\n", name) + l.Printf(" value: %f\n", metric.Value()) + case Healthcheck: + metric.Check() + l.Printf("healthcheck %s\n", name) + l.Printf(" error: %v\n", metric.Error()) + case Histogram: + h := metric.Snapshot() + ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + l.Printf("histogram %s\n", name) + l.Printf(" count: %9d\n", h.Count()) + l.Printf(" min: %9d\n", h.Min()) + l.Printf(" max: %9d\n", h.Max()) + l.Printf(" mean: %12.2f\n", h.Mean()) + l.Printf(" stddev: %12.2f\n", h.StdDev()) + l.Printf(" median: %12.2f\n", ps[0]) + l.Printf(" 75%%: %12.2f\n", ps[1]) + l.Printf(" 95%%: %12.2f\n", ps[2]) + l.Printf(" 99%%: %12.2f\n", ps[3]) + l.Printf(" 99.9%%: %12.2f\n", ps[4]) + case Meter: + m := metric.Snapshot() + l.Printf("meter %s\n", name) + l.Printf(" count: %9d\n", m.Count()) + l.Printf(" 1-min rate: %12.2f\n", m.Rate1()) + l.Printf(" 5-min rate: %12.2f\n", m.Rate5()) + l.Printf(" 15-min rate: %12.2f\n", m.Rate15()) + l.Printf(" mean rate: %12.2f\n", m.RateMean()) + case Timer: + t := metric.Snapshot() + ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + l.Printf("timer %s\n", name) + l.Printf(" count: %9d\n", t.Count()) + l.Printf(" min: %12.2f%s\n", float64(t.Min())/du, duSuffix) + l.Printf(" max: %12.2f%s\n", float64(t.Max())/du, duSuffix) + l.Printf(" mean: %12.2f%s\n", t.Mean()/du, duSuffix) + l.Printf(" stddev: %12.2f%s\n", t.StdDev()/du, duSuffix) + l.Printf(" median: %12.2f%s\n", ps[0]/du, duSuffix) + l.Printf(" 75%%: %12.2f%s\n", ps[1]/du, duSuffix) + l.Printf(" 95%%: %12.2f%s\n", ps[2]/du, duSuffix) + l.Printf(" 99%%: %12.2f%s\n", ps[3]/du, duSuffix) + l.Printf(" 99.9%%: %12.2f%s\n", ps[4]/du, duSuffix) + l.Printf(" 1-min rate: %12.2f\n", t.Rate1()) + l.Printf(" 5-min rate: %12.2f\n", t.Rate5()) + l.Printf(" 15-min rate: %12.2f\n", t.Rate15()) + l.Printf(" mean rate: %12.2f\n", t.RateMean()) + } + }) + } +} diff --git a/metrics/memory.md b/metrics/memory.md new file mode 100644 index 000000000..47454f54b --- /dev/null +++ b/metrics/memory.md @@ -0,0 +1,285 @@ +Memory usage +============ + +(Highly unscientific.) + +Command used to gather static memory usage: + +```sh +grep ^Vm "/proc/$(ps fax | grep [m]etrics-bench | awk '{print $1}')/status" +``` + +Program used to gather baseline memory usage: + +```go +package main + +import "time" + +func main() { + time.Sleep(600e9) +} +``` + +Baseline +-------- + +``` +VmPeak: 42604 kB +VmSize: 42604 kB +VmLck: 0 kB +VmHWM: 1120 kB +VmRSS: 1120 kB +VmData: 35460 kB +VmStk: 136 kB +VmExe: 1020 kB +VmLib: 1848 kB +VmPTE: 36 kB +VmSwap: 0 kB +``` + +Program used to gather metric memory usage (with other metrics being similar): + +```go +package main + +import ( + "fmt" + "metrics" + "time" +) + +func main() { + fmt.Sprintf("foo") + metrics.NewRegistry() + time.Sleep(600e9) +} +``` + +1000 counters registered +------------------------ + +``` +VmPeak: 44016 kB +VmSize: 44016 kB +VmLck: 0 kB +VmHWM: 1928 kB +VmRSS: 1928 kB +VmData: 36868 kB +VmStk: 136 kB +VmExe: 1024 kB +VmLib: 1848 kB +VmPTE: 40 kB +VmSwap: 0 kB +``` + +**1.412 kB virtual, TODO 0.808 kB resident per counter.** + +100000 counters registered +-------------------------- + +``` +VmPeak: 55024 kB +VmSize: 55024 kB +VmLck: 0 kB +VmHWM: 12440 kB +VmRSS: 12440 kB +VmData: 47876 kB +VmStk: 136 kB +VmExe: 1024 kB +VmLib: 1848 kB +VmPTE: 64 kB +VmSwap: 0 kB +``` + +**0.1242 kB virtual, 0.1132 kB resident per counter.** + +1000 gauges registered +---------------------- + +``` +VmPeak: 44012 kB +VmSize: 44012 kB +VmLck: 0 kB +VmHWM: 1928 kB +VmRSS: 1928 kB +VmData: 36868 kB +VmStk: 136 kB +VmExe: 1020 kB +VmLib: 1848 kB +VmPTE: 40 kB +VmSwap: 0 kB +``` + +**1.408 kB virtual, 0.808 kB resident per counter.** + +100000 gauges registered +------------------------ + +``` +VmPeak: 55020 kB +VmSize: 55020 kB +VmLck: 0 kB +VmHWM: 12432 kB +VmRSS: 12432 kB +VmData: 47876 kB +VmStk: 136 kB +VmExe: 1020 kB +VmLib: 1848 kB +VmPTE: 60 kB +VmSwap: 0 kB +``` + +**0.12416 kB virtual, 0.11312 resident per gauge.** + +1000 histograms with a uniform sample size of 1028 +-------------------------------------------------- + +``` +VmPeak: 72272 kB +VmSize: 72272 kB +VmLck: 0 kB +VmHWM: 16204 kB +VmRSS: 16204 kB +VmData: 65100 kB +VmStk: 136 kB +VmExe: 1048 kB +VmLib: 1848 kB +VmPTE: 80 kB +VmSwap: 0 kB +``` + +**29.668 kB virtual, TODO 15.084 resident per histogram.** + +10000 histograms with a uniform sample size of 1028 +--------------------------------------------------- + +``` +VmPeak: 256912 kB +VmSize: 256912 kB +VmLck: 0 kB +VmHWM: 146204 kB +VmRSS: 146204 kB +VmData: 249740 kB +VmStk: 136 kB +VmExe: 1048 kB +VmLib: 1848 kB +VmPTE: 448 kB +VmSwap: 0 kB +``` + +**21.4308 kB virtual, 14.5084 kB resident per histogram.** + +50000 histograms with a uniform sample size of 1028 +--------------------------------------------------- + +``` +VmPeak: 908112 kB +VmSize: 908112 kB +VmLck: 0 kB +VmHWM: 645832 kB +VmRSS: 645588 kB +VmData: 900940 kB +VmStk: 136 kB +VmExe: 1048 kB +VmLib: 1848 kB +VmPTE: 1716 kB +VmSwap: 1544 kB +``` + +**17.31016 kB virtual, 12.88936 kB resident per histogram.** + +1000 histograms with an exponentially-decaying sample size of 1028 and alpha of 0.015 +------------------------------------------------------------------------------------- + +``` +VmPeak: 62480 kB +VmSize: 62480 kB +VmLck: 0 kB +VmHWM: 11572 kB +VmRSS: 11572 kB +VmData: 55308 kB +VmStk: 136 kB +VmExe: 1048 kB +VmLib: 1848 kB +VmPTE: 64 kB +VmSwap: 0 kB +``` + +**19.876 kB virtual, 10.452 kB resident per histogram.** + +10000 histograms with an exponentially-decaying sample size of 1028 and alpha of 0.015 +-------------------------------------------------------------------------------------- + +``` +VmPeak: 153296 kB +VmSize: 153296 kB +VmLck: 0 kB +VmHWM: 101176 kB +VmRSS: 101176 kB +VmData: 146124 kB +VmStk: 136 kB +VmExe: 1048 kB +VmLib: 1848 kB +VmPTE: 240 kB +VmSwap: 0 kB +``` + +**11.0692 kB virtual, 10.0056 kB resident per histogram.** + +50000 histograms with an exponentially-decaying sample size of 1028 and alpha of 0.015 +-------------------------------------------------------------------------------------- + +``` +VmPeak: 557264 kB +VmSize: 557264 kB +VmLck: 0 kB +VmHWM: 501056 kB +VmRSS: 501056 kB +VmData: 550092 kB +VmStk: 136 kB +VmExe: 1048 kB +VmLib: 1848 kB +VmPTE: 1032 kB +VmSwap: 0 kB +``` + +**10.2932 kB virtual, 9.99872 kB resident per histogram.** + +1000 meters +----------- + +``` +VmPeak: 74504 kB +VmSize: 74504 kB +VmLck: 0 kB +VmHWM: 24124 kB +VmRSS: 24124 kB +VmData: 67340 kB +VmStk: 136 kB +VmExe: 1040 kB +VmLib: 1848 kB +VmPTE: 92 kB +VmSwap: 0 kB +``` + +**31.9 kB virtual, 23.004 kB resident per meter.** + +10000 meters +------------ + +``` +VmPeak: 278920 kB +VmSize: 278920 kB +VmLck: 0 kB +VmHWM: 227300 kB +VmRSS: 227300 kB +VmData: 271756 kB +VmStk: 136 kB +VmExe: 1040 kB +VmLib: 1848 kB +VmPTE: 488 kB +VmSwap: 0 kB +``` + +**23.6316 kB virtual, 22.618 kB resident per meter.** diff --git a/metrics/meter.go b/metrics/meter.go new file mode 100644 index 000000000..82b2141a6 --- /dev/null +++ b/metrics/meter.go @@ -0,0 +1,261 @@ +package metrics + +import ( + "sync" + "time" +) + +// Meters count events to produce exponentially-weighted moving average rates +// at one-, five-, and fifteen-minutes and a mean rate. +type Meter interface { + Count() int64 + Mark(int64) + Rate1() float64 + Rate5() float64 + Rate15() float64 + RateMean() float64 + Snapshot() Meter + Stop() +} + +// GetOrRegisterMeter returns an existing Meter or constructs and registers a +// new StandardMeter. +// Be sure to unregister the meter from the registry once it is of no use to +// allow for garbage collection. +func GetOrRegisterMeter(name string, r Registry) Meter { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewMeter).(Meter) +} + +// NewMeter constructs a new StandardMeter and launches a goroutine. +// Be sure to call Stop() once the meter is of no use to allow for garbage collection. +func NewMeter() Meter { + if !Enabled { + return NilMeter{} + } + m := newStandardMeter() + arbiter.Lock() + defer arbiter.Unlock() + arbiter.meters[m] = struct{}{} + if !arbiter.started { + arbiter.started = true + go arbiter.tick() + } + return m +} + +// NewMeter constructs and registers a new StandardMeter and launches a +// goroutine. +// Be sure to unregister the meter from the registry once it is of no use to +// allow for garbage collection. +func NewRegisteredMeter(name string, r Registry) Meter { + c := NewMeter() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// MeterSnapshot is a read-only copy of another Meter. +type MeterSnapshot struct { + count int64 + rate1, rate5, rate15, rateMean float64 +} + +// Count returns the count of events at the time the snapshot was taken. +func (m *MeterSnapshot) Count() int64 { return m.count } + +// Mark panics. +func (*MeterSnapshot) Mark(n int64) { + panic("Mark called on a MeterSnapshot") +} + +// Rate1 returns the one-minute moving average rate of events per second at the +// time the snapshot was taken. +func (m *MeterSnapshot) Rate1() float64 { return m.rate1 } + +// Rate5 returns the five-minute moving average rate of events per second at +// the time the snapshot was taken. +func (m *MeterSnapshot) Rate5() float64 { return m.rate5 } + +// Rate15 returns the fifteen-minute moving average rate of events per second +// at the time the snapshot was taken. +func (m *MeterSnapshot) Rate15() float64 { return m.rate15 } + +// RateMean returns the meter's mean rate of events per second at the time the +// snapshot was taken. +func (m *MeterSnapshot) RateMean() float64 { return m.rateMean } + +// Snapshot returns the snapshot. +func (m *MeterSnapshot) Snapshot() Meter { return m } + +// Stop is a no-op. +func (m *MeterSnapshot) Stop() {} + +// NilMeter is a no-op Meter. +type NilMeter struct{} + +// Count is a no-op. +func (NilMeter) Count() int64 { return 0 } + +// Mark is a no-op. +func (NilMeter) Mark(n int64) {} + +// Rate1 is a no-op. +func (NilMeter) Rate1() float64 { return 0.0 } + +// Rate5 is a no-op. +func (NilMeter) Rate5() float64 { return 0.0 } + +// Rate15is a no-op. +func (NilMeter) Rate15() float64 { return 0.0 } + +// RateMean is a no-op. +func (NilMeter) RateMean() float64 { return 0.0 } + +// Snapshot is a no-op. +func (NilMeter) Snapshot() Meter { return NilMeter{} } + +// Stop is a no-op. +func (NilMeter) Stop() {} + +// StandardMeter is the standard implementation of a Meter. +type StandardMeter struct { + lock sync.RWMutex + snapshot *MeterSnapshot + a1, a5, a15 EWMA + startTime time.Time + stopped bool +} + +func newStandardMeter() *StandardMeter { + return &StandardMeter{ + snapshot: &MeterSnapshot{}, + a1: NewEWMA1(), + a5: NewEWMA5(), + a15: NewEWMA15(), + startTime: time.Now(), + } +} + +// Stop stops the meter, Mark() will be a no-op if you use it after being stopped. +func (m *StandardMeter) Stop() { + m.lock.Lock() + stopped := m.stopped + m.stopped = true + m.lock.Unlock() + if !stopped { + arbiter.Lock() + delete(arbiter.meters, m) + arbiter.Unlock() + } +} + +// Count returns the number of events recorded. +func (m *StandardMeter) Count() int64 { + m.lock.RLock() + count := m.snapshot.count + m.lock.RUnlock() + return count +} + +// Mark records the occurrence of n events. +func (m *StandardMeter) Mark(n int64) { + m.lock.Lock() + defer m.lock.Unlock() + if m.stopped { + return + } + m.snapshot.count += n + m.a1.Update(n) + m.a5.Update(n) + m.a15.Update(n) + m.updateSnapshot() +} + +// Rate1 returns the one-minute moving average rate of events per second. +func (m *StandardMeter) Rate1() float64 { + m.lock.RLock() + rate1 := m.snapshot.rate1 + m.lock.RUnlock() + return rate1 +} + +// Rate5 returns the five-minute moving average rate of events per second. +func (m *StandardMeter) Rate5() float64 { + m.lock.RLock() + rate5 := m.snapshot.rate5 + m.lock.RUnlock() + return rate5 +} + +// Rate15 returns the fifteen-minute moving average rate of events per second. +func (m *StandardMeter) Rate15() float64 { + m.lock.RLock() + rate15 := m.snapshot.rate15 + m.lock.RUnlock() + return rate15 +} + +// RateMean returns the meter's mean rate of events per second. +func (m *StandardMeter) RateMean() float64 { + m.lock.RLock() + rateMean := m.snapshot.rateMean + m.lock.RUnlock() + return rateMean +} + +// Snapshot returns a read-only copy of the meter. +func (m *StandardMeter) Snapshot() Meter { + m.lock.RLock() + snapshot := *m.snapshot + m.lock.RUnlock() + return &snapshot +} + +func (m *StandardMeter) updateSnapshot() { + // should run with write lock held on m.lock + snapshot := m.snapshot + snapshot.rate1 = m.a1.Rate() + snapshot.rate5 = m.a5.Rate() + snapshot.rate15 = m.a15.Rate() + snapshot.rateMean = float64(snapshot.count) / time.Since(m.startTime).Seconds() +} + +func (m *StandardMeter) tick() { + m.lock.Lock() + defer m.lock.Unlock() + m.a1.Tick() + m.a5.Tick() + m.a15.Tick() + m.updateSnapshot() +} + +// meterArbiter ticks meters every 5s from a single goroutine. +// meters are references in a set for future stopping. +type meterArbiter struct { + sync.RWMutex + started bool + meters map[*StandardMeter]struct{} + ticker *time.Ticker +} + +var arbiter = meterArbiter{ticker: time.NewTicker(5e9), meters: make(map[*StandardMeter]struct{})} + +// Ticks meters on the scheduled interval +func (ma *meterArbiter) tick() { + for range ma.ticker.C { + ma.tickMeters() + } +} + +func (ma *meterArbiter) tickMeters() { + ma.RLock() + defer ma.RUnlock() + for meter := range ma.meters { + meter.tick() + } +} diff --git a/metrics/meter_test.go b/metrics/meter_test.go new file mode 100644 index 000000000..e88922260 --- /dev/null +++ b/metrics/meter_test.go @@ -0,0 +1,73 @@ +package metrics + +import ( + "testing" + "time" +) + +func BenchmarkMeter(b *testing.B) { + m := NewMeter() + b.ResetTimer() + for i := 0; i < b.N; i++ { + m.Mark(1) + } +} + +func TestGetOrRegisterMeter(t *testing.T) { + r := NewRegistry() + NewRegisteredMeter("foo", r).Mark(47) + if m := GetOrRegisterMeter("foo", r); 47 != m.Count() { + t.Fatal(m) + } +} + +func TestMeterDecay(t *testing.T) { + ma := meterArbiter{ + ticker: time.NewTicker(time.Millisecond), + meters: make(map[*StandardMeter]struct{}), + } + m := newStandardMeter() + ma.meters[m] = struct{}{} + go ma.tick() + m.Mark(1) + rateMean := m.RateMean() + time.Sleep(100 * time.Millisecond) + if m.RateMean() >= rateMean { + t.Error("m.RateMean() didn't decrease") + } +} + +func TestMeterNonzero(t *testing.T) { + m := NewMeter() + m.Mark(3) + if count := m.Count(); 3 != count { + t.Errorf("m.Count(): 3 != %v\n", count) + } +} + +func TestMeterStop(t *testing.T) { + l := len(arbiter.meters) + m := NewMeter() + if len(arbiter.meters) != l+1 { + t.Errorf("arbiter.meters: %d != %d\n", l+1, len(arbiter.meters)) + } + m.Stop() + if len(arbiter.meters) != l { + t.Errorf("arbiter.meters: %d != %d\n", l, len(arbiter.meters)) + } +} + +func TestMeterSnapshot(t *testing.T) { + m := NewMeter() + m.Mark(1) + if snapshot := m.Snapshot(); m.RateMean() != snapshot.RateMean() { + t.Fatal(snapshot) + } +} + +func TestMeterZero(t *testing.T) { + m := NewMeter() + if count := m.Count(); 0 != count { + t.Errorf("m.Count(): 0 != %v\n", count) + } +} diff --git a/metrics/metrics.go b/metrics/metrics.go index c82661d80..4e4e3ecb2 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -1,20 +1,8 @@ -// Copyright 2015 The go-ethereum Authors -// This file is part of the go-ethereum library. +// Go port of Coda Hale's Metrics library // -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. +// <https://github.com/rcrowley/go-metrics> // -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. - -// Package metrics provides general system and process level metrics collection. +// Coda Hale's original work: <https://github.com/codahale/metrics> package metrics import ( @@ -24,17 +12,19 @@ import ( "time" "github.com/ethereum/go-ethereum/log" - "github.com/rcrowley/go-metrics" - "github.com/rcrowley/go-metrics/exp" ) +// Enabled is checked by the constructor functions for all of the +// standard metrics. If it is true, the metric returned is a stub. +// +// This global kill-switch helps quantify the observer effect and makes +// for less cluttered pprof profiles. +var Enabled bool = false + // MetricsEnabledFlag is the CLI flag name to use to enable metrics collections. const MetricsEnabledFlag = "metrics" const DashboardEnabledFlag = "dashboard" -// Enabled is the flag specifying if metrics are enable or not. -var Enabled = false - // Init enables or disables the metrics system. Since we need this to run before // any other code gets to create meters and timers, we'll actually do an ugly hack // and peek into the command line args for the metrics flag. @@ -45,34 +35,7 @@ func init() { Enabled = true } } - exp.Exp(metrics.DefaultRegistry) -} - -// NewCounter create a new metrics Counter, either a real one of a NOP stub depending -// on the metrics flag. -func NewCounter(name string) metrics.Counter { - if !Enabled { - return new(metrics.NilCounter) - } - return metrics.GetOrRegisterCounter(name, metrics.DefaultRegistry) -} - -// NewMeter create a new metrics Meter, either a real one of a NOP stub depending -// on the metrics flag. -func NewMeter(name string) metrics.Meter { - if !Enabled { - return new(metrics.NilMeter) - } - return metrics.GetOrRegisterMeter(name, metrics.DefaultRegistry) -} - -// NewTimer create a new metrics Timer, either a real one of a NOP stub depending -// on the metrics flag. -func NewTimer(name string) metrics.Timer { - if !Enabled { - return new(metrics.NilTimer) - } - return metrics.GetOrRegisterTimer(name, metrics.DefaultRegistry) + //exp.Exp(DefaultRegistry) } // CollectProcessMetrics periodically collects various metrics about the running @@ -90,17 +53,17 @@ func CollectProcessMetrics(refresh time.Duration) { diskstats[i] = new(DiskStats) } // Define the various metrics to collect - memAllocs := metrics.GetOrRegisterMeter("system/memory/allocs", metrics.DefaultRegistry) - memFrees := metrics.GetOrRegisterMeter("system/memory/frees", metrics.DefaultRegistry) - memInuse := metrics.GetOrRegisterMeter("system/memory/inuse", metrics.DefaultRegistry) - memPauses := metrics.GetOrRegisterMeter("system/memory/pauses", metrics.DefaultRegistry) + memAllocs := GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) + memFrees := GetOrRegisterMeter("system/memory/frees", DefaultRegistry) + memInuse := GetOrRegisterMeter("system/memory/inuse", DefaultRegistry) + memPauses := GetOrRegisterMeter("system/memory/pauses", DefaultRegistry) - var diskReads, diskReadBytes, diskWrites, diskWriteBytes metrics.Meter + var diskReads, diskReadBytes, diskWrites, diskWriteBytes Meter if err := ReadDiskStats(diskstats[0]); err == nil { - diskReads = metrics.GetOrRegisterMeter("system/disk/readcount", metrics.DefaultRegistry) - diskReadBytes = metrics.GetOrRegisterMeter("system/disk/readdata", metrics.DefaultRegistry) - diskWrites = metrics.GetOrRegisterMeter("system/disk/writecount", metrics.DefaultRegistry) - diskWriteBytes = metrics.GetOrRegisterMeter("system/disk/writedata", metrics.DefaultRegistry) + diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) + diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry) + diskWrites = GetOrRegisterMeter("system/disk/writecount", DefaultRegistry) + diskWriteBytes = GetOrRegisterMeter("system/disk/writedata", DefaultRegistry) } else { log.Debug("Failed to read disk metrics", "err", err) } diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go new file mode 100644 index 000000000..726fba347 --- /dev/null +++ b/metrics/metrics_test.go @@ -0,0 +1,124 @@ +package metrics + +import ( + "fmt" + "io/ioutil" + "log" + "sync" + "testing" +) + +const FANOUT = 128 + +// Stop the compiler from complaining during debugging. +var ( + _ = ioutil.Discard + _ = log.LstdFlags +) + +func BenchmarkMetrics(b *testing.B) { + r := NewRegistry() + c := NewRegisteredCounter("counter", r) + g := NewRegisteredGauge("gauge", r) + gf := NewRegisteredGaugeFloat64("gaugefloat64", r) + h := NewRegisteredHistogram("histogram", r, NewUniformSample(100)) + m := NewRegisteredMeter("meter", r) + t := NewRegisteredTimer("timer", r) + RegisterDebugGCStats(r) + RegisterRuntimeMemStats(r) + b.ResetTimer() + ch := make(chan bool) + + wgD := &sync.WaitGroup{} + /* + wgD.Add(1) + go func() { + defer wgD.Done() + //log.Println("go CaptureDebugGCStats") + for { + select { + case <-ch: + //log.Println("done CaptureDebugGCStats") + return + default: + CaptureDebugGCStatsOnce(r) + } + } + }() + //*/ + + wgR := &sync.WaitGroup{} + //* + wgR.Add(1) + go func() { + defer wgR.Done() + //log.Println("go CaptureRuntimeMemStats") + for { + select { + case <-ch: + //log.Println("done CaptureRuntimeMemStats") + return + default: + CaptureRuntimeMemStatsOnce(r) + } + } + }() + //*/ + + wgW := &sync.WaitGroup{} + /* + wgW.Add(1) + go func() { + defer wgW.Done() + //log.Println("go Write") + for { + select { + case <-ch: + //log.Println("done Write") + return + default: + WriteOnce(r, ioutil.Discard) + } + } + }() + //*/ + + wg := &sync.WaitGroup{} + wg.Add(FANOUT) + for i := 0; i < FANOUT; i++ { + go func(i int) { + defer wg.Done() + //log.Println("go", i) + for i := 0; i < b.N; i++ { + c.Inc(1) + g.Update(int64(i)) + gf.Update(float64(i)) + h.Update(int64(i)) + m.Mark(1) + t.Update(1) + } + //log.Println("done", i) + }(i) + } + wg.Wait() + close(ch) + wgD.Wait() + wgR.Wait() + wgW.Wait() +} + +func Example() { + c := NewCounter() + Register("money", c) + c.Inc(17) + + // Threadsafe registration + t := GetOrRegisterTimer("db.get.latency", nil) + t.Time(func() {}) + t.Update(1) + + fmt.Println(c.Count()) + fmt.Println(t.Min()) + // Output: 17 + // 1 +} diff --git a/metrics/opentsdb.go b/metrics/opentsdb.go new file mode 100644 index 000000000..df7f152ed --- /dev/null +++ b/metrics/opentsdb.go @@ -0,0 +1,119 @@ +package metrics + +import ( + "bufio" + "fmt" + "log" + "net" + "os" + "strings" + "time" +) + +var shortHostName string = "" + +// OpenTSDBConfig provides a container with configuration parameters for +// the OpenTSDB exporter +type OpenTSDBConfig struct { + Addr *net.TCPAddr // Network address to connect to + Registry Registry // Registry to be exported + FlushInterval time.Duration // Flush interval + DurationUnit time.Duration // Time conversion unit for durations + Prefix string // Prefix to be prepended to metric names +} + +// OpenTSDB is a blocking exporter function which reports metrics in r +// to a TSDB server located at addr, flushing them every d duration +// and prepending metric names with prefix. +func OpenTSDB(r Registry, d time.Duration, prefix string, addr *net.TCPAddr) { + OpenTSDBWithConfig(OpenTSDBConfig{ + Addr: addr, + Registry: r, + FlushInterval: d, + DurationUnit: time.Nanosecond, + Prefix: prefix, + }) +} + +// OpenTSDBWithConfig is a blocking exporter function just like OpenTSDB, +// but it takes a OpenTSDBConfig instead. +func OpenTSDBWithConfig(c OpenTSDBConfig) { + for range time.Tick(c.FlushInterval) { + if err := openTSDB(&c); nil != err { + log.Println(err) + } + } +} + +func getShortHostname() string { + if shortHostName == "" { + host, _ := os.Hostname() + if index := strings.Index(host, "."); index > 0 { + shortHostName = host[:index] + } else { + shortHostName = host + } + } + return shortHostName +} + +func openTSDB(c *OpenTSDBConfig) error { + shortHostname := getShortHostname() + now := time.Now().Unix() + du := float64(c.DurationUnit) + conn, err := net.DialTCP("tcp", nil, c.Addr) + if nil != err { + return err + } + defer conn.Close() + w := bufio.NewWriter(conn) + c.Registry.Each(func(name string, i interface{}) { + switch metric := i.(type) { + case Counter: + fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, metric.Count(), shortHostname) + case Gauge: + fmt.Fprintf(w, "put %s.%s.value %d %d host=%s\n", c.Prefix, name, now, metric.Value(), shortHostname) + case GaugeFloat64: + fmt.Fprintf(w, "put %s.%s.value %d %f host=%s\n", c.Prefix, name, now, metric.Value(), shortHostname) + case Histogram: + h := metric.Snapshot() + ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, h.Count(), shortHostname) + fmt.Fprintf(w, "put %s.%s.min %d %d host=%s\n", c.Prefix, name, now, h.Min(), shortHostname) + fmt.Fprintf(w, "put %s.%s.max %d %d host=%s\n", c.Prefix, name, now, h.Max(), shortHostname) + fmt.Fprintf(w, "put %s.%s.mean %d %.2f host=%s\n", c.Prefix, name, now, h.Mean(), shortHostname) + fmt.Fprintf(w, "put %s.%s.std-dev %d %.2f host=%s\n", c.Prefix, name, now, h.StdDev(), shortHostname) + fmt.Fprintf(w, "put %s.%s.50-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[0], shortHostname) + fmt.Fprintf(w, "put %s.%s.75-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[1], shortHostname) + fmt.Fprintf(w, "put %s.%s.95-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[2], shortHostname) + fmt.Fprintf(w, "put %s.%s.99-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[3], shortHostname) + fmt.Fprintf(w, "put %s.%s.999-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[4], shortHostname) + case Meter: + m := metric.Snapshot() + fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, m.Count(), shortHostname) + fmt.Fprintf(w, "put %s.%s.one-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate1(), shortHostname) + fmt.Fprintf(w, "put %s.%s.five-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate5(), shortHostname) + fmt.Fprintf(w, "put %s.%s.fifteen-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate15(), shortHostname) + fmt.Fprintf(w, "put %s.%s.mean %d %.2f host=%s\n", c.Prefix, name, now, m.RateMean(), shortHostname) + case Timer: + t := metric.Snapshot() + ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, t.Count(), shortHostname) + fmt.Fprintf(w, "put %s.%s.min %d %d host=%s\n", c.Prefix, name, now, t.Min()/int64(du), shortHostname) + fmt.Fprintf(w, "put %s.%s.max %d %d host=%s\n", c.Prefix, name, now, t.Max()/int64(du), shortHostname) + fmt.Fprintf(w, "put %s.%s.mean %d %.2f host=%s\n", c.Prefix, name, now, t.Mean()/du, shortHostname) + fmt.Fprintf(w, "put %s.%s.std-dev %d %.2f host=%s\n", c.Prefix, name, now, t.StdDev()/du, shortHostname) + fmt.Fprintf(w, "put %s.%s.50-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[0]/du, shortHostname) + fmt.Fprintf(w, "put %s.%s.75-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[1]/du, shortHostname) + fmt.Fprintf(w, "put %s.%s.95-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[2]/du, shortHostname) + fmt.Fprintf(w, "put %s.%s.99-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[3]/du, shortHostname) + fmt.Fprintf(w, "put %s.%s.999-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[4]/du, shortHostname) + fmt.Fprintf(w, "put %s.%s.one-minute %d %.2f host=%s\n", c.Prefix, name, now, t.Rate1(), shortHostname) + fmt.Fprintf(w, "put %s.%s.five-minute %d %.2f host=%s\n", c.Prefix, name, now, t.Rate5(), shortHostname) + fmt.Fprintf(w, "put %s.%s.fifteen-minute %d %.2f host=%s\n", c.Prefix, name, now, t.Rate15(), shortHostname) + fmt.Fprintf(w, "put %s.%s.mean-rate %d %.2f host=%s\n", c.Prefix, name, now, t.RateMean(), shortHostname) + } + w.Flush() + }) + return nil +} diff --git a/metrics/opentsdb_test.go b/metrics/opentsdb_test.go new file mode 100644 index 000000000..c43728960 --- /dev/null +++ b/metrics/opentsdb_test.go @@ -0,0 +1,21 @@ +package metrics + +import ( + "net" + "time" +) + +func ExampleOpenTSDB() { + addr, _ := net.ResolveTCPAddr("net", ":2003") + go OpenTSDB(DefaultRegistry, 1*time.Second, "some.prefix", addr) +} + +func ExampleOpenTSDBWithConfig() { + addr, _ := net.ResolveTCPAddr("net", ":2003") + go OpenTSDBWithConfig(OpenTSDBConfig{ + Addr: addr, + Registry: DefaultRegistry, + FlushInterval: 1 * time.Second, + DurationUnit: time.Millisecond, + }) +} diff --git a/metrics/registry.go b/metrics/registry.go new file mode 100644 index 000000000..cc34c9dfd --- /dev/null +++ b/metrics/registry.go @@ -0,0 +1,354 @@ +package metrics + +import ( + "fmt" + "reflect" + "strings" + "sync" +) + +// DuplicateMetric is the error returned by Registry.Register when a metric +// already exists. If you mean to Register that metric you must first +// Unregister the existing metric. +type DuplicateMetric string + +func (err DuplicateMetric) Error() string { + return fmt.Sprintf("duplicate metric: %s", string(err)) +} + +// A Registry holds references to a set of metrics by name and can iterate +// over them, calling callback functions provided by the user. +// +// This is an interface so as to encourage other structs to implement +// the Registry API as appropriate. +type Registry interface { + + // Call the given function for each registered metric. + Each(func(string, interface{})) + + // Get the metric by the given name or nil if none is registered. + Get(string) interface{} + + // GetAll metrics in the Registry. + GetAll() map[string]map[string]interface{} + + // Gets an existing metric or registers the given one. + // The interface can be the metric to register if not found in registry, + // or a function returning the metric for lazy instantiation. + GetOrRegister(string, interface{}) interface{} + + // Register the given metric under the given name. + Register(string, interface{}) error + + // Run all registered healthchecks. + RunHealthchecks() + + // Unregister the metric with the given name. + Unregister(string) + + // Unregister all metrics. (Mostly for testing.) + UnregisterAll() +} + +// The standard implementation of a Registry is a mutex-protected map +// of names to metrics. +type StandardRegistry struct { + metrics map[string]interface{} + mutex sync.Mutex +} + +// Create a new registry. +func NewRegistry() Registry { + return &StandardRegistry{metrics: make(map[string]interface{})} +} + +// Call the given function for each registered metric. +func (r *StandardRegistry) Each(f func(string, interface{})) { + for name, i := range r.registered() { + f(name, i) + } +} + +// Get the metric by the given name or nil if none is registered. +func (r *StandardRegistry) Get(name string) interface{} { + r.mutex.Lock() + defer r.mutex.Unlock() + return r.metrics[name] +} + +// Gets an existing metric or creates and registers a new one. Threadsafe +// alternative to calling Get and Register on failure. +// The interface can be the metric to register if not found in registry, +// or a function returning the metric for lazy instantiation. +func (r *StandardRegistry) GetOrRegister(name string, i interface{}) interface{} { + r.mutex.Lock() + defer r.mutex.Unlock() + if metric, ok := r.metrics[name]; ok { + return metric + } + if v := reflect.ValueOf(i); v.Kind() == reflect.Func { + i = v.Call(nil)[0].Interface() + } + r.register(name, i) + return i +} + +// Register the given metric under the given name. Returns a DuplicateMetric +// if a metric by the given name is already registered. +func (r *StandardRegistry) Register(name string, i interface{}) error { + r.mutex.Lock() + defer r.mutex.Unlock() + return r.register(name, i) +} + +// Run all registered healthchecks. +func (r *StandardRegistry) RunHealthchecks() { + r.mutex.Lock() + defer r.mutex.Unlock() + for _, i := range r.metrics { + if h, ok := i.(Healthcheck); ok { + h.Check() + } + } +} + +// GetAll metrics in the Registry +func (r *StandardRegistry) GetAll() map[string]map[string]interface{} { + data := make(map[string]map[string]interface{}) + r.Each(func(name string, i interface{}) { + values := make(map[string]interface{}) + switch metric := i.(type) { + case Counter: + values["count"] = metric.Count() + case Gauge: + values["value"] = metric.Value() + case GaugeFloat64: + values["value"] = metric.Value() + case Healthcheck: + values["error"] = nil + metric.Check() + if err := metric.Error(); nil != err { + values["error"] = metric.Error().Error() + } + case Histogram: + h := metric.Snapshot() + ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + values["count"] = h.Count() + values["min"] = h.Min() + values["max"] = h.Max() + values["mean"] = h.Mean() + values["stddev"] = h.StdDev() + values["median"] = ps[0] + values["75%"] = ps[1] + values["95%"] = ps[2] + values["99%"] = ps[3] + values["99.9%"] = ps[4] + case Meter: + m := metric.Snapshot() + values["count"] = m.Count() + values["1m.rate"] = m.Rate1() + values["5m.rate"] = m.Rate5() + values["15m.rate"] = m.Rate15() + values["mean.rate"] = m.RateMean() + case Timer: + t := metric.Snapshot() + ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + values["count"] = t.Count() + values["min"] = t.Min() + values["max"] = t.Max() + values["mean"] = t.Mean() + values["stddev"] = t.StdDev() + values["median"] = ps[0] + values["75%"] = ps[1] + values["95%"] = ps[2] + values["99%"] = ps[3] + values["99.9%"] = ps[4] + values["1m.rate"] = t.Rate1() + values["5m.rate"] = t.Rate5() + values["15m.rate"] = t.Rate15() + values["mean.rate"] = t.RateMean() + } + data[name] = values + }) + return data +} + +// Unregister the metric with the given name. +func (r *StandardRegistry) Unregister(name string) { + r.mutex.Lock() + defer r.mutex.Unlock() + r.stop(name) + delete(r.metrics, name) +} + +// Unregister all metrics. (Mostly for testing.) +func (r *StandardRegistry) UnregisterAll() { + r.mutex.Lock() + defer r.mutex.Unlock() + for name := range r.metrics { + r.stop(name) + delete(r.metrics, name) + } +} + +func (r *StandardRegistry) register(name string, i interface{}) error { + if _, ok := r.metrics[name]; ok { + return DuplicateMetric(name) + } + switch i.(type) { + case Counter, Gauge, GaugeFloat64, Healthcheck, Histogram, Meter, Timer, ResettingTimer: + r.metrics[name] = i + } + return nil +} + +func (r *StandardRegistry) registered() map[string]interface{} { + r.mutex.Lock() + defer r.mutex.Unlock() + metrics := make(map[string]interface{}, len(r.metrics)) + for name, i := range r.metrics { + metrics[name] = i + } + return metrics +} + +func (r *StandardRegistry) stop(name string) { + if i, ok := r.metrics[name]; ok { + if s, ok := i.(Stoppable); ok { + s.Stop() + } + } +} + +// Stoppable defines the metrics which has to be stopped. +type Stoppable interface { + Stop() +} + +type PrefixedRegistry struct { + underlying Registry + prefix string +} + +func NewPrefixedRegistry(prefix string) Registry { + return &PrefixedRegistry{ + underlying: NewRegistry(), + prefix: prefix, + } +} + +func NewPrefixedChildRegistry(parent Registry, prefix string) Registry { + return &PrefixedRegistry{ + underlying: parent, + prefix: prefix, + } +} + +// Call the given function for each registered metric. +func (r *PrefixedRegistry) Each(fn func(string, interface{})) { + wrappedFn := func(prefix string) func(string, interface{}) { + return func(name string, iface interface{}) { + if strings.HasPrefix(name, prefix) { + fn(name, iface) + } else { + return + } + } + } + + baseRegistry, prefix := findPrefix(r, "") + baseRegistry.Each(wrappedFn(prefix)) +} + +func findPrefix(registry Registry, prefix string) (Registry, string) { + switch r := registry.(type) { + case *PrefixedRegistry: + return findPrefix(r.underlying, r.prefix+prefix) + case *StandardRegistry: + return r, prefix + } + return nil, "" +} + +// Get the metric by the given name or nil if none is registered. +func (r *PrefixedRegistry) Get(name string) interface{} { + realName := r.prefix + name + return r.underlying.Get(realName) +} + +// Gets an existing metric or registers the given one. +// The interface can be the metric to register if not found in registry, +// or a function returning the metric for lazy instantiation. +func (r *PrefixedRegistry) GetOrRegister(name string, metric interface{}) interface{} { + realName := r.prefix + name + return r.underlying.GetOrRegister(realName, metric) +} + +// Register the given metric under the given name. The name will be prefixed. +func (r *PrefixedRegistry) Register(name string, metric interface{}) error { + realName := r.prefix + name + return r.underlying.Register(realName, metric) +} + +// Run all registered healthchecks. +func (r *PrefixedRegistry) RunHealthchecks() { + r.underlying.RunHealthchecks() +} + +// GetAll metrics in the Registry +func (r *PrefixedRegistry) GetAll() map[string]map[string]interface{} { + return r.underlying.GetAll() +} + +// Unregister the metric with the given name. The name will be prefixed. +func (r *PrefixedRegistry) Unregister(name string) { + realName := r.prefix + name + r.underlying.Unregister(realName) +} + +// Unregister all metrics. (Mostly for testing.) +func (r *PrefixedRegistry) UnregisterAll() { + r.underlying.UnregisterAll() +} + +var DefaultRegistry Registry = NewRegistry() + +// Call the given function for each registered metric. +func Each(f func(string, interface{})) { + DefaultRegistry.Each(f) +} + +// Get the metric by the given name or nil if none is registered. +func Get(name string) interface{} { + return DefaultRegistry.Get(name) +} + +// Gets an existing metric or creates and registers a new one. Threadsafe +// alternative to calling Get and Register on failure. +func GetOrRegister(name string, i interface{}) interface{} { + return DefaultRegistry.GetOrRegister(name, i) +} + +// Register the given metric under the given name. Returns a DuplicateMetric +// if a metric by the given name is already registered. +func Register(name string, i interface{}) error { + return DefaultRegistry.Register(name, i) +} + +// Register the given metric under the given name. Panics if a metric by the +// given name is already registered. +func MustRegister(name string, i interface{}) { + if err := Register(name, i); err != nil { + panic(err) + } +} + +// Run all registered healthchecks. +func RunHealthchecks() { + DefaultRegistry.RunHealthchecks() +} + +// Unregister the metric with the given name. +func Unregister(name string) { + DefaultRegistry.Unregister(name) +} diff --git a/metrics/registry_test.go b/metrics/registry_test.go new file mode 100644 index 000000000..a63e485fe --- /dev/null +++ b/metrics/registry_test.go @@ -0,0 +1,305 @@ +package metrics + +import ( + "testing" +) + +func BenchmarkRegistry(b *testing.B) { + r := NewRegistry() + r.Register("foo", NewCounter()) + b.ResetTimer() + for i := 0; i < b.N; i++ { + r.Each(func(string, interface{}) {}) + } +} + +func TestRegistry(t *testing.T) { + r := NewRegistry() + r.Register("foo", NewCounter()) + i := 0 + r.Each(func(name string, iface interface{}) { + i++ + if "foo" != name { + t.Fatal(name) + } + if _, ok := iface.(Counter); !ok { + t.Fatal(iface) + } + }) + if 1 != i { + t.Fatal(i) + } + r.Unregister("foo") + i = 0 + r.Each(func(string, interface{}) { i++ }) + if 0 != i { + t.Fatal(i) + } +} + +func TestRegistryDuplicate(t *testing.T) { + r := NewRegistry() + if err := r.Register("foo", NewCounter()); nil != err { + t.Fatal(err) + } + if err := r.Register("foo", NewGauge()); nil == err { + t.Fatal(err) + } + i := 0 + r.Each(func(name string, iface interface{}) { + i++ + if _, ok := iface.(Counter); !ok { + t.Fatal(iface) + } + }) + if 1 != i { + t.Fatal(i) + } +} + +func TestRegistryGet(t *testing.T) { + r := NewRegistry() + r.Register("foo", NewCounter()) + if count := r.Get("foo").(Counter).Count(); 0 != count { + t.Fatal(count) + } + r.Get("foo").(Counter).Inc(1) + if count := r.Get("foo").(Counter).Count(); 1 != count { + t.Fatal(count) + } +} + +func TestRegistryGetOrRegister(t *testing.T) { + r := NewRegistry() + + // First metric wins with GetOrRegister + _ = r.GetOrRegister("foo", NewCounter()) + m := r.GetOrRegister("foo", NewGauge()) + if _, ok := m.(Counter); !ok { + t.Fatal(m) + } + + i := 0 + r.Each(func(name string, iface interface{}) { + i++ + if name != "foo" { + t.Fatal(name) + } + if _, ok := iface.(Counter); !ok { + t.Fatal(iface) + } + }) + if i != 1 { + t.Fatal(i) + } +} + +func TestRegistryGetOrRegisterWithLazyInstantiation(t *testing.T) { + r := NewRegistry() + + // First metric wins with GetOrRegister + _ = r.GetOrRegister("foo", NewCounter) + m := r.GetOrRegister("foo", NewGauge) + if _, ok := m.(Counter); !ok { + t.Fatal(m) + } + + i := 0 + r.Each(func(name string, iface interface{}) { + i++ + if name != "foo" { + t.Fatal(name) + } + if _, ok := iface.(Counter); !ok { + t.Fatal(iface) + } + }) + if i != 1 { + t.Fatal(i) + } +} + +func TestRegistryUnregister(t *testing.T) { + l := len(arbiter.meters) + r := NewRegistry() + r.Register("foo", NewCounter()) + r.Register("bar", NewMeter()) + r.Register("baz", NewTimer()) + if len(arbiter.meters) != l+2 { + t.Errorf("arbiter.meters: %d != %d\n", l+2, len(arbiter.meters)) + } + r.Unregister("foo") + r.Unregister("bar") + r.Unregister("baz") + if len(arbiter.meters) != l { + t.Errorf("arbiter.meters: %d != %d\n", l+2, len(arbiter.meters)) + } +} + +func TestPrefixedChildRegistryGetOrRegister(t *testing.T) { + r := NewRegistry() + pr := NewPrefixedChildRegistry(r, "prefix.") + + _ = pr.GetOrRegister("foo", NewCounter()) + + i := 0 + r.Each(func(name string, m interface{}) { + i++ + if name != "prefix.foo" { + t.Fatal(name) + } + }) + if i != 1 { + t.Fatal(i) + } +} + +func TestPrefixedRegistryGetOrRegister(t *testing.T) { + r := NewPrefixedRegistry("prefix.") + + _ = r.GetOrRegister("foo", NewCounter()) + + i := 0 + r.Each(func(name string, m interface{}) { + i++ + if name != "prefix.foo" { + t.Fatal(name) + } + }) + if i != 1 { + t.Fatal(i) + } +} + +func TestPrefixedRegistryRegister(t *testing.T) { + r := NewPrefixedRegistry("prefix.") + err := r.Register("foo", NewCounter()) + c := NewCounter() + Register("bar", c) + if err != nil { + t.Fatal(err.Error()) + } + + i := 0 + r.Each(func(name string, m interface{}) { + i++ + if name != "prefix.foo" { + t.Fatal(name) + } + }) + if i != 1 { + t.Fatal(i) + } +} + +func TestPrefixedRegistryUnregister(t *testing.T) { + r := NewPrefixedRegistry("prefix.") + + _ = r.Register("foo", NewCounter()) + + i := 0 + r.Each(func(name string, m interface{}) { + i++ + if name != "prefix.foo" { + t.Fatal(name) + } + }) + if i != 1 { + t.Fatal(i) + } + + r.Unregister("foo") + + i = 0 + r.Each(func(name string, m interface{}) { + i++ + }) + + if i != 0 { + t.Fatal(i) + } +} + +func TestPrefixedRegistryGet(t *testing.T) { + pr := NewPrefixedRegistry("prefix.") + name := "foo" + pr.Register(name, NewCounter()) + + fooCounter := pr.Get(name) + if fooCounter == nil { + t.Fatal(name) + } +} + +func TestPrefixedChildRegistryGet(t *testing.T) { + r := NewRegistry() + pr := NewPrefixedChildRegistry(r, "prefix.") + name := "foo" + pr.Register(name, NewCounter()) + fooCounter := pr.Get(name) + if fooCounter == nil { + t.Fatal(name) + } +} + +func TestChildPrefixedRegistryRegister(t *testing.T) { + r := NewPrefixedChildRegistry(DefaultRegistry, "prefix.") + err := r.Register("foo", NewCounter()) + c := NewCounter() + Register("bar", c) + if err != nil { + t.Fatal(err.Error()) + } + + i := 0 + r.Each(func(name string, m interface{}) { + i++ + if name != "prefix.foo" { + t.Fatal(name) + } + }) + if i != 1 { + t.Fatal(i) + } +} + +func TestChildPrefixedRegistryOfChildRegister(t *testing.T) { + r := NewPrefixedChildRegistry(NewRegistry(), "prefix.") + r2 := NewPrefixedChildRegistry(r, "prefix2.") + err := r.Register("foo2", NewCounter()) + if err != nil { + t.Fatal(err.Error()) + } + err = r2.Register("baz", NewCounter()) + c := NewCounter() + Register("bars", c) + + i := 0 + r2.Each(func(name string, m interface{}) { + i++ + if name != "prefix.prefix2.baz" { + //t.Fatal(name) + } + }) + if i != 1 { + t.Fatal(i) + } +} + +func TestWalkRegistries(t *testing.T) { + r := NewPrefixedChildRegistry(NewRegistry(), "prefix.") + r2 := NewPrefixedChildRegistry(r, "prefix2.") + err := r.Register("foo2", NewCounter()) + if err != nil { + t.Fatal(err.Error()) + } + err = r2.Register("baz", NewCounter()) + c := NewCounter() + Register("bars", c) + + _, prefix := findPrefix(r2, "") + if "prefix.prefix2." != prefix { + t.Fatal(prefix) + } + +} diff --git a/metrics/resetting_timer.go b/metrics/resetting_timer.go new file mode 100644 index 000000000..57bcb3134 --- /dev/null +++ b/metrics/resetting_timer.go @@ -0,0 +1,237 @@ +package metrics + +import ( + "math" + "sort" + "sync" + "time" +) + +// Initial slice capacity for the values stored in a ResettingTimer +const InitialResettingTimerSliceCap = 10 + +// ResettingTimer is used for storing aggregated values for timers, which are reset on every flush interval. +type ResettingTimer interface { + Values() []int64 + Snapshot() ResettingTimer + Percentiles([]float64) []int64 + Mean() float64 + Time(func()) + Update(time.Duration) + UpdateSince(time.Time) +} + +// GetOrRegisterResettingTimer returns an existing ResettingTimer or constructs and registers a +// new StandardResettingTimer. +func GetOrRegisterResettingTimer(name string, r Registry) ResettingTimer { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewResettingTimer).(ResettingTimer) +} + +// NewRegisteredResettingTimer constructs and registers a new StandardResettingTimer. +func NewRegisteredResettingTimer(name string, r Registry) ResettingTimer { + c := NewResettingTimer() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// NewResettingTimer constructs a new StandardResettingTimer +func NewResettingTimer() ResettingTimer { + if !Enabled { + return NilResettingTimer{} + } + return &StandardResettingTimer{ + values: make([]int64, 0, InitialResettingTimerSliceCap), + } +} + +// NilResettingTimer is a no-op ResettingTimer. +type NilResettingTimer struct { +} + +// Values is a no-op. +func (NilResettingTimer) Values() []int64 { return nil } + +// Snapshot is a no-op. +func (NilResettingTimer) Snapshot() ResettingTimer { return NilResettingTimer{} } + +// Time is a no-op. +func (NilResettingTimer) Time(func()) {} + +// Update is a no-op. +func (NilResettingTimer) Update(time.Duration) {} + +// Percentiles panics. +func (NilResettingTimer) Percentiles([]float64) []int64 { + panic("Percentiles called on a NilResettingTimer") +} + +// Mean panics. +func (NilResettingTimer) Mean() float64 { + panic("Mean called on a NilResettingTimer") +} + +// UpdateSince is a no-op. +func (NilResettingTimer) UpdateSince(time.Time) {} + +// StandardResettingTimer is the standard implementation of a ResettingTimer. +// and Meter. +type StandardResettingTimer struct { + values []int64 + mutex sync.Mutex +} + +// Values returns a slice with all measurements. +func (t *StandardResettingTimer) Values() []int64 { + return t.values +} + +// Snapshot resets the timer and returns a read-only copy of its contents. +func (t *StandardResettingTimer) Snapshot() ResettingTimer { + t.mutex.Lock() + defer t.mutex.Unlock() + currentValues := t.values + t.values = make([]int64, 0, InitialResettingTimerSliceCap) + + return &ResettingTimerSnapshot{ + values: currentValues, + } +} + +// Percentiles panics. +func (t *StandardResettingTimer) Percentiles([]float64) []int64 { + panic("Percentiles called on a StandardResettingTimer") +} + +// Mean panics. +func (t *StandardResettingTimer) Mean() float64 { + panic("Mean called on a StandardResettingTimer") +} + +// Record the duration of the execution of the given function. +func (t *StandardResettingTimer) Time(f func()) { + ts := time.Now() + f() + t.Update(time.Since(ts)) +} + +// Record the duration of an event. +func (t *StandardResettingTimer) Update(d time.Duration) { + t.mutex.Lock() + defer t.mutex.Unlock() + t.values = append(t.values, int64(d)) +} + +// Record the duration of an event that started at a time and ends now. +func (t *StandardResettingTimer) UpdateSince(ts time.Time) { + t.mutex.Lock() + defer t.mutex.Unlock() + t.values = append(t.values, int64(time.Since(ts))) +} + +// ResettingTimerSnapshot is a point-in-time copy of another ResettingTimer. +type ResettingTimerSnapshot struct { + values []int64 + mean float64 + thresholdBoundaries []int64 + calculated bool +} + +// Snapshot returns the snapshot. +func (t *ResettingTimerSnapshot) Snapshot() ResettingTimer { return t } + +// Time panics. +func (*ResettingTimerSnapshot) Time(func()) { + panic("Time called on a ResettingTimerSnapshot") +} + +// Update panics. +func (*ResettingTimerSnapshot) Update(time.Duration) { + panic("Update called on a ResettingTimerSnapshot") +} + +// UpdateSince panics. +func (*ResettingTimerSnapshot) UpdateSince(time.Time) { + panic("UpdateSince called on a ResettingTimerSnapshot") +} + +// Values returns all values from snapshot. +func (t *ResettingTimerSnapshot) Values() []int64 { + return t.values +} + +// Percentiles returns the boundaries for the input percentiles. +func (t *ResettingTimerSnapshot) Percentiles(percentiles []float64) []int64 { + t.calc(percentiles) + + return t.thresholdBoundaries +} + +// Mean returns the mean of the snapshotted values +func (t *ResettingTimerSnapshot) Mean() float64 { + if !t.calculated { + t.calc([]float64{}) + } + + return t.mean +} + +func (t *ResettingTimerSnapshot) calc(percentiles []float64) { + sort.Sort(Int64Slice(t.values)) + + count := len(t.values) + if count > 0 { + min := t.values[0] + max := t.values[count-1] + + cumulativeValues := make([]int64, count) + cumulativeValues[0] = min + for i := 1; i < count; i++ { + cumulativeValues[i] = t.values[i] + cumulativeValues[i-1] + } + + t.thresholdBoundaries = make([]int64, len(percentiles)) + + thresholdBoundary := max + + for i, pct := range percentiles { + if count > 1 { + var abs float64 + if pct >= 0 { + abs = pct + } else { + abs = 100 + pct + } + // poor man's math.Round(x): + // math.Floor(x + 0.5) + indexOfPerc := int(math.Floor(((abs / 100.0) * float64(count)) + 0.5)) + if pct >= 0 { + indexOfPerc -= 1 // index offset=0 + } + thresholdBoundary = t.values[indexOfPerc] + } + + t.thresholdBoundaries[i] = thresholdBoundary + } + + sum := cumulativeValues[count-1] + t.mean = float64(sum) / float64(count) + } else { + t.thresholdBoundaries = make([]int64, len(percentiles)) + t.mean = 0 + } + + t.calculated = true +} + +// Int64Slice attaches the methods of sort.Interface to []int64, sorting in increasing order. +type Int64Slice []int64 + +func (s Int64Slice) Len() int { return len(s) } +func (s Int64Slice) Less(i, j int) bool { return s[i] < s[j] } +func (s Int64Slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } diff --git a/metrics/resetting_timer_test.go b/metrics/resetting_timer_test.go new file mode 100644 index 000000000..58fd47f35 --- /dev/null +++ b/metrics/resetting_timer_test.go @@ -0,0 +1,106 @@ +package metrics + +import ( + "testing" + "time" +) + +func TestResettingTimer(t *testing.T) { + tests := []struct { + values []int64 + start int + end int + wantP50 int64 + wantP95 int64 + wantP99 int64 + wantMean float64 + wantMin int64 + wantMax int64 + }{ + { + values: []int64{}, + start: 1, + end: 11, + wantP50: 5, wantP95: 10, wantP99: 10, + wantMin: 1, wantMax: 10, wantMean: 5.5, + }, + { + values: []int64{}, + start: 1, + end: 101, + wantP50: 50, wantP95: 95, wantP99: 99, + wantMin: 1, wantMax: 100, wantMean: 50.5, + }, + { + values: []int64{1}, + start: 0, + end: 0, + wantP50: 1, wantP95: 1, wantP99: 1, + wantMin: 1, wantMax: 1, wantMean: 1, + }, + { + values: []int64{0}, + start: 0, + end: 0, + wantP50: 0, wantP95: 0, wantP99: 0, + wantMin: 0, wantMax: 0, wantMean: 0, + }, + { + values: []int64{}, + start: 0, + end: 0, + wantP50: 0, wantP95: 0, wantP99: 0, + wantMin: 0, wantMax: 0, wantMean: 0, + }, + { + values: []int64{1, 10}, + start: 0, + end: 0, + wantP50: 1, wantP95: 10, wantP99: 10, + wantMin: 1, wantMax: 10, wantMean: 5.5, + }, + } + for ind, tt := range tests { + timer := NewResettingTimer() + + for i := tt.start; i < tt.end; i++ { + tt.values = append(tt.values, int64(i)) + } + + for _, v := range tt.values { + timer.Update(time.Duration(v)) + } + + snap := timer.Snapshot() + + ps := snap.Percentiles([]float64{50, 95, 99}) + + val := snap.Values() + + if len(val) > 0 { + if tt.wantMin != val[0] { + t.Fatalf("%d: min: got %d, want %d", ind, val[0], tt.wantMin) + } + + if tt.wantMax != val[len(val)-1] { + t.Fatalf("%d: max: got %d, want %d", ind, val[len(val)-1], tt.wantMax) + } + } + + if tt.wantMean != snap.Mean() { + t.Fatalf("%d: mean: got %.2f, want %.2f", ind, snap.Mean(), tt.wantMean) + } + + if tt.wantP50 != ps[0] { + t.Fatalf("%d: p50: got %d, want %d", ind, ps[0], tt.wantP50) + } + + if tt.wantP95 != ps[1] { + t.Fatalf("%d: p95: got %d, want %d", ind, ps[1], tt.wantP95) + } + + if tt.wantP99 != ps[2] { + t.Fatalf("%d: p99: got %d, want %d", ind, ps[2], tt.wantP99) + } + } +} diff --git a/metrics/runtime.go b/metrics/runtime.go new file mode 100644 index 000000000..9450c479b --- /dev/null +++ b/metrics/runtime.go @@ -0,0 +1,212 @@ +package metrics + +import ( + "runtime" + "runtime/pprof" + "time" +) + +var ( + memStats runtime.MemStats + runtimeMetrics struct { + MemStats struct { + Alloc Gauge + BuckHashSys Gauge + DebugGC Gauge + EnableGC Gauge + Frees Gauge + HeapAlloc Gauge + HeapIdle Gauge + HeapInuse Gauge + HeapObjects Gauge + HeapReleased Gauge + HeapSys Gauge + LastGC Gauge + Lookups Gauge + Mallocs Gauge + MCacheInuse Gauge + MCacheSys Gauge + MSpanInuse Gauge + MSpanSys Gauge + NextGC Gauge + NumGC Gauge + GCCPUFraction GaugeFloat64 + PauseNs Histogram + PauseTotalNs Gauge + StackInuse Gauge + StackSys Gauge + Sys Gauge + TotalAlloc Gauge + } + NumCgoCall Gauge + NumGoroutine Gauge + NumThread Gauge + ReadMemStats Timer + } + frees uint64 + lookups uint64 + mallocs uint64 + numGC uint32 + numCgoCalls int64 + + threadCreateProfile = pprof.Lookup("threadcreate") +) + +// Capture new values for the Go runtime statistics exported in +// runtime.MemStats. This is designed to be called as a goroutine. +func CaptureRuntimeMemStats(r Registry, d time.Duration) { + for range time.Tick(d) { + CaptureRuntimeMemStatsOnce(r) + } +} + +// Capture new values for the Go runtime statistics exported in +// runtime.MemStats. This is designed to be called in a background +// goroutine. Giving a registry which has not been given to +// RegisterRuntimeMemStats will panic. +// +// Be very careful with this because runtime.ReadMemStats calls the C +// functions runtime·semacquire(&runtime·worldsema) and runtime·stoptheworld() +// and that last one does what it says on the tin. +func CaptureRuntimeMemStatsOnce(r Registry) { + t := time.Now() + runtime.ReadMemStats(&memStats) // This takes 50-200us. + runtimeMetrics.ReadMemStats.UpdateSince(t) + + runtimeMetrics.MemStats.Alloc.Update(int64(memStats.Alloc)) + runtimeMetrics.MemStats.BuckHashSys.Update(int64(memStats.BuckHashSys)) + if memStats.DebugGC { + runtimeMetrics.MemStats.DebugGC.Update(1) + } else { + runtimeMetrics.MemStats.DebugGC.Update(0) + } + if memStats.EnableGC { + runtimeMetrics.MemStats.EnableGC.Update(1) + } else { + runtimeMetrics.MemStats.EnableGC.Update(0) + } + + runtimeMetrics.MemStats.Frees.Update(int64(memStats.Frees - frees)) + runtimeMetrics.MemStats.HeapAlloc.Update(int64(memStats.HeapAlloc)) + runtimeMetrics.MemStats.HeapIdle.Update(int64(memStats.HeapIdle)) + runtimeMetrics.MemStats.HeapInuse.Update(int64(memStats.HeapInuse)) + runtimeMetrics.MemStats.HeapObjects.Update(int64(memStats.HeapObjects)) + runtimeMetrics.MemStats.HeapReleased.Update(int64(memStats.HeapReleased)) + runtimeMetrics.MemStats.HeapSys.Update(int64(memStats.HeapSys)) + runtimeMetrics.MemStats.LastGC.Update(int64(memStats.LastGC)) + runtimeMetrics.MemStats.Lookups.Update(int64(memStats.Lookups - lookups)) + runtimeMetrics.MemStats.Mallocs.Update(int64(memStats.Mallocs - mallocs)) + runtimeMetrics.MemStats.MCacheInuse.Update(int64(memStats.MCacheInuse)) + runtimeMetrics.MemStats.MCacheSys.Update(int64(memStats.MCacheSys)) + runtimeMetrics.MemStats.MSpanInuse.Update(int64(memStats.MSpanInuse)) + runtimeMetrics.MemStats.MSpanSys.Update(int64(memStats.MSpanSys)) + runtimeMetrics.MemStats.NextGC.Update(int64(memStats.NextGC)) + runtimeMetrics.MemStats.NumGC.Update(int64(memStats.NumGC - numGC)) + runtimeMetrics.MemStats.GCCPUFraction.Update(gcCPUFraction(&memStats)) + + // <https://code.google.com/p/go/source/browse/src/pkg/runtime/mgc0.c> + i := numGC % uint32(len(memStats.PauseNs)) + ii := memStats.NumGC % uint32(len(memStats.PauseNs)) + if memStats.NumGC-numGC >= uint32(len(memStats.PauseNs)) { + for i = 0; i < uint32(len(memStats.PauseNs)); i++ { + runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) + } + } else { + if i > ii { + for ; i < uint32(len(memStats.PauseNs)); i++ { + runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) + } + i = 0 + } + for ; i < ii; i++ { + runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) + } + } + frees = memStats.Frees + lookups = memStats.Lookups + mallocs = memStats.Mallocs + numGC = memStats.NumGC + + runtimeMetrics.MemStats.PauseTotalNs.Update(int64(memStats.PauseTotalNs)) + runtimeMetrics.MemStats.StackInuse.Update(int64(memStats.StackInuse)) + runtimeMetrics.MemStats.StackSys.Update(int64(memStats.StackSys)) + runtimeMetrics.MemStats.Sys.Update(int64(memStats.Sys)) + runtimeMetrics.MemStats.TotalAlloc.Update(int64(memStats.TotalAlloc)) + + currentNumCgoCalls := numCgoCall() + runtimeMetrics.NumCgoCall.Update(currentNumCgoCalls - numCgoCalls) + numCgoCalls = currentNumCgoCalls + + runtimeMetrics.NumGoroutine.Update(int64(runtime.NumGoroutine())) + + runtimeMetrics.NumThread.Update(int64(threadCreateProfile.Count())) +} + +// Register runtimeMetrics for the Go runtime statistics exported in runtime and +// specifically runtime.MemStats. The runtimeMetrics are named by their +// fully-qualified Go symbols, i.e. runtime.MemStats.Alloc. +func RegisterRuntimeMemStats(r Registry) { + runtimeMetrics.MemStats.Alloc = NewGauge() + runtimeMetrics.MemStats.BuckHashSys = NewGauge() + runtimeMetrics.MemStats.DebugGC = NewGauge() + runtimeMetrics.MemStats.EnableGC = NewGauge() + runtimeMetrics.MemStats.Frees = NewGauge() + runtimeMetrics.MemStats.HeapAlloc = NewGauge() + runtimeMetrics.MemStats.HeapIdle = NewGauge() + runtimeMetrics.MemStats.HeapInuse = NewGauge() + runtimeMetrics.MemStats.HeapObjects = NewGauge() + runtimeMetrics.MemStats.HeapReleased = NewGauge() + runtimeMetrics.MemStats.HeapSys = NewGauge() + runtimeMetrics.MemStats.LastGC = NewGauge() + runtimeMetrics.MemStats.Lookups = NewGauge() + runtimeMetrics.MemStats.Mallocs = NewGauge() + runtimeMetrics.MemStats.MCacheInuse = NewGauge() + runtimeMetrics.MemStats.MCacheSys = NewGauge() + runtimeMetrics.MemStats.MSpanInuse = NewGauge() + runtimeMetrics.MemStats.MSpanSys = NewGauge() + runtimeMetrics.MemStats.NextGC = NewGauge() + runtimeMetrics.MemStats.NumGC = NewGauge() + runtimeMetrics.MemStats.GCCPUFraction = NewGaugeFloat64() + runtimeMetrics.MemStats.PauseNs = NewHistogram(NewExpDecaySample(1028, 0.015)) + runtimeMetrics.MemStats.PauseTotalNs = NewGauge() + runtimeMetrics.MemStats.StackInuse = NewGauge() + runtimeMetrics.MemStats.StackSys = NewGauge() + runtimeMetrics.MemStats.Sys = NewGauge() + runtimeMetrics.MemStats.TotalAlloc = NewGauge() + runtimeMetrics.NumCgoCall = NewGauge() + runtimeMetrics.NumGoroutine = NewGauge() + runtimeMetrics.NumThread = NewGauge() + runtimeMetrics.ReadMemStats = NewTimer() + + r.Register("runtime.MemStats.Alloc", runtimeMetrics.MemStats.Alloc) + r.Register("runtime.MemStats.BuckHashSys", runtimeMetrics.MemStats.BuckHashSys) + r.Register("runtime.MemStats.DebugGC", runtimeMetrics.MemStats.DebugGC) + r.Register("runtime.MemStats.EnableGC", runtimeMetrics.MemStats.EnableGC) + r.Register("runtime.MemStats.Frees", runtimeMetrics.MemStats.Frees) + r.Register("runtime.MemStats.HeapAlloc", runtimeMetrics.MemStats.HeapAlloc) + r.Register("runtime.MemStats.HeapIdle", runtimeMetrics.MemStats.HeapIdle) + r.Register("runtime.MemStats.HeapInuse", runtimeMetrics.MemStats.HeapInuse) + r.Register("runtime.MemStats.HeapObjects", runtimeMetrics.MemStats.HeapObjects) + r.Register("runtime.MemStats.HeapReleased", runtimeMetrics.MemStats.HeapReleased) + r.Register("runtime.MemStats.HeapSys", runtimeMetrics.MemStats.HeapSys) + r.Register("runtime.MemStats.LastGC", runtimeMetrics.MemStats.LastGC) + r.Register("runtime.MemStats.Lookups", runtimeMetrics.MemStats.Lookups) + r.Register("runtime.MemStats.Mallocs", runtimeMetrics.MemStats.Mallocs) + r.Register("runtime.MemStats.MCacheInuse", runtimeMetrics.MemStats.MCacheInuse) + r.Register("runtime.MemStats.MCacheSys", runtimeMetrics.MemStats.MCacheSys) + r.Register("runtime.MemStats.MSpanInuse", runtimeMetrics.MemStats.MSpanInuse) + r.Register("runtime.MemStats.MSpanSys", runtimeMetrics.MemStats.MSpanSys) + r.Register("runtime.MemStats.NextGC", runtimeMetrics.MemStats.NextGC) + r.Register("runtime.MemStats.NumGC", runtimeMetrics.MemStats.NumGC) + r.Register("runtime.MemStats.GCCPUFraction", runtimeMetrics.MemStats.GCCPUFraction) + r.Register("runtime.MemStats.PauseNs", runtimeMetrics.MemStats.PauseNs) + r.Register("runtime.MemStats.PauseTotalNs", runtimeMetrics.MemStats.PauseTotalNs) + r.Register("runtime.MemStats.StackInuse", runtimeMetrics.MemStats.StackInuse) + r.Register("runtime.MemStats.StackSys", runtimeMetrics.MemStats.StackSys) + r.Register("runtime.MemStats.Sys", runtimeMetrics.MemStats.Sys) + r.Register("runtime.MemStats.TotalAlloc", runtimeMetrics.MemStats.TotalAlloc) + r.Register("runtime.NumCgoCall", runtimeMetrics.NumCgoCall) + r.Register("runtime.NumGoroutine", runtimeMetrics.NumGoroutine) + r.Register("runtime.NumThread", runtimeMetrics.NumThread) + r.Register("runtime.ReadMemStats", runtimeMetrics.ReadMemStats) +} diff --git a/metrics/runtime_cgo.go b/metrics/runtime_cgo.go new file mode 100644 index 000000000..e3391f4e8 --- /dev/null +++ b/metrics/runtime_cgo.go @@ -0,0 +1,10 @@ +// +build cgo +// +build !appengine + +package metrics + +import "runtime" + +func numCgoCall() int64 { + return runtime.NumCgoCall() +} diff --git a/metrics/runtime_gccpufraction.go b/metrics/runtime_gccpufraction.go new file mode 100644 index 000000000..ca12c05ba --- /dev/null +++ b/metrics/runtime_gccpufraction.go @@ -0,0 +1,9 @@ +// +build go1.5 + +package metrics + +import "runtime" + +func gcCPUFraction(memStats *runtime.MemStats) float64 { + return memStats.GCCPUFraction +} diff --git a/metrics/runtime_no_cgo.go b/metrics/runtime_no_cgo.go new file mode 100644 index 000000000..616a3b475 --- /dev/null +++ b/metrics/runtime_no_cgo.go @@ -0,0 +1,7 @@ +// +build !cgo appengine + +package metrics + +func numCgoCall() int64 { + return 0 +} diff --git a/metrics/runtime_no_gccpufraction.go b/metrics/runtime_no_gccpufraction.go new file mode 100644 index 000000000..be96aa6f1 --- /dev/null +++ b/metrics/runtime_no_gccpufraction.go @@ -0,0 +1,9 @@ +// +build !go1.5 + +package metrics + +import "runtime" + +func gcCPUFraction(memStats *runtime.MemStats) float64 { + return 0 +} diff --git a/metrics/runtime_test.go b/metrics/runtime_test.go new file mode 100644 index 000000000..ebbfd501a --- /dev/null +++ b/metrics/runtime_test.go @@ -0,0 +1,88 @@ +package metrics + +import ( + "runtime" + "testing" + "time" +) + +func BenchmarkRuntimeMemStats(b *testing.B) { + r := NewRegistry() + RegisterRuntimeMemStats(r) + b.ResetTimer() + for i := 0; i < b.N; i++ { + CaptureRuntimeMemStatsOnce(r) + } +} + +func TestRuntimeMemStats(t *testing.T) { + r := NewRegistry() + RegisterRuntimeMemStats(r) + CaptureRuntimeMemStatsOnce(r) + zero := runtimeMetrics.MemStats.PauseNs.Count() // Get a "zero" since GC may have run before these tests. + runtime.GC() + CaptureRuntimeMemStatsOnce(r) + if count := runtimeMetrics.MemStats.PauseNs.Count(); 1 != count-zero { + t.Fatal(count - zero) + } + runtime.GC() + runtime.GC() + CaptureRuntimeMemStatsOnce(r) + if count := runtimeMetrics.MemStats.PauseNs.Count(); 3 != count-zero { + t.Fatal(count - zero) + } + for i := 0; i < 256; i++ { + runtime.GC() + } + CaptureRuntimeMemStatsOnce(r) + if count := runtimeMetrics.MemStats.PauseNs.Count(); 259 != count-zero { + t.Fatal(count - zero) + } + for i := 0; i < 257; i++ { + runtime.GC() + } + CaptureRuntimeMemStatsOnce(r) + if count := runtimeMetrics.MemStats.PauseNs.Count(); 515 != count-zero { // We lost one because there were too many GCs between captures. + t.Fatal(count - zero) + } +} + +func TestRuntimeMemStatsNumThread(t *testing.T) { + r := NewRegistry() + RegisterRuntimeMemStats(r) + CaptureRuntimeMemStatsOnce(r) + + if value := runtimeMetrics.NumThread.Value(); value < 1 { + t.Fatalf("got NumThread: %d, wanted at least 1", value) + } +} + +func TestRuntimeMemStatsBlocking(t *testing.T) { + if g := runtime.GOMAXPROCS(0); g < 2 { + t.Skipf("skipping TestRuntimeMemStatsBlocking with GOMAXPROCS=%d\n", g) + } + ch := make(chan int) + go testRuntimeMemStatsBlocking(ch) + var memStats runtime.MemStats + t0 := time.Now() + runtime.ReadMemStats(&memStats) + t1 := time.Now() + t.Log("i++ during runtime.ReadMemStats:", <-ch) + go testRuntimeMemStatsBlocking(ch) + d := t1.Sub(t0) + t.Log(d) + time.Sleep(d) + t.Log("i++ during time.Sleep:", <-ch) +} + +func testRuntimeMemStatsBlocking(ch chan int) { + i := 0 + for { + select { + case ch <- i: + return + default: + i++ + } + } +} diff --git a/metrics/sample.go b/metrics/sample.go new file mode 100644 index 000000000..5c4845a4f --- /dev/null +++ b/metrics/sample.go @@ -0,0 +1,616 @@ +package metrics + +import ( + "math" + "math/rand" + "sort" + "sync" + "time" +) + +const rescaleThreshold = time.Hour + +// Samples maintain a statistically-significant selection of values from +// a stream. +type Sample interface { + Clear() + Count() int64 + Max() int64 + Mean() float64 + Min() int64 + Percentile(float64) float64 + Percentiles([]float64) []float64 + Size() int + Snapshot() Sample + StdDev() float64 + Sum() int64 + Update(int64) + Values() []int64 + Variance() float64 +} + +// ExpDecaySample is an exponentially-decaying sample using a forward-decaying +// priority reservoir. See Cormode et al's "Forward Decay: A Practical Time +// Decay Model for Streaming Systems". +// +// <http://dimacs.rutgers.edu/~graham/pubs/papers/fwddecay.pdf> +type ExpDecaySample struct { + alpha float64 + count int64 + mutex sync.Mutex + reservoirSize int + t0, t1 time.Time + values *expDecaySampleHeap +} + +// NewExpDecaySample constructs a new exponentially-decaying sample with the +// given reservoir size and alpha. +func NewExpDecaySample(reservoirSize int, alpha float64) Sample { + if !Enabled { + return NilSample{} + } + s := &ExpDecaySample{ + alpha: alpha, + reservoirSize: reservoirSize, + t0: time.Now(), + values: newExpDecaySampleHeap(reservoirSize), + } + s.t1 = s.t0.Add(rescaleThreshold) + return s +} + +// Clear clears all samples. +func (s *ExpDecaySample) Clear() { + s.mutex.Lock() + defer s.mutex.Unlock() + s.count = 0 + s.t0 = time.Now() + s.t1 = s.t0.Add(rescaleThreshold) + s.values.Clear() +} + +// Count returns the number of samples recorded, which may exceed the +// reservoir size. +func (s *ExpDecaySample) Count() int64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return s.count +} + +// Max returns the maximum value in the sample, which may not be the maximum +// value ever to be part of the sample. +func (s *ExpDecaySample) Max() int64 { + return SampleMax(s.Values()) +} + +// Mean returns the mean of the values in the sample. +func (s *ExpDecaySample) Mean() float64 { + return SampleMean(s.Values()) +} + +// Min returns the minimum value in the sample, which may not be the minimum +// value ever to be part of the sample. +func (s *ExpDecaySample) Min() int64 { + return SampleMin(s.Values()) +} + +// Percentile returns an arbitrary percentile of values in the sample. +func (s *ExpDecaySample) Percentile(p float64) float64 { + return SamplePercentile(s.Values(), p) +} + +// Percentiles returns a slice of arbitrary percentiles of values in the +// sample. +func (s *ExpDecaySample) Percentiles(ps []float64) []float64 { + return SamplePercentiles(s.Values(), ps) +} + +// Size returns the size of the sample, which is at most the reservoir size. +func (s *ExpDecaySample) Size() int { + s.mutex.Lock() + defer s.mutex.Unlock() + return s.values.Size() +} + +// Snapshot returns a read-only copy of the sample. +func (s *ExpDecaySample) Snapshot() Sample { + s.mutex.Lock() + defer s.mutex.Unlock() + vals := s.values.Values() + values := make([]int64, len(vals)) + for i, v := range vals { + values[i] = v.v + } + return &SampleSnapshot{ + count: s.count, + values: values, + } +} + +// StdDev returns the standard deviation of the values in the sample. +func (s *ExpDecaySample) StdDev() float64 { + return SampleStdDev(s.Values()) +} + +// Sum returns the sum of the values in the sample. +func (s *ExpDecaySample) Sum() int64 { + return SampleSum(s.Values()) +} + +// Update samples a new value. +func (s *ExpDecaySample) Update(v int64) { + s.update(time.Now(), v) +} + +// Values returns a copy of the values in the sample. +func (s *ExpDecaySample) Values() []int64 { + s.mutex.Lock() + defer s.mutex.Unlock() + vals := s.values.Values() + values := make([]int64, len(vals)) + for i, v := range vals { + values[i] = v.v + } + return values +} + +// Variance returns the variance of the values in the sample. +func (s *ExpDecaySample) Variance() float64 { + return SampleVariance(s.Values()) +} + +// update samples a new value at a particular timestamp. This is a method all +// its own to facilitate testing. +func (s *ExpDecaySample) update(t time.Time, v int64) { + s.mutex.Lock() + defer s.mutex.Unlock() + s.count++ + if s.values.Size() == s.reservoirSize { + s.values.Pop() + } + s.values.Push(expDecaySample{ + k: math.Exp(t.Sub(s.t0).Seconds()*s.alpha) / rand.Float64(), + v: v, + }) + if t.After(s.t1) { + values := s.values.Values() + t0 := s.t0 + s.values.Clear() + s.t0 = t + s.t1 = s.t0.Add(rescaleThreshold) + for _, v := range values { + v.k = v.k * math.Exp(-s.alpha*s.t0.Sub(t0).Seconds()) + s.values.Push(v) + } + } +} + +// NilSample is a no-op Sample. +type NilSample struct{} + +// Clear is a no-op. +func (NilSample) Clear() {} + +// Count is a no-op. +func (NilSample) Count() int64 { return 0 } + +// Max is a no-op. +func (NilSample) Max() int64 { return 0 } + +// Mean is a no-op. +func (NilSample) Mean() float64 { return 0.0 } + +// Min is a no-op. +func (NilSample) Min() int64 { return 0 } + +// Percentile is a no-op. +func (NilSample) Percentile(p float64) float64 { return 0.0 } + +// Percentiles is a no-op. +func (NilSample) Percentiles(ps []float64) []float64 { + return make([]float64, len(ps)) +} + +// Size is a no-op. +func (NilSample) Size() int { return 0 } + +// Sample is a no-op. +func (NilSample) Snapshot() Sample { return NilSample{} } + +// StdDev is a no-op. +func (NilSample) StdDev() float64 { return 0.0 } + +// Sum is a no-op. +func (NilSample) Sum() int64 { return 0 } + +// Update is a no-op. +func (NilSample) Update(v int64) {} + +// Values is a no-op. +func (NilSample) Values() []int64 { return []int64{} } + +// Variance is a no-op. +func (NilSample) Variance() float64 { return 0.0 } + +// SampleMax returns the maximum value of the slice of int64. +func SampleMax(values []int64) int64 { + if 0 == len(values) { + return 0 + } + var max int64 = math.MinInt64 + for _, v := range values { + if max < v { + max = v + } + } + return max +} + +// SampleMean returns the mean value of the slice of int64. +func SampleMean(values []int64) float64 { + if 0 == len(values) { + return 0.0 + } + return float64(SampleSum(values)) / float64(len(values)) +} + +// SampleMin returns the minimum value of the slice of int64. +func SampleMin(values []int64) int64 { + if 0 == len(values) { + return 0 + } + var min int64 = math.MaxInt64 + for _, v := range values { + if min > v { + min = v + } + } + return min +} + +// SamplePercentiles returns an arbitrary percentile of the slice of int64. +func SamplePercentile(values int64Slice, p float64) float64 { + return SamplePercentiles(values, []float64{p})[0] +} + +// SamplePercentiles returns a slice of arbitrary percentiles of the slice of +// int64. +func SamplePercentiles(values int64Slice, ps []float64) []float64 { + scores := make([]float64, len(ps)) + size := len(values) + if size > 0 { + sort.Sort(values) + for i, p := range ps { + pos := p * float64(size+1) + if pos < 1.0 { + scores[i] = float64(values[0]) + } else if pos >= float64(size) { + scores[i] = float64(values[size-1]) + } else { + lower := float64(values[int(pos)-1]) + upper := float64(values[int(pos)]) + scores[i] = lower + (pos-math.Floor(pos))*(upper-lower) + } + } + } + return scores +} + +// SampleSnapshot is a read-only copy of another Sample. +type SampleSnapshot struct { + count int64 + values []int64 +} + +func NewSampleSnapshot(count int64, values []int64) *SampleSnapshot { + return &SampleSnapshot{ + count: count, + values: values, + } +} + +// Clear panics. +func (*SampleSnapshot) Clear() { + panic("Clear called on a SampleSnapshot") +} + +// Count returns the count of inputs at the time the snapshot was taken. +func (s *SampleSnapshot) Count() int64 { return s.count } + +// Max returns the maximal value at the time the snapshot was taken. +func (s *SampleSnapshot) Max() int64 { return SampleMax(s.values) } + +// Mean returns the mean value at the time the snapshot was taken. +func (s *SampleSnapshot) Mean() float64 { return SampleMean(s.values) } + +// Min returns the minimal value at the time the snapshot was taken. +func (s *SampleSnapshot) Min() int64 { return SampleMin(s.values) } + +// Percentile returns an arbitrary percentile of values at the time the +// snapshot was taken. +func (s *SampleSnapshot) Percentile(p float64) float64 { + return SamplePercentile(s.values, p) +} + +// Percentiles returns a slice of arbitrary percentiles of values at the time +// the snapshot was taken. +func (s *SampleSnapshot) Percentiles(ps []float64) []float64 { + return SamplePercentiles(s.values, ps) +} + +// Size returns the size of the sample at the time the snapshot was taken. +func (s *SampleSnapshot) Size() int { return len(s.values) } + +// Snapshot returns the snapshot. +func (s *SampleSnapshot) Snapshot() Sample { return s } + +// StdDev returns the standard deviation of values at the time the snapshot was +// taken. +func (s *SampleSnapshot) StdDev() float64 { return SampleStdDev(s.values) } + +// Sum returns the sum of values at the time the snapshot was taken. +func (s *SampleSnapshot) Sum() int64 { return SampleSum(s.values) } + +// Update panics. +func (*SampleSnapshot) Update(int64) { + panic("Update called on a SampleSnapshot") +} + +// Values returns a copy of the values in the sample. +func (s *SampleSnapshot) Values() []int64 { + values := make([]int64, len(s.values)) + copy(values, s.values) + return values +} + +// Variance returns the variance of values at the time the snapshot was taken. +func (s *SampleSnapshot) Variance() float64 { return SampleVariance(s.values) } + +// SampleStdDev returns the standard deviation of the slice of int64. +func SampleStdDev(values []int64) float64 { + return math.Sqrt(SampleVariance(values)) +} + +// SampleSum returns the sum of the slice of int64. +func SampleSum(values []int64) int64 { + var sum int64 + for _, v := range values { + sum += v + } + return sum +} + +// SampleVariance returns the variance of the slice of int64. +func SampleVariance(values []int64) float64 { + if 0 == len(values) { + return 0.0 + } + m := SampleMean(values) + var sum float64 + for _, v := range values { + d := float64(v) - m + sum += d * d + } + return sum / float64(len(values)) +} + +// A uniform sample using Vitter's Algorithm R. +// +// <http://www.cs.umd.edu/~samir/498/vitter.pdf> +type UniformSample struct { + count int64 + mutex sync.Mutex + reservoirSize int + values []int64 +} + +// NewUniformSample constructs a new uniform sample with the given reservoir +// size. +func NewUniformSample(reservoirSize int) Sample { + if !Enabled { + return NilSample{} + } + return &UniformSample{ + reservoirSize: reservoirSize, + values: make([]int64, 0, reservoirSize), + } +} + +// Clear clears all samples. +func (s *UniformSample) Clear() { + s.mutex.Lock() + defer s.mutex.Unlock() + s.count = 0 + s.values = make([]int64, 0, s.reservoirSize) +} + +// Count returns the number of samples recorded, which may exceed the +// reservoir size. +func (s *UniformSample) Count() int64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return s.count +} + +// Max returns the maximum value in the sample, which may not be the maximum +// value ever to be part of the sample. +func (s *UniformSample) Max() int64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SampleMax(s.values) +} + +// Mean returns the mean of the values in the sample. +func (s *UniformSample) Mean() float64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SampleMean(s.values) +} + +// Min returns the minimum value in the sample, which may not be the minimum +// value ever to be part of the sample. +func (s *UniformSample) Min() int64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SampleMin(s.values) +} + +// Percentile returns an arbitrary percentile of values in the sample. +func (s *UniformSample) Percentile(p float64) float64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SamplePercentile(s.values, p) +} + +// Percentiles returns a slice of arbitrary percentiles of values in the +// sample. +func (s *UniformSample) Percentiles(ps []float64) []float64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SamplePercentiles(s.values, ps) +} + +// Size returns the size of the sample, which is at most the reservoir size. +func (s *UniformSample) Size() int { + s.mutex.Lock() + defer s.mutex.Unlock() + return len(s.values) +} + +// Snapshot returns a read-only copy of the sample. +func (s *UniformSample) Snapshot() Sample { + s.mutex.Lock() + defer s.mutex.Unlock() + values := make([]int64, len(s.values)) + copy(values, s.values) + return &SampleSnapshot{ + count: s.count, + values: values, + } +} + +// StdDev returns the standard deviation of the values in the sample. +func (s *UniformSample) StdDev() float64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SampleStdDev(s.values) +} + +// Sum returns the sum of the values in the sample. +func (s *UniformSample) Sum() int64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SampleSum(s.values) +} + +// Update samples a new value. +func (s *UniformSample) Update(v int64) { + s.mutex.Lock() + defer s.mutex.Unlock() + s.count++ + if len(s.values) < s.reservoirSize { + s.values = append(s.values, v) + } else { + r := rand.Int63n(s.count) + if r < int64(len(s.values)) { + s.values[int(r)] = v + } + } +} + +// Values returns a copy of the values in the sample. +func (s *UniformSample) Values() []int64 { + s.mutex.Lock() + defer s.mutex.Unlock() + values := make([]int64, len(s.values)) + copy(values, s.values) + return values +} + +// Variance returns the variance of the values in the sample. +func (s *UniformSample) Variance() float64 { + s.mutex.Lock() + defer s.mutex.Unlock() + return SampleVariance(s.values) +} + +// expDecaySample represents an individual sample in a heap. +type expDecaySample struct { + k float64 + v int64 +} + +func newExpDecaySampleHeap(reservoirSize int) *expDecaySampleHeap { + return &expDecaySampleHeap{make([]expDecaySample, 0, reservoirSize)} +} + +// expDecaySampleHeap is a min-heap of expDecaySamples. +// The internal implementation is copied from the standard library's container/heap +type expDecaySampleHeap struct { + s []expDecaySample +} + +func (h *expDecaySampleHeap) Clear() { + h.s = h.s[:0] +} + +func (h *expDecaySampleHeap) Push(s expDecaySample) { + n := len(h.s) + h.s = h.s[0 : n+1] + h.s[n] = s + h.up(n) +} + +func (h *expDecaySampleHeap) Pop() expDecaySample { + n := len(h.s) - 1 + h.s[0], h.s[n] = h.s[n], h.s[0] + h.down(0, n) + + n = len(h.s) + s := h.s[n-1] + h.s = h.s[0 : n-1] + return s +} + +func (h *expDecaySampleHeap) Size() int { + return len(h.s) +} + +func (h *expDecaySampleHeap) Values() []expDecaySample { + return h.s +} + +func (h *expDecaySampleHeap) up(j int) { + for { + i := (j - 1) / 2 // parent + if i == j || !(h.s[j].k < h.s[i].k) { + break + } + h.s[i], h.s[j] = h.s[j], h.s[i] + j = i + } +} + +func (h *expDecaySampleHeap) down(i, n int) { + for { + j1 := 2*i + 1 + if j1 >= n || j1 < 0 { // j1 < 0 after int overflow + break + } + j := j1 // left child + if j2 := j1 + 1; j2 < n && !(h.s[j1].k < h.s[j2].k) { + j = j2 // = 2*i + 2 // right child + } + if !(h.s[j].k < h.s[i].k) { + break + } + h.s[i], h.s[j] = h.s[j], h.s[i] + i = j + } +} + +type int64Slice []int64 + +func (p int64Slice) Len() int { return len(p) } +func (p int64Slice) Less(i, j int) bool { return p[i] < p[j] } +func (p int64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } diff --git a/metrics/sample_test.go b/metrics/sample_test.go new file mode 100644 index 000000000..d60e99c5b --- /dev/null +++ b/metrics/sample_test.go @@ -0,0 +1,363 @@ +package metrics + +import ( + "math/rand" + "runtime" + "testing" + "time" +) + +// Benchmark{Compute,Copy}{1000,1000000} demonstrate that, even for relatively +// expensive computations like Variance, the cost of copying the Sample, as +// approximated by a make and copy, is much greater than the cost of the +// computation for small samples and only slightly less for large samples. +func BenchmarkCompute1000(b *testing.B) { + s := make([]int64, 1000) + for i := 0; i < len(s); i++ { + s[i] = int64(i) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + SampleVariance(s) + } +} +func BenchmarkCompute1000000(b *testing.B) { + s := make([]int64, 1000000) + for i := 0; i < len(s); i++ { + s[i] = int64(i) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + SampleVariance(s) + } +} +func BenchmarkCopy1000(b *testing.B) { + s := make([]int64, 1000) + for i := 0; i < len(s); i++ { + s[i] = int64(i) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + sCopy := make([]int64, len(s)) + copy(sCopy, s) + } +} +func BenchmarkCopy1000000(b *testing.B) { + s := make([]int64, 1000000) + for i := 0; i < len(s); i++ { + s[i] = int64(i) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + sCopy := make([]int64, len(s)) + copy(sCopy, s) + } +} + +func BenchmarkExpDecaySample257(b *testing.B) { + benchmarkSample(b, NewExpDecaySample(257, 0.015)) +} + +func BenchmarkExpDecaySample514(b *testing.B) { + benchmarkSample(b, NewExpDecaySample(514, 0.015)) +} + +func BenchmarkExpDecaySample1028(b *testing.B) { + benchmarkSample(b, NewExpDecaySample(1028, 0.015)) +} + +func BenchmarkUniformSample257(b *testing.B) { + benchmarkSample(b, NewUniformSample(257)) +} + +func BenchmarkUniformSample514(b *testing.B) { + benchmarkSample(b, NewUniformSample(514)) +} + +func BenchmarkUniformSample1028(b *testing.B) { + benchmarkSample(b, NewUniformSample(1028)) +} + +func TestExpDecaySample10(t *testing.T) { + rand.Seed(1) + s := NewExpDecaySample(100, 0.99) + for i := 0; i < 10; i++ { + s.Update(int64(i)) + } + if size := s.Count(); 10 != size { + t.Errorf("s.Count(): 10 != %v\n", size) + } + if size := s.Size(); 10 != size { + t.Errorf("s.Size(): 10 != %v\n", size) + } + if l := len(s.Values()); 10 != l { + t.Errorf("len(s.Values()): 10 != %v\n", l) + } + for _, v := range s.Values() { + if v > 10 || v < 0 { + t.Errorf("out of range [0, 10): %v\n", v) + } + } +} + +func TestExpDecaySample100(t *testing.T) { + rand.Seed(1) + s := NewExpDecaySample(1000, 0.01) + for i := 0; i < 100; i++ { + s.Update(int64(i)) + } + if size := s.Count(); 100 != size { + t.Errorf("s.Count(): 100 != %v\n", size) + } + if size := s.Size(); 100 != size { + t.Errorf("s.Size(): 100 != %v\n", size) + } + if l := len(s.Values()); 100 != l { + t.Errorf("len(s.Values()): 100 != %v\n", l) + } + for _, v := range s.Values() { + if v > 100 || v < 0 { + t.Errorf("out of range [0, 100): %v\n", v) + } + } +} + +func TestExpDecaySample1000(t *testing.T) { + rand.Seed(1) + s := NewExpDecaySample(100, 0.99) + for i := 0; i < 1000; i++ { + s.Update(int64(i)) + } + if size := s.Count(); 1000 != size { + t.Errorf("s.Count(): 1000 != %v\n", size) + } + if size := s.Size(); 100 != size { + t.Errorf("s.Size(): 100 != %v\n", size) + } + if l := len(s.Values()); 100 != l { + t.Errorf("len(s.Values()): 100 != %v\n", l) + } + for _, v := range s.Values() { + if v > 1000 || v < 0 { + t.Errorf("out of range [0, 1000): %v\n", v) + } + } +} + +// This test makes sure that the sample's priority is not amplified by using +// nanosecond duration since start rather than second duration since start. +// The priority becomes +Inf quickly after starting if this is done, +// effectively freezing the set of samples until a rescale step happens. +func TestExpDecaySampleNanosecondRegression(t *testing.T) { + rand.Seed(1) + s := NewExpDecaySample(100, 0.99) + for i := 0; i < 100; i++ { + s.Update(10) + } + time.Sleep(1 * time.Millisecond) + for i := 0; i < 100; i++ { + s.Update(20) + } + v := s.Values() + avg := float64(0) + for i := 0; i < len(v); i++ { + avg += float64(v[i]) + } + avg /= float64(len(v)) + if avg > 16 || avg < 14 { + t.Errorf("out of range [14, 16]: %v\n", avg) + } +} + +func TestExpDecaySampleRescale(t *testing.T) { + s := NewExpDecaySample(2, 0.001).(*ExpDecaySample) + s.update(time.Now(), 1) + s.update(time.Now().Add(time.Hour+time.Microsecond), 1) + for _, v := range s.values.Values() { + if v.k == 0.0 { + t.Fatal("v.k == 0.0") + } + } +} + +func TestExpDecaySampleSnapshot(t *testing.T) { + now := time.Now() + rand.Seed(1) + s := NewExpDecaySample(100, 0.99) + for i := 1; i <= 10000; i++ { + s.(*ExpDecaySample).update(now.Add(time.Duration(i)), int64(i)) + } + snapshot := s.Snapshot() + s.Update(1) + testExpDecaySampleStatistics(t, snapshot) +} + +func TestExpDecaySampleStatistics(t *testing.T) { + now := time.Now() + rand.Seed(1) + s := NewExpDecaySample(100, 0.99) + for i := 1; i <= 10000; i++ { + s.(*ExpDecaySample).update(now.Add(time.Duration(i)), int64(i)) + } + testExpDecaySampleStatistics(t, s) +} + +func TestUniformSample(t *testing.T) { + rand.Seed(1) + s := NewUniformSample(100) + for i := 0; i < 1000; i++ { + s.Update(int64(i)) + } + if size := s.Count(); 1000 != size { + t.Errorf("s.Count(): 1000 != %v\n", size) + } + if size := s.Size(); 100 != size { + t.Errorf("s.Size(): 100 != %v\n", size) + } + if l := len(s.Values()); 100 != l { + t.Errorf("len(s.Values()): 100 != %v\n", l) + } + for _, v := range s.Values() { + if v > 1000 || v < 0 { + t.Errorf("out of range [0, 100): %v\n", v) + } + } +} + +func TestUniformSampleIncludesTail(t *testing.T) { + rand.Seed(1) + s := NewUniformSample(100) + max := 100 + for i := 0; i < max; i++ { + s.Update(int64(i)) + } + v := s.Values() + sum := 0 + exp := (max - 1) * max / 2 + for i := 0; i < len(v); i++ { + sum += int(v[i]) + } + if exp != sum { + t.Errorf("sum: %v != %v\n", exp, sum) + } +} + +func TestUniformSampleSnapshot(t *testing.T) { + s := NewUniformSample(100) + for i := 1; i <= 10000; i++ { + s.Update(int64(i)) + } + snapshot := s.Snapshot() + s.Update(1) + testUniformSampleStatistics(t, snapshot) +} + +func TestUniformSampleStatistics(t *testing.T) { + rand.Seed(1) + s := NewUniformSample(100) + for i := 1; i <= 10000; i++ { + s.Update(int64(i)) + } + testUniformSampleStatistics(t, s) +} + +func benchmarkSample(b *testing.B, s Sample) { + var memStats runtime.MemStats + runtime.ReadMemStats(&memStats) + pauseTotalNs := memStats.PauseTotalNs + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.Update(1) + } + b.StopTimer() + runtime.GC() + runtime.ReadMemStats(&memStats) + b.Logf("GC cost: %d ns/op", int(memStats.PauseTotalNs-pauseTotalNs)/b.N) +} + +func testExpDecaySampleStatistics(t *testing.T, s Sample) { + if count := s.Count(); 10000 != count { + t.Errorf("s.Count(): 10000 != %v\n", count) + } + if min := s.Min(); 107 != min { + t.Errorf("s.Min(): 107 != %v\n", min) + } + if max := s.Max(); 10000 != max { + t.Errorf("s.Max(): 10000 != %v\n", max) + } + if mean := s.Mean(); 4965.98 != mean { + t.Errorf("s.Mean(): 4965.98 != %v\n", mean) + } + if stdDev := s.StdDev(); 2959.825156930727 != stdDev { + t.Errorf("s.StdDev(): 2959.825156930727 != %v\n", stdDev) + } + ps := s.Percentiles([]float64{0.5, 0.75, 0.99}) + if 4615 != ps[0] { + t.Errorf("median: 4615 != %v\n", ps[0]) + } + if 7672 != ps[1] { + t.Errorf("75th percentile: 7672 != %v\n", ps[1]) + } + if 9998.99 != ps[2] { + t.Errorf("99th percentile: 9998.99 != %v\n", ps[2]) + } +} + +func testUniformSampleStatistics(t *testing.T, s Sample) { + if count := s.Count(); 10000 != count { + t.Errorf("s.Count(): 10000 != %v\n", count) + } + if min := s.Min(); 37 != min { + t.Errorf("s.Min(): 37 != %v\n", min) + } + if max := s.Max(); 9989 != max { + t.Errorf("s.Max(): 9989 != %v\n", max) + } + if mean := s.Mean(); 4748.14 != mean { + t.Errorf("s.Mean(): 4748.14 != %v\n", mean) + } + if stdDev := s.StdDev(); 2826.684117548333 != stdDev { + t.Errorf("s.StdDev(): 2826.684117548333 != %v\n", stdDev) + } + ps := s.Percentiles([]float64{0.5, 0.75, 0.99}) + if 4599 != ps[0] { + t.Errorf("median: 4599 != %v\n", ps[0]) + } + if 7380.5 != ps[1] { + t.Errorf("75th percentile: 7380.5 != %v\n", ps[1]) + } + if 9986.429999999998 != ps[2] { + t.Errorf("99th percentile: 9986.429999999998 != %v\n", ps[2]) + } +} + +// TestUniformSampleConcurrentUpdateCount would expose data race problems with +// concurrent Update and Count calls on Sample when test is called with -race +// argument +func TestUniformSampleConcurrentUpdateCount(t *testing.T) { + if testing.Short() { + t.Skip("skipping in short mode") + } + s := NewUniformSample(100) + for i := 0; i < 100; i++ { + s.Update(int64(i)) + } + quit := make(chan struct{}) + go func() { + t := time.NewTicker(10 * time.Millisecond) + for { + select { + case <-t.C: + s.Update(rand.Int63()) + case <-quit: + t.Stop() + return + } + } + }() + for i := 0; i < 1000; i++ { + s.Count() + time.Sleep(5 * time.Millisecond) + } + quit <- struct{}{} +} diff --git a/metrics/syslog.go b/metrics/syslog.go new file mode 100644 index 000000000..a0ed4b1b2 --- /dev/null +++ b/metrics/syslog.go @@ -0,0 +1,78 @@ +// +build !windows + +package metrics + +import ( + "fmt" + "log/syslog" + "time" +) + +// Output each metric in the given registry to syslog periodically using +// the given syslogger. +func Syslog(r Registry, d time.Duration, w *syslog.Writer) { + for range time.Tick(d) { + r.Each(func(name string, i interface{}) { + switch metric := i.(type) { + case Counter: + w.Info(fmt.Sprintf("counter %s: count: %d", name, metric.Count())) + case Gauge: + w.Info(fmt.Sprintf("gauge %s: value: %d", name, metric.Value())) + case GaugeFloat64: + w.Info(fmt.Sprintf("gauge %s: value: %f", name, metric.Value())) + case Healthcheck: + metric.Check() + w.Info(fmt.Sprintf("healthcheck %s: error: %v", name, metric.Error())) + case Histogram: + h := metric.Snapshot() + ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + w.Info(fmt.Sprintf( + "histogram %s: count: %d min: %d max: %d mean: %.2f stddev: %.2f median: %.2f 75%%: %.2f 95%%: %.2f 99%%: %.2f 99.9%%: %.2f", + name, + h.Count(), + h.Min(), + h.Max(), + h.Mean(), + h.StdDev(), + ps[0], + ps[1], + ps[2], + ps[3], + ps[4], + )) + case Meter: + m := metric.Snapshot() + w.Info(fmt.Sprintf( + "meter %s: count: %d 1-min: %.2f 5-min: %.2f 15-min: %.2f mean: %.2f", + name, + m.Count(), + m.Rate1(), + m.Rate5(), + m.Rate15(), + m.RateMean(), + )) + case Timer: + t := metric.Snapshot() + ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + w.Info(fmt.Sprintf( + "timer %s: count: %d min: %d max: %d mean: %.2f stddev: %.2f median: %.2f 75%%: %.2f 95%%: %.2f 99%%: %.2f 99.9%%: %.2f 1-min: %.2f 5-min: %.2f 15-min: %.2f mean-rate: %.2f", + name, + t.Count(), + t.Min(), + t.Max(), + t.Mean(), + t.StdDev(), + ps[0], + ps[1], + ps[2], + ps[3], + ps[4], + t.Rate1(), + t.Rate5(), + t.Rate15(), + t.RateMean(), + )) + } + }) + } +} diff --git a/metrics/timer.go b/metrics/timer.go new file mode 100644 index 000000000..89e22208f --- /dev/null +++ b/metrics/timer.go @@ -0,0 +1,329 @@ +package metrics + +import ( + "sync" + "time" +) + +// Timers capture the duration and rate of events. +type Timer interface { + Count() int64 + Max() int64 + Mean() float64 + Min() int64 + Percentile(float64) float64 + Percentiles([]float64) []float64 + Rate1() float64 + Rate5() float64 + Rate15() float64 + RateMean() float64 + Snapshot() Timer + StdDev() float64 + Stop() + Sum() int64 + Time(func()) + Update(time.Duration) + UpdateSince(time.Time) + Variance() float64 +} + +// GetOrRegisterTimer returns an existing Timer or constructs and registers a +// new StandardTimer. +// Be sure to unregister the meter from the registry once it is of no use to +// allow for garbage collection. +func GetOrRegisterTimer(name string, r Registry) Timer { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewTimer).(Timer) +} + +// NewCustomTimer constructs a new StandardTimer from a Histogram and a Meter. +// Be sure to call Stop() once the timer is of no use to allow for garbage collection. +func NewCustomTimer(h Histogram, m Meter) Timer { + if !Enabled { + return NilTimer{} + } + return &StandardTimer{ + histogram: h, + meter: m, + } +} + +// NewRegisteredTimer constructs and registers a new StandardTimer. +// Be sure to unregister the meter from the registry once it is of no use to +// allow for garbage collection. +func NewRegisteredTimer(name string, r Registry) Timer { + c := NewTimer() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// NewTimer constructs a new StandardTimer using an exponentially-decaying +// sample with the same reservoir size and alpha as UNIX load averages. +// Be sure to call Stop() once the timer is of no use to allow for garbage collection. +func NewTimer() Timer { + if !Enabled { + return NilTimer{} + } + return &StandardTimer{ + histogram: NewHistogram(NewExpDecaySample(1028, 0.015)), + meter: NewMeter(), + } +} + +// NilTimer is a no-op Timer. +type NilTimer struct { + h Histogram + m Meter +} + +// Count is a no-op. +func (NilTimer) Count() int64 { return 0 } + +// Max is a no-op. +func (NilTimer) Max() int64 { return 0 } + +// Mean is a no-op. +func (NilTimer) Mean() float64 { return 0.0 } + +// Min is a no-op. +func (NilTimer) Min() int64 { return 0 } + +// Percentile is a no-op. +func (NilTimer) Percentile(p float64) float64 { return 0.0 } + +// Percentiles is a no-op. +func (NilTimer) Percentiles(ps []float64) []float64 { + return make([]float64, len(ps)) +} + +// Rate1 is a no-op. +func (NilTimer) Rate1() float64 { return 0.0 } + +// Rate5 is a no-op. +func (NilTimer) Rate5() float64 { return 0.0 } + +// Rate15 is a no-op. +func (NilTimer) Rate15() float64 { return 0.0 } + +// RateMean is a no-op. +func (NilTimer) RateMean() float64 { return 0.0 } + +// Snapshot is a no-op. +func (NilTimer) Snapshot() Timer { return NilTimer{} } + +// StdDev is a no-op. +func (NilTimer) StdDev() float64 { return 0.0 } + +// Stop is a no-op. +func (NilTimer) Stop() {} + +// Sum is a no-op. +func (NilTimer) Sum() int64 { return 0 } + +// Time is a no-op. +func (NilTimer) Time(func()) {} + +// Update is a no-op. +func (NilTimer) Update(time.Duration) {} + +// UpdateSince is a no-op. +func (NilTimer) UpdateSince(time.Time) {} + +// Variance is a no-op. +func (NilTimer) Variance() float64 { return 0.0 } + +// StandardTimer is the standard implementation of a Timer and uses a Histogram +// and Meter. +type StandardTimer struct { + histogram Histogram + meter Meter + mutex sync.Mutex +} + +// Count returns the number of events recorded. +func (t *StandardTimer) Count() int64 { + return t.histogram.Count() +} + +// Max returns the maximum value in the sample. +func (t *StandardTimer) Max() int64 { + return t.histogram.Max() +} + +// Mean returns the mean of the values in the sample. +func (t *StandardTimer) Mean() float64 { + return t.histogram.Mean() +} + +// Min returns the minimum value in the sample. +func (t *StandardTimer) Min() int64 { + return t.histogram.Min() +} + +// Percentile returns an arbitrary percentile of the values in the sample. +func (t *StandardTimer) Percentile(p float64) float64 { + return t.histogram.Percentile(p) +} + +// Percentiles returns a slice of arbitrary percentiles of the values in the +// sample. +func (t *StandardTimer) Percentiles(ps []float64) []float64 { + return t.histogram.Percentiles(ps) +} + +// Rate1 returns the one-minute moving average rate of events per second. +func (t *StandardTimer) Rate1() float64 { + return t.meter.Rate1() +} + +// Rate5 returns the five-minute moving average rate of events per second. +func (t *StandardTimer) Rate5() float64 { + return t.meter.Rate5() +} + +// Rate15 returns the fifteen-minute moving average rate of events per second. +func (t *StandardTimer) Rate15() float64 { + return t.meter.Rate15() +} + +// RateMean returns the meter's mean rate of events per second. +func (t *StandardTimer) RateMean() float64 { + return t.meter.RateMean() +} + +// Snapshot returns a read-only copy of the timer. +func (t *StandardTimer) Snapshot() Timer { + t.mutex.Lock() + defer t.mutex.Unlock() + return &TimerSnapshot{ + histogram: t.histogram.Snapshot().(*HistogramSnapshot), + meter: t.meter.Snapshot().(*MeterSnapshot), + } +} + +// StdDev returns the standard deviation of the values in the sample. +func (t *StandardTimer) StdDev() float64 { + return t.histogram.StdDev() +} + +// Stop stops the meter. +func (t *StandardTimer) Stop() { + t.meter.Stop() +} + +// Sum returns the sum in the sample. +func (t *StandardTimer) Sum() int64 { + return t.histogram.Sum() +} + +// Record the duration of the execution of the given function. +func (t *StandardTimer) Time(f func()) { + ts := time.Now() + f() + t.Update(time.Since(ts)) +} + +// Record the duration of an event. +func (t *StandardTimer) Update(d time.Duration) { + t.mutex.Lock() + defer t.mutex.Unlock() + t.histogram.Update(int64(d)) + t.meter.Mark(1) +} + +// Record the duration of an event that started at a time and ends now. +func (t *StandardTimer) UpdateSince(ts time.Time) { + t.mutex.Lock() + defer t.mutex.Unlock() + t.histogram.Update(int64(time.Since(ts))) + t.meter.Mark(1) +} + +// Variance returns the variance of the values in the sample. +func (t *StandardTimer) Variance() float64 { + return t.histogram.Variance() +} + +// TimerSnapshot is a read-only copy of another Timer. +type TimerSnapshot struct { + histogram *HistogramSnapshot + meter *MeterSnapshot +} + +// Count returns the number of events recorded at the time the snapshot was +// taken. +func (t *TimerSnapshot) Count() int64 { return t.histogram.Count() } + +// Max returns the maximum value at the time the snapshot was taken. +func (t *TimerSnapshot) Max() int64 { return t.histogram.Max() } + +// Mean returns the mean value at the time the snapshot was taken. +func (t *TimerSnapshot) Mean() float64 { return t.histogram.Mean() } + +// Min returns the minimum value at the time the snapshot was taken. +func (t *TimerSnapshot) Min() int64 { return t.histogram.Min() } + +// Percentile returns an arbitrary percentile of sampled values at the time the +// snapshot was taken. +func (t *TimerSnapshot) Percentile(p float64) float64 { + return t.histogram.Percentile(p) +} + +// Percentiles returns a slice of arbitrary percentiles of sampled values at +// the time the snapshot was taken. +func (t *TimerSnapshot) Percentiles(ps []float64) []float64 { + return t.histogram.Percentiles(ps) +} + +// Rate1 returns the one-minute moving average rate of events per second at the +// time the snapshot was taken. +func (t *TimerSnapshot) Rate1() float64 { return t.meter.Rate1() } + +// Rate5 returns the five-minute moving average rate of events per second at +// the time the snapshot was taken. +func (t *TimerSnapshot) Rate5() float64 { return t.meter.Rate5() } + +// Rate15 returns the fifteen-minute moving average rate of events per second +// at the time the snapshot was taken. +func (t *TimerSnapshot) Rate15() float64 { return t.meter.Rate15() } + +// RateMean returns the meter's mean rate of events per second at the time the +// snapshot was taken. +func (t *TimerSnapshot) RateMean() float64 { return t.meter.RateMean() } + +// Snapshot returns the snapshot. +func (t *TimerSnapshot) Snapshot() Timer { return t } + +// StdDev returns the standard deviation of the values at the time the snapshot +// was taken. +func (t *TimerSnapshot) StdDev() float64 { return t.histogram.StdDev() } + +// Stop is a no-op. +func (t *TimerSnapshot) Stop() {} + +// Sum returns the sum at the time the snapshot was taken. +func (t *TimerSnapshot) Sum() int64 { return t.histogram.Sum() } + +// Time panics. +func (*TimerSnapshot) Time(func()) { + panic("Time called on a TimerSnapshot") +} + +// Update panics. +func (*TimerSnapshot) Update(time.Duration) { + panic("Update called on a TimerSnapshot") +} + +// UpdateSince panics. +func (*TimerSnapshot) UpdateSince(time.Time) { + panic("UpdateSince called on a TimerSnapshot") +} + +// Variance returns the variance of the values at the time the snapshot was +// taken. +func (t *TimerSnapshot) Variance() float64 { return t.histogram.Variance() } diff --git a/metrics/timer_test.go b/metrics/timer_test.go new file mode 100644 index 000000000..f85c9b803 --- /dev/null +++ b/metrics/timer_test.go @@ -0,0 +1,101 @@ +package metrics + +import ( + "fmt" + "math" + "testing" + "time" +) + +func BenchmarkTimer(b *testing.B) { + tm := NewTimer() + b.ResetTimer() + for i := 0; i < b.N; i++ { + tm.Update(1) + } +} + +func TestGetOrRegisterTimer(t *testing.T) { + r := NewRegistry() + NewRegisteredTimer("foo", r).Update(47) + if tm := GetOrRegisterTimer("foo", r); 1 != tm.Count() { + t.Fatal(tm) + } +} + +func TestTimerExtremes(t *testing.T) { + tm := NewTimer() + tm.Update(math.MaxInt64) + tm.Update(0) + if stdDev := tm.StdDev(); 4.611686018427388e+18 != stdDev { + t.Errorf("tm.StdDev(): 4.611686018427388e+18 != %v\n", stdDev) + } +} + +func TestTimerStop(t *testing.T) { + l := len(arbiter.meters) + tm := NewTimer() + if len(arbiter.meters) != l+1 { + t.Errorf("arbiter.meters: %d != %d\n", l+1, len(arbiter.meters)) + } + tm.Stop() + if len(arbiter.meters) != l { + t.Errorf("arbiter.meters: %d != %d\n", l, len(arbiter.meters)) + } +} + +func TestTimerFunc(t *testing.T) { + tm := NewTimer() + tm.Time(func() { time.Sleep(50e6) }) + if max := tm.Max(); 45e6 > max || max > 55e6 { + t.Errorf("tm.Max(): 45e6 > %v || %v > 55e6\n", max, max) + } +} + +func TestTimerZero(t *testing.T) { + tm := NewTimer() + if count := tm.Count(); 0 != count { + t.Errorf("tm.Count(): 0 != %v\n", count) + } + if min := tm.Min(); 0 != min { + t.Errorf("tm.Min(): 0 != %v\n", min) + } + if max := tm.Max(); 0 != max { + t.Errorf("tm.Max(): 0 != %v\n", max) + } + if mean := tm.Mean(); 0.0 != mean { + t.Errorf("tm.Mean(): 0.0 != %v\n", mean) + } + if stdDev := tm.StdDev(); 0.0 != stdDev { + t.Errorf("tm.StdDev(): 0.0 != %v\n", stdDev) + } + ps := tm.Percentiles([]float64{0.5, 0.75, 0.99}) + if 0.0 != ps[0] { + t.Errorf("median: 0.0 != %v\n", ps[0]) + } + if 0.0 != ps[1] { + t.Errorf("75th percentile: 0.0 != %v\n", ps[1]) + } + if 0.0 != ps[2] { + t.Errorf("99th percentile: 0.0 != %v\n", ps[2]) + } + if rate1 := tm.Rate1(); 0.0 != rate1 { + t.Errorf("tm.Rate1(): 0.0 != %v\n", rate1) + } + if rate5 := tm.Rate5(); 0.0 != rate5 { + t.Errorf("tm.Rate5(): 0.0 != %v\n", rate5) + } + if rate15 := tm.Rate15(); 0.0 != rate15 { + t.Errorf("tm.Rate15(): 0.0 != %v\n", rate15) + } + if rateMean := tm.RateMean(); 0.0 != rateMean { + t.Errorf("tm.RateMean(): 0.0 != %v\n", rateMean) + } +} + +func ExampleGetOrRegisterTimer() { + m := "account.create.latency" + t := GetOrRegisterTimer(m, nil) + t.Update(47) + fmt.Println(t.Max()) // Output: 47 +} diff --git a/metrics/validate.sh b/metrics/validate.sh new file mode 100755 index 000000000..c4ae91e64 --- /dev/null +++ b/metrics/validate.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -e + +# check there are no formatting issues +GOFMT_LINES=`gofmt -l . | wc -l | xargs` +test $GOFMT_LINES -eq 0 || echo "gofmt needs to be run, ${GOFMT_LINES} files have issues" + +# run the tests for the root package +go test -race . diff --git a/metrics/writer.go b/metrics/writer.go new file mode 100644 index 000000000..88521a80d --- /dev/null +++ b/metrics/writer.go @@ -0,0 +1,100 @@ +package metrics + +import ( + "fmt" + "io" + "sort" + "time" +) + +// Write sorts writes each metric in the given registry periodically to the +// given io.Writer. +func Write(r Registry, d time.Duration, w io.Writer) { + for range time.Tick(d) { + WriteOnce(r, w) + } +} + +// WriteOnce sorts and writes metrics in the given registry to the given +// io.Writer. +func WriteOnce(r Registry, w io.Writer) { + var namedMetrics namedMetricSlice + r.Each(func(name string, i interface{}) { + namedMetrics = append(namedMetrics, namedMetric{name, i}) + }) + + sort.Sort(namedMetrics) + for _, namedMetric := range namedMetrics { + switch metric := namedMetric.m.(type) { + case Counter: + fmt.Fprintf(w, "counter %s\n", namedMetric.name) + fmt.Fprintf(w, " count: %9d\n", metric.Count()) + case Gauge: + fmt.Fprintf(w, "gauge %s\n", namedMetric.name) + fmt.Fprintf(w, " value: %9d\n", metric.Value()) + case GaugeFloat64: + fmt.Fprintf(w, "gauge %s\n", namedMetric.name) + fmt.Fprintf(w, " value: %f\n", metric.Value()) + case Healthcheck: + metric.Check() + fmt.Fprintf(w, "healthcheck %s\n", namedMetric.name) + fmt.Fprintf(w, " error: %v\n", metric.Error()) + case Histogram: + h := metric.Snapshot() + ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + fmt.Fprintf(w, "histogram %s\n", namedMetric.name) + fmt.Fprintf(w, " count: %9d\n", h.Count()) + fmt.Fprintf(w, " min: %9d\n", h.Min()) + fmt.Fprintf(w, " max: %9d\n", h.Max()) + fmt.Fprintf(w, " mean: %12.2f\n", h.Mean()) + fmt.Fprintf(w, " stddev: %12.2f\n", h.StdDev()) + fmt.Fprintf(w, " median: %12.2f\n", ps[0]) + fmt.Fprintf(w, " 75%%: %12.2f\n", ps[1]) + fmt.Fprintf(w, " 95%%: %12.2f\n", ps[2]) + fmt.Fprintf(w, " 99%%: %12.2f\n", ps[3]) + fmt.Fprintf(w, " 99.9%%: %12.2f\n", ps[4]) + case Meter: + m := metric.Snapshot() + fmt.Fprintf(w, "meter %s\n", namedMetric.name) + fmt.Fprintf(w, " count: %9d\n", m.Count()) + fmt.Fprintf(w, " 1-min rate: %12.2f\n", m.Rate1()) + fmt.Fprintf(w, " 5-min rate: %12.2f\n", m.Rate5()) + fmt.Fprintf(w, " 15-min rate: %12.2f\n", m.Rate15()) + fmt.Fprintf(w, " mean rate: %12.2f\n", m.RateMean()) + case Timer: + t := metric.Snapshot() + ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) + fmt.Fprintf(w, "timer %s\n", namedMetric.name) + fmt.Fprintf(w, " count: %9d\n", t.Count()) + fmt.Fprintf(w, " min: %9d\n", t.Min()) + fmt.Fprintf(w, " max: %9d\n", t.Max()) + fmt.Fprintf(w, " mean: %12.2f\n", t.Mean()) + fmt.Fprintf(w, " stddev: %12.2f\n", t.StdDev()) + fmt.Fprintf(w, " median: %12.2f\n", ps[0]) + fmt.Fprintf(w, " 75%%: %12.2f\n", ps[1]) + fmt.Fprintf(w, " 95%%: %12.2f\n", ps[2]) + fmt.Fprintf(w, " 99%%: %12.2f\n", ps[3]) + fmt.Fprintf(w, " 99.9%%: %12.2f\n", ps[4]) + fmt.Fprintf(w, " 1-min rate: %12.2f\n", t.Rate1()) + fmt.Fprintf(w, " 5-min rate: %12.2f\n", t.Rate5()) + fmt.Fprintf(w, " 15-min rate: %12.2f\n", t.Rate15()) + fmt.Fprintf(w, " mean rate: %12.2f\n", t.RateMean()) + } + } +} + +type namedMetric struct { + name string + m interface{} +} + +// namedMetricSlice is a slice of namedMetrics that implements sort.Interface. +type namedMetricSlice []namedMetric + +func (nms namedMetricSlice) Len() int { return len(nms) } + +func (nms namedMetricSlice) Swap(i, j int) { nms[i], nms[j] = nms[j], nms[i] } + +func (nms namedMetricSlice) Less(i, j int) bool { + return nms[i].name < nms[j].name +} diff --git a/metrics/writer_test.go b/metrics/writer_test.go new file mode 100644 index 000000000..1aacc2871 --- /dev/null +++ b/metrics/writer_test.go @@ -0,0 +1,22 @@ +package metrics + +import ( + "sort" + "testing" +) + +func TestMetricsSorting(t *testing.T) { + var namedMetrics = namedMetricSlice{ + {name: "zzz"}, + {name: "bbb"}, + {name: "fff"}, + {name: "ggg"}, + } + + sort.Sort(namedMetrics) + for i, name := range []string{"bbb", "fff", "ggg", "zzz"} { + if namedMetrics[i].name != name { + t.Fail() + } + } +} |