diff --git a/config_example.toml b/config_example.toml index a5a3b53..1eedb6f 100644 --- a/config_example.toml +++ b/config_example.toml @@ -55,6 +55,12 @@ save_interval = "5s" # Set node to offline if not seen within this period offline_after = "10m" +## Verify if node is really down by ping last seen address of node +# send x pings to verify if node is offline (for disable set count < 1) +ping_count = 3 +# timeout of sending ping to a node +ping_timeout = "1s" + ## [[nodes.output.example]] # Each output format has its own config block and needs to be enabled by adding: diff --git a/database/graphite/global.go b/database/graphite/global.go index 62fff66..0cd2cf6 100644 --- a/database/graphite/global.go +++ b/database/graphite/global.go @@ -36,6 +36,7 @@ func (c *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time, s func GlobalStatsFields(name string, stats *runtime.GlobalStats) []graphigo.Metric { return []graphigo.Metric{ {Name: name + ".nodes", Value: stats.Nodes}, + {Name: name + ".nodes.no_respondd", Value: stats.NodesNoRespondd}, {Name: name + ".gateways", Value: stats.Gateways}, {Name: name + ".clients.total", Value: stats.Clients}, {Name: name + ".clients.wifi", Value: stats.ClientsWifi}, diff --git a/database/influxdb/global.go b/database/influxdb/global.go index 6c4330e..90b82b9 100644 --- a/database/influxdb/global.go +++ b/database/influxdb/global.go @@ -42,12 +42,13 @@ func (conn *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time // GlobalStatsFields returns fields for InfluxDB func GlobalStatsFields(stats *runtime.GlobalStats) map[string]interface{} { return map[string]interface{}{ - "nodes": stats.Nodes, - "gateways": stats.Gateways, - "clients.total": stats.Clients, - "clients.wifi": stats.ClientsWifi, - "clients.wifi24": stats.ClientsWifi24, - "clients.wifi5": stats.ClientsWifi5, + "nodes": stats.Nodes, + "nodes.no_respondd": stats.NodesNoRespondd, + "gateways": stats.Gateways, + "clients.total": stats.Clients, + "clients.wifi": stats.ClientsWifi, + "clients.wifi24": stats.ClientsWifi24, + "clients.wifi5": stats.ClientsWifi5, } } diff --git a/database/logging/file.go b/database/logging/file.go index c9745b0..4768616 100644 --- a/database/logging/file.go +++ b/database/logging/file.go @@ -50,7 +50,7 @@ func (conn *Connection) InsertLink(link *runtime.Link, time time.Time) { } func (conn *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time, site string, domain string) { - conn.log("InsertGlobals: [", time.String(), "] site: ", site, " domain: ", domain, ", nodes: ", stats.Nodes, ", clients: ", stats.Clients, " models: ", len(stats.Models)) + conn.log("InsertGlobals: [", time.String(), "] site: ", site, " domain: ", domain, ", nodes: ", stats.Nodes, " (no respondd: ", stats.NodesNoRespondd, "), clients: ", stats.Clients, " models: ", len(stats.Models)) } func (conn *Connection) PruneNodes(deleteAfter time.Duration) { diff --git a/docs/docs_configuration.md b/docs/docs_configuration.md index 4c5d406..1291dd4 100644 --- a/docs/docs_configuration.md +++ b/docs/docs_configuration.md @@ -203,6 +203,8 @@ state_path = "/var/lib/yanic/state.json" prune_after = "7d" save_interval = "5s" offline_after = "10m" +ping_count = 3 +ping_timeout = "1s" ``` {% endmethod %} @@ -246,6 +248,26 @@ offline_after = "10m" ``` {% endmethod %} +### ping_count +{% method %} +Verify if node is really down by ping last seen address of node +send x pings to verify if node is offline (for disable set count < 1) +{% sample lang="toml" %} +```toml +ping_count = 3 +``` +{% endmethod %} + + +### ping_timeout +{% method %} +Timeout of sending ping to a node +{% sample lang="toml" %} +```toml +ping_timeout = "1s" +``` +{% endmethod %} + ## [[nodes.output.example]] {% method %} diff --git a/runtime/node.go b/runtime/node.go index 3e2059c..820c1ee 100644 --- a/runtime/node.go +++ b/runtime/node.go @@ -13,6 +13,7 @@ type Node struct { Firstseen jsontime.Time `json:"firstseen"` Lastseen jsontime.Time `json:"lastseen"` Online bool `json:"online"` + NoRespondd bool `json:"-"` Statistics *data.Statistics `json:"statistics"` Nodeinfo *data.NodeInfo `json:"nodeinfo"` Neighbours *data.Neighbours `json:"-"` diff --git a/runtime/nodes.go b/runtime/nodes.go index 91e8ac8..4b41d42 100644 --- a/runtime/nodes.go +++ b/runtime/nodes.go @@ -176,7 +176,16 @@ func (nodes *Nodes) expire() { delete(nodes.List, id) } else if node.Lastseen.Before(offlineAfter) { // set to offline - node.Online = false + if nodes.config.PingCount > 0 && nodes.ping(node) { + node.Online = true + node.NoRespondd = true + + node.Statistics = nil + node.Neighbours = nil + } else { + node.Online = false + node.NoRespondd = false + } } } } diff --git a/runtime/nodes_config.go b/runtime/nodes_config.go index 6d9520c..69dbf90 100644 --- a/runtime/nodes_config.go +++ b/runtime/nodes_config.go @@ -7,5 +7,7 @@ type NodesConfig struct { SaveInterval duration.Duration `toml:"save_interval"` // Save nodes periodically OfflineAfter duration.Duration `toml:"offline_after"` // Set node to offline if not seen within this period PruneAfter duration.Duration `toml:"prune_after"` // Remove nodes after n days of inactivity + PingCount int `toml:"ping_count"` // send x pings to verify if node is offline (for disable count < 1) + PingTimeout duration.Duration `toml:"ping_timeout"` // timeout of sending ping to a node Output map[string]interface{} } diff --git a/runtime/nodes_ping.go b/runtime/nodes_ping.go new file mode 100644 index 0000000..d5f7e53 --- /dev/null +++ b/runtime/nodes_ping.go @@ -0,0 +1,38 @@ +package runtime + +import ( + "github.com/bdlm/log" + "github.com/sparrc/go-ping" +) + +func (nodes *Nodes) ping(node *Node) bool { + logNode := log.WithField("node_id", "unknown") + if node.Nodeinfo != nil { + logNode = logNode.WithField("node_id", node.Nodeinfo.NodeID) + } + if node.Address == nil { + logNode.Debug("error no address found") + return false + } + addr := node.Address.IP.String() + if node.Address.IP.IsLinkLocalUnicast() { + addr += "%" + node.Address.Zone + } + + logAddr := logNode.WithField("addr", addr) + + pinger, err := ping.NewPinger(addr) + if err != nil { + logAddr.Debugf("error during ping: %s", err) + return false + } + //pinger.SetPrivileged(true) + pinger.Count = nodes.config.PingCount + pinger.Timeout = nodes.config.PingTimeout.Duration + pinger.Run() // blocks until finished + stats := pinger.Statistics() + logAddr.WithFields(map[string]interface{}{ + "pkg_lost": stats.PacketLoss, + }).Debug("pong") + return stats.PacketLoss < 100 +} diff --git a/runtime/nodes_ping_test.go b/runtime/nodes_ping_test.go new file mode 100644 index 0000000..170f4d1 --- /dev/null +++ b/runtime/nodes_ping_test.go @@ -0,0 +1,38 @@ +package runtime + +import ( + "net" + "testing" + "time" + + "github.com/bdlm/log" + "github.com/stretchr/testify/assert" + + "github.com/FreifunkBremen/yanic/data" +) + +func TestPing(t *testing.T) { + log.SetLevel(log.DebugLevel) + + assert := assert.New(t) + config := &NodesConfig{ + PingCount: 1, + } + config.OfflineAfter.Duration = time.Minute * 10 + // to get default (100%) path of testing + // config.PruneAfter.Duration = time.Hour * 24 * 6 + nodes := &Nodes{ + config: config, + List: make(map[string]*Node), + ifaceToNodeID: make(map[string]string), + } + + node := nodes.Update("expire", &data.ResponseData{NodeInfo: &data.NodeInfo{NodeID: "nodeID-Lola"}}) + node.Address = &net.UDPAddr{Zone: "bat0"} + // error during ping + assert.False(nodes.ping(node)) + + node.Address.IP = net.ParseIP("fe80::1") + // error during ping + assert.False(nodes.ping(node)) +} diff --git a/runtime/stats.go b/runtime/stats.go index 145d8bb..bdb7d29 100644 --- a/runtime/stats.go +++ b/runtime/stats.go @@ -11,12 +11,13 @@ type CounterMap map[string]uint32 // GlobalStats struct type GlobalStats struct { - Clients uint32 - ClientsWifi uint32 - ClientsWifi24 uint32 - ClientsWifi5 uint32 - Gateways uint32 - Nodes uint32 + Clients uint32 + ClientsWifi uint32 + ClientsWifi24 uint32 + ClientsWifi5 uint32 + Gateways uint32 + Nodes uint32 + NodesNoRespondd uint32 Firmwares CounterMap Models CounterMap @@ -81,6 +82,9 @@ func (s *GlobalStats) Add(node *Node) { s.ClientsWifi5 += stats.Clients.Wifi5 s.ClientsWifi += stats.Clients.Wifi } + if node.NoRespondd { + s.NodesNoRespondd++ + } if node.IsGateway() { s.Gateways++ } diff --git a/runtime/stats_test.go b/runtime/stats_test.go index 93363e8..7b1d0f4 100644 --- a/runtime/stats_test.go +++ b/runtime/stats_test.go @@ -22,6 +22,7 @@ func TestGlobalStats(t *testing.T) { //check GLOBAL_SITE stats assert.EqualValues(1, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Gateways) assert.EqualValues(3, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Nodes) + assert.EqualValues(1, stats[GLOBAL_SITE][GLOBAL_DOMAIN].NodesNoRespondd) assert.EqualValues(25, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Clients) // check models @@ -98,7 +99,8 @@ func createTestNodes() *Nodes { nodes.AddNode(nodeData) nodes.AddNode(&Node{ - Online: true, + Online: true, + NoRespondd: true, Statistics: &data.Statistics{ Clients: data.Clients{ Total: 2,