From 2f6e22f8cce36108dd719983e89f81db5a70aa2a Mon Sep 17 00:00:00 2001 From: Martin/Geno Date: Sat, 19 Jan 2019 15:45:35 +0100 Subject: [PATCH] [TASK] add respondd-crashed a tool to find wrong offline detected nodes. by pinging the nodes which does not answer by respondd --- contrib/respondd-crashed/README.md | 30 ++++++ contrib/respondd-crashed/helper.go | 24 +++++ contrib/respondd-crashed/helper_test.go | 1 + contrib/respondd-crashed/hook.go | 34 +++++++ contrib/respondd-crashed/main.go | 86 +++++++++++++++++ contrib/respondd-crashed/run.go | 119 ++++++++++++++++++++++++ contrib/respondd-crashed/status.go | 31 ++++++ 7 files changed, 325 insertions(+) create mode 100644 contrib/respondd-crashed/README.md create mode 100644 contrib/respondd-crashed/helper.go create mode 100644 contrib/respondd-crashed/helper_test.go create mode 100644 contrib/respondd-crashed/hook.go create mode 100644 contrib/respondd-crashed/main.go create mode 100644 contrib/respondd-crashed/run.go create mode 100644 contrib/respondd-crashed/status.go diff --git a/contrib/respondd-crashed/README.md b/contrib/respondd-crashed/README.md new file mode 100644 index 0000000..8692fa7 --- /dev/null +++ b/contrib/respondd-crashed/README.md @@ -0,0 +1,30 @@ +# respondd-crashed + +This tool ping every "offline" node at every ip address of a meshviewer.json to detect, if a respondd deamon is not running anymore. + + +## give access to run ping +```bash + sudo setcap cap_net_raw=+ep %GOPATH/bin/respondd-crashed +``` + + +## Usage + +Usage of respondd-crashed: + -ll-iface string + interface to ping linklocal-address + -loglevel uint + Show log message starting at level (default 40) + -meshviewer-path string + path to meshviewer.json from yanic (default "meshviewer.json") + -ping-count int + count of pings (default 3) + -ping-timeout duration + timeout to wait for response (default 5s) + -run-every duration + repeat check every (default 1m0s) + -status-path string + path to store status (default "respondd-crashed.json") + -timestamps + Enables timestamps for log output diff --git a/contrib/respondd-crashed/helper.go b/contrib/respondd-crashed/helper.go new file mode 100644 index 0000000..7484546 --- /dev/null +++ b/contrib/respondd-crashed/helper.go @@ -0,0 +1,24 @@ +package main + +import ( + "encoding/json" + "net/http" + "time" +) + +func JSONRequest(url string, value interface{}) error { + var netClient = &http.Client{ + Timeout: time.Second * 20, + } + + resp, err := netClient.Get(url) + if err != nil { + return err + } + + err = json.NewDecoder(resp.Body).Decode(&value) + if err != nil { + return err + } + return nil +} diff --git a/contrib/respondd-crashed/helper_test.go b/contrib/respondd-crashed/helper_test.go new file mode 100644 index 0000000..06ab7d0 --- /dev/null +++ b/contrib/respondd-crashed/helper_test.go @@ -0,0 +1 @@ +package main diff --git a/contrib/respondd-crashed/hook.go b/contrib/respondd-crashed/hook.go new file mode 100644 index 0000000..2b05cf1 --- /dev/null +++ b/contrib/respondd-crashed/hook.go @@ -0,0 +1,34 @@ +package main + +import ( + "os" + + "github.com/bdlm/log" + stdLogger "github.com/bdlm/std/logger" +) + +type Hook struct{} + +func (hook *Hook) Fire(entry *log.Entry) error { + switch entry.Level { + case log.PanicLevel: + entry.Logger.Out = os.Stderr + case log.FatalLevel: + entry.Logger.Out = os.Stderr + case log.ErrorLevel: + entry.Logger.Out = os.Stderr + case log.WarnLevel: + entry.Logger.Out = os.Stdout + case log.InfoLevel: + entry.Logger.Out = os.Stdout + case log.DebugLevel: + entry.Logger.Out = os.Stdout + default: + } + + return nil +} + +func (hook *Hook) Levels() []stdLogger.Level { + return log.AllLevels +} diff --git a/contrib/respondd-crashed/main.go b/contrib/respondd-crashed/main.go new file mode 100644 index 0000000..6d7f49b --- /dev/null +++ b/contrib/respondd-crashed/main.go @@ -0,0 +1,86 @@ +package main + +import ( + "flag" + "os" + "os/signal" + "sync" + "syscall" + "time" + + "github.com/bdlm/log" + stdLogger "github.com/bdlm/std/logger" + "github.com/digineo/go-ping" +) + +var ( + timestamps bool + loglevel uint + + runEvery time.Duration + + iface string + + pingCount int + pingTimeout time.Duration + + meshviewerPATH string + statusPath string +) + +func main() { + flag.BoolVar(×tamps, "timestamps", false, "Enables timestamps for log output") + flag.UintVar(&loglevel, "loglevel", 40, "Show log message starting at level") + + flag.DurationVar(&runEvery, "run-every", time.Duration(time.Minute), "repeat check every") + + flag.StringVar(&iface, "ll-iface", "", "interface to ping linklocal-address") + + flag.IntVar(&pingCount, "ping-count", 3, "count of pings") + flag.DurationVar(&pingTimeout, "ping-timeout", time.Duration(time.Second*5), "timeout to wait for response") + + flag.StringVar(&statusPath, "status-path", "respondd-crashed.json", "path to store status") + flag.StringVar(&meshviewerPATH, "meshviewer-path", "meshviewer.json", "path to meshviewer.json from yanic") + + flag.Parse() + + log.AddHook(&Hook{}) + log.SetLevel(stdLogger.Level(loglevel)) + log.SetFormatter(&log.TextFormatter{ + DisableTimestamp: timestamps, + }) + + pinger, err := ping.New("", "::") + if err != nil { + log.Panicf("not able to bind pinger: %s", err) + } + + timer := time.NewTimer(runEvery) + + stop := false + + wg := sync.WaitGroup{} + + log.Info("start tester") + + func() { + wg.Add(1) + for !stop { + select { + case <-timer.C: + run(pinger) + timer.Reset(runEvery) + } + } + timer.Stop() + wg.Done() + }() + + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) + sig := <-sigs + stop = true + wg.Wait() + log.Infof("stopped: %s", sig) + +} diff --git a/contrib/respondd-crashed/run.go b/contrib/respondd-crashed/run.go new file mode 100644 index 0000000..4710d9b --- /dev/null +++ b/contrib/respondd-crashed/run.go @@ -0,0 +1,119 @@ +package main + +import ( + "encoding/json" + "net" + "os" + "sync" + + "github.com/bdlm/log" + "github.com/digineo/go-ping" + + meshviewerFFRGB "github.com/FreifunkBremen/yanic/output/meshviewer-ffrgb" +) + +func pingNode(pinger *ping.Pinger, node *meshviewerFFRGB.Node, addrStr string) bool { + logNode := log.WithField("node_id", node.NodeID) + + addr, err := net.ResolveIPAddr("ip6", addrStr) + if err != nil { + logNode.Warnf("error parse ip address for ping: %s", err) + } + + if addrStr[:5] == "fe80:" { + if iface == "" { + logNode.Debug("skip ll-addr") + return false + } + addr.Zone = iface + } + logNode = logNode.WithField("addr", addr.String()) + + _, err = pinger.PingAttempts(addr, pingTimeout, pingCount) + + logNode.WithFields(map[string]interface{}{ + "success": err == nil, + }).Debug("pong") + return err == nil +} + +func run(pinger *ping.Pinger) { + status := &Status{NodesCrashed: []*Node{}} + var meshviewerjson meshviewerFFRGB.Meshviewer + + if meshviewerPATH[:4] == "http" { + if err := JSONRequest(meshviewerPATH, &meshviewerjson); err != nil { + status.Error = err.Error() + log.Errorf("error during fetch meshviewer.json: %s", err) + } + } else { + meshviewerFile, err := os.Open(meshviewerPATH) + if err != nil { + status.Error = err.Error() + log.Errorf("error during fetch meshviewer.json: %s", err) + } else if err := json.NewDecoder(meshviewerFile).Decode(&meshviewerjson); err != nil { + status.Error = err.Error() + log.Errorf("error during decode meshviewer.json: %s", err) + } + } + + log.Debug("fetched meshviewer.json") + + wg := sync.WaitGroup{} + wg.Add(len(meshviewerjson.Nodes)) + + offline := 0 + for _, node := range meshviewerjson.Nodes { + go func(node *meshviewerFFRGB.Node) { + defer wg.Done() + if node.IsOnline { + return + } + logNode := log.WithField("node", node.NodeID) + wgNode := sync.WaitGroup{} + wgNode.Add(len(node.Addresses)) + offline += 1 + notReachable := true + for _, addr := range node.Addresses { + go func(node *meshviewerFFRGB.Node, addr string) { + if ok := pingNode(pinger, node, addr); ok { + notReachable = false + } + wgNode.Done() + }(node, addr) + } + wgNode.Wait() + if !notReachable { + logNode.Info("add to crashed list") + status.AddNode(node) + } + }(node) + } + + wg.Wait() + + status.Lock() + status.NodesCount = len(meshviewerjson.Nodes) + status.NodesOfflineCount = offline + status.Unlock() + + tmpFile := statusPath + ".tmp" + statusFile, err := os.OpenFile(tmpFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + log.Warnf("unable to open status file: %s", err) + } + defer statusFile.Close() + + if err := json.NewEncoder(statusFile).Encode(status); err != nil { + log.Warnf("unable to write status json: %s", err) + } + if err := os.Rename(tmpFile, statusPath); err != nil { + log.Warnf("unable to move status file: %s", err) + } + + log.WithFields(map[string]interface{}{ + "count_meshviewer": status.NodesCount, + "count_offline": status.NodesOfflineCount, + "count_status": len(status.NodesCrashed), + }).Info("test complete") +} diff --git a/contrib/respondd-crashed/status.go b/contrib/respondd-crashed/status.go new file mode 100644 index 0000000..94de3bc --- /dev/null +++ b/contrib/respondd-crashed/status.go @@ -0,0 +1,31 @@ +package main + +import ( + "sync" + + meshviewerFFRGB "github.com/FreifunkBremen/yanic/output/meshviewer-ffrgb" +) + +type Node struct { + NodeID string `json:"node_id"` + Hostname string `json:"hostname"` + Addresses []string `json:"addresses"` +} + +type Status struct { + Error string `json:"error,omitempty"` + NodesCount int `json:"nodes_count"` + NodesOfflineCount int `json:"nodes_offline_count"` + NodesCrashed []*Node `json:"nodes_crashed"` + sync.Mutex +} + +func (s *Status) AddNode(node *meshviewerFFRGB.Node) { + s.Lock() + s.NodesCrashed = append(s.NodesCrashed, &Node{ + NodeID: node.NodeID, + Hostname: node.Hostname, + Addresses: node.Addresses, + }) + s.Unlock() +}