-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
First Release (Flattened dev branch pre-public)
- Loading branch information
0 parents
commit 5168d51
Showing
29 changed files
with
2,164 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
ixp-xping | ||
.gopath | ||
build/ | ||
debian/files | ||
debian/ixp-xping.debhelper.log | ||
debian/ixp-xping.substvars | ||
tmp | ||
*.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
PKG = github.com/benjojo/ixp-xping | ||
PREFIX = /usr | ||
|
||
all: build/ixp-xping | ||
|
||
# NOTE: This repo uses Go modules, and uses a synthetic GOPATH at | ||
# $(CURDIR)/.gopath that is only used for the build cache. $GOPATH/src/ is | ||
# empty. | ||
GO = GOPATH=$(CURDIR)/.gopath GOBIN=$(CURDIR)/build go | ||
GO_BUILDFLAGS = | ||
GO_LDFLAGS = -s -w | ||
|
||
build/ixp-xping: *.go | ||
$(GO) install $(GO_BUILDFLAGS) -ldflags "$(GO_LDFLAGS)" . | ||
|
||
install: build/ixp-xping | ||
install -D -m 0755 build/ixp-xping "$(DESTDIR)$(PREFIX)/bin/ixp-xping" | ||
|
||
vendor: | ||
$(GO) mod tidy | ||
$(GO) mod vendor | ||
|
||
.PHONY: install vendor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
The xping protocol works by flooding basic UDP packets to each switch specific IP address. | ||
|
||
Inside the packet is a two byte magic number (set to 0x83 0x30, 8330 is the asn of the company that's commissioned this), | ||
then a 64-bit unsigned integer for a sequence number ( this is used to help calculate packet loss) | ||
|
||
Received it is put into a receive ring buffer so that when the calculations for packet loss and latency is needed, the | ||
ring buffer is iterated through to calculate missing packets (and average latency) | ||
|
||
The program uses SOF_TIMESTAMPING_RX_SOFTWARE to help mitigate garbage collection spikes, however | ||
because the transmit path is not time stamped, spikes can still appear from time to time. | ||
|
||
Transmit side timestamping was not implemented becuase of its inflexibly and extra complexity. | ||
|
||
There is no handshake between hosts, as things are expected to be statically configured |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
ixp-xping | ||
=== | ||
|
||
ixp-xping is a prometheus exporter that aims to detect packet loss/latency that is impacting a limited number of network flows on an internet exchange (However it could be used outside of those environments as well). | ||
|
||
The stand out feature from ixp-ping to other solutions is that it supports monitoring more than one IXP switch per system, meaning a IX deployment can have all of the switches tests with just one machine with the correct network card types to accommodate them all (Though the machine cannot monitor the inner sites traffic quality since the machine cannot monitor itself!) | ||
|
||
## Theory | ||
|
||
Traditionally you would monitor things using ICMP between each host, forming a mesh. | ||
|
||
data:image/s3,"s3://crabby-images/14bc7/14bc7cd3bd919bc63df5b35a1c3186623bb6f2a2" alt="" | ||
|
||
However this overlooks how a lot of modern backbones are not a single set of physical links, but aggregations of links to build up larger capacities than current possible single port link speeds. | ||
|
||
data:image/s3,"s3://crabby-images/82940/82940c655cb8d5aff5d961c9c11fc7bb15be54d6" alt="" | ||
|
||
The way that routers in the path decide what link to send traffic down when they have more than one option is typically done though hashing, In where the Source/Destination Port, and the Source/Destination IP address and the IP protocol (The 5 tuple) are converted into a single number that is then used to select a link to forward the packet on to. | ||
|
||
data:image/s3,"s3://crabby-images/6ffc7/6ffc7a367915b75d69f1ae41b6364b39f411eba6" alt="" | ||
|
||
ixp-xping works by creating a number of different UDP flows (aka, a UDP port) to every single peer xping instance and testing latency and loss over each one. Using the fact that modern day IP transport works with ECMP or other forms of packet hashing, this can correctly detect faulty link aggregation members, or overloaded links inside a link aggregation group. | ||
|
||
## Example Output | ||
|
||
The program outputs data in the form of a prometheus exporter: | ||
|
||
``` | ||
# HELP xping_peer_latency_per_flow aaa | ||
# TYPE xping_peer_latency_per_flow gauge | ||
xping_peer_latency_per_flow{peer="192.168.122.49",port="32736"} 844 | ||
… | ||
xping_peer_latency_per_flow{peer="192.168.122.49",port="32751"} 521 | ||
xping_peer_latency_per_flow{peer="192.168.122.50",port="32736"} 431 | ||
… | ||
xping_peer_latency_per_flow{peer="192.168.122.50",port="32751"} 338 | ||
``` | ||
|
||
That can then be used to create latency and loss graphs that take into account each link the data may be hashed on to between the two peers. | ||
|
||
data:image/s3,"s3://crabby-images/c237f/c237f43fe49cf65ca5fbe5560aaf65e290e545de" alt="" | ||
|
||
## Config | ||
|
||
The program is configured by a YAML file located in `/etc/ixp-xping.yaml`, however this file path can be changed with the `-cfg.path /foo/bar.yaml` option | ||
|
||
Inside the file should be a file like: | ||
|
||
```yaml | ||
listenhost: 0.0.0.0 | ||
listenportstart: 32736 | ||
pollratems: 250 | ||
peers: | ||
- 192.0.2.43 | ||
- 192.0.2.22 | ||
- 192.0.2.73 | ||
peersnames: | ||
192.0.2.73: thn14-cr1 | ||
192.0.2.22: the-sr2 | ||
192.0.2.43: sov-sr2 | ||
allowedcidrs: | ||
- 192.0.0.0/22 | ||
prometheusport: 9150 | ||
``` | ||
In addition, there is an automatic configuration mode that reads an internal LONAP style format. By default on start xping will look for `/usr/local/etc/lonap_mon_hosts.json` in the following format: | ||
|
||
```json | ||
{ | ||
"eqs-mon1": { | ||
"eqs-cr1": { | ||
"address": "192.0.2.94", | ||
"device": "ens2", | ||
"netmask": "255.255.252.0" | ||
}, | ||
"eqs-qr1": { | ||
"address": "192.0.2.93", | ||
"device": "ens2d1", | ||
"netmask": "255.255.252.0" | ||
} | ||
}, | ||
"hex-mon1": { | ||
"hex-cr1": { | ||
"address": "192.0.2.32", | ||
"device": "ens2", | ||
"netmask": "255.255.252.0" | ||
}, | ||
"hex-qr1": { | ||
"address": "192.0.2.36", | ||
"device": "ens2d1", | ||
"netmask": "255.255.252.0" | ||
}, | ||
"hex-sr1": { | ||
"address": "192.0.2.31", | ||
"device": "eno50", | ||
"netmask": "255.255.252.0" | ||
} | ||
} | ||
} | ||
``` | ||
|
||
and proceed to automatically add the allowedcidr's,peers, and peersnames. | ||
|
||
|
||
### YAML configuration sections | ||
|
||
**listenhost** | ||
|
||
This is the IP address to bind on to listen to, It is recommended to bind on 0.0.0.0 | ||
|
||
**listenportstart** | ||
|
||
This is the port number to start binding on, this number must be the same amongst all peers | ||
|
||
**pollratems** | ||
|
||
The number of milliseconds to wait before sending each probe | ||
|
||
**peers** | ||
|
||
A list of IP addresses that are also running xping, These peer lists are allowed to be different between peers, but care should be taken to ensure that the `allowedcidrs` section allows those peers to use the node. | ||
|
||
**peersnames** | ||
|
||
This optionally renames IP addresses to host names, this makes it a lot easier on systems like AlertManager or Grafana to display the true device name rather than an opaque IP address. | ||
|
||
**allowedcidrs** | ||
|
||
This spesifies what IP CIDRs the local xping instance is allowed to reply to pings from. If a packet arrives that is not on the allowedcidrs list then it will be dropped. | ||
|
||
**prometheusport** | ||
|
||
What TCP port to listen on for HTTP requests for the prometheus endpoint `/metrics` | ||
|
||
## Alerting | ||
|
||
You could set up automated alerting for when packet loss is detected between two peers on the exchange by using [AlertManager](https://prometheus.io/docs/alerting/latest/alertmanager/). | ||
|
||
Here is an example AlertManager rule that would alert for more than 2% packet loss on the exchange for more than 300 seconds: | ||
|
||
``` | ||
- alert: xpingPacketLossOnFlow | ||
expr: xping_peer_loss_per_flow > 0.02 | ||
for: 5m | ||
annotations: | ||
summary: Loss between two endpoints detected | ||
description: Loss detected between {{$labels.local}} (Port {{$.labels.port}}) and {{$labels.peer}} | ||
``` | ||
|
||
## Building | ||
|
||
To build the deb file that you can then install onto a machine you can use the debuild ecosystem. | ||
|
||
If you have alraedy built the binary, you should be able to just run: | ||
|
||
``` | ||
debuild -b -uc -us | ||
``` | ||
|
||
A debian file should then be produced that can be installed to each machine. (in the directory above). | ||
|
||
If you are building this "fresh" you will need to setup go, debuild, and others. Here is a rough set | ||
of commands you will need to run. | ||
|
||
``` | ||
apt install devscripts lintian dh-golang build-essential | ||
|
||
wget https://go.dev/dl/go1.22.4.linux-amd64.tar.gz | ||
|
||
rm -rf /usr/local/go && tar -C /usr/local -xzf go1.22.4.linux-amd64.tar.gz | ||
|
||
export PATH=$PATH:/usr/local/go/bin | ||
|
||
make | ||
|
||
debuild -b -uc -us | ||
``` | ||
## Authorship / Commission | ||
This project was commissioned by the LONdon Access Point (LONAP) internet exchange |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
package configfile | ||
|
||
import ( | ||
"encoding/json" | ||
"flag" | ||
"fmt" | ||
"io" | ||
"log" | ||
"net" | ||
"os" | ||
|
||
"gopkg.in/yaml.v2" | ||
) | ||
|
||
type XPingConfig struct { | ||
ListenHost string | ||
ListenPortStart uint32 | ||
PollRateMS uint | ||
Peers []string | ||
PeersNames map[string]string | ||
AllowedCIDRs []string | ||
PrometheusPort uint32 | ||
|
||
internalAllowedCIDRs []*net.IPNet | ||
} | ||
|
||
func Parse(in io.Reader) (XPingConfig, error) { | ||
XPC := XPingConfig{} | ||
configBytes, err := io.ReadAll(in) | ||
if err != nil { | ||
return XPC, err | ||
} | ||
|
||
err = yaml.Unmarshal(configBytes, &XPC) | ||
if err != nil { | ||
return XPC, err | ||
} | ||
|
||
return XPC, nil | ||
} | ||
|
||
func (XPC *XPingConfig) ResolveFriendlyName(i net.IP) string { | ||
if XPC.PeersNames[i.String()] != "" { | ||
return XPC.PeersNames[i.String()] | ||
} | ||
return i.String() | ||
} | ||
|
||
func (XPC *XPingConfig) IsAllowedCIDR(i net.IP) bool { | ||
if XPC.internalAllowedCIDRs == nil { | ||
XPC.internalAllowedCIDRs = make([]*net.IPNet, 0) | ||
for _, v := range XPC.AllowedCIDRs { | ||
_, c, err := net.ParseCIDR(v) | ||
if err == nil { | ||
XPC.internalAllowedCIDRs = append(XPC.internalAllowedCIDRs, c) | ||
} else { | ||
log.Printf("Invalid AlllowedCIDR %v", v) | ||
} | ||
} | ||
|
||
if len(XPC.AllowedCIDRs) == 0 { | ||
log.Printf("WARNING: There are no AllowedCIDRs set, no packets are going to be accepted!") | ||
} | ||
} | ||
|
||
for _, v := range XPC.internalAllowedCIDRs { | ||
if v.Contains(i) { | ||
return true | ||
} | ||
} | ||
return false | ||
} | ||
|
||
type LONAPConfigEntry struct { | ||
Address string | ||
Device string | ||
Netmask string | ||
} | ||
|
||
var lonapAutoConfigPath = flag.String( | ||
"cfg.lonap-auto-config", | ||
"/usr/local/etc/lonap_mon_hosts.json", | ||
"Where to look for a LONAP style auto configuration file") | ||
|
||
func (XPC *XPingConfig) LONAPAutoConfig() { | ||
XPC.PeersNames = make(map[string]string) | ||
lonapf, err := os.Open(*lonapAutoConfigPath) | ||
if err == nil { | ||
defer lonapf.Close() | ||
|
||
// Roll the peers into a map so we can avoid dupes | ||
PeerMap := make(map[string]bool) | ||
for _, v := range XPC.Peers { | ||
PeerMap[v] = true | ||
} | ||
AllowedCIDRsMap := make(map[string]bool) | ||
for _, v := range XPC.AllowedCIDRs { | ||
AllowedCIDRsMap[v] = true | ||
} | ||
|
||
lonapConfigStructure := make(map[string]map[string]LONAPConfigEntry) | ||
err = json.NewDecoder(lonapf).Decode(&lonapConfigStructure) | ||
if err == nil { | ||
for _, switches := range lonapConfigStructure { | ||
for switchName, switchInfo := range switches { | ||
XPC.PeersNames[switchInfo.Address] = switchName | ||
PeerMap[switchInfo.Address] = true | ||
AllowedCIDRsMap[quickCIDR(switchInfo.Address, switchInfo.Netmask).String()] = true | ||
} | ||
} | ||
} else { | ||
log.Printf("failed to parse lonap_mon_hosts.json: %v", err) | ||
} | ||
|
||
XPC.Peers = make([]string, 0) | ||
for k := range PeerMap { | ||
XPC.Peers = append(XPC.Peers, k) | ||
} | ||
XPC.AllowedCIDRs = make([]string, 0) | ||
for k := range AllowedCIDRsMap { | ||
XPC.AllowedCIDRs = append(XPC.AllowedCIDRs, k) | ||
} | ||
} | ||
} | ||
|
||
func quickCIDR(IPStr, NetmaskStr string) *net.IPNet { | ||
IP := net.ParseIP(IPStr) | ||
NM := net.ParseIP(NetmaskStr) | ||
NM = NM.To4() | ||
slash, _ := net.IPv4Mask( | ||
NM[0], NM[1], NM[2], NM[3]).Size() | ||
|
||
_, c, _ := net.ParseCIDR(fmt.Sprintf("%s/%d", IP.String(), slash)) | ||
return c | ||
} |
Oops, something went wrong.