端口监听情况exporter开发实践


关于custom exporter的开发方法,前面两篇文章已经讲得很清楚了,今天结合实际应用场景,开发一个开箱即用的port exporter.

前言

开始阅读这篇文章之前,需要你对linux系统有一定的理解,并且理解以下几个文件/目录的用途。

  • /proc/net/tcp
  • /proc/pid/fd

这部分内容篇幅过长,需要读者自己去学习理解。本篇文章主要讲述port exporter的开发。

代码

目录结构

1
2
3
4
5
6
7
8
9
10
11
12
exporter/
├── collector
│   ├── port.go
│   └── port_test.go
├── go.mod
├── go.sum
├── main.go
├── metrics
│   └── port
│   ├── port.go
│   ├── port_linux.go

main.go

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
package main

import (
"exporter/collector"
"fmt"
prom "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"net/http"
)

func main() {
prom.MustRegister(collector.NewCustomPortCollector())
http.Handle("/metrics", promhttp.Handler())
if err := http.ListenAndServe(":8088", nil); err != nil {
fmt.Printf("Error occur when start custom collector on %v %v",collector.HostName, err)
}
}

根据前两篇讲的exporter开发套路,这里的main函数非常简单,只需要暴露一个固定的url给prometheus访问即可。另外,目前该custom exporter只有一个port exporter功能,所以不需要做其他的过度、超前设计,重点在于port exporter的功能实现和性能上。

核心代码

代码地址–custom exporter

采集:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
func (p PortCollector) Collect(c chan<- prometheus.Metric) {
tabs, err := port.TCPSockets(func(s *port.SocketEntry) bool {
return s.State == port.Listen
})
if err != nil {
return
}
lookup := func(skaddr *port.SocketAddr) string {
const IPv4Strlen = 17
addr := skaddr.IP.String()
names, err := net.LookupAddr(addr)
if err == nil && len(names) > 0 {
addr = names[0]
}
if len(addr) > IPv4Strlen {
addr = addr[:IPv4Strlen]
}
return fmt.Sprintf("%s:%d", addr, skaddr.Port)
}

for _, e := range tabs {
exec := ""
if e.Process != nil {
exec = e.Process.ExecName()
}
saddr := lookup(e.LocalAddr)
c <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
"custom_port_exporter",
"custom_port_exporter scrape node port state info",
[]string{"host","app","port","state","pid"},
nil,
),
prometheus.UntypedValue,
e.Process.PidFloatValue(),
[]string{ HostName, exec, "tcp "+saddr, e.State.String(), e.Process.PidValue()}...,
)
}
}

这个方法的核心是port.TCPSockets,经过层层穿透,port.TCPSockets主要实现是依赖metrics/port/port_linux.gonetstat(path string, fn filterFunc) ([]SocketEntry, error):

1
2
3
4
5
6
7
8
9
10
11
12
13
func netstat(path string, fn filterFunc) ([]SocketEntry, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
tabs, err := readSocket(f, fn)
f.Close()
if err != nil {
return nil, err
}
getPidAndExec(tabs)
return tabs, nil
}

其中,readSocket主要是实现对/proc/net/tcp的读取和解析。内容结构参考这边分析文章,具体的解析过程代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
func readSocket(r io.Reader, accept filterFunc) ([]SocketEntry, error) {
br := bufio.NewScanner(r)
tab := make([]SocketEntry, 0, 4)

// Discard title
br.Scan()

for br.Scan() {
var e SocketEntry
line := br.Text()
// Skip comments
if i := strings.Index(line, "#"); i >= 0 {
line = line[:i]
}
fields := strings.Fields(line)
if len(fields) < 12 {
return nil, fmt.Errorf("netstat: not enough fields: %v, %v", len(fields), fields)
}
addr, err := parseAddr(fields[1])
if err != nil {
return nil, err
}
e.LocalAddr = addr
addr, err = parseAddr(fields[2])
if err != nil {
return nil, err
}
e.RemoteAddr = addr
u, err := strconv.ParseUint(fields[3], 16, 8)
if err != nil {
return nil, err
}
e.State = SocketState(u)
u, err = strconv.ParseUint(fields[7], 10, 32)
if err != nil {
return nil, err
}
e.UID = uint32(u)
e.ino = fields[9]
if accept(&e) {
tab = append(tab, e)
}
}
return tab, br.Err()
}

getPidAndExec主要是实现对/proc/pid/fd 的读取和解析。内容结构参考这边分析文章,具体的解析过程代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
func (p *procFd) readFdDir() {
// link name is of the form socket:[5860846]
fddir := path.Join(p.base, "/fd")
fi, err := ioutil.ReadDir(fddir)
if err != nil {
return
}
var buf [128]byte

for _, file := range fi {
fd := path.Join(fddir, file.Name())
lname, err := os.Readlink(fd)
if err != nil || !strings.HasPrefix(lname, sockPrefix) {
continue
}

for i := range p.sktab {
sk := &p.sktab[i]
ss := sockPrefix + sk.ino + "]"
if ss != lname {
continue
}
if p.p == nil {
stat, err := os.Open(path.Join(p.base, "stat"))
if err != nil {
return
}
n, err := stat.Read(buf[:])
stat.Close()
if err != nil {
return
}
z := bytes.SplitN(buf[:n], []byte(" "), 3)
name := getProcName(z[1])
p.p = &Proc{p.pid, name}
}
sk.Process = p.p
}
}
}

使用

查看数据,访问ip:8088/metrics,如果能看类似以下的数据,则正常:

1
2
3
custom_port_exporter{app="docker-proxy",host="ip",pid="1814",port="tcp 0.0.0.0:8080",state="10"} 1814
custom_port_exporter{app="etcd",host="ip",pid="3063",port="tcp localhost:2379",state="10"} 3063
custom_port_exporter{app="etcd",host="ip",pid="3063",port="tcp localhost:2381",state="10"} 3063

把port exporter的信息加入到prometheus的配置文件:

1
2
3
4
5
scrape_configs:
- job_name: 'custom exporter'
static_configs:
- targets:
- localhost:8088

以后如果有其他好玩的需求,会持续更新到custom exporter
(完)