node_exporter源码阅读

node_exporter源码阅读

基于prometheus的监控系统搭建起来之后,对于多样化的业务线来说,
定制化的采集监控指标必将是业务线产品经理的又一个需求方向,所以,作为平台开发者来说,
预判产品经理的预判也是一项基本的职业技能🤣,即:think deeper
基于此,先把node_exporter的源码一窥究竟.

本篇文章主要是源码阅读,概念和指标相关的介绍需要读者自行去了解,不过不了解也不影响本篇文章的阅读

附上node_exporter源码地址

1.目录结构

总的来说,node_exporter的目录结构还是非常简单的:

  • node_exporter.go文件即项目的主入口
  • collector文件夹即存放各种采集指标的文件夹
    其余的就是一些文档和辅助脚本,源码阅读时可以忽略;

2.项目主入口 – node_exporter.go

2.1定义了一个结构体–handler

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
// handler的实例由newHandler来创建
// handler封装了一个http.Handler
// 对于带filters参数的请求,会动态创建一个http.Handler
// filters参数是针对采集指标的,如果不需要那么多采集指标,可以通过filters参数指定过滤掉,
// 那么创建出来的http.Handler也就是一个filtered http.Handler
// 这个创建出来的http.Handler会赋值给unfilteredHandler
type handler struct {
unfilteredHandler http.Handler
// node_exporter本身的指标也要采集的话,就会用到这个单独的registry
exporterMetricsRegistry *prometheus.Registry
// 是否要采集node_exporter本身的指标
includeExporterMetrics bool
// 最大并发请求数,默认40,可通过命令行传入
maxRequests int
logger log.Logger
}

prometheus对于node_exporter采集数据的获取是通过向node_exporter发送拉取请求来实现的,
而这个拉取请求就是通过http协议来交互的.所以这个handler和我们web项目中的handler是一个概念,
即都实现了http.Handler接口:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
// ServeHTTP implements http.Handler.
func (h *handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
filters := r.URL.Query()["collect[]"]
level.Debug(h.logger).Log("msg", "collect query:", "filters", filters)

if len(filters) == 0 {
// No filters, use the prepared unfiltered handler.
h.unfilteredHandler.ServeHTTP(w, r)
return
}
// 这里会调用innerHandler来创建上面讲到的handler
filteredHandler, err := h.innerHandler(filters...)
if err != nil {
level.Warn(h.logger).Log("msg", "Couldn't create filtered metrics handler:", "err", err)
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte(fmt.Sprintf("Couldn't create filtered metrics handler: %s", err)))
return
}
filteredHandler.ServeHTTP(w, r)
}

2.2从主入口函数main开始

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
func main() {
// 命令行参数处理
var (
listenAddress = kingpin.Flag(
"web.listen-address",
"Address on which to expose metrics and web interface.",
).Default(":9100").String()
metricsPath = kingpin.Flag(
"web.telemetry-path",
"Path under which to expose metrics.",
).Default("/metrics").String()
disableExporterMetrics = kingpin.Flag(
"web.disable-exporter-metrics",
"Exclude metrics about the exporter itself (promhttp_*, process_*, go_*).",
).Bool()
maxRequests = kingpin.Flag(
"web.max-requests",
"Maximum number of parallel scrape requests. Use 0 to disable.",
).Default("40").Int()
disableDefaultCollectors = kingpin.Flag(
"collector.disable-defaults",
"Set all collectors to disabled by default.",
).Default("false").Bool()
configFile = kingpin.Flag(
"web.config",
"[EXPERIMENTAL] Path to config yaml file that can enable TLS or authentication.",
).Default("").String()
)

promlogConfig := &promlog.Config{}
flag.AddFlags(kingpin.CommandLine, promlogConfig)
kingpin.Version(version.Print("node_exporter"))
kingpin.CommandLine.UsageWriter(os.Stdout)
kingpin.HelpFlag.Short('h')
kingpin.Parse()
logger := promlog.New(promlogConfig)
// 命令行参数处理结束

// 如果设置disableDefaultCollectors为true,将把除了显式从命令行传入的采集指标之外的所有指标采集器设置为不采集
if *disableDefaultCollectors {
collector.DisableDefaultCollectors()
}
level.Info(logger).Log("msg", "Starting node_exporter", "version", version.Info())
level.Info(logger).Log("msg", "Build context", "build_context", version.BuildContext())
// 用户
if user, err := user.Current(); err == nil && user.Uid == "0" {
level.Warn(logger).Log("msg", "Node Exporter is running as root user. This exporter is designed to run as unpriviledged user, root is not required.")
}

// newHandler是重点,后面会讲到
http.Handle(*metricsPath, newHandler(!*disableExporterMetrics, *maxRequests, logger))
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(`<html>
<head><title>Node Exporter</title></head>
<body>
<h1>Node Exporter</h1>
<p><a href="` + *metricsPath + `">Metrics</a></p>
</body>
</html>`))
})

// 开启http服务,等待prometheus来抓取数据
level.Info(logger).Log("msg", "Listening on", "address", *listenAddress)
server := &http.Server{Addr: *listenAddress}
if err := web.ListenAndServe(server, *configFile, logger); err != nil {
level.Error(logger).Log("err", err)
os.Exit(1)
}
}

2.3handler的创建者–newHandler

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24

func newHandler(includeExporterMetrics bool, maxRequests int, logger log.Logger) *handler {
// handler的结构体成员赋值
h := &handler{
exporterMetricsRegistry: prometheus.NewRegistry(),
includeExporterMetrics: includeExporterMetrics,
maxRequests: maxRequests,
logger: logger,
}
if h.includeExporterMetrics {
h.exporterMetricsRegistry.MustRegister(
promcollectors.NewProcessCollector(promcollectors.ProcessCollectorOpts{}),
promcollectors.NewGoCollector(),
)
}
// innerHandler真正创建handler的地方
if innerHandler, err := h.innerHandler(); err != nil {
panic(fmt.Sprintf("Couldn't create metrics handler: %s", err))
} else {
h.unfilteredHandler = innerHandler
}
return h
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
// 根据filters入参创建对应的http.Handler
func (h *handler) innerHandler(filters ...string) (http.Handler, error) {
// 根据filters情况创建NodeCollector,后面会讲这个NodeCollector
nc, err := collector.NewNodeCollector(h.logger, filters...)
if err != nil {
return nil, fmt.Errorf("couldn't create collector: %s", err)
}

// Only log the creation of an unfiltered handler, which should happen
// only once upon startup.
if len(filters) == 0 {
level.Info(h.logger).Log("msg", "Enabled collectors")
collectors := []string{}
for n := range nc.Collectors {
collectors = append(collectors, n)
}
sort.Strings(collectors)
for _, c := range collectors {
level.Info(h.logger).Log("collector", c)
}
}

// 下面就是注册实现了prometheus.Collector接口的node_exporter/自研exporter的通用流程
// 在node_exporter这里,由NodeCollector实现prometheus.Collector接口
r := prometheus.NewRegistry()
r.MustRegister(version.NewCollector("node_exporter"))
if err := r.Register(nc); err != nil {
return nil, fmt.Errorf("couldn't register node collector: %s", err)
}
handler := promhttp.HandlerFor(
prometheus.Gatherers{h.exporterMetricsRegistry, r},
promhttp.HandlerOpts{
ErrorLog: stdlog.New(log.NewStdlibAdapter(level.Error(h.logger)), "", 0),
ErrorHandling: promhttp.ContinueOnError,
MaxRequestsInFlight: h.maxRequests,
Registry: h.exporterMetricsRegistry,
},
)
if h.includeExporterMetrics {
// Note that we have to use h.exporterMetricsRegistry here to
// use the same promhttp metrics for all expositions.
handler = promhttp.InstrumentMetricHandler(
h.exporterMetricsRegistry, handler,
)
}
return handler, nil
}

3.node_exporter/自研exporter必须要实现的一个接口–prometheus.Collector

这里涉及到github.com/prometheus/client_golang/prometheus的源码,留到讲自研exporter的时候会剖析,
client_golang是prometheus的官方go库,既可以用于集成现有应用,也可以作为连接Prometheus HTTP API的基础库

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
// Collector is the interface implemented by anything that can be used by
// Prometheus to collect metrics. A Collector has to be registered for
// collection. See Registerer.Register.
//
// The stock metrics provided by this package (Gauge, Counter, Summary,
// Histogram, Untyped) are also Collectors (which only ever collect one metric,
// namely itself). An implementer of Collector may, however, collect multiple
// metrics in a coordinated fashion and/or create metrics on the fly. Examples
// for collectors already implemented in this library are the metric vectors
// (i.e. collection of multiple instances of the same Metric but with different
// label values) like GaugeVec or SummaryVec, and the ExpvarCollector.
type Collector interface {
// Describe sends the super-set of all possible descriptors of metrics
// collected by this Collector to the provided channel and returns once
// the last descriptor has been sent. The sent descriptors fulfill the
// consistency and uniqueness requirements described in the Desc
// documentation.
//
// It is valid if one and the same Collector sends duplicate
// descriptors. Those duplicates are simply ignored. However, two
// different Collectors must not send duplicate descriptors.
//
// Sending no descriptor at all marks the Collector as “unchecked”,
// i.e. no checks will be performed at registration time, and the
// Collector may yield any Metric it sees fit in its Collect method.
//
// This method idempotently sends the same descriptors throughout the
// lifetime of the Collector. It may be called concurrently and
// therefore must be implemented in a concurrency safe way.
//
// If a Collector encounters an error while executing this method, it
// must send an invalid descriptor (created with NewInvalidDesc) to
// signal the error to the registry.
Describe(chan<- *Desc)
// Collect is called by the Prometheus registry when collecting
// metrics. The implementation sends each collected metric via the
// provided channel and returns once the last metric has been sent. The
// descriptor of each sent metric is one of those returned by Describe
// (unless the Collector is unchecked, see above). Returned metrics that
// share the same descriptor must differ in their variable label
// values.
//
// This method may be called concurrently and must therefore be
// implemented in a concurrency safe way. Blocking occurs at the expense
// of total performance of rendering all registered metrics. Ideally,
// Collector implementations support concurrent readers.
Collect(chan<- Metric)
}

3.1node_exporterprometheus.Collector接口的实现者–NodeCollector

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

// NodeCollector implements the prometheus.Collector interface.
type NodeCollector struct {
Collectors map[string]Collector
logger log.Logger
}

// Describe implements the prometheus.Collector interface.
func (n NodeCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- scrapeDurationDesc
ch <- scrapeSuccessDesc
}

// Collect implements the prometheus.Collector interface.
func (n NodeCollector) Collect(ch chan<- prometheus.Metric) {
wg := sync.WaitGroup{}
wg.Add(len(n.Collectors))
for name, c := range n.Collectors {
go func(name string, c Collector) {
// 执行各个采集指标的采集方法
execute(name, c, ch, n.logger)
wg.Done()
}(name, c)
}
wg.Wait()
}

4.结束语

整体来看node_exporter的源码可阅读性还是非常高的.接下来将会写一篇自研exporter的实践;