内容目录
效果图
1.依赖
已部署grafana prometheus pushgatway
因为是shell脚本采集信息推送给pushgatway
2.脚本
2.1 pidstat版本
#!/bin/bash
hostname=$(hostname)
while true; do
metrics_file="/tmp/metrics.txt"
pidstat -u -r -h 1 1|grep -v pidstat | awk '
/^\s*$/ || /^Linux/ || /^#/ { next }
{
# 检测时间列是不是两列(比如 02:14:05 PM)
if ($2 ~ /^(AM|PM)$/) {
# ubuntu格式(2列时间)
pid = $4
cmd = $NF
cpu = $(NF-7)
mem = $(NF-1)
} else {
# centos7格式(1列时间)
pid = $3
cmd = $NF
cpu = $(NF-7)
mem = $(NF-1)
}
printf("cpu_usage{process=\"%s\", pid=\"%s\"} %s\n", cmd, pid, cpu);
printf("mem_usage{process=\"%s\", pid=\"%s\"} %s\n", cmd, pid, mem);
}' > "$metrics_file"
# 打印出来看看
#metrics=$(cat "$metrics_file")
#echo "$hostname $metrics"
# 发送数据到 Pushgateway
url="http://192.168.0.8:9091/metrics/job/system_metrics/instance/$hostname"
curl -s -X POST --data-binary @"$metrics_file" "$url"
sleep 15
done
2.2 top命令版本
#!/bin/bash
hostname=$(hostname)
while true; do
metrics_file="/tmp/metrics.txt"
{
echo "# TYPE cpu_usage gauge"
echo "# TYPE mem_usage gauge"
# 使用 top 获取瞬时CPU/内存数据
top -b -n 1 | awk 'NR > 7 {
pid=$1
cpu=$9
mem=$10
process=$12
if (process == "") process="Unknown";
printf("cpu_usage{process=\"%s\", pid=\"%s\"} %s\n", process, pid, cpu);
printf("mem_usage{process=\"%s\", pid=\"%s\"} %s\n", process, pid, mem);
}'
} > "$metrics_file"
# 打印调试输出
#echo "$hostname $(cat "$metrics_file")"
# 发送到 Pushgateway
url="http://192.168.0.8:9091/metrics/job/system_metrics/instance/$hostname"
curl -s -X POST --data-binary @"$metrics_file" "$url"
sleep 15
done
3. 启动
3.1 后台启动脚本
nohup bash getbig.sh >/dev/null 2>&1 &
3.2 开机服务
vim /etc/systemd/system/getbig.service
[Unit]
Description=getbig push to prometheus
Wants=network-online.target
After=network-online.target
[Service]
Type=simple
ExecStart=/usr/local/bin/getbig
[Install]
WantedBy=multi-user.target
systemctl daemon-reload
systemctl start getbig
systemctl enable getbig
4. grafana仪表盘json
- Dashboard → New → Import → 复制下面 JSON
- 选择你的 Prometheus 数据源
-
保存即可!
{ "title": "系统进程 Top15 CPU/内存资源占用监控", "timezone": "browser", "refresh": "10s", "panels": [ { "type": "timeseries", "title": "Top15 CPU使用率(%)", "gridPos": { "h": 10, "w": 24, "x": 0, "y": 0 }, "datasource": { "type": "prometheus", "uid": "bdfpj6tvj77cwf" }, "targets": [ { "expr": "topk(15, cpu_usage{instance=\"$node\"})", "legendFormat": "{{process}} (pid={{pid}})", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "percent", "custom": { "drawStyle": "lines", "lineInterpolation": "smooth", "fillOpacity": 20, "lineWidth": 2 } } } }, { "type": "timeseries", "title": "Top15 内存使用率(%)", "gridPos": { "h": 10, "w": 24, "x": 0, "y": 10 }, "datasource": { "type": "prometheus", "uid": "bdfpj6tvj77cwf" }, "targets": [ { "expr": "topk(15, mem_usage{instance=\"$node\"})", "legendFormat": "{{process}} (pid={{pid}})", "refId": "B" } ], "fieldConfig": { "defaults": { "unit": "percent", "custom": { "drawStyle": "lines", "lineInterpolation": "smooth", "fillOpacity": 20, "lineWidth": 2 } } } } ], "templating": { "list": [ { "type": "query", "name": "node", "datasource": { "type": "prometheus", "uid": "bdfpj6tvj77cwf" }, "query": { "query": "label_values(cpu_usage, instance)", "refId": "Prometheus-node" }, "refresh": 1, "multi": false, "includeAll": false } ] }, "schemaVersion": 39, "version": 1 }
5.整合到别的仪表盘中
原来的仪表盘json格式
"panels": [ {...原来第一个面板...}, {...原来第二个面板...}, {...Top15 CPU 面板...}, <-- 把这个加在这里 {...Top15 内存 面板...} <-- 还有这个 ],
5.1 添加json
注意$node参数这个仪表盘要有,或者改为这个仪表盘值为被监控服务器hostname值的变量
不然变量值为空,肯定查不到结果的
{
"type": "timeseries",
"title": "Top15 CPU 使用率(%)",
"gridPos": {
"h": 12,
"w": 24,
"x": 0,
"y": 20
},
"datasource": {
"type": "prometheus",
"uid": "bdfpj6tvj77cwf"
},
"targets": [
{
"expr": "topk(15, cpu_usage{instance=\"$node\"})",
"legendFormat": "{{process}} (pid={{pid}})",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "percent",
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 20,
"lineWidth": 2
}
}
}
},
{
"type": "timeseries",
"title": "Top15 内存 使用率(%)",
"gridPos": {
"h": 12,
"w": 24,
"x": 0,
"y": 32
},
"datasource": {
"type": "prometheus",
"uid": "bdfpj6tvj77cwf"
},
"targets": [
{
"expr": "topk(15, mem_usage{instance=\"$node\"})",
"legendFormat": "{{process}} (pid={{pid}})",
"refId": "B"
}
],
"fieldConfig": {
"defaults": {
"unit": "percent",
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 20,
"lineWidth": 2
}
}
}
}
近期评论