Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
255f4c5
chore(grafana): format indent node-alert.json only
waitingsong Mar 23, 2025
5550a86
chore(grafana): format node-overview.json indent only
waitingsong Mar 23, 2025
c868abe
feat(node): zfs monitor and grafana ui
waitingsong Mar 26, 2025
282b11e
feat(node): add record zfs_exporter_up
waitingsong Apr 1, 2025
ad95f0f
feat(grafana): add zfs_exporter aliveness panels
waitingsong Apr 1, 2025
f39e7fa
chore(node): remove zfs_exporter installation task, use repo installa…
waitingsong Apr 6, 2025
5996ac0
chore(grafana): format node-cluster.json indent only
waitingsong Apr 7, 2025
33ac720
chore(grafana): render node-cluster.json Emoji
waitingsong Apr 7, 2025
20210c6
feat(prometheus): add records for ZFS ARC
waitingsong Apr 8, 2025
681393e
feat(grafana): update node-cluster.json adding ZFS ARC panels
waitingsong Apr 8, 2025
c8d66b2
refactor(prometheus): rename record name node:zfs:pool_metrics to nod…
waitingsong Apr 8, 2025
1db2e51
chore(grafana): format node-instance.json indent only
waitingsong Apr 8, 2025
c983000
chore(grafana): render node-instance.json Emoji and sync pluginVersion
waitingsong Apr 8, 2025
8aba4e4
chore(grafana): update node-instance.json sort Logs row position
waitingsong Apr 9, 2025
b02dfc5
feat(grafana): update node-instance.json adding ZFS ARC panels
waitingsong Apr 9, 2025
a9639b9
perf(grafana): update node-zfs.json expr
waitingsong Apr 11, 2025
ef53008
refactor(prometheus): rename record name node:zfs:dataset_metrics to …
waitingsong Apr 12, 2025
70e98b4
feat(prometheus): update node.yml adding alert ZARCCachePressure
waitingsong Apr 12, 2025
dd21605
chore(grafana): render node-alert.json Emoji and remove unnecessary e…
waitingsong Apr 12, 2025
4f14793
feat(grafana): update node-alert.json adding ZFS ARC panel
waitingsong Apr 12, 2025
e3fa30b
chore(grafana): update node-instance.json change ARC panel legend.cal…
waitingsong Apr 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5,523 changes: 2,837 additions & 2,686 deletions files/grafana/node/node-alert.json

Large diffs are not rendered by default.

26,095 changes: 13,630 additions & 12,465 deletions files/grafana/node/node-cluster.json

Large diffs are not rendered by default.

32,183 changes: 16,551 additions & 15,632 deletions files/grafana/node/node-instance.json

Large diffs are not rendered by default.

9,793 changes: 5,502 additions & 4,291 deletions files/grafana/node/node-overview.json

Large diffs are not rendered by default.

3,184 changes: 3,184 additions & 0 deletions files/grafana/node/node-zfs.json

Large diffs are not rendered by default.

182 changes: 178 additions & 4 deletions files/prometheus/rules/node.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ groups:
- record: node:cpu:time_irate1m
expr: irate(node_cpu_seconds_total[1m])

# {cpu} total time spent per second on single cpu
# {cpu} total time spent per second on single cpu
- record: node:cpu:total_time_irate1m
expr: sum without (mode) (node:cpu:time_irate1m)

Expand Down Expand Up @@ -247,7 +247,7 @@ groups:
node_memory_HardwareCorrupted_bytes +
node_memory_PageTables_bytes +
node_memory_Slab_bytes +
node_memory_PageTables_bytes) -
node_memory_PageTables_bytes) -
node_memory_Buffers_bytes -
(node_memory_Hugepagesize_bytes * node_memory_HugePages_Total) -
(node_memory_Cached_bytes - node_memory_Mapped_bytes ) -
Expand Down Expand Up @@ -443,6 +443,93 @@ groups:
- record: node:dev:disk_io_batch_1m
expr: node:dev:disk_io_bytes_rate1m / node:dev:disk_iops_1m

#--------------------------------#
# ZFS #
#--------------------------------#
# health [0: ONLINE, 1: DEGRADED, 2: FAULTED, 3: OFFLINE, 4: UNAVAIL, 5: REMOVED, 6: SUSPENDED].
- record: node:ins:zfs_pool_metrics
expr: |
label_replace(zfs_pool_ashift, "metric_type", "ashift", "", "") or
label_replace(zfs_pool_allocated_bytes / 1048576, "metric_type", "allocated_mib", "", "") or
label_replace(zfs_pool_autoexpand, "metric_type", "autoexpand", "", "") or
label_replace(zfs_pool_autoreplace, "metric_type", "autoreplace", "", "") or
label_replace(zfs_pool_autotrim, "metric_type", "autotrim", "", "") or
label_replace(
ceil( (zfs_pool_allocated_bytes / clamp_min(zfs_pool_size_bytes, 1) ) * 1000 ) / 1000,
"metric_type", "capacity", "", ""
) or
label_replace(zfs_pool_dedupratio, "metric_type", "dedupratio", "", "") or
label_replace(zfs_pool_free_bytes / 1048576, "metric_type", "free_mib", "", "") or
label_replace(zfs_pool_health, "metric_type", "health", "", "") or
label_replace(zfs_pool_leaked_bytes / 1024, "metric_type", "leaked_kib", "", "") or
label_replace(zfs_pool_fragmentation_ratio, "metric_type", "fragmentation_ratio", "", "") or
label_replace(zfs_pool_freeing_bytes / 1024, "metric_type", "freeing_kib", "", "") or
label_replace(zfs_pool_readonly, "metric_type", "readonly", "", "") or
label_replace(zfs_pool_size_bytes / 1073741824, "metric_type", "size_gib", "", "")

- record: node:ins:zfs_dataset_metrics
expr: |
label_replace(zfs_dataset_atime, "metric_type", "atime", "", "") or
label_replace(zfs_dataset_available_bytes / 1048576, "metric_type", "avail_mib", "", "") or
label_replace(
ceil( (zfs_dataset_used_bytes / clamp_min(zfs_dataset_used_bytes + zfs_dataset_available_bytes, 1) ) * 1000 ) / 1000,
"metric_type", "capacity", "", ""
) or
label_replace(zfs_dataset_compression, "metric_type", "compression", "", "") or
label_replace(zfs_dataset_compressratio, "metric_type", "compressratio", "", "") or
label_replace(zfs_dataset_creation * 1000, "metric_type", "creation", "", "") or
label_replace(zfs_dataset_exec, "metric_type", "exec", "", "") or
label_replace(zfs_dataset_logbias, "metric_type", "logbias", "", "") or
label_replace(zfs_dataset_logical_used_bytes / 1048576, "metric_type", "logical_used_mib", "", "") or
label_replace(zfs_dataset_mounted, "metric_type", "mounted", "", "") or
label_replace(zfs_dataset_primarycache, "metric_type", "primarycache", "", "") or
label_replace(zfs_dataset_quota_bytes / 1048576, "metric_type", "quota_mib", "", "") or
label_replace(zfs_dataset_recordsize / 1024, "metric_type", "recordsize_kib", "", "") or
label_replace(zfs_dataset_referenced_bytes, "metric_type", "refer_bytes", "", "") or
label_replace(zfs_dataset_relatime, "metric_type", "relatime", "", "") or
label_replace(zfs_dataset_sync, "metric_type", "sync", "", "") or
label_replace((zfs_dataset_used_bytes + zfs_dataset_available_bytes) / 1073741824, "metric_type", "size_gib", "", "") or
label_replace(zfs_dataset_used_bytes / 1048576, "metric_type", "used_mib", "", "") or
label_replace(zfs_dataset_written_bytes / 1048576, "metric_type", "written_mib", "", "")

- record: node:ins:zfs_arc_utilization
expr: node_zfs_arc_c / node_zfs_arc_c_max

- record: node:ins:zfs_arc_usage_ratio
expr: node_zfs_arc_size / node_zfs_arc_c

- record: node:ins:zfs_arc_memory_ratio
expr: node_zfs_arc_size / node_memory_MemTotal_bytes

- record: node:ins:zfs_arc_meta_usage
expr: node_zfs_arc_arc_meta_used / node_zfs_arc_arc_meta_limit

- record: node:ins:zfs_arc_hit_ratio
expr: node_zfs_arc_hits / (node_zfs_arc_hits + node_zfs_arc_misses)

- record: node:ins:zfs_arc_hit_ratio_rate1m
expr: rate(node_zfs_arc_hits[1m]) / (rate(node_zfs_arc_hits[1m]) + rate(node_zfs_arc_misses[1m]))

- record: node:ins:zfs_arc_hit_ratio_rate5m
expr: rate(node_zfs_arc_hits[5m]) / (rate(node_zfs_arc_hits[5m]) + rate(node_zfs_arc_misses[5m]))

# Immediate intervention and adjustment are required if value > 0.8
# Monitor trends and prepare for scaling plans if value > 0.6
# The formula is a weighted sum of the following metrics:
# - capacity pressure: 40% weight
# - mem pressure: 30% weight
# - hit ratio pressure: 20% weight
# - evict failure pressure: 10% weight
- record: node:ins:zfs_arc_pressure_ratio
expr: |
(
clamp_max(node:ins:zfs_arc_usage_ratio / 0.8, 1) * 0.4
+ (1 - node:ins:mem_usage) * 0.3
+ (1 - node:ins:zfs_arc_hit_ratio_rate5m) * 0.2
+ (rate(node_zfs_arc_evict_skip[1h]) / (rate(node_zfs_arc_evict_skip[1h]) + 1)) * 0.1
) * 1.0


#--------------------------------#
# Filesystem #
#--------------------------------#
Expand Down Expand Up @@ -777,7 +864,7 @@ groups:
keepalived_up[ins={{ $labels.ins }}, instance={{ $labels.instance }}] = {{ $value }} < 1
http://g.pigsty/d/node-instance?var-ins={{ $labels.ins }}



#==============================================================#
# Node : CPU #
Expand Down Expand Up @@ -855,7 +942,7 @@ groups:

# filesystem usage > 90%
- alert: NodeFsSpaceFull
expr: node:fs:space_usage > 0.90
expr: node:fs:space_usage{fstype!="zfs"} > 0.90
for: 1m
labels: { level: 1, severity: WARN, category: node }
annotations:
Expand Down Expand Up @@ -883,6 +970,93 @@ groups:
description: |
node:ins:fd_usage[ins={{ $labels.ins }}] = {{ $value | printf "%.2f" }} > 90%

#==============================================================#
# Node : ZFS POOL #
#==============================================================#
# 0: ONLINE, 1: DEGRADED, 2: FAULTED, 3: OFFLINE, 4: UNAVAIL, 5: REMOVED, 6: SUSPENDED
- alert: ZPoolDegraded
expr: node:ins:zfs_pool_metrics{metric_type="health"} == 1
labels: { level: 0, severity: CRIT, category: node }
annotations:
summary: "CRIT ZFS Pool Degraded {{ $labels.pool }} {{ $labels.ins }}@{{ $labels.instance }}"

- alert: ZPoolFaulted
expr: node:ins:zfs_pool_metrics{metric_type="health"} == 2
labels: { level: 0, severity: CRIT, category: node }
annotations:
summary: "CRIT ZFS Pool Faulted {{ $labels.pool }} {{ $labels.ins }}@{{ $labels.instance }}"

- alert: ZPoolOffline
expr: node:ins:zfs_pool_metrics{metric_type="health"} == 3
labels: { level: 0, severity: CRIT, category: node }
annotations:
summary: "CRIT ZFS Pool Offline {{ $labels.pool }} {{ $labels.ins }}@{{ $labels.instance }}"

- alert: ZPoolUnavail
expr: node:ins:zfs_pool_metrics{metric_type="health"} == 4
labels: { level: 0, severity: CRIT, category: node }
annotations:
summary: "CRIT ZFS Pool Unavail {{ $labels.pool }} {{ $labels.ins }}@{{ $labels.instance }}"

- alert: ZPoolRemoved
expr: node:ins:zfs_pool_metrics{metric_type="health"} == 5
labels: { level: 0, severity: WARN, category: node }
annotations:
summary: "CRIT ZFS Pool Removed {{ $labels.pool }} {{ $labels.ins }}@{{ $labels.instance }}"

- alert: ZPoolSuspended
expr: node:ins:zfs_pool_metrics{metric_type="health"} == 6
labels: { level: 0, severity: CRIT, category: node }
annotations:
summary: "CRIT ZFS Pool Suspended {{ $labels.pool }} {{ $labels.ins }}@{{ $labels.instance }}"

- alert: ZPoolReadonly
expr: node:ins:zfs_pool_metrics{metric_type="readonly"} == 1
labels: { level: 0, severity: CRIT, category: node }
annotations:
summary: "WARN ZPool Readonly {{ $labels.pool }} {{ $labels.ins }}@{{ $labels.instance }}"

# > 85% according to https://openzfs.github.io/openzfs-docs/Performance%20and%20Tuning/Workload%20Tuning.html#free-space
- alert: ZPoolSpaceFull
expr: node:ins:zfs_pool_metrics{metric_type="capacity"} > 0.85
for: 1m
labels: { level: 0, severity: CRIT, category: node }
annotations:
summary: 'WARN ZFS Pool SpaceFull {{ $labels.pool }} {{ $labels.ins }}@{{ $labels.instance }} {{ $value | printf "%.2f" }}'
description: |
node:ins:zfs_pool_metrics[metric_type=capacity, pool={{ $labels.pool }}, ins={{ $labels.ins }}] = {{ $value | printf "%.2f" }} > 85%

# > 85% according to https://openzfs.github.io/openzfs-docs/Performance%20and%20Tuning/Workload%20Tuning.html#free-space
- alert: ZDatasetSpaceFull
expr: node:ins:zfs_dataset_metrics{metric_type="capacity"} > 0.85
for: 1m
labels: { level: 1, severity: WARN, category: node }
annotations:
summary: 'WARN ZFS Dataset SpaceFull {{ $labels.name }} {{ $labels.ins }}@{{ $labels.instance }} {{ $value | printf "%.2f" }}'
description: |
node:ins:zfs_dataset_metrics[metric_type=capacity, name={{ $labels.name }}, ins={{ $labels.ins }}] = {{ $value | printf "%.2f" }} > 85%

# zfs_arc_pressure_ratio > 0.75: means ARC is not effective
# zfs_arc_usage_ratio > 0.95: means ARC usage almost full
# zfs_arc_hit_ratio < 0.3: means ARC hits are very low
# rate(node_zfs_arc_evict_skip[10m]) > 50: means evict failures are high
- alert: ZARCCachePressure
expr: |
node:ins:zfs_arc_pressure_ratio > 0.75
or (
node:ins:zfs_arc_usage_ratio > 0.95
and node:ins:zfs_arc_hit_ratio < 0.3
and rate(node_zfs_arc_evict_skip[10m]) > 50
)
for: 10m
labels: { level: 1, severity: WARN, category: node }
annotations:
summary: 'WARN ZFS ARC Pressure {{ $labels.ins }}@{{ $labels.instance }} is high'
description: |
Check values of node:ins:zfs_arc_pressure_ratio, node:ins:zfs_arc_usage_ratio, node:ins:zfs_arc_hit_ratio and rate(node_zfs_arc_evict_skip[10m]),
link: http://g.pigsty/d/node-instance/node-instance?var-id={{ $labels.ip }}&viewPanel=panel-283


# OPTIONAL: space predict 1d
# OPTIONAL: filesystem read-only
# OPTIONAL: fast release on disk space
Expand Down
4 changes: 4 additions & 0 deletions roles/infra/templates/prometheus/agent.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,8 @@ groups:
# KAFKA
- record: kafka_exporter_up
expr: up{job="kafka"}

# ZFS
- record: zfs_exporter_up
expr: up{instance=~".*:{{ zfs_exporter_port|default('9134') }}"}
...
10 changes: 10 additions & 0 deletions roles/node_monitor/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@ node_exporter_enabled: true # setup node_exporter on this node?
node_exporter_port: 9100 # node exporter listen port, 9100 by default
node_exporter_options: '--no-collector.softnet --no-collector.nvme --collector.tcpstat --collector.processes'
#--------------------------------------------------------------#
# ZFS EXPORTER
#--------------------------------------------------------------#
zfs_exporter_enabled: false # setup zfs_exporter on this node?
zfs_exporter_version: 3.8.1
zfs_exporter_port: 9134 # zfs exporter listen port, 9134 by default
zfs_exporter_options: >
--deadline=5s
--properties.pool='allocated,ashift,autoexpand,autoreplace,autotrim,capacity,dedupratio,fragmentation,free,freeing,health,leaked,readonly,size'
--properties.dataset-filesystem='atime,available,compression,compressratio,creation,exec,logbias,logicalused,mounted,primarycache,quota,recordsize,referenced,relatime,sync,used,usedbydataset,volsize,written'
#--------------------------------------------------------------#
# PROMTAIL
#--------------------------------------------------------------#
promtail_enabled: true # enable promtail logging collector?
Expand Down
20 changes: 20 additions & 0 deletions roles/node_monitor/files/zfs_exporter.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[Unit]
Description=Prometheus zfs exporter for machine metrics
Documentation=https://github.com/waitingsong/zfs_exporter/
After=network.target

[Service]
EnvironmentFile=-/etc/default/zfs_exporter
User=root
ExecStart=/usr/bin/zfs_exporter $ZFS_EXPORTER_OPTS
Restart=on-failure
RestartSec=5s

# deprecated
MemoryLimit=200M

CPUQuota=30%
MemoryMax=200M

[Install]
WantedBy=multi-user.target
38 changes: 34 additions & 4 deletions roles/node_monitor/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,33 @@
wait_for: host=127.0.0.1 port={{ node_exporter_port }} state=started timeout=10


#--------------------------------------------------------------#
# Config zfs_exporter [zfs_exporter_config]
#--------------------------------------------------------------#
- name: config zfs_exporter
tags: [ node_exporter, node_exporter_config, zfs_exporter, zfs_exporter_config ]
when: zfs_exporter_enabled|bool
block:
- name: config default zfs_exporter options
copy:
dest: /etc/default/zfs_exporter
content: |
ZFS_EXPORTER_OPTS="--web.listen-address=':{{ zfs_exporter_port }}' --web.telemetry-path='{{ exporter_metrics_path }}' {{ zfs_exporter_options }}"


#--------------------------------------------------------------#
# Launch zfs_exporter [zfs_exporter_launch]
#--------------------------------------------------------------#
- name: launch zfs_exporter
tags: [ node_exporter, node_exporter_launch, zfs_exporter, zfs_exporter_launch ]
when: zfs_exporter_enabled|bool and zfs_exporter_version is defined and zfs_exporter_version|length > 0
block:
- name: launch zfs_exporter systemd service
systemd: name=zfs_exporter state=restarted enabled=yes daemon_reload=yes
- name: wait for zfs_exporter service online
wait_for: host=127.0.0.1 port={{ zfs_exporter_port }} state=started timeout=10


#--------------------------------------------------------------#
# Config keepalived_exporter [vip_exporter_config]
#--------------------------------------------------------------#
Expand Down Expand Up @@ -151,13 +178,16 @@
# {{ inventory_hostname }}
# node, haproxy, promtail
- labels: { ip: {{ inventory_hostname }} , ins: {{ nodename }} , cls: {{ node_cluster|default('nodes') }} }
targets: {% if not node_exporter_enabled|bool and not haproxy_enabled|bool and not promtail_enabled|bool %}[]{% endif %}
targets: {% if not node_exporter_enabled|bool and not zfs_exporter_enabled|bool and not haproxy_enabled|bool and not promtail_enabled|bool %}[]{% endif %}

{% if node_exporter_enabled|bool %}- {{ inventory_hostname }}:{{ node_exporter_port }}{% endif %}

{% if haproxy_enabled|bool %}- {{ inventory_hostname }}:{{ haproxy_exporter_port }}{% endif %}

{% if promtail_enabled|bool %}- {{ inventory_hostname }}:{{ promtail_port }}{% endif %}


{% if zfs_exporter_enabled|bool %}- {{ inventory_hostname }}:{{ zfs_exporter_port }}{% endif %}

{% if vip_enabled|bool and vip_address is defined and vip_address != '' %}
# keepalived
- labels: { ip: {{ inventory_hostname }} , ins: {{ nodename }} , cls: {{ node_cluster|default('nodes') }}, vip: {{ vip_address }} }
Expand Down