You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2480 lines
100 KiB

zabbix_export:
version: '7.0'
template_groups:
- uuid: 846977d1dfed4968bc5f8bdb363285bc
name: 'Templates/Operating systems'
templates:
- uuid: 2506b0ca01884903b547b1e19b76ce6d
template: 'Linux by Prom'
name: 'Linux by Prom'
description: |
Official Linux template using node exporter.
Known Issues:
Description: node_exporter v0.16.0 renamed many metrics. CPU utilization for 'guest' and 'guest_nice' metrics are not supported in this template with node_exporter < 0.16. Disk IO metrics are not supported. Other metrics provided as 'best effort'.
See https://github.com/prometheus/node_exporter/releases/tag/v0.16.0 for details.
Version: below 0.16.0
Description: metric node_network_info with label 'device' cannot be found, so network discovery is not possible.
Version: below 0.18
You can discuss this template or leave feedback on our forum https://www.zabbix.com/forum/zabbix-suggestions-and-feedback/387225-discussion-thread-for-official-zabbix-template-for-linux
Generated by official Zabbix template tool "Templator" 2.0.0
vendor:
name: Zabbix
version: 7.0-0
groups:
- name: 'Templates/Operating systems'
items:
- uuid: 9a60d1e53caa4049a33aa52f3f55ad75
name: 'Linux: Version of node_exporter running'
type: DEPENDENT
key: 'agent.version[node_exporter]'
delay: '0'
history: 7d
trends: '0'
value_type: CHAR
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- node_exporter_build_info
- label
- version
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1d
master_item:
key: node_exporter.get
tags:
- tag: component
value: application
- uuid: d3631bd7effc4cd1bc45c991b3c3038f
name: 'Linux: Number of open file descriptors'
type: DEPENDENT
key: 'fd.open[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- node_filefd_allocated
- value
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: system
- uuid: a8b78e776e1d4011bbcb6dd54e32b40b
name: 'Linux: Maximum number of open file descriptors'
type: DEPENDENT
key: 'kernel.maxfiles[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
description: 'It could be increased by using `sysctl` utility or modifying the file `/etc/sysctl.conf`.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- node_filefd_maximum
- value
- ''
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1d
master_item:
key: node_exporter.get
tags:
- tag: component
value: system
triggers:
- uuid: c0002ad071d7497ab32633c93624e0c2
expression: 'last(/Linux by Prom/kernel.maxfiles[node_exporter])<{$KERNEL.MAXFILES.MIN}'
name: 'Linux: Configured max number of open filedescriptors is too low'
event_name: 'Linux: Configured max number of open filedescriptors is too low (< {$KERNEL.MAXFILES.MIN})'
priority: INFO
dependencies:
- name: 'Linux: Running out of file descriptors'
expression: 'last(/Linux by Prom/fd.open[node_exporter])/last(/Linux by Prom/kernel.maxfiles[node_exporter])*100>80'
tags:
- tag: scope
value: performance
- uuid: 8b2fffcba0b24ca8a687361645ffaa1d
name: 'Linux: Get node_exporter metrics'
type: HTTP_AGENT
key: node_exporter.get
history: 1h
trends: '0'
value_type: TEXT
url: 'http://{HOST.CONN}:{$NODE_EXPORTER_PORT}/metrics'
tags:
- tag: component
value: raw
triggers:
- uuid: e33fafc8476e404aa4150d0d491f4c83
expression: 'nodata(/Linux by Prom/node_exporter.get,30m)=1'
name: 'Linux: node_exporter is not available'
event_name: 'node_exporter is not available (or no data for 30m)'
priority: WARNING
description: 'Failed to fetch system metrics from node_exporter in time.'
manual_close: 'YES'
tags:
- tag: scope
value: availability
- uuid: 75afcb39d2d7496e9490cd14f1431600
name: 'Linux: System boot time'
type: DEPENDENT
key: 'system.boottime[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: unixtime
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- '{__name__=~"^node_boot_time(?:_seconds)?$"}'
- value
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: system
- uuid: a732c4df8af047fbb874e1fb363b5e9e
name: 'Linux: CPU guest time'
type: DEPENDENT
key: 'system.cpu.guest[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
description: 'Guest time - the time spent on running a virtual CPU for a guest operating system.'
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- '{__name__=~"^node_cpu(?:_guest_seconds_total)?$",cpu=~".+",mode=~"^(?:user|guest)$"}'
- type: JAVASCRIPT
parameters:
- |
//calculates average, all cpu utilization
var valueArr = JSON.parse(value);
return valueArr.reduce(function(acc,obj){
return acc + parseFloat(obj['value'])
},0)/valueArr.length;
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: MULTIPLIER
parameters:
- '100'
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: 759809ee6c9e43cf8ff44f987221a90e
name: 'Linux: CPU guest nice time'
type: DEPENDENT
key: 'system.cpu.guest_nice[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
description: 'The time spent on running a niced guest (a virtual CPU for guest operating systems under the control of the Linux kernel).'
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- '{__name__=~"^node_cpu(?:_guest_seconds_total)?$",cpu=~".+",mode=~"^(?:nice|guest_nice)$"}'
- type: JAVASCRIPT
parameters:
- |
//calculates average, all cpu utilization
var valueArr = JSON.parse(value);
return valueArr.reduce(function(acc,obj){
return acc + parseFloat(obj['value'])
},0)/valueArr.length;
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: MULTIPLIER
parameters:
- '100'
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: 7bb99c606c6b497aaef279663b60829b
name: 'Linux: CPU idle time'
type: DEPENDENT
key: 'system.cpu.idle[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
description: 'The time the CPU has spent doing nothing.'
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- '{__name__=~"^node_cpu(?:_seconds_total)?$",cpu=~".+",mode="idle"}'
- type: JAVASCRIPT
parameters:
- |
//calculates average, all cpu utilization
var valueArr = JSON.parse(value);
return valueArr.reduce(function(acc,obj){
return acc + parseFloat(obj['value'])
},0)/valueArr.length;
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: MULTIPLIER
parameters:
- '100'
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: 4d3f64c79c684d55bcde14ca5bf3022a
name: 'Linux: CPU interrupt time'
type: DEPENDENT
key: 'system.cpu.interrupt[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
description: 'The amount of time the CPU has been servicing hardware interrupts.'
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- '{__name__=~"^node_cpu(?:_seconds_total)?$",cpu=~".+",mode="irq"}'
- type: JAVASCRIPT
parameters:
- |
//calculates average, all cpu utilization
var valueArr = JSON.parse(value);
return valueArr.reduce(function(acc,obj){
return acc + parseFloat(obj['value'])
},0)/valueArr.length;
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: MULTIPLIER
parameters:
- '100'
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: 76206cf435fd4a979dbbf74807b532de
name: 'Linux: Interrupts per second'
type: DEPENDENT
key: 'system.cpu.intr[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- '{__name__=~"node_intr"}'
- value
- ''
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: 6a727e148a2c4956bcfb6de0a3486c2d
name: 'Linux: CPU iowait time'
type: DEPENDENT
key: 'system.cpu.iowait[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
description: 'The amount of time the CPU has been waiting for I/O to complete.'
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- '{__name__=~"^node_cpu(?:_seconds_total)?$",cpu=~".+",mode="iowait"}'
- type: JAVASCRIPT
parameters:
- |
//calculates average, all cpu utilization
var valueArr = JSON.parse(value);
return valueArr.reduce(function(acc,obj){
return acc + parseFloat(obj['value'])
},0)/valueArr.length;
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: MULTIPLIER
parameters:
- '100'
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: 78fb393fe3d64cd7bf79be5427715add
name: 'Linux: Load average (1m avg)'
type: DEPENDENT
key: 'system.cpu.load.avg1[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- node_load1
- value
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: 7fc62b26b9ff43429632f1985dc07851
name: 'Linux: Load average (5m avg)'
type: DEPENDENT
key: 'system.cpu.load.avg5[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- node_load5
- value
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: 35fb0319c599403bbea0e616c9066e27
name: 'Linux: Load average (15m avg)'
type: DEPENDENT
key: 'system.cpu.load.avg15[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- node_load15
- value
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: 86de794832794f57ba428cca31031005
name: 'Linux: CPU nice time'
type: DEPENDENT
key: 'system.cpu.nice[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
description: 'The time the CPU has spent running users'' processes that have been niced.'
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- '{__name__=~"^node_cpu(?:_seconds_total)?$",cpu=~".+",mode="nice"}'
- type: JAVASCRIPT
parameters:
- |
//calculates average, all cpu utilization
var valueArr = JSON.parse(value);
return valueArr.reduce(function(acc,obj){
return acc + parseFloat(obj['value'])
},0)/valueArr.length;
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: MULTIPLIER
parameters:
- '100'
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: d7db65cfeaac4541bd98d06dc35f89e2
name: 'Linux: Number of CPUs'
type: DEPENDENT
key: 'system.cpu.num[node_exporter]'
delay: '0'
history: 7d
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- '{__name__=~"^node_cpu(?:_seconds_total)?$",cpu=~".+",mode="idle"}'
- type: JAVASCRIPT
parameters:
- |
//count the number of cores
return JSON.parse(value).length
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: cf3cd5840b91477d8e9254820896b978
name: 'Linux: CPU softirq time'
type: DEPENDENT
key: 'system.cpu.softirq[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
description: 'The amount of time the CPU has been servicing software interrupts.'
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- '{__name__=~"^node_cpu(?:_seconds_total)?$",cpu=~".+",mode="softirq"}'
- type: JAVASCRIPT
parameters:
- |
//calculates average, all cpu utilization
var valueArr = JSON.parse(value);
return valueArr.reduce(function(acc,obj){
return acc + parseFloat(obj['value'])
},0)/valueArr.length;
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: MULTIPLIER
parameters:
- '100'
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: 2ee5885c4de646c0926f0c1a48890e68
name: 'Linux: CPU steal time'
type: DEPENDENT
key: 'system.cpu.steal[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
description: 'The amount of "stolen" CPU from this virtual machine by the hypervisor for other tasks, such as running another virtual machine.'
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- '{__name__=~"^node_cpu(?:_seconds_total)?$",cpu=~".+",mode="steal"}'
- type: JAVASCRIPT
parameters:
- |
//calculates average, all cpu utilization
var valueArr = JSON.parse(value);
return valueArr.reduce(function(acc,obj){
return acc + parseFloat(obj['value'])
},0)/valueArr.length;
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: MULTIPLIER
parameters:
- '100'
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: b873e0d178d144fca3597d617fc119e7
name: 'Linux: Context switches per second'
type: DEPENDENT
key: 'system.cpu.switches[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- '{__name__=~"node_context_switches"}'
- value
- ''
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: e296be976c5944b19a7178304ed6140c
name: 'Linux: CPU system time'
type: DEPENDENT
key: 'system.cpu.system[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
description: 'The time the CPU has spent running the kernel and its processes.'
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- '{__name__=~"^node_cpu(?:_seconds_total)?$",cpu=~".+",mode="system"}'
- type: JAVASCRIPT
parameters:
- |
//calculates average, all cpu utilization
var valueArr = JSON.parse(value);
return valueArr.reduce(function(acc,obj){
return acc + parseFloat(obj['value'])
},0)/valueArr.length;
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: MULTIPLIER
parameters:
- '100'
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: 832698e707d14bad8171ca71a2533350
name: 'Linux: CPU user time'
type: DEPENDENT
key: 'system.cpu.user[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
description: 'The time the CPU has spent running users'' processes that are not niced.'
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- '{__name__=~"^node_cpu(?:_seconds_total)?$",cpu=~".+",mode="user"}'
- type: JAVASCRIPT
parameters:
- |
//calculates average, all cpu utilization
var valueArr = JSON.parse(value);
return valueArr.reduce(function(acc,obj){
return acc + parseFloat(obj['value'])
},0)/valueArr.length;
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: MULTIPLIER
parameters:
- '100'
master_item:
key: node_exporter.get
tags:
- tag: component
value: cpu
- uuid: 76f60c7e4f1a482198ff4f101020bbcc
name: 'Linux: CPU utilization'
type: DEPENDENT
key: 'system.cpu.util[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
description: 'The CPU utilization expressed in %.'
preprocessing:
- type: JAVASCRIPT
parameters:
- |
//Calculate utilization
return (100 - value)
master_item:
key: 'system.cpu.idle[node_exporter]'
tags:
- tag: component
value: cpu
triggers:
- uuid: 4c924f8243a7431886a2d69368cb7142
expression: 'min(/Linux by Prom/system.cpu.util[node_exporter],5m)>{$CPU.UTIL.CRIT}'
name: 'Linux: High CPU utilization'
event_name: 'Linux: High CPU utilization (over {$CPU.UTIL.CRIT}% for 5m)'
opdata: 'Current utilization: {ITEM.LASTVALUE1}'
priority: WARNING
description: 'The CPU utilization is too high. The system might be slow to respond.'
dependencies:
- name: 'Linux: Load average is too high'
expression: |
min(/Linux by Prom/system.cpu.load.avg1[node_exporter],5m)/last(/Linux by Prom/system.cpu.num[node_exporter])>{$LOAD_AVG_PER_CPU.MAX.WARN}
and last(/Linux by Prom/system.cpu.load.avg5[node_exporter])>0
and last(/Linux by Prom/system.cpu.load.avg15[node_exporter])>0
tags:
- tag: scope
value: performance
- uuid: c47995760ef3458eb6237e434ef4eef8
name: 'Linux: System description'
type: DEPENDENT
key: 'system.descr[node_exporter]'
delay: '0'
history: 2w
trends: '0'
value_type: CHAR
description: 'Labeled system information as provided by the uname system call.'
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- node_uname_info
- type: JAVASCRIPT
parameters:
- |
var info = JSON.parse(value)[0];
return info.labels.sysname+' version: '+info.labels.release+' '+info.labels.version
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1d
master_item:
key: node_exporter.get
tags:
- tag: component
value: system
- uuid: 9fcb3dd70dc244f2b2ff86560b37ec7e
name: 'Linux: System local time'
type: DEPENDENT
key: 'system.localtime[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: unixtime
description: 'The local system time of the host.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- '{__name__=~"^node_time(?:_seconds)?$"}'
- value
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: system
triggers:
- uuid: e91c10be1c4a4c3fadfe06aec6c1e4e4
expression: 'fuzzytime(/Linux by Prom/system.localtime[node_exporter],{$SYSTEM.FUZZYTIME.MAX})=0'
name: 'Linux: System time is out of sync'
event_name: 'Linux: System time is out of sync (diff with Zabbix server > {$SYSTEM.FUZZYTIME.MAX}s)'
priority: WARNING
description: 'The host''s system time is different from Zabbix server time.'
manual_close: 'YES'
tags:
- tag: scope
value: availability
- tag: scope
value: performance
- uuid: c70cfa2ad5754089935883c5b3172656
name: 'Linux: System name'
type: DEPENDENT
key: 'system.name[node_exporter]'
delay: '0'
history: 2w
trends: '0'
value_type: CHAR
description: 'The host name of the system.'
inventory_link: NAME
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- node_uname_info
- label
- nodename
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1d
master_item:
key: node_exporter.get
tags:
- tag: component
value: system
triggers:
- uuid: e827e944568b46e284820630482317bd
expression: 'last(/Linux by Prom/system.name[node_exporter],#1)<>last(/Linux by Prom/system.name[node_exporter],#2) and length(last(/Linux by Prom/system.name[node_exporter]))>0'
name: 'Linux: System name has changed'
event_name: 'Linux: System name has changed (new name: {ITEM.VALUE})'
priority: INFO
description: 'The name of the system has changed. Acknowledge to close the problem manually.'
manual_close: 'YES'
tags:
- tag: scope
value: notice
- tag: scope
value: security
- uuid: ce4bb6c485cc4b739dce31b631634790
name: 'Linux: Operating system architecture'
type: DEPENDENT
key: 'system.sw.arch[node_exporter]'
delay: '0'
history: 2w
trends: '0'
value_type: CHAR
description: 'The architecture of the operating system.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- node_uname_info
- label
- machine
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1d
master_item:
key: node_exporter.get
tags:
- tag: component
value: os
- uuid: 11ff4e02245f4c2e9ce00e55c1a77b39
name: 'Linux: Operating system'
type: DEPENDENT
key: 'system.sw.os[node_exporter]'
delay: '0'
history: 2w
trends: '0'
value_type: CHAR
inventory_link: OS
preprocessing:
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1d
master_item:
key: 'system.descr[node_exporter]'
tags:
- tag: component
value: os
triggers:
- uuid: 00974d5b128c4b1db64ca100770fb11b
expression: 'last(/Linux by Prom/system.sw.os[node_exporter],#1)<>last(/Linux by Prom/system.sw.os[node_exporter],#2) and length(last(/Linux by Prom/system.sw.os[node_exporter]))>0'
name: 'Linux: Operating system description has changed'
priority: INFO
description: 'The description of the operating system has changed. Possible reasons are that the system has been updated or replaced. Acknowledge to close the problem manually.'
manual_close: 'YES'
dependencies:
- name: 'Linux: System name has changed'
expression: 'last(/Linux by Prom/system.name[node_exporter],#1)<>last(/Linux by Prom/system.name[node_exporter],#2) and length(last(/Linux by Prom/system.name[node_exporter]))>0'
tags:
- tag: scope
value: notice
- uuid: ccc3c4d69fef412aa53c6a7320ef44fb
name: 'Linux: Free swap space'
type: DEPENDENT
key: 'system.swap.free[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: B
description: 'The free space of the swap volume/file expressed in bytes.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- '{__name__=~"node_memory_SwapFree"}'
- value
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: memory
- tag: component
value: storage
- uuid: 99feb76b7da04b00a0d191b92bf979a1
name: 'Linux: Free swap space in %'
type: CALCULATED
key: 'system.swap.pfree[node_exporter]'
history: 7d
value_type: FLOAT
units: '%'
params: 'last(//system.swap.free[node_exporter])/last(//system.swap.total[node_exporter])*100'
description: 'The free space of the swap volume/file expressed in %.'
tags:
- tag: component
value: memory
- tag: component
value: storage
- uuid: d0c1ef0a679546d780df30a02a27df44
name: 'Linux: Total swap space'
type: DEPENDENT
key: 'system.swap.total[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: B
description: 'The total space of the swap volume/file expressed in bytes.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- '{__name__=~"node_memory_SwapTotal"}'
- value
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: memory
- tag: component
value: storage
- uuid: 7eeabdb6bf44483bab91faaab8bb24d5
name: 'Linux: System uptime'
type: DEPENDENT
key: 'system.uptime[node_exporter]'
delay: '0'
history: 2w
trends: '0'
units: uptime
description: 'The system uptime expressed in the following format: "N days, hh:mm:ss".'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- '{__name__=~"^node_boot_time(?:_seconds)?$"}'
- value
- ''
- type: JAVASCRIPT
parameters:
- |
//use boottime to calculate uptime
return (Math.floor(Date.now()/1000)-Number(value));
master_item:
key: node_exporter.get
tags:
- tag: component
value: system
triggers:
- uuid: 83311a7c73c441d39d88fe0fc7b55af9
expression: 'last(/Linux by Prom/system.uptime[node_exporter])<10m'
name: 'Linux: {HOST.NAME} has been restarted'
event_name: 'Linux: {HOST.NAME} has been restarted (uptime < 10m)'
priority: WARNING
description: 'The device uptime is less than 10 minutes.'
manual_close: 'YES'
tags:
- tag: scope
value: notice
- uuid: 70a75efafbc84cbebd1bb262ec797523
name: 'Linux: Available memory'
type: DEPENDENT
key: 'vm.memory.available[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: B
description: |
The available memory:
- in Linux - available = free + buffers + cache;
- on other platforms calculation may vary.
See also Appendixes in Zabbix Documentation about parameters of the `vm.memory.size` item.
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- '{__name__=~"node_memory_MemAvailable"}'
- value
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: memory
- uuid: e3ad296f16084e9caa923b449d8c725f
name: 'Linux: Total memory'
type: DEPENDENT
key: 'vm.memory.total[node_exporter]'
delay: '0'
history: 7d
value_type: FLOAT
units: B
description: 'The total memory expressed in bytes.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- '{__name__=~"node_memory_MemTotal"}'
- value
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: memory
- uuid: 60c716c692fb482f9abffb0fc9ce4324
name: 'Linux: Memory utilization'
type: CALCULATED
key: 'vm.memory.util[node_exporter]'
history: 7d
value_type: FLOAT
units: '%'
params: '(last(//vm.memory.total[node_exporter])-last(//vm.memory.available[node_exporter]))/last(//vm.memory.total[node_exporter])*100'
description: 'Memory used percentage is calculated as (total-available)/total*100.'
tags:
- tag: component
value: memory
triggers:
- uuid: 72304b3a167e41ccbcdb75568c65294a
expression: 'min(/Linux by Prom/vm.memory.util[node_exporter],5m)>{$MEMORY.UTIL.MAX}'
name: 'Linux: High memory utilization'
event_name: 'Linux: High memory utilization (>{$MEMORY.UTIL.MAX}% for 5m)'
priority: AVERAGE
description: 'The system is running out of free memory.'
dependencies:
- name: 'Linux: Lack of available memory'
expression: 'max(/Linux by Prom/vm.memory.available[node_exporter],5m)<{$MEMORY.AVAILABLE.MIN} and last(/Linux by Prom/vm.memory.total[node_exporter])>0'
tags:
- tag: scope
value: capacity
- tag: scope
value: performance
discovery_rules:
- uuid: 91531a7a68564860976096df6899fe88
name: 'Network interface discovery'
type: DEPENDENT
key: 'net.if.discovery[node_exporter]'
delay: '0'
filter:
evaltype: AND
conditions:
- macro: '{#IFNAME}'
value: '{$NET.IF.IFNAME.MATCHES}'
formulaid: C
- macro: '{#IFNAME}'
value: '{$NET.IF.IFNAME.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: D
- macro: '{#IFALIAS}'
value: '{$NET.IF.IFALIAS.MATCHES}'
formulaid: A
- macro: '{#IFALIAS}'
value: '{$NET.IF.IFALIAS.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: B
- macro: '{#IFOPERSTATUS}'
value: '{$NET.IF.IFOPERSTATUS.MATCHES}'
formulaid: E
- macro: '{#IFOPERSTATUS}'
value: '{$NET.IF.IFOPERSTATUS.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: F
description: 'Discovery of network interfaces. Requires node_exporter v0.18 and up.'
item_prototypes:
- uuid: ee28aff27b4f40eaa8e903ce3fab8555
name: 'Interface {#IFNAME}({#IFALIAS}): Inbound packets discarded'
type: DEPENDENT
key: 'net.if.in.discards[node_exporter,"{#IFNAME}"]'
delay: '0'
history: 7d
value_type: FLOAT
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'node_network_receive_drop_total{device="{#IFNAME}"}'
- value
- ''
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: network
- tag: description
value: '{#IFALIAS}'
- tag: interface
value: '{#IFNAME}'
- uuid: f6b39f743bb6438d876236ae651f68ee
name: 'Interface {#IFNAME}({#IFALIAS}): Inbound packets with errors'
type: DEPENDENT
key: 'net.if.in.errors[node_exporter,"{#IFNAME}"]'
delay: '0'
history: 7d
value_type: FLOAT
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'node_network_receive_errs_total{device="{#IFNAME}"}'
- value
- ''
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: network
- tag: description
value: '{#IFALIAS}'
- tag: interface
value: '{#IFNAME}'
- uuid: eea37c3779994ac5b35bb3b125429340
name: 'Interface {#IFNAME}({#IFALIAS}): Bits received'
type: DEPENDENT
key: 'net.if.in[node_exporter,"{#IFNAME}"]'
delay: '0'
history: 7d
value_type: FLOAT
units: bps
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'node_network_receive_bytes_total{device="{#IFNAME}"}'
- value
- ''
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: MULTIPLIER
parameters:
- '8'
master_item:
key: node_exporter.get
tags:
- tag: component
value: network
- tag: description
value: '{#IFALIAS}'
- tag: interface
value: '{#IFNAME}'
- uuid: aec995b7fe724823bdb41f134594b648
name: 'Interface {#IFNAME}({#IFALIAS}): Outbound packets discarded'
type: DEPENDENT
key: 'net.if.out.discards[node_exporter,"{#IFNAME}"]'
delay: '0'
history: 7d
value_type: FLOAT
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'node_network_transmit_drop_total{device="{#IFNAME}"}'
- value
- ''
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: network
- tag: description
value: '{#IFALIAS}'
- tag: interface
value: '{#IFNAME}'
- uuid: 4ffe74b94eb14a4ea61c0df790d0920f
name: 'Interface {#IFNAME}({#IFALIAS}): Outbound packets with errors'
type: DEPENDENT
key: 'net.if.out.errors[node_exporter"{#IFNAME}"]'
delay: '0'
history: 7d
value_type: FLOAT
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'node_network_transmit_errs_total{device="{#IFNAME}"}'
- value
- ''
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: network
- tag: description
value: '{#IFALIAS}'
- tag: interface
value: '{#IFNAME}'
- uuid: ddc4111fefec473e8d8b079f6eb50c05
name: 'Interface {#IFNAME}({#IFALIAS}): Bits sent'
type: DEPENDENT
key: 'net.if.out[node_exporter,"{#IFNAME}"]'
delay: '0'
history: 7d
value_type: FLOAT
units: bps
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'node_network_transmit_bytes_total{device="{#IFNAME}"}'
- value
- ''
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: MULTIPLIER
parameters:
- '8'
master_item:
key: node_exporter.get
tags:
- tag: component
value: network
- tag: description
value: '{#IFALIAS}'
- tag: interface
value: '{#IFNAME}'
- uuid: 51f9a39ad6d940ae914cd0a350d83dab
name: 'Interface {#IFNAME}({#IFALIAS}): Speed'
type: DEPENDENT
key: 'net.if.speed[node_exporter,"{#IFNAME}"]'
delay: '0'
history: 7d
trends: '0'
units: bps
description: 'Sets value to 0 if metric is missing in node_exporter output.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'node_network_speed_bytes{device="{#IFNAME}"}'
- value
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '0'
- type: MULTIPLIER
parameters:
- '8'
master_item:
key: node_exporter.get
tags:
- tag: component
value: network
- tag: description
value: '{#IFALIAS}'
- tag: interface
value: '{#IFNAME}'
- uuid: 4097f76d86cd417db586c5973e1da90b
name: 'Interface {#IFNAME}({#IFALIAS}): Operational status'
type: DEPENDENT
key: 'net.if.status[node_exporter,"{#IFNAME}"]'
delay: '0'
history: 7d
trends: '0'
description: 'Reference: https://www.kernel.org/doc/Documentation/networking/operstates.txt'
valuemap:
name: ifOperStatus
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'node_network_info{device="{#IFNAME}"}'
- label
- operstate
- type: JAVASCRIPT
parameters:
- |
var newvalue;
switch(value) {
case "unknown":
newvalue = 0;
break;
case "notpresent":
newvalue = 1;
break;
case "down":
newvalue = 2;
break;
case "lowerlayerdown":
newvalue = 3;
break;
case "testing":
newvalue = 4;
break;
case "dormant":
newvalue = 5;
break;
case "up":
newvalue = 6;
break; default:
newvalue = "Problem parsing interface operstate in JS";
}
return newvalue;
master_item:
key: node_exporter.get
tags:
- tag: component
value: network
- tag: description
value: '{#IFALIAS}'
- tag: interface
value: '{#IFNAME}'
trigger_prototypes:
- uuid: e35b59dc09754451a721793831edc3d4
expression: '{$IFCONTROL:"{#IFNAME}"}=1 and last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"])=2 and (last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"],#1)<>last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"],#2))'
recovery_mode: RECOVERY_EXPRESSION
recovery_expression: 'last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"])<>2 or {$IFCONTROL:"{#IFNAME}"}=0'
name: 'Interface {#IFNAME}({#IFALIAS}): Link down'
opdata: 'Current state: {ITEM.LASTVALUE1}'
priority: AVERAGE
description: |
This trigger expression works as follows:
1. It can be triggered if the operations status is down.
2. `{$IFCONTROL:"{#IFNAME}"}=1` - a user can redefine context macro to value - 0. That marks this interface as not important. No new trigger will be fired if this interface is down.
3. `{TEMPLATE_NAME:METRIC.diff()}=1` - the trigger fires only if the operational status was up to (1) sometime before (so, do not fire for the 'eternal off' interfaces.)
WARNING: if closed manually - it will not fire again on the next poll, because of .diff.
manual_close: 'YES'
tags:
- tag: scope
value: availability
- uuid: ce9cfd9880c44aaea18cfa2ab2fdf0e1
name: 'Interface {#IFNAME}({#IFALIAS}): Interface type'
type: DEPENDENT
key: 'net.if.type[node_exporter,"{#IFNAME}"]'
delay: '0'
history: 7d
trends: '0'
description: 'node_network_protocol_type protocol_type value of /sys/class/net/<iface>.'
valuemap:
name: 'Linux::Interface protocol types'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'node_network_protocol_type{device="{#IFNAME}"}'
- value
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: network
- tag: description
value: '{#IFALIAS}'
- tag: interface
value: '{#IFNAME}'
trigger_prototypes:
- uuid: 96bbb9726ef149a5b97f96d54502593a
expression: |
change(/Linux by Prom/net.if.speed[node_exporter,"{#IFNAME}"])<0 and last(/Linux by Prom/net.if.speed[node_exporter,"{#IFNAME}"])>0
and (
last(/Linux by Prom/net.if.type[node_exporter,"{#IFNAME}"])=6 or
last(/Linux by Prom/net.if.type[node_exporter,"{#IFNAME}"])=7 or
last(/Linux by Prom/net.if.type[node_exporter,"{#IFNAME}"])=11 or
last(/Linux by Prom/net.if.type[node_exporter,"{#IFNAME}"])=62 or
last(/Linux by Prom/net.if.type[node_exporter,"{#IFNAME}"])=69 or
last(/Linux by Prom/net.if.type[node_exporter,"{#IFNAME}"])=117
)
and
(last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"])<>2)
recovery_mode: RECOVERY_EXPRESSION
recovery_expression: |
(change(/Linux by Prom/net.if.speed[node_exporter,"{#IFNAME}"])>0 and last(/Linux by Prom/net.if.speed[node_exporter,"{#IFNAME}"],#2)>0) or
(last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"])=2)
name: 'Interface {#IFNAME}({#IFALIAS}): Ethernet has changed to lower speed than it was before'
opdata: 'Current reported speed: {ITEM.LASTVALUE1}'
priority: INFO
description: 'This Ethernet connection has transitioned down from its known maximum speed. This might be a sign of autonegotiation issues. Acknowledge to close the problem manually.'
manual_close: 'YES'
dependencies:
- name: 'Interface {#IFNAME}({#IFALIAS}): Link down'
expression: '{$IFCONTROL:"{#IFNAME}"}=1 and last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"])=2 and (last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"],#1)<>last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"],#2))'
recovery_expression: 'last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"])<>2 or {$IFCONTROL:"{#IFNAME}"}=0'
tags:
- tag: scope
value: performance
- uuid: 08d5857e474042b5906288156c8c5d52
expression: |
change(/Linux by Prom/net.if.type[node_exporter,"{#IFNAME}"])<0 and last(/Linux by Prom/net.if.type[node_exporter,"{#IFNAME}"])>0
and
(last(/Linux by Prom/net.if.type[node_exporter,"{#IFNAME}"])=6
or last(/Linux by Prom/net.if.type[node_exporter,"{#IFNAME}"])=1)
and
(last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"])<>2)
recovery_mode: RECOVERY_EXPRESSION
recovery_expression: |
(change(/Linux by Prom/net.if.type[node_exporter,"{#IFNAME}"])>0 and last(/Linux by Prom/net.if.type[node_exporter,"{#IFNAME}"],#2)>0) or
(last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"])=2)
name: 'Interface {#IFNAME}({#IFALIAS}): Ethernet has changed to lower speed than it was before'
opdata: 'Current reported speed: {ITEM.LASTVALUE1}'
priority: INFO
description: 'This Ethernet connection has transitioned down from its known maximum speed. This might be a sign of autonegotiation issues. Acknowledge to close the problem manually.'
manual_close: 'YES'
dependencies:
- name: 'Interface {#IFNAME}({#IFALIAS}): Link down'
expression: '{$IFCONTROL:"{#IFNAME}"}=1 and last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"])=2 and (last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"],#1)<>last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"],#2))'
recovery_expression: 'last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"])<>2 or {$IFCONTROL:"{#IFNAME}"}=0'
tags:
- tag: scope
value: performance
- uuid: 11e2c8023463482da878cdad5bb7de76
expression: |
(avg(/Linux by Prom/net.if.in[node_exporter,"{#IFNAME}"],15m)>({$IF.UTIL.MAX:"{#IFNAME}"}/100)*last(/Linux by Prom/net.if.speed[node_exporter,"{#IFNAME}"]) or
avg(/Linux by Prom/net.if.out[node_exporter,"{#IFNAME}"],15m)>({$IF.UTIL.MAX:"{#IFNAME}"}/100)*last(/Linux by Prom/net.if.speed[node_exporter,"{#IFNAME}"])) and
last(/Linux by Prom/net.if.speed[node_exporter,"{#IFNAME}"])>0
recovery_mode: RECOVERY_EXPRESSION
recovery_expression: |
avg(/Linux by Prom/net.if.in[node_exporter,"{#IFNAME}"],15m)<(({$IF.UTIL.MAX:"{#IFNAME}"}-3)/100)*last(/Linux by Prom/net.if.speed[node_exporter,"{#IFNAME}"]) and
avg(/Linux by Prom/net.if.out[node_exporter,"{#IFNAME}"],15m)<(({$IF.UTIL.MAX:"{#IFNAME}"}-3)/100)*last(/Linux by Prom/net.if.speed[node_exporter,"{#IFNAME}"])
name: 'Interface {#IFNAME}({#IFALIAS}): High bandwidth usage'
event_name: 'Interface {#IFNAME}({#IFALIAS}): High bandwidth usage (>{$IF.UTIL.MAX:"{#IFNAME}"}%)'
opdata: 'In: {ITEM.LASTVALUE1}, out: {ITEM.LASTVALUE3}, speed: {ITEM.LASTVALUE2}'
priority: WARNING
description: 'The utilization of the network interface is close to its estimated maximum bandwidth.'
manual_close: 'YES'
dependencies:
- name: 'Interface {#IFNAME}({#IFALIAS}): Link down'
expression: '{$IFCONTROL:"{#IFNAME}"}=1 and last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"])=2 and (last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"],#1)<>last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"],#2))'
recovery_expression: 'last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"])<>2 or {$IFCONTROL:"{#IFNAME}"}=0'
tags:
- tag: scope
value: performance
- uuid: 6e8269b2260e42de97aec08043a768df
expression: |
min(/Linux by Prom/net.if.in.errors[node_exporter,"{#IFNAME}"],5m)>{$IF.ERRORS.WARN:"{#IFNAME}"}
or min(/Linux by Prom/net.if.out.errors[node_exporter"{#IFNAME}"],5m)>{$IF.ERRORS.WARN:"{#IFNAME}"}
recovery_mode: RECOVERY_EXPRESSION
recovery_expression: |
max(/Linux by Prom/net.if.in.errors[node_exporter,"{#IFNAME}"],5m)<{$IF.ERRORS.WARN:"{#IFNAME}"}*0.8
and max(/Linux by Prom/net.if.out.errors[node_exporter"{#IFNAME}"],5m)<{$IF.ERRORS.WARN:"{#IFNAME}"}*0.8
name: 'Interface {#IFNAME}({#IFALIAS}): High error rate'
event_name: 'Interface {#IFNAME}({#IFALIAS}): High error rate (>{$IF.ERRORS.WARN:"{#IFNAME}"} for 5m)'
opdata: 'errors in: {ITEM.LASTVALUE1}, errors out: {ITEM.LASTVALUE2}'
priority: WARNING
description: 'It recovers when it is below 80% of the `{$IF.ERRORS.WARN:"{#IFNAME}"}` threshold.'
manual_close: 'YES'
dependencies:
- name: 'Interface {#IFNAME}({#IFALIAS}): Link down'
expression: '{$IFCONTROL:"{#IFNAME}"}=1 and last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"])=2 and (last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"],#1)<>last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"],#2))'
recovery_expression: 'last(/Linux by Prom/net.if.status[node_exporter,"{#IFNAME}"])<>2 or {$IFCONTROL:"{#IFNAME}"}=0'
tags:
- tag: scope
value: availability
- tag: scope
value: performance
graph_prototypes:
- uuid: cf143a7bb6d548fc9d3b089dcbb47ccf
name: 'Interface {#IFNAME}({#IFALIAS}): Network traffic'
graph_items:
- drawtype: GRADIENT_LINE
color: 199C0D
item:
host: 'Linux by Prom'
key: 'net.if.in[node_exporter,"{#IFNAME}"]'
- sortorder: '1'
drawtype: BOLD_LINE
color: F63100
item:
host: 'Linux by Prom'
key: 'net.if.out[node_exporter,"{#IFNAME}"]'
- sortorder: '2'
color: 00611C
yaxisside: RIGHT
item:
host: 'Linux by Prom'
key: 'net.if.out.errors[node_exporter"{#IFNAME}"]'
- sortorder: '3'
color: F7941D
yaxisside: RIGHT
item:
host: 'Linux by Prom'
key: 'net.if.in.errors[node_exporter,"{#IFNAME}"]'
- sortorder: '4'
color: FC6EA3
yaxisside: RIGHT
item:
host: 'Linux by Prom'
key: 'net.if.out.discards[node_exporter,"{#IFNAME}"]'
- sortorder: '5'
color: 6C59DC
yaxisside: RIGHT
item:
host: 'Linux by Prom'
key: 'net.if.in.discards[node_exporter,"{#IFNAME}"]'
master_item:
key: node_exporter.get
lld_macro_paths:
- lld_macro: '{#IFNAME}'
path: $.labels.device
- lld_macro: '{#HELP}'
path: $.help
- lld_macro: '{#IFALIAS}'
path: $.labels.ifalias
- lld_macro: '{#IFOPERSTATUS}'
path: $.labels.operstate
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- '{__name__=~"^node_network_info$"}'
- uuid: 920f367e232945b8992d0f32e6f9324d
name: 'Block devices discovery'
type: DEPENDENT
key: 'vfs.dev.discovery[node_exporter]'
delay: '0'
filter:
evaltype: AND
conditions:
- macro: '{#DEVNAME}'
value: '{$VFS.DEV.DEVNAME.MATCHES}'
formulaid: A
- macro: '{#DEVNAME}'
value: '{$VFS.DEV.DEVNAME.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: B
item_prototypes:
- uuid: 0a0949bcdf754fbe82e772ffbd69b5ba
name: '{#DEVNAME}: Disk average queue size (avgqu-sz)'
type: DEPENDENT
key: 'vfs.dev.queue_size[node_exporter,"{#DEVNAME}"]'
delay: '0'
history: 7d
value_type: FLOAT
description: 'The current average disk queue; the number of requests outstanding on the disk while the performance data is being collected.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'node_disk_io_time_weighted_seconds_total{device="{#DEVNAME}"}'
- value
- ''
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: storage
- tag: disk
value: '{#DEVNAME}'
- uuid: 9d158ecffdaa43f6bd0ab867ca68620b
name: '{#DEVNAME}: Disk read request avg waiting time (r_await)'
type: CALCULATED
key: 'vfs.dev.read.await[node_exporter,"{#DEVNAME}"]'
history: 7d
value_type: FLOAT
units: '!ms'
params: '(last(//vfs.dev.read.time.rate[node_exporter,"{#DEVNAME}"])/(last(//vfs.dev.read.rate[node_exporter,"{#DEVNAME}"])+(last(//vfs.dev.read.rate[node_exporter,"{#DEVNAME}"])=0)))*1000*(last(//vfs.dev.read.rate[node_exporter,"{#DEVNAME}"]) > 0)'
description: 'This formula contains two Boolean expressions that evaluates to 1 or 0 in order to set calculated metric to zero and to avoid division by zero exception.'
tags:
- tag: component
value: storage
- tag: disk
value: '{#DEVNAME}'
- uuid: dcd6f61ffb2f491ba78845514654a8cf
name: '{#DEVNAME}: Disk read rate'
type: DEPENDENT
key: 'vfs.dev.read.rate[node_exporter,"{#DEVNAME}"]'
delay: '0'
history: 7d
value_type: FLOAT
units: '!r/s'
description: 'r/s. The number (after merges) of read requests completed per second for the device.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'node_disk_reads_completed_total{device="{#DEVNAME}"}'
- value
- ''
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: storage
- tag: disk
value: '{#DEVNAME}'
- uuid: 73940f30ea2b4b88b0047e54b773b747
name: '{#DEVNAME}: Disk read time (rate)'
type: DEPENDENT
key: 'vfs.dev.read.time.rate[node_exporter,"{#DEVNAME}"]'
delay: '0'
history: 7d
value_type: FLOAT
description: 'Rate of total read time counter. Used in `r_await` calculation.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'node_disk_read_time_seconds_total{device="{#DEVNAME}"}'
- value
- ''
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: storage
- tag: disk
value: '{#DEVNAME}'
- uuid: d087a0525500450c9a307ff21f58482a
name: '{#DEVNAME}: Disk utilization'
type: DEPENDENT
key: 'vfs.dev.util[node_exporter,"{#DEVNAME}"]'
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
description: 'This item is the percentage of elapsed time during which the selected disk drive was busy while servicing read or write requests.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'node_disk_io_time_seconds_total{device="{#DEVNAME}"}'
- value
- ''
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: MULTIPLIER
parameters:
- '100'
master_item:
key: node_exporter.get
tags:
- tag: component
value: storage
- tag: disk
value: '{#DEVNAME}'
- uuid: 94203a49081047b6bc76cd66aecd55ce
name: '{#DEVNAME}: Disk write request avg waiting time (w_await)'
type: CALCULATED
key: 'vfs.dev.write.await[node_exporter,"{#DEVNAME}"]'
history: 7d
value_type: FLOAT
units: '!ms'
params: '(last(//vfs.dev.write.time.rate[node_exporter,"{#DEVNAME}"])/(last(//vfs.dev.write.rate[node_exporter,"{#DEVNAME}"])+(last(//vfs.dev.write.rate[node_exporter,"{#DEVNAME}"])=0)))*1000*(last(//vfs.dev.write.rate[node_exporter,"{#DEVNAME}"]) > 0)'
description: 'This formula contains two Boolean expressions that evaluates to 1 or 0 in order to set calculated metric to zero and to avoid division by zero exception.'
tags:
- tag: component
value: storage
- tag: disk
value: '{#DEVNAME}'
- uuid: 806ba0a56fab4b5094008c98bb817e2b
name: '{#DEVNAME}: Disk write rate'
type: DEPENDENT
key: 'vfs.dev.write.rate[node_exporter,"{#DEVNAME}"]'
delay: '0'
history: 7d
value_type: FLOAT
units: '!w/s'
description: 'w/s. The number (after merges) of write requests completed per second for the device.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'node_disk_writes_completed_total{device="{#DEVNAME}"}'
- value
- ''
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: storage
- tag: disk
value: '{#DEVNAME}'
- uuid: 601cc30457934b7f8f050b2cea13ccea
name: '{#DEVNAME}: Disk write time (rate)'
type: DEPENDENT
key: 'vfs.dev.write.time.rate[node_exporter,"{#DEVNAME}"]'
delay: '0'
history: 7d
value_type: FLOAT
description: 'Rate of total write time counter. Used in `w_await` calculation.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'node_disk_write_time_seconds_total{device="{#DEVNAME}"}'
- value
- ''
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: storage
- tag: disk
value: '{#DEVNAME}'
trigger_prototypes:
- uuid: 412381f71bba441b955898839f81d51c
expression: 'min(/Linux by Prom/vfs.dev.read.await[node_exporter,"{#DEVNAME}"],15m) > {$VFS.DEV.READ.AWAIT.WARN:"{#DEVNAME}"} or min(/Linux by Prom/vfs.dev.write.await[node_exporter,"{#DEVNAME}"],15m) > {$VFS.DEV.WRITE.AWAIT.WARN:"{#DEVNAME}"}'
name: '{#DEVNAME}: Disk read/write request responses are too high'
event_name: '{#DEVNAME}: Disk read/write request responses are too high (read > {$VFS.DEV.READ.AWAIT.WARN:"{#DEVNAME}"} ms for 15m or write > {$VFS.DEV.WRITE.AWAIT.WARN:"{#DEVNAME}"} ms for 15m)'
priority: WARNING
description: 'This trigger might indicate the disk {#DEVNAME} saturation.'
manual_close: 'YES'
tags:
- tag: scope
value: performance
graph_prototypes:
- uuid: 13a5a74a99734449b08075bbb25bb498
name: '{#DEVNAME}: Disk average waiting time'
graph_items:
- color: 199C0D
item:
host: 'Linux by Prom'
key: 'vfs.dev.read.await[node_exporter,"{#DEVNAME}"]'
- sortorder: '1'
drawtype: GRADIENT_LINE
color: F63100
item:
host: 'Linux by Prom'
key: 'vfs.dev.write.await[node_exporter,"{#DEVNAME}"]'
- uuid: bad3d3ba32534ef194a4837d666ddccb
name: '{#DEVNAME}: Disk read/write rates'
graph_items:
- color: 199C0D
item:
host: 'Linux by Prom'
key: 'vfs.dev.read.rate[node_exporter,"{#DEVNAME}"]'
- sortorder: '1'
drawtype: GRADIENT_LINE
color: F63100
item:
host: 'Linux by Prom'
key: 'vfs.dev.write.rate[node_exporter,"{#DEVNAME}"]'
- uuid: aa53b66adaac4e62863ea2dd59ba3ea1
name: '{#DEVNAME}: Disk utilization and queue'
graph_items:
- color: 199C0D
yaxisside: RIGHT
item:
host: 'Linux by Prom'
key: 'vfs.dev.queue_size[node_exporter,"{#DEVNAME}"]'
- sortorder: '1'
drawtype: GRADIENT_LINE
color: F63100
item:
host: 'Linux by Prom'
key: 'vfs.dev.util[node_exporter,"{#DEVNAME}"]'
master_item:
key: node_exporter.get
lld_macro_paths:
- lld_macro: '{#DEVNAME}'
path: $.labels.device
- lld_macro: '{#HELP}'
path: $.help
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- 'node_disk_io_now{device=~".+"}'
- uuid: 85d047920c1342b992fa1905eb489a44
name: 'Mounted filesystem discovery'
type: DEPENDENT
key: 'vfs.fs.discovery[node_exporter]'
delay: '0'
filter:
evaltype: AND
conditions:
- macro: '{#FSTYPE}'
value: '{$VFS.FS.FSTYPE.MATCHES}'
formulaid: E
- macro: '{#FSTYPE}'
value: '{$VFS.FS.FSTYPE.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: F
- macro: '{#FSNAME}'
value: '{$VFS.FS.FSNAME.MATCHES}'
formulaid: B
- macro: '{#FSNAME}'
value: '{$VFS.FS.FSNAME.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: C
- macro: '{#FSNAME}'
value: '{$VFS.FS.FSDEVICE.MATCHES}'
formulaid: D
- macro: '{#FSDEVICE}'
value: '{$VFS.FS.FSDEVICE.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: A
description: 'Discovery of file systems of different types.'
item_prototypes:
- uuid: 3a3edb8bf5a74a63baa27c64d1a44493
name: '{#FSNAME}: Free space'
type: DEPENDENT
key: 'vfs.fs.free[node_exporter,"{#FSNAME}"]'
delay: '0'
history: 7d
value_type: FLOAT
units: B
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- '{__name__=~"^node_filesystem_avail(?:_bytes)?$", mountpoint="{#FSNAME}"}'
- value
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: storage
- tag: filesystem
value: '{#FSNAME}'
- uuid: ee1dad3677214561baafa6d930358ee0
name: '{#FSNAME}: Free inodes in %'
type: DEPENDENT
key: 'vfs.fs.inode.pfree[node_exporter,"{#FSNAME}"]'
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- '{__name__=~"node_filesystem_files.*",mountpoint="{#FSNAME}"}'
- type: JAVASCRIPT
parameters:
- |
//count vfs.fs.inode.pfree
var inode_free;
var inode_total;
JSON.parse(value).forEach(function(metric) {
if (metric['name'] == 'node_filesystem_files'){
inode_total = metric['value'];
} else if (metric['name'] == 'node_filesystem_files_free'){
inode_free = metric['value'];
}
});
return (inode_free/inode_total)*100;
master_item:
key: node_exporter.get
tags:
- tag: component
value: storage
- tag: filesystem
value: '{#FSNAME}'
trigger_prototypes:
- uuid: 27cabcc9d9644ba6b7bd8c92a740e3cc
expression: 'min(/Linux by Prom/vfs.fs.inode.pfree[node_exporter,"{#FSNAME}"],5m)<{$VFS.FS.INODE.PFREE.MIN.CRIT:"{#FSNAME}"}'
name: '{#FSNAME}: Running out of free inodes'
event_name: '{#FSNAME}: Running out of free inodes (free < {$VFS.FS.INODE.PFREE.MIN.CRIT:"{#FSNAME}"}%)'
opdata: 'Free inodes: {ITEM.LASTVALUE1}'
priority: AVERAGE
description: |
It may become impossible to write to a disk if there are no index nodes left.
The following error messages may be returned as symptoms, even though the free space is available:
- 'No space left on device';
- 'Disk is full'.
tags:
- tag: scope
value: capacity
- tag: scope
value: performance
- uuid: c639215f6dd94c71aebf97265a282a0d
expression: 'min(/Linux by Prom/vfs.fs.inode.pfree[node_exporter,"{#FSNAME}"],5m)<{$VFS.FS.INODE.PFREE.MIN.WARN:"{#FSNAME}"}'
name: '{#FSNAME}: Running out of free inodes'
event_name: '{#FSNAME}: Running out of free inodes (free < {$VFS.FS.INODE.PFREE.MIN.WARN:"{#FSNAME}"}%)'
opdata: 'Free inodes: {ITEM.LASTVALUE1}'
priority: WARNING
description: |
It may become impossible to write to a disk if there are no index nodes left.
The following error messages may be returned as symptoms, even though the free space is available:
- 'No space left on device';
- 'Disk is full'.
dependencies:
- name: '{#FSNAME}: Running out of free inodes'
expression: 'min(/Linux by Prom/vfs.fs.inode.pfree[node_exporter,"{#FSNAME}"],5m)<{$VFS.FS.INODE.PFREE.MIN.CRIT:"{#FSNAME}"}'
tags:
- tag: scope
value: capacity
- tag: scope
value: performance
- uuid: 89afe41bb0d74c579a9cff8b036d30dc
name: '{#FSNAME}: Space utilization'
type: CALCULATED
key: 'vfs.fs.pused[node_exporter,"{#FSNAME}"]'
history: 7d
value_type: FLOAT
units: '%'
params: '(last(//vfs.fs.used[node_exporter,"{#FSNAME}"])/last(//vfs.fs.total[node_exporter,"{#FSNAME}"]))*100'
description: 'The space utilization expressed in % for {#FSNAME}.'
tags:
- tag: component
value: storage
- tag: filesystem
value: '{#FSNAME}'
- uuid: acb3d42ae23b48cbacd0e99c27697798
name: '{#FSNAME}: Total space'
type: DEPENDENT
key: 'vfs.fs.total[node_exporter,"{#FSNAME}"]'
delay: '0'
history: 7d
value_type: FLOAT
units: B
description: 'Total space in bytes'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- '{__name__=~"^node_filesystem_size(?:_bytes)?$", mountpoint="{#FSNAME}"}'
- value
- ''
master_item:
key: node_exporter.get
tags:
- tag: component
value: storage
- tag: filesystem
value: '{#FSNAME}'
- uuid: 89e559a7ff364dd78b638454896692c5
name: '{#FSNAME}: Used space'
type: CALCULATED
key: 'vfs.fs.used[node_exporter,"{#FSNAME}"]'
history: 7d
value_type: FLOAT
units: B
params: '(last(//vfs.fs.total[node_exporter,"{#FSNAME}"])-last(//vfs.fs.free[node_exporter,"{#FSNAME}"]))'
description: 'Used storage in bytes'
tags:
- tag: component
value: storage
- tag: filesystem
value: '{#FSNAME}'
trigger_prototypes:
- uuid: d5687d7aa0484b389f0bd168d50ee1e6
expression: |
last(/Linux by Prom/vfs.fs.pused[node_exporter,"{#FSNAME}"])>{$VFS.FS.PUSED.MAX.CRIT:"{#FSNAME}"} and
((last(/Linux by Prom/vfs.fs.total[node_exporter,"{#FSNAME}"])-last(/Linux by Prom/vfs.fs.used[node_exporter,"{#FSNAME}"]))<{$VFS.FS.FREE.MIN.CRIT:"{#FSNAME}"} or timeleft(/Linux by Prom/vfs.fs.pused[node_exporter,"{#FSNAME}"],1h,100)<1d)
name: '{#FSNAME}: Disk space is critically low'
event_name: '{#FSNAME}: Disk space is critically low (used > {$VFS.FS.PUSED.MAX.CRIT:"{#FSNAME}"}%)'
opdata: 'Space used: {ITEM.LASTVALUE3} of {ITEM.LASTVALUE2} ({ITEM.LASTVALUE1})'
priority: AVERAGE
description: |
Two conditions should match:
1. The first condition - utilization of the space should be above `{$VFS.FS.PUSED.MAX.CRIT:"{#FSNAME}"}`.
2. The second condition should be one of the following:
- the disk free space is less than `{$VFS.FS.FREE.MIN.CRIT:"{#FSNAME}"}`;
- the disk will be full in less than 24 hours.
manual_close: 'YES'
tags:
- tag: scope
value: availability
- tag: scope
value: capacity
- uuid: 8f765148cfd64d5ebda93f39d0b20e36
expression: |
last(/Linux by Prom/vfs.fs.pused[node_exporter,"{#FSNAME}"])>{$VFS.FS.PUSED.MAX.WARN:"{#FSNAME}"} and
((last(/Linux by Prom/vfs.fs.total[node_exporter,"{#FSNAME}"])-last(/Linux by Prom/vfs.fs.used[node_exporter,"{#FSNAME}"]))<{$VFS.FS.FREE.MIN.WARN:"{#FSNAME}"} or timeleft(/Linux by Prom/vfs.fs.pused[node_exporter,"{#FSNAME}"],1h,100)<1d)
name: '{#FSNAME}: Disk space is low'
event_name: '{#FSNAME}: Disk space is low (used > {$VFS.FS.PUSED.MAX.WARN:"{#FSNAME}"}%)'
opdata: 'Space used: {ITEM.LASTVALUE3} of {ITEM.LASTVALUE2} ({ITEM.LASTVALUE1})'
priority: WARNING
description: |
Two conditions should match:
1. The first condition - utilization of the space should be above `{$VFS.FS.PUSED.MAX.WARN:"{#FSNAME}"}`.
2. The second condition should be one of the following:
- the disk free space is less than `{$VFS.FS.FREE.MIN.WARN:"{#FSNAME}"}`;
- the disk will be full in less than 24 hours.
manual_close: 'YES'
dependencies:
- name: '{#FSNAME}: Disk space is critically low'
expression: |
last(/Linux by Prom/vfs.fs.pused[node_exporter,"{#FSNAME}"])>{$VFS.FS.PUSED.MAX.CRIT:"{#FSNAME}"} and
((last(/Linux by Prom/vfs.fs.total[node_exporter,"{#FSNAME}"])-last(/Linux by Prom/vfs.fs.used[node_exporter,"{#FSNAME}"]))<{$VFS.FS.FREE.MIN.CRIT:"{#FSNAME}"} or timeleft(/Linux by Prom/vfs.fs.pused[node_exporter,"{#FSNAME}"],1h,100)<1d)
tags:
- tag: scope
value: availability
- tag: scope
value: capacity
graph_prototypes:
- uuid: aa350a426fcd40af96cd15f778cdf62d
name: '{#FSNAME}: Disk space usage'
width: '600'
height: '340'
type: PIE
show_3d: 'YES'
graph_items:
- color: '969696'
calc_fnc: LAST
type: GRAPH_SUM
item:
host: 'Linux by Prom'
key: 'vfs.fs.total[node_exporter,"{#FSNAME}"]'
- sortorder: '1'
color: C80000
calc_fnc: LAST
item:
host: 'Linux by Prom'
key: 'vfs.fs.used[node_exporter,"{#FSNAME}"]'
master_item:
key: node_exporter.get
lld_macro_paths:
- lld_macro: '{#FSTYPE}'
path: $.labels.fstype
- lld_macro: '{#FSNAME}'
path: $.labels.mountpoint
- lld_macro: '{#FSDEVICE}'
path: $.labels.device
- lld_macro: '{#HELP}'
path: $.help
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- '{__name__=~"^node_filesystem_size(?:_bytes)?$", mountpoint=~".+"}'
tags:
- tag: class
value: os
- tag: target
value: linux
macros:
- macro: '{$CPU.UTIL.CRIT}'
value: '90'
- macro: '{$IF.ERRORS.WARN}'
value: '2'
- macro: '{$IF.UTIL.MAX}'
value: '90'
- macro: '{$IFCONTROL}'
value: '1'
- macro: '{$KERNEL.MAXFILES.MIN}'
value: '256'
- macro: '{$LOAD_AVG_PER_CPU.MAX.WARN}'
value: '1.5'
description: 'Load per CPU considered sustainable. Tune if needed.'
- macro: '{$MEMORY.AVAILABLE.MIN}'
value: 20M
- macro: '{$MEMORY.UTIL.MAX}'
value: '90'
- macro: '{$NET.IF.IFALIAS.MATCHES}'
value: '^.*$'
- macro: '{$NET.IF.IFALIAS.NOT_MATCHES}'
value: CHANGE_IF_NEEDED
- macro: '{$NET.IF.IFNAME.MATCHES}'
value: '^.*$'
- macro: '{$NET.IF.IFNAME.NOT_MATCHES}'
value: '(^Software Loopback Interface|^NULL[0-9.]*$|^[Ll]o[0-9.]*$|^[Ss]ystem$|^Nu[0-9.]*$|^veth[0-9A-z]+$|docker[0-9]+|br-[a-z0-9]{12})'
description: 'Filter out loopbacks, nulls, docker veth links and docker0 bridge by default.'
- macro: '{$NET.IF.IFOPERSTATUS.MATCHES}'
value: '^.*$'
- macro: '{$NET.IF.IFOPERSTATUS.NOT_MATCHES}'
value: ^7$
description: 'Ignore notPresent(7).'
- macro: '{$NODE_EXPORTER_PORT}'
value: '9100'
description: 'TCP Port node_exporter is listening on.'
- macro: '{$SWAP.PFREE.MIN.WARN}'
value: '50'
- macro: '{$SYSTEM.FUZZYTIME.MAX}'
value: '60'
- macro: '{$VFS.DEV.DEVNAME.MATCHES}'
value: .+
description: 'This macro is used in block devices discovery. Can be overridden on the host or linked template level.'
- macro: '{$VFS.DEV.DEVNAME.NOT_MATCHES}'
value: '^(loop[0-9]*|sd[a-z][0-9]+|nbd[0-9]+|sr[0-9]+|fd[0-9]+|dm-[0-9]+|ram[0-9]+|ploop[a-z0-9]+|md[0-9]*|hcp[0-9]*|zram[0-9]*)'
description: 'This macro is used in block devices discovery. Can be overridden on the host or linked template level.'
- macro: '{$VFS.DEV.READ.AWAIT.WARN}'
value: '20'
description: 'Disk read average response time (in ms) before the trigger would fire.'
- macro: '{$VFS.DEV.WRITE.AWAIT.WARN}'
value: '20'
description: 'Disk write average response time (in ms) before the trigger would fire.'
- macro: '{$VFS.FS.FREE.MIN.CRIT}'
value: 5G
description: 'The critical threshold of the filesystem utilization.'
- macro: '{$VFS.FS.FREE.MIN.WARN}'
value: 10G
description: 'The warning threshold of the filesystem utilization.'
- macro: '{$VFS.FS.FSDEVICE.MATCHES}'
value: ^.+$
description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level.'
- macro: '{$VFS.FS.FSDEVICE.NOT_MATCHES}'
value: ^\s$
description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level.'
- macro: '{$VFS.FS.FSNAME.MATCHES}'
value: .+
description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level.'
- macro: '{$VFS.FS.FSNAME.NOT_MATCHES}'
value: ^(/dev|/sys|/run|/proc|.+/shm$)
description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level.'
- macro: '{$VFS.FS.FSTYPE.MATCHES}'
value: ^(btrfs|ext2|ext3|ext4|reiser|xfs|ffs|ufs|jfs|jfs2|vxfs|hfs|apfs|refs|ntfs|fat32|zfs)$
description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level.'
- macro: '{$VFS.FS.FSTYPE.NOT_MATCHES}'
value: ^\s$
description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level.'
- macro: '{$VFS.FS.INODE.PFREE.MIN.CRIT}'
value: '10'
- macro: '{$VFS.FS.INODE.PFREE.MIN.WARN}'
value: '20'
- macro: '{$VFS.FS.PUSED.MAX.CRIT}'
value: '90'
- macro: '{$VFS.FS.PUSED.MAX.WARN}'
value: '80'
dashboards:
- uuid: 19dac6b780aa49558bf4a3782ba4b3b6
name: 'Network interfaces'
pages:
- widgets:
- type: graphprototype
width: '24'
height: '5'
fields:
- type: INTEGER
name: columns
value: '1'
- type: INTEGER
name: rows
value: '1'
- type: GRAPH_PROTOTYPE
name: graphid
value:
host: 'Linux by Prom'
name: 'Interface {#IFNAME}({#IFALIAS}): Network traffic'
- uuid: 558606056f464970a7c544ba75d544f2
name: 'System performance'
pages:
- widgets:
- type: graph
width: '12'
height: '5'
fields:
- type: GRAPH
name: graphid
value:
host: 'Linux by Prom'
name: 'Linux: System load'
- type: graph
x: '12'
width: '12'
height: '5'
fields:
- type: GRAPH
name: graphid
value:
host: 'Linux by Prom'
name: 'Linux: CPU usage'
- type: graph
'y': '5'
width: '12'
height: '5'
fields:
- type: GRAPH
name: graphid
value:
host: 'Linux by Prom'
name: 'Linux: Memory usage'
- type: graph
x: '12'
'y': '5'
width: '12'
height: '5'
fields:
- type: GRAPH
name: graphid
value:
host: 'Linux by Prom'
name: 'Linux: Swap usage'
- type: graphprototype
'y': '10'
width: '24'
height: '5'
fields:
- type: INTEGER
name: columns
value: '1'
- type: INTEGER
name: rows
value: '1'
- type: GRAPH_PROTOTYPE
name: graphid
value:
host: 'Linux by Prom'
name: '{#FSNAME}: Disk space usage'
- type: graphprototype
'y': '15'
width: '24'
height: '5'
fields:
- type: INTEGER
name: columns
value: '1'
- type: INTEGER
name: rows
value: '1'
- type: GRAPH_PROTOTYPE
name: graphid
value:
host: 'Linux by Prom'
name: '{#DEVNAME}: Disk read/write rates'
- type: graphprototype
'y': '20'
width: '24'
height: '5'
fields:
- type: INTEGER
name: columns
value: '1'
- type: INTEGER
name: rows
value: '1'
- type: GRAPH_PROTOTYPE
name: graphid
value:
host: 'Linux by Prom'
name: '{#DEVNAME}: Disk average waiting time'
- type: graphprototype
'y': '25'
width: '24'
height: '5'
fields:
- type: INTEGER
name: columns
value: '1'
- type: INTEGER
name: rows
value: '1'
- type: GRAPH_PROTOTYPE
name: graphid
value:
host: 'Linux by Prom'
name: '{#DEVNAME}: Disk utilization and queue'
- type: graphprototype
'y': '30'
width: '24'
height: '5'
fields:
- type: INTEGER
name: columns
value: '1'
- type: INTEGER
name: rows
value: '1'
- type: GRAPH_PROTOTYPE
name: graphid
value:
host: 'Linux by Prom'
name: 'Interface {#IFNAME}({#IFALIAS}): Network traffic'
valuemaps:
- uuid: 4827063819f14d539f509552c84f5f94
name: ifOperStatus
mappings:
- value: '0'
newvalue: unknown
- value: '1'
newvalue: notpresent
- value: '2'
newvalue: down
- value: '3'
newvalue: lowerlayerdown
- value: '4'
newvalue: testing
- value: '5'
newvalue: dormant
- value: '6'
newvalue: up
- uuid: 4d912f1ee95942038f306ddefb3a57b3
name: 'Linux::Interface protocol types'
mappings:
- value: '0'
newvalue: 'from KA9Q: NET/ROM pseudo'
- value: '1'
newvalue: Ethernet
- value: '2'
newvalue: 'Experimental Ethernet'
- value: '3'
newvalue: 'AX.25 Level 2'
- value: '4'
newvalue: 'PROnet token ring'
- value: '5'
newvalue: Chaosnet
- value: '6'
newvalue: 'IEEE 802.2 Ethernet/TR/TB'
- value: '7'
newvalue: ARCnet
- value: '8'
newvalue: APPLEtalk
- value: '15'
newvalue: 'Frame Relay DLCI'
- value: '19'
newvalue: ATM
- value: '23'
newvalue: 'Metricom STRIP (new IANA id)'
- value: '24'
newvalue: 'IEEE 1394 IPv4 - RFC 2734'
- value: '27'
newvalue: EUI-64
- value: '32'
newvalue: InfiniBand
- value: '256'
newvalue: ARPHRD_SLIP
- value: '257'
newvalue: ARPHRD_CSLIP
- value: '258'
newvalue: ARPHRD_SLIP6
- value: '259'
newvalue: ARPHRD_CSLIP6
- value: '260'
newvalue: 'Notional KISS type'
- value: '264'
newvalue: ARPHRD_ADAPT
- value: '270'
newvalue: ARPHRD_ROSE
- value: '271'
newvalue: 'CCITT X.25'
- value: '272'
newvalue: 'Boards with X.25 in firmware'
- value: '280'
newvalue: 'Controller Area Network'
- value: '512'
newvalue: ARPHRD_PPP
- value: '513'
newvalue: 'Cisco HDLC'
- value: '516'
newvalue: LAPB
- value: '517'
newvalue: 'Digital''s DDCMP protocol'
- value: '518'
newvalue: 'Raw HDLC'
- value: '519'
newvalue: 'Raw IP'
- value: '768'
newvalue: 'IPIP tunnel'
- value: '769'
newvalue: 'IP6IP6 tunnel'
- value: '770'
newvalue: 'Frame Relay Access Device'
- value: '771'
newvalue: 'SKIP vif'
- value: '772'
newvalue: 'Loopback device'
- value: '773'
newvalue: 'Localtalk device'
- value: '774'
newvalue: 'Fiber Distributed Data Interface'
- value: '775'
newvalue: 'AP1000 BIF'
- value: '776'
newvalue: 'sit0 device - IPv6-in-IPv4'
- value: '777'
newvalue: 'IP over DDP tunneller'
- value: '778'
newvalue: 'GRE over IP'
- value: '779'
newvalue: 'PIMSM register interface'
- value: '780'
newvalue: 'High Performance Parallel Interface'
- value: '781'
newvalue: 'Nexus 64Mbps Ash'
- value: '782'
newvalue: 'Acorn Econet'
- value: '783'
newvalue: Linux-IrDA
- value: '784'
newvalue: 'Point to point fibrechannel'
- value: '785'
newvalue: 'Fibrechannel arbitrated loop'
- value: '786'
newvalue: 'Fibrechannel public loop'
- value: '787'
newvalue: 'Fibrechannel fabric'
- value: '800'
newvalue: 'Magic type ident for TR'
- value: '801'
newvalue: 'IEEE 802.11'
- value: '802'
newvalue: 'IEEE 802.11 + Prism2 header'
- value: '803'
newvalue: 'IEEE 802.11 + radiotap header'
- value: '804'
newvalue: ARPHRD_IEEE802154
- value: '805'
newvalue: 'IEEE 802.15.4 network monitor'
- value: '820'
newvalue: 'PhoNet media type'
- value: '821'
newvalue: 'PhoNet pipe header'
- value: '822'
newvalue: 'CAIF media type'
- value: '823'
newvalue: 'GRE over IPv6'
- value: '824'
newvalue: 'Netlink header'
- value: '825'
newvalue: 'IPv6 over LoWPAN'
- value: '826'
newvalue: 'Vsock monitor header'
triggers:
- uuid: 16e6a4d340164dee9379a3fc0aac0576
expression: 'max(/Linux by Prom/system.swap.pfree[node_exporter],5m)<{$SWAP.PFREE.MIN.WARN} and last(/Linux by Prom/system.swap.total[node_exporter])>0'
name: 'Linux: High swap space usage'
event_name: 'Linux: High swap space usage (less than {$SWAP.PFREE.MIN.WARN}% free)'
opdata: 'Free: {ITEM.LASTVALUE1}, total: {ITEM.LASTVALUE2}'
priority: WARNING
description: 'If there is no swap configured, this trigger is ignored.'
dependencies:
- name: 'Linux: High memory utilization'
expression: 'min(/Linux by Prom/vm.memory.util[node_exporter],5m)>{$MEMORY.UTIL.MAX}'
- name: 'Linux: Lack of available memory'
expression: 'max(/Linux by Prom/vm.memory.available[node_exporter],5m)<{$MEMORY.AVAILABLE.MIN} and last(/Linux by Prom/vm.memory.total[node_exporter])>0'
tags:
- tag: scope
value: capacity
- uuid: f78ed17586964a46a9de0c4f183984f6
expression: 'max(/Linux by Prom/vm.memory.available[node_exporter],5m)<{$MEMORY.AVAILABLE.MIN} and last(/Linux by Prom/vm.memory.total[node_exporter])>0'
name: 'Linux: Lack of available memory'
event_name: 'Linux: Lack of available memory (<{$MEMORY.AVAILABLE.MIN} of {ITEM.VALUE2})'
opdata: 'Available: {ITEM.LASTVALUE1}, total: {ITEM.LASTVALUE2}'
priority: AVERAGE
tags:
- tag: scope
value: capacity
- tag: scope
value: performance
- uuid: e5897641e5594bf6b7b18ae4e107bd50
expression: |
min(/Linux by Prom/system.cpu.load.avg1[node_exporter],5m)/last(/Linux by Prom/system.cpu.num[node_exporter])>{$LOAD_AVG_PER_CPU.MAX.WARN}
and last(/Linux by Prom/system.cpu.load.avg5[node_exporter])>0
and last(/Linux by Prom/system.cpu.load.avg15[node_exporter])>0
name: 'Linux: Load average is too high'
event_name: 'Linux: Load average is too high (per CPU load over {$LOAD_AVG_PER_CPU.MAX.WARN} for 5m)'
opdata: 'Load averages(1m 5m 15m): ({ITEM.LASTVALUE1} {ITEM.LASTVALUE3} {ITEM.LASTVALUE4}), # of CPUs: {ITEM.LASTVALUE2}'
priority: AVERAGE
description: 'The load average per CPU is too high. The system may be slow to respond.'
tags:
- tag: scope
value: capacity
- tag: scope
value: performance
- uuid: 7effa63c9c994a6786279802aa7328ad
expression: 'last(/Linux by Prom/fd.open[node_exporter])/last(/Linux by Prom/kernel.maxfiles[node_exporter])*100>80'
name: 'Linux: Running out of file descriptors'
event_name: 'Linux: Running out of file descriptors (less than < 20% free)'
opdata: '{ITEM.LASTVALUE1} of {ITEM.LASTVALUE2} file descriptors are in use.'
priority: WARNING
tags:
- tag: scope
value: performance
graphs:
- uuid: bb8bb267b32b46bfa2a6277b65730396
name: 'Linux: CPU jumps'
graph_items:
- color: 199C0D
item:
host: 'Linux by Prom'
key: 'system.cpu.switches[node_exporter]'
- sortorder: '1'
color: F63100
item:
host: 'Linux by Prom'
key: 'system.cpu.intr[node_exporter]'
- uuid: 09ebf28dc8504ab2bfd07130d1f6ca91
name: 'Linux: CPU usage'
type: STACKED
ymin_type_1: FIXED
ymax_type_1: FIXED
graph_items:
- color: 199C0D
item:
host: 'Linux by Prom'
key: 'system.cpu.system[node_exporter]'
- sortorder: '1'
color: F63100
item:
host: 'Linux by Prom'
key: 'system.cpu.user[node_exporter]'
- sortorder: '2'
color: 00611C
item:
host: 'Linux by Prom'
key: 'system.cpu.nice[node_exporter]'
- sortorder: '3'
color: F7941D
item:
host: 'Linux by Prom'
key: 'system.cpu.iowait[node_exporter]'
- sortorder: '4'
color: FC6EA3
item:
host: 'Linux by Prom'
key: 'system.cpu.steal[node_exporter]'
- sortorder: '5'
color: 6C59DC
item:
host: 'Linux by Prom'
key: 'system.cpu.interrupt[node_exporter]'
- sortorder: '6'
color: C7A72D
item:
host: 'Linux by Prom'
key: 'system.cpu.softirq[node_exporter]'
- sortorder: '7'
color: BA2A5D
item:
host: 'Linux by Prom'
key: 'system.cpu.guest[node_exporter]'
- sortorder: '8'
color: F230E0
item:
host: 'Linux by Prom'
key: 'system.cpu.guest_nice[node_exporter]'
- uuid: 9cd184535b6b4fd5978a302bdedeac0b
name: 'Linux: CPU utilization'
ymin_type_1: FIXED
ymax_type_1: FIXED
graph_items:
- drawtype: GRADIENT_LINE
color: 199C0D
item:
host: 'Linux by Prom'
key: 'system.cpu.util[node_exporter]'
- uuid: 1383a8b57e6743de946c8a43016ee2f7
name: 'Linux: Memory usage'
ymin_type_1: FIXED
graph_items:
- drawtype: BOLD_LINE
color: 199C0D
item:
host: 'Linux by Prom'
key: 'vm.memory.total[node_exporter]'
- sortorder: '1'
drawtype: GRADIENT_LINE
color: F63100
item:
host: 'Linux by Prom'
key: 'vm.memory.available[node_exporter]'
- uuid: 81e79ef1219a4c48af1f740017bbde6a
name: 'Linux: Memory utilization'
ymin_type_1: FIXED
ymax_type_1: FIXED
graph_items:
- drawtype: GRADIENT_LINE
color: 199C0D
item:
host: 'Linux by Prom'
key: 'vm.memory.util[node_exporter]'
- uuid: 567684533d2442b286119c7d392e2bb7
name: 'Linux: Swap usage'
graph_items:
- color: 199C0D
item:
host: 'Linux by Prom'
key: 'system.swap.free[node_exporter]'
- sortorder: '1'
color: F63100
item:
host: 'Linux by Prom'
key: 'system.swap.total[node_exporter]'
- uuid: f9cbf136d01040ffa18753fb40d55f79
name: 'Linux: System load'
ymin_type_1: FIXED
graph_items:
- color: 199C0D
item:
host: 'Linux by Prom'
key: 'system.cpu.load.avg1[node_exporter]'
- sortorder: '1'
color: F63100
item:
host: 'Linux by Prom'
key: 'system.cpu.load.avg5[node_exporter]'
- sortorder: '2'
color: 00611C
item:
host: 'Linux by Prom'
key: 'system.cpu.load.avg15[node_exporter]'
- sortorder: '3'
color: F7941D
yaxisside: RIGHT
item:
host: 'Linux by Prom'
key: 'system.cpu.num[node_exporter]'