You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1484 lines
51 KiB
1484 lines
51 KiB
zabbix_export:
|
|
version: '7.0'
|
|
template_groups:
|
|
- uuid: a571c0d144b14fd4a87a9d9b2aa9fcd6
|
|
name: Templates/Applications
|
|
templates:
|
|
- uuid: b25b8b517a4743c48037bfa10af3dc3c
|
|
template: 'Etcd by HTTP'
|
|
name: 'Etcd by HTTP'
|
|
description: |
|
|
Get Etcd metrics by HTTP agent.
|
|
|
|
Generated by official Zabbix template tool "Templator" 2.0.0
|
|
vendor:
|
|
name: Zabbix
|
|
version: 7.0-0
|
|
groups:
|
|
- name: Templates/Applications
|
|
items:
|
|
- uuid: 32a59c8e93e141d6a471266df6dbfbd3
|
|
name: 'Etcd: Cluster version'
|
|
type: DEPENDENT
|
|
key: etcd.cluster.version
|
|
delay: '0'
|
|
history: 7d
|
|
trends: '0'
|
|
value_type: CHAR
|
|
description: 'The version of the `etcd cluster`.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.etcdcluster
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1d
|
|
master_item:
|
|
key: etcd.get_version
|
|
tags:
|
|
- tag: component
|
|
value: application
|
|
triggers:
|
|
- uuid: 7c87424c4fb34c56bc9b976755b4ec02
|
|
expression: 'last(/Etcd by HTTP/etcd.cluster.version,#1)<>last(/Etcd by HTTP/etcd.cluster.version,#2) and length(last(/Etcd by HTTP/etcd.cluster.version))>0'
|
|
name: 'Etcd: Cluster version has changed'
|
|
event_name: 'Etcd: Cluster version has changed (new version: {ITEM.VALUE})'
|
|
priority: INFO
|
|
description: 'Etcd version has changed. Acknowledge to close the problem manually.'
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: 04b0fa552b7d4267b4c5b67ee82ef5f1
|
|
name: 'Etcd: CPU'
|
|
type: DEPENDENT
|
|
key: etcd.cpu.util
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'The total user and system CPU time spent in seconds.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- process_cpu_seconds_total
|
|
- value
|
|
- ''
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- uuid: bfa5bd42637642808802f7b2485a0c4d
|
|
name: 'Etcd: DB size'
|
|
type: DEPENDENT
|
|
key: etcd.db.size
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'The total size of the underlying database.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_mvcc_db_total_size_in_bytes
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: database
|
|
- uuid: a5bc7ffb090641ab92f537b38b6055e5
|
|
name: 'Etcd: Deletes per second'
|
|
type: DEPENDENT
|
|
key: etcd.delete.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of deletes seen by this member per second.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_mvcc_delete_total
|
|
- value
|
|
- ''
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: operations
|
|
- uuid: 183843bd93f84dc887a03fb638b2d323
|
|
name: 'Etcd: Pending events'
|
|
type: DEPENDENT
|
|
key: etcd.events.sent.rate
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The total number of pending events to be sent.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_debugging_mvcc_pending_events_total
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: events
|
|
- uuid: 34ffab33275a400ab88e5217dee5ef96
|
|
name: 'Etcd: Get node metrics'
|
|
type: HTTP_AGENT
|
|
key: etcd.get_metrics
|
|
history: '0'
|
|
trends: '0'
|
|
value_type: TEXT
|
|
authtype: BASIC
|
|
username: '{$ETCD.USER}'
|
|
password: '{$ETCD.PASSWORD}'
|
|
url: '{$ETCD.SCHEME}://{HOST.CONN}:{$ETCD.PORT}/metrics'
|
|
tags:
|
|
- tag: component
|
|
value: raw
|
|
- uuid: fdf7593420ab42b2a5af8f8b8030b517
|
|
name: 'Etcd: Get version'
|
|
type: HTTP_AGENT
|
|
key: etcd.get_version
|
|
history: '0'
|
|
trends: '0'
|
|
value_type: TEXT
|
|
url: '{$ETCD.SCHEME}://{HOST.CONN}:{$ETCD.PORT}/version'
|
|
tags:
|
|
- tag: component
|
|
value: application
|
|
- uuid: d23baf75628043e193ba0a607e1b4215
|
|
name: 'Etcd: RPCs received per second'
|
|
type: DEPENDENT
|
|
key: etcd.grpc.received.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of RPC stream messages received on the server.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_TO_JSON
|
|
parameters:
|
|
- grpc_server_msg_received_total
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
var valueArr = JSON.parse(value);
|
|
return valueArr.reduce(function(acc,obj){
|
|
return acc + parseFloat(obj['value'])
|
|
},0);
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: grpc
|
|
- uuid: eda81182710e47e1b5f2f21bb05b4775
|
|
name: 'Etcd: RPCs sent per second'
|
|
type: DEPENDENT
|
|
key: etcd.grpc.sent.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of gRPC stream messages sent by the server.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_TO_JSON
|
|
parameters:
|
|
- grpc_server_msg_sent_total
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
var valueArr = JSON.parse(value);
|
|
return valueArr.reduce(function(acc,obj){
|
|
return acc + parseFloat(obj['value'])
|
|
},0);
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: grpc
|
|
- uuid: bf59a130b20d480d93eb9330750e8e28
|
|
name: 'Etcd: RPCs started per second'
|
|
type: DEPENDENT
|
|
key: etcd.grpc.started.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of RPCs started on the server.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_TO_JSON
|
|
parameters:
|
|
- grpc_server_started_total
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
var valueArr = JSON.parse(value);
|
|
return valueArr.reduce(function(acc,obj){
|
|
return acc + parseFloat(obj['value'])
|
|
},0);
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: grpc
|
|
- uuid: e03575f4c472410eb6fbcf731ac6aab2
|
|
name: 'Etcd: Server has a leader'
|
|
type: DEPENDENT
|
|
key: etcd.has.leader
|
|
delay: '0'
|
|
history: 7d
|
|
description: |
|
|
It defines - whether or not a leader exists:
|
|
1 - it exists;
|
|
0 - it does not.
|
|
valuemap:
|
|
name: 'Etcd leader'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_server_has_leader
|
|
- value
|
|
- ''
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 10m
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: leader
|
|
triggers:
|
|
- uuid: 20165719d521453bb239d818ac57805c
|
|
expression: 'last(/Etcd by HTTP/etcd.has.leader)=0'
|
|
name: 'Etcd: Member has no leader'
|
|
priority: AVERAGE
|
|
description: 'If a member does not have a leader, it is totally unavailable.'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 3fde4db8b9684ba4b56ba915e48957b5
|
|
name: 'Etcd: Node health'
|
|
type: HTTP_AGENT
|
|
key: etcd.health
|
|
history: 7d
|
|
authtype: BASIC
|
|
username: '{$ETCD.USER}'
|
|
password: '{$ETCD.PASSWORD}'
|
|
valuemap:
|
|
name: 'Etcd healthcheck'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.health
|
|
- type: BOOL_TO_DECIMAL
|
|
parameters:
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 10m
|
|
url: '{$ETCD.SCHEME}://{HOST.CONN}:{$ETCD.PORT}/health'
|
|
tags:
|
|
- tag: component
|
|
value: health
|
|
triggers:
|
|
- uuid: 6acda7bdc9df4a4ab5b7cca76c6369f0
|
|
expression: 'last(/Etcd by HTTP/etcd.health)=0'
|
|
name: 'Etcd: Node healthcheck failed'
|
|
opdata: 'Current state: {ITEM.LASTVALUE1}'
|
|
priority: AVERAGE
|
|
description: 'See more details on https://etcd.io/docs/v3.5/op-guide/monitoring/#health-check.'
|
|
dependencies:
|
|
- name: 'Etcd: Service is unavailable'
|
|
expression: 'last(/Etcd by HTTP/net.tcp.service["{$ETCD.SCHEME}","{HOST.CONN}","{$ETCD.PORT}"])=0'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 923a408dd4514e808b6e2137a94f8140
|
|
name: 'Etcd: HTTP 4XX'
|
|
type: DEPENDENT
|
|
key: etcd.http.requests.4xx.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of handled failures of requests (non-watches), by the method (`GET/PUT` etc.), and the code `4XX`.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_TO_JSON
|
|
parameters:
|
|
- 'etcd_http_failed_total{code=~"4.+"}'
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
var valueArr = JSON.parse(value);
|
|
return valueArr.reduce(function(acc,obj){
|
|
return acc + parseFloat(obj['value'])
|
|
},0);
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: http
|
|
- tag: http-code
|
|
value: 4xx
|
|
- uuid: c0f27d4bfba344079a31ce8c10b22683
|
|
name: 'Etcd: HTTP 5XX'
|
|
type: DEPENDENT
|
|
key: etcd.http.requests.5xx.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of handled failures of requests (non-watches), by the method (`GET/PUT` etc.), and the code `5XX`.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_TO_JSON
|
|
parameters:
|
|
- 'etcd_http_failed_total{code=~"5.+"}'
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
var valueArr = JSON.parse(value);
|
|
return valueArr.reduce(function(acc,obj){
|
|
return acc + parseFloat(obj['value'])
|
|
},0);
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: http
|
|
- tag: http-code
|
|
value: 5xx
|
|
triggers:
|
|
- uuid: 0302957e0f6b43389546e1cfb958ed9c
|
|
expression: 'min(/Etcd by HTTP/etcd.http.requests.5xx.rate,5m)>{$ETCD.HTTP.FAIL.MAX.WARN}'
|
|
name: 'Etcd: Too many HTTP requests failures'
|
|
event_name: 'Etcd: Too many HTTP requests failures (over {$ETCD.HTTP.FAIL.MAX.WARN} for 5m)'
|
|
priority: WARNING
|
|
description: 'Too many requests failed on `etcd` instance with the `5xx HTTP code`.'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 2a19db1c58ee4a509061fcb1b557c1a3
|
|
name: 'Etcd: HTTP requests received'
|
|
type: DEPENDENT
|
|
key: etcd.http.requests.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of requests received into the system (successfully parsed and `authd`).'
|
|
preprocessing:
|
|
- type: PROMETHEUS_TO_JSON
|
|
parameters:
|
|
- etcd_http_received_total
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
var valueArr = JSON.parse(value);
|
|
return valueArr.reduce(function(acc,obj){
|
|
return acc + parseFloat(obj['value'])
|
|
},0);
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: http
|
|
- uuid: b3760811472440baad6a338f481ba13a
|
|
name: 'Etcd: Server is a leader'
|
|
type: DEPENDENT
|
|
key: etcd.is.leader
|
|
delay: '0'
|
|
history: 7d
|
|
description: |
|
|
It defines - whether or not this member is a leader:
|
|
1 - it is;
|
|
0 - otherwise.
|
|
valuemap:
|
|
name: 'Etcd leader'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_server_is_leader
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 10m
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: leader
|
|
triggers:
|
|
- uuid: 44d66eb1a332418daf4c3a1110db5458
|
|
expression: 'nodata(/Etcd by HTTP/etcd.is.leader,30m)=1'
|
|
name: 'Etcd: Failed to fetch info data'
|
|
event_name: 'Etcd: Failed to fetch info data (or no data for 30m)'
|
|
priority: WARNING
|
|
description: 'Zabbix has not received any data for items for the last 30 minutes.'
|
|
manual_close: 'YES'
|
|
dependencies:
|
|
- name: 'Etcd: Service is unavailable'
|
|
expression: 'last(/Etcd by HTTP/net.tcp.service["{$ETCD.SCHEME}","{HOST.CONN}","{$ETCD.PORT}"])=0'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: ecd1ae9c038f4fc2b720ad562ced0191
|
|
name: 'Etcd: Keys compacted per second'
|
|
type: DEPENDENT
|
|
key: etcd.keys.compacted.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
description: 'The number of DB keys compacted per second.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_debugging_mvcc_db_compaction_keys_total
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: keys
|
|
- uuid: a3f910efb0a04cc494c07b8703f9d2ec
|
|
name: 'Etcd: Keys expired per second'
|
|
type: DEPENDENT
|
|
key: etcd.keys.expired.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
description: 'The number of expired keys per second.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_debugging_store_expires_total
|
|
- value
|
|
- ''
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: keys
|
|
- uuid: fbda737014544cf1bcf544a48aa6e48b
|
|
name: 'Etcd: Keys total'
|
|
type: DEPENDENT
|
|
key: etcd.keys.total
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The total number of keys.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_debugging_mvcc_keys_total
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: keys
|
|
- uuid: e45ba61d99b8432b86f5797a2cfdb416
|
|
name: 'Etcd: Leader changes'
|
|
type: DEPENDENT
|
|
key: etcd.leader.changes
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of leader changes the member has seen since its start.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_server_leader_changes_seen_total
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: leader
|
|
triggers:
|
|
- uuid: 1ea623927179489890a5a73eeb8177f7
|
|
expression: '(max(/Etcd by HTTP/etcd.leader.changes,15m)-min(/Etcd by HTTP/etcd.leader.changes,15m))>{$ETCD.LEADER.CHANGES.MAX.WARN}'
|
|
name: 'Etcd: Instance has seen too many leader changes'
|
|
event_name: 'Etcd: Instance has seen too many leader changes (over {$ETCD.LEADER.CHANGES.MAX.WARN} for 15m)'''
|
|
priority: WARNING
|
|
description: 'Rapid leadership changes impact the performance of `etcd` significantly. It also signals that the leader is unstable, perhaps due to network connectivity issues or excessive load hitting the `etcd cluster`.'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 348e15d2ec3a4bb88e2ca371f96c2f00
|
|
name: 'Etcd: Maximum open file descriptors'
|
|
type: DEPENDENT
|
|
key: etcd.max.fds
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
description: 'The Maximum number of open file descriptors.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- process_max_fds
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: fds
|
|
- uuid: d016b8674ebd4251943f2e94b22f5ff2
|
|
name: 'Etcd: Client gRPC received bytes per second'
|
|
type: DEPENDENT
|
|
key: etcd.network.grpc.received.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: Bps
|
|
description: 'The number of bytes received from gRPC clients per second.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_network_client_grpc_received_bytes_total
|
|
- value
|
|
- ''
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: grpc
|
|
- uuid: e50d2d088c6448dbb3ecaeebc3b2b8f1
|
|
name: 'Etcd: Client gRPC sent bytes per second'
|
|
type: DEPENDENT
|
|
key: etcd.network.grpc.sent.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: Bps
|
|
description: 'The number of bytes sent from gRPC clients per second.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_network_client_grpc_sent_bytes_total
|
|
- value
|
|
- ''
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: grpc
|
|
- uuid: d5099d4cdb3044ba95935c2aea2b6352
|
|
name: 'Etcd: Open file descriptors'
|
|
type: DEPENDENT
|
|
key: etcd.open.fds
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
description: 'The number of open file descriptors.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- process_open_fds
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: fds
|
|
- uuid: a2927b1e85af41cab9c28b1b79c229ea
|
|
name: 'Etcd: Proposals applied per second'
|
|
type: DEPENDENT
|
|
key: etcd.proposals.applied.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
description: 'The number of consensus proposals applied.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_server_proposals_applied_total
|
|
- value
|
|
- ''
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: proposals
|
|
- uuid: e829f3df055e42dfbce5f27eb7ca487c
|
|
name: 'Etcd: Proposals committed per second'
|
|
type: DEPENDENT
|
|
key: etcd.proposals.committed.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
description: 'The number of consensus proposals committed.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_server_proposals_committed_total
|
|
- value
|
|
- ''
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: proposals
|
|
- uuid: 6b45b99526394a219d31b5c22cb98c85
|
|
name: 'Etcd: Proposals failed per second'
|
|
type: DEPENDENT
|
|
key: etcd.proposals.failed.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
description: 'The number of failed proposals seen.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_server_proposals_failed_total
|
|
- value
|
|
- ''
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: proposals
|
|
triggers:
|
|
- uuid: 432cea8bddd742ca98528be2fbc7e35e
|
|
expression: 'min(/Etcd by HTTP/etcd.proposals.failed.rate,5m)>{$ETCD.PROPOSAL.FAIL.MAX.WARN}'
|
|
name: 'Etcd: Too many proposal failures'
|
|
event_name: 'Etcd: Too many proposal failures (over {$ETCD.PROPOSAL.FAIL.MAX.WARN} for 5m)'''
|
|
priority: WARNING
|
|
description: 'Normally related to two issues: temporary failures related to a leader election or longer downtime caused by a loss of quorum in the cluster.'
|
|
tags:
|
|
- tag: scope
|
|
value: performance
|
|
- uuid: 1c506ff69e7b4564a6d95fd35b1a11fd
|
|
name: 'Etcd: Proposals pending'
|
|
type: DEPENDENT
|
|
key: etcd.proposals.pending
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The current number of pending proposals to commit.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_server_proposals_pending
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: proposals
|
|
triggers:
|
|
- uuid: 5feefc4dd5d14fe2b56dd63029b57026
|
|
expression: 'min(/Etcd by HTTP/etcd.proposals.pending,5m)>{$ETCD.PROPOSAL.PENDING.MAX.WARN}'
|
|
name: 'Etcd: Too many proposals are queued to commit'
|
|
event_name: 'Etcd: Too many proposals are queued to commit (over {$ETCD.PROPOSAL.PENDING.MAX.WARN} for 5m)'''
|
|
priority: WARNING
|
|
description: 'Rising pending proposals suggests there is a high client load, or the member cannot commit proposals.'
|
|
tags:
|
|
- tag: scope
|
|
value: performance
|
|
- uuid: bd7398507c274bfab53339380df16761
|
|
name: 'Etcd: PUT per second'
|
|
type: DEPENDENT
|
|
key: etcd.put.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of puts seen by this member per second.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_mvcc_put_total
|
|
- value
|
|
- ''
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: operations
|
|
- uuid: b744c07f3290467b96b21ea38ad5d497
|
|
name: 'Etcd: Range per second'
|
|
type: DEPENDENT
|
|
key: etcd.range.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of ranges seen by this member per second.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_debugging_mvcc_range_total
|
|
- value
|
|
- ''
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: operations
|
|
- uuid: 88c91b36eca94fd2b357a67d171dc621
|
|
name: 'Etcd: Reads per second'
|
|
type: DEPENDENT
|
|
key: etcd.reads.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of read actions by `get/getRecursive`, local to this member.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_TO_JSON
|
|
parameters:
|
|
- etcd_debugging_store_reads_total
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
//calculates total reads
|
|
var valueArr = JSON.parse(value);
|
|
return valueArr.reduce(function(acc,obj){
|
|
return acc + parseFloat(obj['value'])
|
|
},0);
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: store
|
|
- uuid: 4b881e32094e4f478c5d0849cb5d07a7
|
|
name: 'Etcd: Resident memory'
|
|
type: DEPENDENT
|
|
key: etcd.res.bytes
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'The size of resident memory expressed in bytes.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- process_resident_memory_bytes
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- uuid: dee9ed8897cf4d3582957707ea09cdf8
|
|
name: 'Etcd: Server version'
|
|
type: DEPENDENT
|
|
key: etcd.server.version
|
|
delay: '0'
|
|
history: 7d
|
|
trends: '0'
|
|
value_type: CHAR
|
|
description: 'The version of the `etcd server`.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.etcdserver
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1d
|
|
master_item:
|
|
key: etcd.get_version
|
|
tags:
|
|
- tag: component
|
|
value: application
|
|
triggers:
|
|
- uuid: cfb2fc467b224ef694d59b5c081ed965
|
|
expression: 'last(/Etcd by HTTP/etcd.server.version,#1)<>last(/Etcd by HTTP/etcd.server.version,#2) and length(last(/Etcd by HTTP/etcd.server.version))>0'
|
|
name: 'Etcd: Server version has changed'
|
|
event_name: 'Etcd: Server version has changed (new version: {ITEM.VALUE})'
|
|
priority: INFO
|
|
description: 'Etcd version has changed. Acknowledge to close the problem manually.'
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: b14c787c716146e990bc388d277a2803
|
|
name: 'Etcd: Transaction per second'
|
|
type: DEPENDENT
|
|
key: etcd.txn.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of transactions seen by this member per second.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- etcd_debugging_mvcc_range_total
|
|
- value
|
|
- ''
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: transactions
|
|
- uuid: 98ec9085d621446aa462efc86cf93905
|
|
name: 'Etcd: Uptime'
|
|
type: DEPENDENT
|
|
key: etcd.uptime
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: '`Etcd` server uptime.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- process_start_time_seconds
|
|
- value
|
|
- ''
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
//use boottime to calculate uptime
|
|
return (Math.floor(Date.now()/1000)-Number(value));
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: application
|
|
triggers:
|
|
- uuid: 6843369969f5410a840494104d71fe1f
|
|
expression: 'last(/Etcd by HTTP/etcd.uptime)<10m'
|
|
name: 'Etcd: Host has been restarted'
|
|
event_name: 'Etcd: {HOST.NAME} has been restarted (uptime < 10m)'
|
|
priority: INFO
|
|
description: 'Uptime is less than 10 minutes.'
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: c35810b8b7bc4a62970b5293fb2d8fb2
|
|
name: 'Etcd: Virtual memory'
|
|
type: DEPENDENT
|
|
key: etcd.virtual.bytes
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'The size of virtual memory expressed in bytes.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- process_virtual_memory_bytes
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- uuid: 16c041fc189248bfaaa5826ffaf38459
|
|
name: 'Etcd: Writes per second'
|
|
type: DEPENDENT
|
|
key: etcd.writes.rate
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of writes (e.g., `set/compareAndDelete`) seen by this member.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_TO_JSON
|
|
parameters:
|
|
- etcd_debugging_store_writes_total
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
var valueArr = JSON.parse(value);
|
|
return valueArr.reduce(function(acc,obj){
|
|
return acc + parseFloat(obj['value'])
|
|
},0);
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: store
|
|
- uuid: a0f94f429b99432e86d15ffa74d6eada
|
|
name: 'Etcd: Service''s TCP port state'
|
|
type: SIMPLE
|
|
key: 'net.tcp.service["{$ETCD.SCHEME}","{HOST.CONN}","{$ETCD.PORT}"]'
|
|
history: 7d
|
|
valuemap:
|
|
name: 'Service state'
|
|
preprocessing:
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 10m
|
|
tags:
|
|
- tag: component
|
|
value: health
|
|
- tag: component
|
|
value: network
|
|
triggers:
|
|
- uuid: 74164f0783ae4227ba44f3e865fee3bd
|
|
expression: 'last(/Etcd by HTTP/net.tcp.service["{$ETCD.SCHEME}","{HOST.CONN}","{$ETCD.PORT}"])=0'
|
|
name: 'Etcd: Service is unavailable'
|
|
priority: AVERAGE
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
discovery_rules:
|
|
- uuid: 5e6121383e5d4f3eb1150a2068a4633b
|
|
name: 'gRPC codes discovery'
|
|
type: DEPENDENT
|
|
key: etcd.grpc_code.discovery
|
|
delay: '0'
|
|
filter:
|
|
evaltype: AND
|
|
conditions:
|
|
- macro: '{#GRPC.CODE}'
|
|
value: '{$ETCD.GRPC_CODE.NOT_MATCHES}'
|
|
operator: NOT_MATCHES_REGEX
|
|
formulaid: A
|
|
- macro: '{#GRPC.CODE}'
|
|
value: '{$ETCD.GRPC_CODE.MATCHES}'
|
|
formulaid: B
|
|
item_prototypes:
|
|
- uuid: 7d316cbec2ce4718ac133d90b7a89585
|
|
name: 'Etcd: RPCs completed with code {#GRPC.CODE}'
|
|
type: DEPENDENT
|
|
key: 'etcd.grpc.handled.rate[{#GRPC.CODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of RPCs completed on the server with grpc_code {#GRPC.CODE}.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_TO_JSON
|
|
parameters:
|
|
- 'grpc_server_handled_total{grpc_method="{#GRPC.CODE}"}'
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
var valueArr = JSON.parse(value);
|
|
return valueArr.reduce(function(acc,obj){
|
|
return acc + parseFloat(obj['value'])
|
|
},0);
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: grpc
|
|
- tag: grpc-code
|
|
value: '{#GRPC.CODE}'
|
|
trigger_prototypes:
|
|
- uuid: 459b6ee5735047d597a6a4ab41b76e21
|
|
expression: 'min(/Etcd by HTTP/etcd.grpc.handled.rate[{#GRPC.CODE}],5m)>{$ETCD.GRPC.ERRORS.MAX.WARN}'
|
|
name: 'Etcd: Too many failed gRPC requests with code: {#GRPC.CODE}'
|
|
event_name: 'Etcd: Too many failed gRPC requests with code: {#GRPC.CODE} (over {$ETCD.GRPC.ERRORS.MAX.WARN} in 5m)'
|
|
priority: WARNING
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
preprocessing:
|
|
- type: PROMETHEUS_TO_JSON
|
|
parameters:
|
|
- grpc_server_handled_total
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
var data = JSON.parse(value),
|
|
lookup = {},
|
|
result = [];
|
|
for (var item, i = 0; item = data[i++];) {
|
|
var code = item.labels.grpc_code;
|
|
if (!(code in lookup)) {
|
|
lookup[code] = 1;
|
|
result.push({ "{#GRPC.CODE}": code });
|
|
}
|
|
}
|
|
return JSON.stringify(result);
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
overrides:
|
|
- name: trigger
|
|
step: '1'
|
|
filter:
|
|
conditions:
|
|
- macro: '{#GRPC.CODE}'
|
|
value: '{$ETCD.GRPC_CODE.TRIGGER.MATCHES}'
|
|
formulaid: A
|
|
operations:
|
|
- operationobject: TRIGGER_PROTOTYPE
|
|
operator: LIKE
|
|
value: 'Too many failed gRPC requests'
|
|
status: ENABLED
|
|
discover: DISCOVER
|
|
- uuid: b7b527ee30b84a569afcd1f85b705810
|
|
name: 'Peers discovery'
|
|
type: DEPENDENT
|
|
key: etcd.peer.discovery
|
|
delay: '0'
|
|
item_prototypes:
|
|
- uuid: 4129aa7b8acf4ca3b5476461fe5275c9
|
|
name: 'Etcd: Etcd peer {#ETCD.PEER}: Bytes received'
|
|
type: DEPENDENT
|
|
key: 'etcd.bytes.received.rate[{#ETCD.PEER}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: Bps
|
|
description: 'The number of bytes received from a peer with the ID `{#ETCD.PEER}`.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'etcd_network_peer_received_bytes_total{From="{#ETCD.PEER}"}'
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: peers
|
|
- tag: peer
|
|
value: '{#ETCD.PEER}'
|
|
- uuid: 8f5fecbabe474baaab40df46879401af
|
|
name: 'Etcd: Etcd peer {#ETCD.PEER}: Bytes sent'
|
|
type: DEPENDENT
|
|
key: 'etcd.bytes.sent.rate[{#ETCD.PEER}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: Bps
|
|
description: 'The number of bytes sent to a peer with the ID `{#ETCD.PEER}`.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'etcd_network_peer_sent_bytes_total{To="{#ETCD.PEER}"}'
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: peers
|
|
- tag: peer
|
|
value: '{#ETCD.PEER}'
|
|
- uuid: 2521ccfc16fc43069001883b85aa0243
|
|
name: 'Etcd: Etcd peer {#ETCD.PEER}: Receive failures'
|
|
type: DEPENDENT
|
|
key: 'etcd.received.fail.rate[{#ETCD.PEER}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of received failures from a peer with the ID `{#ETCD.PEER}`.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'etcd_network_peer_received_failures_total{To="{#ETCD.PEER}"}'
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: peers
|
|
- tag: peer
|
|
value: '{#ETCD.PEER}'
|
|
- uuid: 5756f1a16e5c42b79f6d6225c5382599
|
|
name: 'Etcd: Etcd peer {#ETCD.PEER}: Send failures'
|
|
type: DEPENDENT
|
|
key: 'etcd.sent.fail.rate[{#ETCD.PEER}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of sent failures from a peer with the ID `{#ETCD.PEER}`.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'etcd_network_peer_sent_failures_total{To="{#ETCD.PEER}"}'
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: peers
|
|
- tag: peer
|
|
value: '{#ETCD.PEER}'
|
|
master_item:
|
|
key: etcd.get_metrics
|
|
lld_macro_paths:
|
|
- lld_macro: '{#ETCD.PEER}'
|
|
path: $.labels.To
|
|
preprocessing:
|
|
- type: PROMETHEUS_TO_JSON
|
|
parameters:
|
|
- etcd_network_peer_sent_bytes_total
|
|
tags:
|
|
- tag: class
|
|
value: application
|
|
- tag: target
|
|
value: etcd
|
|
macros:
|
|
- macro: '{$ETCD.GRPC.ERRORS.MAX.WARN}'
|
|
value: '1'
|
|
description: 'The maximum number of gRPC request failures.'
|
|
- macro: '{$ETCD.GRPC_CODE.MATCHES}'
|
|
value: '.*'
|
|
description: 'The filter of discoverable gRPC codes. See more details on https://github.com/grpc/grpc/blob/master/doc/statuscodes.md.'
|
|
- macro: '{$ETCD.GRPC_CODE.NOT_MATCHES}'
|
|
value: CHANGE_IF_NEEDED
|
|
description: 'The filter to exclude discovered gRPC codes. See more details on https://github.com/grpc/grpc/blob/master/doc/statuscodes.md.'
|
|
- macro: '{$ETCD.GRPC_CODE.TRIGGER.MATCHES}'
|
|
value: Aborted|Unavailable
|
|
description: 'The filter of discoverable gRPC codes, which will create triggers.'
|
|
- macro: '{$ETCD.HTTP.FAIL.MAX.WARN}'
|
|
value: '2'
|
|
description: 'The maximum number of HTTP request failures.'
|
|
- macro: '{$ETCD.LEADER.CHANGES.MAX.WARN}'
|
|
value: '5'
|
|
description: 'The maximum number of leader changes.'
|
|
- macro: '{$ETCD.OPEN.FDS.MAX.WARN}'
|
|
value: '90'
|
|
description: 'The maximum percentage of used file descriptors.'
|
|
- macro: '{$ETCD.PASSWORD}'
|
|
- macro: '{$ETCD.PORT}'
|
|
value: '2379'
|
|
description: 'The port of `etcd` API endpoint.'
|
|
- macro: '{$ETCD.PROPOSAL.FAIL.MAX.WARN}'
|
|
value: '2'
|
|
description: 'The maximum number of proposal failures.'
|
|
- macro: '{$ETCD.PROPOSAL.PENDING.MAX.WARN}'
|
|
value: '5'
|
|
description: 'The maximum number of proposals in queue.'
|
|
- macro: '{$ETCD.SCHEME}'
|
|
value: http
|
|
description: 'The request scheme which may be `http` or `https`.'
|
|
- macro: '{$ETCD.USER}'
|
|
dashboards:
|
|
- uuid: 5b0ffbb731cd4415a2edbc74978e0276
|
|
name: 'Etcd Overview'
|
|
pages:
|
|
- widgets:
|
|
- type: graph
|
|
width: '12'
|
|
height: '5'
|
|
fields:
|
|
- type: GRAPH
|
|
name: graphid
|
|
value:
|
|
host: 'Etcd by HTTP'
|
|
name: 'Etcd: Proposals rate'
|
|
- type: graph
|
|
x: '12'
|
|
width: '12'
|
|
height: '5'
|
|
fields:
|
|
- type: GRAPH
|
|
name: graphid
|
|
value:
|
|
host: 'Etcd by HTTP'
|
|
name: 'Etcd: Read/Write rate'
|
|
- type: graph
|
|
'y': '5'
|
|
width: '12'
|
|
height: '5'
|
|
fields:
|
|
- type: GRAPH
|
|
name: graphid
|
|
value:
|
|
host: 'Etcd by HTTP'
|
|
name: 'Etcd: gRPC client traffic'
|
|
- type: graph
|
|
x: '12'
|
|
'y': '5'
|
|
width: '12'
|
|
height: '5'
|
|
fields:
|
|
- type: GRAPH
|
|
name: graphid
|
|
value:
|
|
host: 'Etcd by HTTP'
|
|
name: 'Etcd: HTTP requests rate'
|
|
- type: graph
|
|
'y': '10'
|
|
width: '12'
|
|
height: '5'
|
|
fields:
|
|
- type: GRAPH
|
|
name: graphid
|
|
value:
|
|
host: 'Etcd by HTTP'
|
|
name: 'Etcd: gRPC requests rate'
|
|
- type: graph
|
|
x: '12'
|
|
'y': '10'
|
|
width: '12'
|
|
height: '5'
|
|
fields:
|
|
- type: GRAPH
|
|
name: graphid
|
|
value:
|
|
host: 'Etcd by HTTP'
|
|
name: 'Etcd: Memory usage'
|
|
- type: graph
|
|
'y': '15'
|
|
width: '24'
|
|
height: '5'
|
|
fields:
|
|
- type: GRAPH
|
|
name: graphid
|
|
value:
|
|
host: 'Etcd by HTTP'
|
|
name: 'Etcd: File descriptors'
|
|
valuemaps:
|
|
- uuid: f25e21a70baa4e009bdbcb44acb1a22e
|
|
name: 'Etcd healthcheck'
|
|
mappings:
|
|
- value: '0'
|
|
newvalue: Failed
|
|
- value: '1'
|
|
newvalue: Ok
|
|
- uuid: 7bcaf8a520e24613a96d49e63a91a55b
|
|
name: 'Etcd leader'
|
|
mappings:
|
|
- value: '0'
|
|
newvalue: 'No'
|
|
- value: '1'
|
|
newvalue: 'Yes'
|
|
- uuid: 1735a8d251b24c3fbab32e766064536b
|
|
name: 'Service state'
|
|
mappings:
|
|
- value: '0'
|
|
newvalue: Down
|
|
- value: '1'
|
|
newvalue: Up
|
|
triggers:
|
|
- uuid: c45583928d204c04ad8884115e1e35c5
|
|
expression: 'min(/Etcd by HTTP/etcd.open.fds,5m)/last(/Etcd by HTTP/etcd.max.fds)*100>{$ETCD.OPEN.FDS.MAX.WARN}'
|
|
name: 'Etcd: Current number of open files is too high'
|
|
event_name: 'Etcd: Current number of open files is too high (over {$ETCD.OPEN.FDS.MAX.WARN}% for 5m)'
|
|
priority: WARNING
|
|
description: |
|
|
Heavy usage of a file descriptor (i.e., near the limit of the process's file descriptor) indicates a potential file descriptor exhaustion issue.
|
|
If the file descriptors are exhausted, `etcd` may panic because it cannot create new WAL files.
|
|
tags:
|
|
- tag: scope
|
|
value: capacity
|
|
graphs:
|
|
- uuid: 18baccd03c0f4814a42d32b51334787d
|
|
name: 'Etcd: File descriptors'
|
|
graph_items:
|
|
- drawtype: GRADIENT_LINE
|
|
color: 199C0D
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.open.fds
|
|
- sortorder: '1'
|
|
drawtype: BOLD_LINE
|
|
color: F63100
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.max.fds
|
|
- uuid: eefd07cf30d84cc4b84f802468363200
|
|
name: 'Etcd: gRPC client traffic'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.network.grpc.received.rate
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.network.grpc.sent.rate
|
|
- uuid: c53ee0dba42d4a1f8afedbe0f6e42785
|
|
name: 'Etcd: gRPC requests rate'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.grpc.received.rate
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.grpc.sent.rate
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.grpc.started.rate
|
|
- uuid: 520ff92815d84e0f84e9296d249c04ae
|
|
name: 'Etcd: HTTP requests rate'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.http.requests.4xx.rate
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.http.requests.5xx.rate
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.http.requests.rate
|
|
- uuid: 90af5b2f75b7402693bad7a8f371ab8e
|
|
name: 'Etcd: Memory usage'
|
|
graph_items:
|
|
- drawtype: GRADIENT_LINE
|
|
color: 199C0D
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.res.bytes
|
|
- sortorder: '1'
|
|
drawtype: GRADIENT_LINE
|
|
color: F63100
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.virtual.bytes
|
|
- uuid: 59cd15292ad04ebd902a7d3080b53838
|
|
name: 'Etcd: Proposals rate'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.proposals.failed.rate
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.proposals.committed.rate
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.proposals.applied.rate
|
|
- sortorder: '3'
|
|
drawtype: BOLD_LINE
|
|
color: F7941D
|
|
yaxisside: RIGHT
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.proposals.pending
|
|
- uuid: b374fab55bcc452e9279214ddb2c8024
|
|
name: 'Etcd: Read/Write rate'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.reads.rate
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'Etcd by HTTP'
|
|
key: etcd.writes.rate
|