zabbix_export: version: '7.0' template_groups: - uuid: a571c0d144b14fd4a87a9d9b2aa9fcd6 name: Templates/Applications templates: - uuid: b25b8b517a4743c48037bfa10af3dc3c template: 'Etcd by HTTP' name: 'Etcd by HTTP' description: | Get Etcd metrics by HTTP agent. Generated by official Zabbix template tool "Templator" 2.0.0 vendor: name: Zabbix version: 7.0-0 groups: - name: Templates/Applications items: - uuid: 32a59c8e93e141d6a471266df6dbfbd3 name: 'Etcd: Cluster version' type: DEPENDENT key: etcd.cluster.version delay: '0' history: 7d trends: '0' value_type: CHAR description: 'The version of the `etcd cluster`.' preprocessing: - type: JSONPATH parameters: - $.etcdcluster - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1d master_item: key: etcd.get_version tags: - tag: component value: application triggers: - uuid: 7c87424c4fb34c56bc9b976755b4ec02 expression: 'last(/Etcd by HTTP/etcd.cluster.version,#1)<>last(/Etcd by HTTP/etcd.cluster.version,#2) and length(last(/Etcd by HTTP/etcd.cluster.version))>0' name: 'Etcd: Cluster version has changed' event_name: 'Etcd: Cluster version has changed (new version: {ITEM.VALUE})' priority: INFO description: 'Etcd version has changed. Acknowledge to close the problem manually.' manual_close: 'YES' tags: - tag: scope value: notice - uuid: 04b0fa552b7d4267b4c5b67ee82ef5f1 name: 'Etcd: CPU' type: DEPENDENT key: etcd.cpu.util delay: '0' history: 7d value_type: FLOAT units: s description: 'The total user and system CPU time spent in seconds.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_cpu_seconds_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: cpu - uuid: bfa5bd42637642808802f7b2485a0c4d name: 'Etcd: DB size' type: DEPENDENT key: etcd.db.size delay: '0' history: 7d value_type: FLOAT units: B description: 'The total size of the underlying database.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_mvcc_db_total_size_in_bytes - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: database - uuid: a5bc7ffb090641ab92f537b38b6055e5 name: 'Etcd: Deletes per second' type: DEPENDENT key: etcd.delete.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of deletes seen by this member per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_mvcc_delete_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: operations - uuid: 183843bd93f84dc887a03fb638b2d323 name: 'Etcd: Pending events' type: DEPENDENT key: etcd.events.sent.rate delay: '0' history: 7d description: 'The total number of pending events to be sent.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_debugging_mvcc_pending_events_total - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: events - uuid: 34ffab33275a400ab88e5217dee5ef96 name: 'Etcd: Get node metrics' type: HTTP_AGENT key: etcd.get_metrics history: '0' trends: '0' value_type: TEXT authtype: BASIC username: '{$ETCD.USER}' password: '{$ETCD.PASSWORD}' url: '{$ETCD.SCHEME}://{HOST.CONN}:{$ETCD.PORT}/metrics' tags: - tag: component value: raw - uuid: fdf7593420ab42b2a5af8f8b8030b517 name: 'Etcd: Get version' type: HTTP_AGENT key: etcd.get_version history: '0' trends: '0' value_type: TEXT url: '{$ETCD.SCHEME}://{HOST.CONN}:{$ETCD.PORT}/version' tags: - tag: component value: application - uuid: d23baf75628043e193ba0a607e1b4215 name: 'Etcd: RPCs received per second' type: DEPENDENT key: etcd.grpc.received.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of RPC stream messages received on the server.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - grpc_server_msg_received_total - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: grpc - uuid: eda81182710e47e1b5f2f21bb05b4775 name: 'Etcd: RPCs sent per second' type: DEPENDENT key: etcd.grpc.sent.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of gRPC stream messages sent by the server.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - grpc_server_msg_sent_total - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: grpc - uuid: bf59a130b20d480d93eb9330750e8e28 name: 'Etcd: RPCs started per second' type: DEPENDENT key: etcd.grpc.started.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of RPCs started on the server.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - grpc_server_started_total - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: grpc - uuid: e03575f4c472410eb6fbcf731ac6aab2 name: 'Etcd: Server has a leader' type: DEPENDENT key: etcd.has.leader delay: '0' history: 7d description: | It defines - whether or not a leader exists: 1 - it exists; 0 - it does not. valuemap: name: 'Etcd leader' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_server_has_leader - value - '' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 10m master_item: key: etcd.get_metrics tags: - tag: component value: leader triggers: - uuid: 20165719d521453bb239d818ac57805c expression: 'last(/Etcd by HTTP/etcd.has.leader)=0' name: 'Etcd: Member has no leader' priority: AVERAGE description: 'If a member does not have a leader, it is totally unavailable.' tags: - tag: scope value: availability - uuid: 3fde4db8b9684ba4b56ba915e48957b5 name: 'Etcd: Node health' type: HTTP_AGENT key: etcd.health history: 7d authtype: BASIC username: '{$ETCD.USER}' password: '{$ETCD.PASSWORD}' valuemap: name: 'Etcd healthcheck' preprocessing: - type: JSONPATH parameters: - $.health - type: BOOL_TO_DECIMAL parameters: - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 10m url: '{$ETCD.SCHEME}://{HOST.CONN}:{$ETCD.PORT}/health' tags: - tag: component value: health triggers: - uuid: 6acda7bdc9df4a4ab5b7cca76c6369f0 expression: 'last(/Etcd by HTTP/etcd.health)=0' name: 'Etcd: Node healthcheck failed' opdata: 'Current state: {ITEM.LASTVALUE1}' priority: AVERAGE description: 'See more details on https://etcd.io/docs/v3.5/op-guide/monitoring/#health-check.' dependencies: - name: 'Etcd: Service is unavailable' expression: 'last(/Etcd by HTTP/net.tcp.service["{$ETCD.SCHEME}","{HOST.CONN}","{$ETCD.PORT}"])=0' tags: - tag: scope value: availability - uuid: 923a408dd4514e808b6e2137a94f8140 name: 'Etcd: HTTP 4XX' type: DEPENDENT key: etcd.http.requests.4xx.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of handled failures of requests (non-watches), by the method (`GET/PUT` etc.), and the code `4XX`.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - 'etcd_http_failed_total{code=~"4.+"}' - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: http - tag: http-code value: 4xx - uuid: c0f27d4bfba344079a31ce8c10b22683 name: 'Etcd: HTTP 5XX' type: DEPENDENT key: etcd.http.requests.5xx.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of handled failures of requests (non-watches), by the method (`GET/PUT` etc.), and the code `5XX`.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - 'etcd_http_failed_total{code=~"5.+"}' - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: http - tag: http-code value: 5xx triggers: - uuid: 0302957e0f6b43389546e1cfb958ed9c expression: 'min(/Etcd by HTTP/etcd.http.requests.5xx.rate,5m)>{$ETCD.HTTP.FAIL.MAX.WARN}' name: 'Etcd: Too many HTTP requests failures' event_name: 'Etcd: Too many HTTP requests failures (over {$ETCD.HTTP.FAIL.MAX.WARN} for 5m)' priority: WARNING description: 'Too many requests failed on `etcd` instance with the `5xx HTTP code`.' tags: - tag: scope value: availability - uuid: 2a19db1c58ee4a509061fcb1b557c1a3 name: 'Etcd: HTTP requests received' type: DEPENDENT key: etcd.http.requests.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of requests received into the system (successfully parsed and `authd`).' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - etcd_http_received_total - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: http - uuid: b3760811472440baad6a338f481ba13a name: 'Etcd: Server is a leader' type: DEPENDENT key: etcd.is.leader delay: '0' history: 7d description: | It defines - whether or not this member is a leader: 1 - it is; 0 - otherwise. valuemap: name: 'Etcd leader' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_server_is_leader - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 10m master_item: key: etcd.get_metrics tags: - tag: component value: leader triggers: - uuid: 44d66eb1a332418daf4c3a1110db5458 expression: 'nodata(/Etcd by HTTP/etcd.is.leader,30m)=1' name: 'Etcd: Failed to fetch info data' event_name: 'Etcd: Failed to fetch info data (or no data for 30m)' priority: WARNING description: 'Zabbix has not received any data for items for the last 30 minutes.' manual_close: 'YES' dependencies: - name: 'Etcd: Service is unavailable' expression: 'last(/Etcd by HTTP/net.tcp.service["{$ETCD.SCHEME}","{HOST.CONN}","{$ETCD.PORT}"])=0' tags: - tag: scope value: notice - uuid: ecd1ae9c038f4fc2b720ad562ced0191 name: 'Etcd: Keys compacted per second' type: DEPENDENT key: etcd.keys.compacted.rate delay: '0' history: 7d value_type: FLOAT description: 'The number of DB keys compacted per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_debugging_mvcc_db_compaction_keys_total - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: keys - uuid: a3f910efb0a04cc494c07b8703f9d2ec name: 'Etcd: Keys expired per second' type: DEPENDENT key: etcd.keys.expired.rate delay: '0' history: 7d value_type: FLOAT description: 'The number of expired keys per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_debugging_store_expires_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: keys - uuid: fbda737014544cf1bcf544a48aa6e48b name: 'Etcd: Keys total' type: DEPENDENT key: etcd.keys.total delay: '0' history: 7d description: 'The total number of keys.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_debugging_mvcc_keys_total - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: keys - uuid: e45ba61d99b8432b86f5797a2cfdb416 name: 'Etcd: Leader changes' type: DEPENDENT key: etcd.leader.changes delay: '0' history: 7d description: 'The number of leader changes the member has seen since its start.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_server_leader_changes_seen_total - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: leader triggers: - uuid: 1ea623927179489890a5a73eeb8177f7 expression: '(max(/Etcd by HTTP/etcd.leader.changes,15m)-min(/Etcd by HTTP/etcd.leader.changes,15m))>{$ETCD.LEADER.CHANGES.MAX.WARN}' name: 'Etcd: Instance has seen too many leader changes' event_name: 'Etcd: Instance has seen too many leader changes (over {$ETCD.LEADER.CHANGES.MAX.WARN} for 15m)''' priority: WARNING description: 'Rapid leadership changes impact the performance of `etcd` significantly. It also signals that the leader is unstable, perhaps due to network connectivity issues or excessive load hitting the `etcd cluster`.' tags: - tag: scope value: availability - uuid: 348e15d2ec3a4bb88e2ca371f96c2f00 name: 'Etcd: Maximum open file descriptors' type: DEPENDENT key: etcd.max.fds delay: '0' history: 7d value_type: FLOAT description: 'The Maximum number of open file descriptors.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_max_fds - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: fds - uuid: d016b8674ebd4251943f2e94b22f5ff2 name: 'Etcd: Client gRPC received bytes per second' type: DEPENDENT key: etcd.network.grpc.received.rate delay: '0' history: 7d value_type: FLOAT units: Bps description: 'The number of bytes received from gRPC clients per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_network_client_grpc_received_bytes_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: grpc - uuid: e50d2d088c6448dbb3ecaeebc3b2b8f1 name: 'Etcd: Client gRPC sent bytes per second' type: DEPENDENT key: etcd.network.grpc.sent.rate delay: '0' history: 7d value_type: FLOAT units: Bps description: 'The number of bytes sent from gRPC clients per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_network_client_grpc_sent_bytes_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: grpc - uuid: d5099d4cdb3044ba95935c2aea2b6352 name: 'Etcd: Open file descriptors' type: DEPENDENT key: etcd.open.fds delay: '0' history: 7d value_type: FLOAT description: 'The number of open file descriptors.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_open_fds - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: fds - uuid: a2927b1e85af41cab9c28b1b79c229ea name: 'Etcd: Proposals applied per second' type: DEPENDENT key: etcd.proposals.applied.rate delay: '0' history: 7d value_type: FLOAT description: 'The number of consensus proposals applied.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_server_proposals_applied_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: proposals - uuid: e829f3df055e42dfbce5f27eb7ca487c name: 'Etcd: Proposals committed per second' type: DEPENDENT key: etcd.proposals.committed.rate delay: '0' history: 7d value_type: FLOAT description: 'The number of consensus proposals committed.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_server_proposals_committed_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: proposals - uuid: 6b45b99526394a219d31b5c22cb98c85 name: 'Etcd: Proposals failed per second' type: DEPENDENT key: etcd.proposals.failed.rate delay: '0' history: 7d value_type: FLOAT description: 'The number of failed proposals seen.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_server_proposals_failed_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: proposals triggers: - uuid: 432cea8bddd742ca98528be2fbc7e35e expression: 'min(/Etcd by HTTP/etcd.proposals.failed.rate,5m)>{$ETCD.PROPOSAL.FAIL.MAX.WARN}' name: 'Etcd: Too many proposal failures' event_name: 'Etcd: Too many proposal failures (over {$ETCD.PROPOSAL.FAIL.MAX.WARN} for 5m)''' priority: WARNING description: 'Normally related to two issues: temporary failures related to a leader election or longer downtime caused by a loss of quorum in the cluster.' tags: - tag: scope value: performance - uuid: 1c506ff69e7b4564a6d95fd35b1a11fd name: 'Etcd: Proposals pending' type: DEPENDENT key: etcd.proposals.pending delay: '0' history: 7d description: 'The current number of pending proposals to commit.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_server_proposals_pending - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: proposals triggers: - uuid: 5feefc4dd5d14fe2b56dd63029b57026 expression: 'min(/Etcd by HTTP/etcd.proposals.pending,5m)>{$ETCD.PROPOSAL.PENDING.MAX.WARN}' name: 'Etcd: Too many proposals are queued to commit' event_name: 'Etcd: Too many proposals are queued to commit (over {$ETCD.PROPOSAL.PENDING.MAX.WARN} for 5m)''' priority: WARNING description: 'Rising pending proposals suggests there is a high client load, or the member cannot commit proposals.' tags: - tag: scope value: performance - uuid: bd7398507c274bfab53339380df16761 name: 'Etcd: PUT per second' type: DEPENDENT key: etcd.put.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of puts seen by this member per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_mvcc_put_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: operations - uuid: b744c07f3290467b96b21ea38ad5d497 name: 'Etcd: Range per second' type: DEPENDENT key: etcd.range.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of ranges seen by this member per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_debugging_mvcc_range_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: operations - uuid: 88c91b36eca94fd2b357a67d171dc621 name: 'Etcd: Reads per second' type: DEPENDENT key: etcd.reads.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of read actions by `get/getRecursive`, local to this member.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - etcd_debugging_store_reads_total - type: JAVASCRIPT parameters: - | //calculates total reads var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: store - uuid: 4b881e32094e4f478c5d0849cb5d07a7 name: 'Etcd: Resident memory' type: DEPENDENT key: etcd.res.bytes delay: '0' history: 7d value_type: FLOAT units: B description: 'The size of resident memory expressed in bytes.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_resident_memory_bytes - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: memory - uuid: dee9ed8897cf4d3582957707ea09cdf8 name: 'Etcd: Server version' type: DEPENDENT key: etcd.server.version delay: '0' history: 7d trends: '0' value_type: CHAR description: 'The version of the `etcd server`.' preprocessing: - type: JSONPATH parameters: - $.etcdserver - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1d master_item: key: etcd.get_version tags: - tag: component value: application triggers: - uuid: cfb2fc467b224ef694d59b5c081ed965 expression: 'last(/Etcd by HTTP/etcd.server.version,#1)<>last(/Etcd by HTTP/etcd.server.version,#2) and length(last(/Etcd by HTTP/etcd.server.version))>0' name: 'Etcd: Server version has changed' event_name: 'Etcd: Server version has changed (new version: {ITEM.VALUE})' priority: INFO description: 'Etcd version has changed. Acknowledge to close the problem manually.' manual_close: 'YES' tags: - tag: scope value: notice - uuid: b14c787c716146e990bc388d277a2803 name: 'Etcd: Transaction per second' type: DEPENDENT key: etcd.txn.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of transactions seen by this member per second.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - etcd_debugging_mvcc_range_total - value - '' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: transactions - uuid: 98ec9085d621446aa462efc86cf93905 name: 'Etcd: Uptime' type: DEPENDENT key: etcd.uptime delay: '0' history: 7d value_type: FLOAT units: s description: '`Etcd` server uptime.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_start_time_seconds - value - '' - type: JAVASCRIPT parameters: - | //use boottime to calculate uptime return (Math.floor(Date.now()/1000)-Number(value)); master_item: key: etcd.get_metrics tags: - tag: component value: application triggers: - uuid: 6843369969f5410a840494104d71fe1f expression: 'last(/Etcd by HTTP/etcd.uptime)<10m' name: 'Etcd: Host has been restarted' event_name: 'Etcd: {HOST.NAME} has been restarted (uptime < 10m)' priority: INFO description: 'Uptime is less than 10 minutes.' manual_close: 'YES' tags: - tag: scope value: notice - uuid: c35810b8b7bc4a62970b5293fb2d8fb2 name: 'Etcd: Virtual memory' type: DEPENDENT key: etcd.virtual.bytes delay: '0' history: 7d value_type: FLOAT units: B description: 'The size of virtual memory expressed in bytes.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_virtual_memory_bytes - value - '' master_item: key: etcd.get_metrics tags: - tag: component value: memory - uuid: 16c041fc189248bfaaa5826ffaf38459 name: 'Etcd: Writes per second' type: DEPENDENT key: etcd.writes.rate delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of writes (e.g., `set/compareAndDelete`) seen by this member.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - etcd_debugging_store_writes_total - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: store - uuid: a0f94f429b99432e86d15ffa74d6eada name: 'Etcd: Service''s TCP port state' type: SIMPLE key: 'net.tcp.service["{$ETCD.SCHEME}","{HOST.CONN}","{$ETCD.PORT}"]' history: 7d valuemap: name: 'Service state' preprocessing: - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 10m tags: - tag: component value: health - tag: component value: network triggers: - uuid: 74164f0783ae4227ba44f3e865fee3bd expression: 'last(/Etcd by HTTP/net.tcp.service["{$ETCD.SCHEME}","{HOST.CONN}","{$ETCD.PORT}"])=0' name: 'Etcd: Service is unavailable' priority: AVERAGE manual_close: 'YES' tags: - tag: scope value: availability discovery_rules: - uuid: 5e6121383e5d4f3eb1150a2068a4633b name: 'gRPC codes discovery' type: DEPENDENT key: etcd.grpc_code.discovery delay: '0' filter: evaltype: AND conditions: - macro: '{#GRPC.CODE}' value: '{$ETCD.GRPC_CODE.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: A - macro: '{#GRPC.CODE}' value: '{$ETCD.GRPC_CODE.MATCHES}' formulaid: B item_prototypes: - uuid: 7d316cbec2ce4718ac133d90b7a89585 name: 'Etcd: RPCs completed with code {#GRPC.CODE}' type: DEPENDENT key: 'etcd.grpc.handled.rate[{#GRPC.CODE}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of RPCs completed on the server with grpc_code {#GRPC.CODE}.' preprocessing: - type: PROMETHEUS_TO_JSON parameters: - 'grpc_server_handled_total{grpc_method="{#GRPC.CODE}"}' - type: JAVASCRIPT parameters: - | var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj['value']) },0); - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: grpc - tag: grpc-code value: '{#GRPC.CODE}' trigger_prototypes: - uuid: 459b6ee5735047d597a6a4ab41b76e21 expression: 'min(/Etcd by HTTP/etcd.grpc.handled.rate[{#GRPC.CODE}],5m)>{$ETCD.GRPC.ERRORS.MAX.WARN}' name: 'Etcd: Too many failed gRPC requests with code: {#GRPC.CODE}' event_name: 'Etcd: Too many failed gRPC requests with code: {#GRPC.CODE} (over {$ETCD.GRPC.ERRORS.MAX.WARN} in 5m)' priority: WARNING tags: - tag: scope value: availability master_item: key: etcd.get_metrics preprocessing: - type: PROMETHEUS_TO_JSON parameters: - grpc_server_handled_total - type: JAVASCRIPT parameters: - | var data = JSON.parse(value), lookup = {}, result = []; for (var item, i = 0; item = data[i++];) { var code = item.labels.grpc_code; if (!(code in lookup)) { lookup[code] = 1; result.push({ "{#GRPC.CODE}": code }); } } return JSON.stringify(result); - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h overrides: - name: trigger step: '1' filter: conditions: - macro: '{#GRPC.CODE}' value: '{$ETCD.GRPC_CODE.TRIGGER.MATCHES}' formulaid: A operations: - operationobject: TRIGGER_PROTOTYPE operator: LIKE value: 'Too many failed gRPC requests' status: ENABLED discover: DISCOVER - uuid: b7b527ee30b84a569afcd1f85b705810 name: 'Peers discovery' type: DEPENDENT key: etcd.peer.discovery delay: '0' item_prototypes: - uuid: 4129aa7b8acf4ca3b5476461fe5275c9 name: 'Etcd: Etcd peer {#ETCD.PEER}: Bytes received' type: DEPENDENT key: 'etcd.bytes.received.rate[{#ETCD.PEER}]' delay: '0' history: 7d value_type: FLOAT units: Bps description: 'The number of bytes received from a peer with the ID `{#ETCD.PEER}`.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'etcd_network_peer_received_bytes_total{From="{#ETCD.PEER}"}' - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: network - tag: component value: peers - tag: peer value: '{#ETCD.PEER}' - uuid: 8f5fecbabe474baaab40df46879401af name: 'Etcd: Etcd peer {#ETCD.PEER}: Bytes sent' type: DEPENDENT key: 'etcd.bytes.sent.rate[{#ETCD.PEER}]' delay: '0' history: 7d value_type: FLOAT units: Bps description: 'The number of bytes sent to a peer with the ID `{#ETCD.PEER}`.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'etcd_network_peer_sent_bytes_total{To="{#ETCD.PEER}"}' - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: network - tag: component value: peers - tag: peer value: '{#ETCD.PEER}' - uuid: 2521ccfc16fc43069001883b85aa0243 name: 'Etcd: Etcd peer {#ETCD.PEER}: Receive failures' type: DEPENDENT key: 'etcd.received.fail.rate[{#ETCD.PEER}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of received failures from a peer with the ID `{#ETCD.PEER}`.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'etcd_network_peer_received_failures_total{To="{#ETCD.PEER}"}' - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: network - tag: component value: peers - tag: peer value: '{#ETCD.PEER}' - uuid: 5756f1a16e5c42b79f6d6225c5382599 name: 'Etcd: Etcd peer {#ETCD.PEER}: Send failures' type: DEPENDENT key: 'etcd.sent.fail.rate[{#ETCD.PEER}]' delay: '0' history: 7d value_type: FLOAT units: rps description: 'The number of sent failures from a peer with the ID `{#ETCD.PEER}`.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'etcd_network_peer_sent_failures_total{To="{#ETCD.PEER}"}' - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: etcd.get_metrics tags: - tag: component value: network - tag: component value: peers - tag: peer value: '{#ETCD.PEER}' master_item: key: etcd.get_metrics lld_macro_paths: - lld_macro: '{#ETCD.PEER}' path: $.labels.To preprocessing: - type: PROMETHEUS_TO_JSON parameters: - etcd_network_peer_sent_bytes_total tags: - tag: class value: application - tag: target value: etcd macros: - macro: '{$ETCD.GRPC.ERRORS.MAX.WARN}' value: '1' description: 'The maximum number of gRPC request failures.' - macro: '{$ETCD.GRPC_CODE.MATCHES}' value: '.*' description: 'The filter of discoverable gRPC codes. See more details on https://github.com/grpc/grpc/blob/master/doc/statuscodes.md.' - macro: '{$ETCD.GRPC_CODE.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'The filter to exclude discovered gRPC codes. See more details on https://github.com/grpc/grpc/blob/master/doc/statuscodes.md.' - macro: '{$ETCD.GRPC_CODE.TRIGGER.MATCHES}' value: Aborted|Unavailable description: 'The filter of discoverable gRPC codes, which will create triggers.' - macro: '{$ETCD.HTTP.FAIL.MAX.WARN}' value: '2' description: 'The maximum number of HTTP request failures.' - macro: '{$ETCD.LEADER.CHANGES.MAX.WARN}' value: '5' description: 'The maximum number of leader changes.' - macro: '{$ETCD.OPEN.FDS.MAX.WARN}' value: '90' description: 'The maximum percentage of used file descriptors.' - macro: '{$ETCD.PASSWORD}' - macro: '{$ETCD.PORT}' value: '2379' description: 'The port of `etcd` API endpoint.' - macro: '{$ETCD.PROPOSAL.FAIL.MAX.WARN}' value: '2' description: 'The maximum number of proposal failures.' - macro: '{$ETCD.PROPOSAL.PENDING.MAX.WARN}' value: '5' description: 'The maximum number of proposals in queue.' - macro: '{$ETCD.SCHEME}' value: http description: 'The request scheme which may be `http` or `https`.' - macro: '{$ETCD.USER}' dashboards: - uuid: 5b0ffbb731cd4415a2edbc74978e0276 name: 'Etcd Overview' pages: - widgets: - type: graph width: '12' height: '5' fields: - type: GRAPH name: graphid value: host: 'Etcd by HTTP' name: 'Etcd: Proposals rate' - type: graph x: '12' width: '12' height: '5' fields: - type: GRAPH name: graphid value: host: 'Etcd by HTTP' name: 'Etcd: Read/Write rate' - type: graph 'y': '5' width: '12' height: '5' fields: - type: GRAPH name: graphid value: host: 'Etcd by HTTP' name: 'Etcd: gRPC client traffic' - type: graph x: '12' 'y': '5' width: '12' height: '5' fields: - type: GRAPH name: graphid value: host: 'Etcd by HTTP' name: 'Etcd: HTTP requests rate' - type: graph 'y': '10' width: '12' height: '5' fields: - type: GRAPH name: graphid value: host: 'Etcd by HTTP' name: 'Etcd: gRPC requests rate' - type: graph x: '12' 'y': '10' width: '12' height: '5' fields: - type: GRAPH name: graphid value: host: 'Etcd by HTTP' name: 'Etcd: Memory usage' - type: graph 'y': '15' width: '24' height: '5' fields: - type: GRAPH name: graphid value: host: 'Etcd by HTTP' name: 'Etcd: File descriptors' valuemaps: - uuid: f25e21a70baa4e009bdbcb44acb1a22e name: 'Etcd healthcheck' mappings: - value: '0' newvalue: Failed - value: '1' newvalue: Ok - uuid: 7bcaf8a520e24613a96d49e63a91a55b name: 'Etcd leader' mappings: - value: '0' newvalue: 'No' - value: '1' newvalue: 'Yes' - uuid: 1735a8d251b24c3fbab32e766064536b name: 'Service state' mappings: - value: '0' newvalue: Down - value: '1' newvalue: Up triggers: - uuid: c45583928d204c04ad8884115e1e35c5 expression: 'min(/Etcd by HTTP/etcd.open.fds,5m)/last(/Etcd by HTTP/etcd.max.fds)*100>{$ETCD.OPEN.FDS.MAX.WARN}' name: 'Etcd: Current number of open files is too high' event_name: 'Etcd: Current number of open files is too high (over {$ETCD.OPEN.FDS.MAX.WARN}% for 5m)' priority: WARNING description: | Heavy usage of a file descriptor (i.e., near the limit of the process's file descriptor) indicates a potential file descriptor exhaustion issue. If the file descriptors are exhausted, `etcd` may panic because it cannot create new WAL files. tags: - tag: scope value: capacity graphs: - uuid: 18baccd03c0f4814a42d32b51334787d name: 'Etcd: File descriptors' graph_items: - drawtype: GRADIENT_LINE color: 199C0D item: host: 'Etcd by HTTP' key: etcd.open.fds - sortorder: '1' drawtype: BOLD_LINE color: F63100 item: host: 'Etcd by HTTP' key: etcd.max.fds - uuid: eefd07cf30d84cc4b84f802468363200 name: 'Etcd: gRPC client traffic' graph_items: - color: 199C0D item: host: 'Etcd by HTTP' key: etcd.network.grpc.received.rate - sortorder: '1' color: F63100 item: host: 'Etcd by HTTP' key: etcd.network.grpc.sent.rate - uuid: c53ee0dba42d4a1f8afedbe0f6e42785 name: 'Etcd: gRPC requests rate' graph_items: - color: 199C0D item: host: 'Etcd by HTTP' key: etcd.grpc.received.rate - sortorder: '1' color: F63100 item: host: 'Etcd by HTTP' key: etcd.grpc.sent.rate - sortorder: '2' color: 00611C item: host: 'Etcd by HTTP' key: etcd.grpc.started.rate - uuid: 520ff92815d84e0f84e9296d249c04ae name: 'Etcd: HTTP requests rate' graph_items: - color: 199C0D item: host: 'Etcd by HTTP' key: etcd.http.requests.4xx.rate - sortorder: '1' color: F63100 item: host: 'Etcd by HTTP' key: etcd.http.requests.5xx.rate - sortorder: '2' color: 00611C item: host: 'Etcd by HTTP' key: etcd.http.requests.rate - uuid: 90af5b2f75b7402693bad7a8f371ab8e name: 'Etcd: Memory usage' graph_items: - drawtype: GRADIENT_LINE color: 199C0D item: host: 'Etcd by HTTP' key: etcd.res.bytes - sortorder: '1' drawtype: GRADIENT_LINE color: F63100 item: host: 'Etcd by HTTP' key: etcd.virtual.bytes - uuid: 59cd15292ad04ebd902a7d3080b53838 name: 'Etcd: Proposals rate' graph_items: - color: 199C0D item: host: 'Etcd by HTTP' key: etcd.proposals.failed.rate - sortorder: '1' color: F63100 item: host: 'Etcd by HTTP' key: etcd.proposals.committed.rate - sortorder: '2' color: 00611C item: host: 'Etcd by HTTP' key: etcd.proposals.applied.rate - sortorder: '3' drawtype: BOLD_LINE color: F7941D yaxisside: RIGHT item: host: 'Etcd by HTTP' key: etcd.proposals.pending - uuid: b374fab55bcc452e9279214ddb2c8024 name: 'Etcd: Read/Write rate' graph_items: - color: 199C0D item: host: 'Etcd by HTTP' key: etcd.reads.rate - sortorder: '1' color: F63100 item: host: 'Etcd by HTTP' key: etcd.writes.rate