You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1891 lines
78 KiB

zabbix_export:
version: '7.0'
template_groups:
- uuid: a571c0d144b14fd4a87a9d9b2aa9fcd6
name: Templates/Applications
templates:
- uuid: 52b2664578884d9eba62e47375c99f8e
template: 'Elasticsearch Cluster by HTTP'
name: 'Elasticsearch Cluster by HTTP'
description: |
The template to monitor Elasticsearch by Zabbix that work without any external scripts.
It works with both standalone and cluster instances.
The metrics are collected in one pass remotely using an HTTP agent.
They are getting values from REST API _cluster/health, _cluster/stats, _nodes/stats requests.
You can set {$ELASTICSEARCH.USERNAME} and {$ELASTICSEARCH.PASSWORD} macros in the template for using on the host level.
If you use an atypical location ES API, don't forget to change the macros {$ELASTICSEARCH.SCHEME},{$ELASTICSEARCH.PORT}.
You can discuss this template or leave feedback on our forum https://www.zabbix.com/forum/zabbix-suggestions-and-feedback/399473-discussion-thread-for-official-zabbix-template-for-elasticsearch
Generated by official Zabbix template tool "Templator" 2.0.0
vendor:
name: Zabbix
version: 7.0-0
groups:
- name: Templates/Applications
items:
- uuid: f3531c005c7f477b9916b4bf1ad273c2
name: 'ES: Delayed unassigned shards'
type: DEPENDENT
key: es.cluster.delayed_unassigned_shards
delay: '0'
history: 7d
description: 'The number of shards whose allocation has been delayed by the timeout settings.'
preprocessing:
- type: JSONPATH
parameters:
- $.delayed_unassigned_shards
master_item:
key: es.cluster.get_health
tags:
- tag: component
value: shards
- uuid: 7f587fd270be4eb68d81ae1de2a3ca1f
name: 'ES: Get cluster health'
type: HTTP_AGENT
key: es.cluster.get_health
history: '0'
trends: '0'
value_type: TEXT
authtype: BASIC
username: '{$ELASTICSEARCH.USERNAME}'
password: '{$ELASTICSEARCH.PASSWORD}'
description: 'Returns the health status of a cluster.'
timeout: 15s
url: '{$ELASTICSEARCH.SCHEME}://{HOST.CONN}:{$ELASTICSEARCH.PORT}/_cluster/health?timeout=5s'
tags:
- tag: component
value: raw
- uuid: 7066a66f352e4d79ba4aec11c0c5c611
name: 'ES: Get cluster stats'
type: HTTP_AGENT
key: es.cluster.get_stats
history: '0'
trends: '0'
value_type: TEXT
authtype: BASIC
username: '{$ELASTICSEARCH.USERNAME}'
password: '{$ELASTICSEARCH.PASSWORD}'
description: 'Returns cluster statistics.'
timeout: 15s
url: '{$ELASTICSEARCH.SCHEME}://{HOST.CONN}:{$ELASTICSEARCH.PORT}/_cluster/stats'
tags:
- tag: component
value: raw
- uuid: 500a763b9bfd4044b2d3bc95d3a0586c
name: 'ES: Inactive shards percentage'
type: DEPENDENT
key: es.cluster.inactive_shards_percent_as_number
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
description: 'The ratio of inactive shards in the cluster expressed as a percentage.'
preprocessing:
- type: JSONPATH
parameters:
- $.active_shards_percent_as_number
- type: JAVASCRIPT
parameters:
- 'return (100 - value)'
master_item:
key: es.cluster.get_health
tags:
- tag: component
value: shards
- uuid: e0b684d5992a496981cf6fb9bb85be62
name: 'ES: Number of initializing shards'
type: DEPENDENT
key: es.cluster.initializing_shards
delay: '0'
history: 7d
description: 'The number of shards that are under initialization.'
preprocessing:
- type: JSONPATH
parameters:
- $.initializing_shards
master_item:
key: es.cluster.get_health
tags:
- tag: component
value: shards
triggers:
- uuid: ee59684bb2044540ad0306a5b6deb408
expression: 'min(/Elasticsearch Cluster by HTTP/es.cluster.initializing_shards,10m)>0'
name: 'ES: Cluster has the initializing shards'
priority: AVERAGE
description: 'The cluster has the initializing shards longer than 10 minutes.'
tags:
- tag: scope
value: notice
- uuid: 7d3c87e2fcae49438a14380f7d5faa81
name: 'ES: Number of data nodes'
type: DEPENDENT
key: es.cluster.number_of_data_nodes
delay: '0'
history: 7d
description: 'The number of nodes that are dedicated to data nodes.'
preprocessing:
- type: JSONPATH
parameters:
- $.number_of_data_nodes
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: es.cluster.get_health
tags:
- tag: component
value: nodes
- uuid: 4ec7496b441643f39df3e25c0225e6ec
name: 'ES: Number of nodes'
type: DEPENDENT
key: es.cluster.number_of_nodes
delay: '0'
history: 7d
description: 'The number of nodes within the cluster.'
preprocessing:
- type: JSONPATH
parameters:
- $.number_of_nodes
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: es.cluster.get_health
tags:
- tag: component
value: nodes
triggers:
- uuid: 5be4cb40e72442aca18ea014adbead23
expression: 'change(/Elasticsearch Cluster by HTTP/es.cluster.number_of_nodes)<0'
name: 'ES: The number of nodes within the cluster has decreased'
priority: INFO
manual_close: 'YES'
tags:
- tag: scope
value: notice
- uuid: 2fba2b29425b4ff9a4008db9bde49d7f
expression: 'change(/Elasticsearch Cluster by HTTP/es.cluster.number_of_nodes)>0'
name: 'ES: The number of nodes within the cluster has increased'
priority: INFO
manual_close: 'YES'
tags:
- tag: scope
value: notice
- uuid: 2c84eea7f4a642f8892cb7f50febb562
name: 'ES: Number of pending tasks'
type: DEPENDENT
key: es.cluster.number_of_pending_tasks
delay: '0'
history: 7d
description: 'The number of cluster-level changes that have not yet been executed.'
preprocessing:
- type: JSONPATH
parameters:
- $.number_of_pending_tasks
master_item:
key: es.cluster.get_health
tags:
- tag: component
value: tasks
- uuid: 981818f43a3c4b36b36e4b3c4e3468e9
name: 'ES: Number of relocating shards'
type: DEPENDENT
key: es.cluster.relocating_shards
delay: '0'
history: 7d
description: 'The number of shards that are under relocation.'
preprocessing:
- type: JSONPATH
parameters:
- $.relocating_shards
master_item:
key: es.cluster.get_health
tags:
- tag: component
value: shards
- uuid: b9568a6370dc40efae45ac1e0b719dd7
name: 'ES: Cluster health status'
type: DEPENDENT
key: es.cluster.status
delay: '0'
history: 7d
description: |
Health status of the cluster, based on the state of its primary and replica shards. Statuses are:
green
All shards are assigned.
yellow
All primary shards are assigned, but one or more replica shards are unassigned. If a node in the cluster fails, some data could be unavailable until that node is repaired.
red
One or more primary shards are unassigned, so some data is unavailable. This can occur briefly during cluster startup as primary shards are assigned.
valuemap:
name: 'ES cluster state'
preprocessing:
- type: JSONPATH
parameters:
- $.status
- type: JAVASCRIPT
parameters:
- |
var state = ['green', 'yellow', 'red'];
return state.indexOf(value.trim()) === -1 ? 255 : state.indexOf(value.trim());
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: es.cluster.get_health
tags:
- tag: component
value: health
triggers:
- uuid: f3bb7e96f6074063bad76521e1dce24f
expression: 'last(/Elasticsearch Cluster by HTTP/es.cluster.status)=2'
name: 'ES: Health is RED'
priority: HIGH
description: |
One or more primary shards are unassigned, so some data is unavailable.
This can occur briefly during cluster startup as primary shards are assigned.
tags:
- tag: scope
value: availability
- uuid: 33a6ef13f7b240768841919482709411
expression: 'last(/Elasticsearch Cluster by HTTP/es.cluster.status)=255'
name: 'ES: Health is UNKNOWN'
priority: HIGH
description: 'The health status of the cluster is unknown or cannot be obtained.'
tags:
- tag: scope
value: availability
- uuid: 99f36c2aa5d64248b96d1fc97c3b3065
expression: 'last(/Elasticsearch Cluster by HTTP/es.cluster.status)=1'
name: 'ES: Health is YELLOW'
priority: AVERAGE
description: |
All primary shards are assigned, but one or more replica shards are unassigned.
If a node in the cluster fails, some data could be unavailable until that node is repaired.
tags:
- tag: scope
value: availability
- uuid: 2295e2ba3d4949feb3519ac85ba6ff86
name: 'ES: Task max waiting in queue'
type: DEPENDENT
key: es.cluster.task_max_waiting_in_queue
delay: '0'
history: 7d
value_type: FLOAT
units: s
description: 'The time expressed in seconds since the earliest initiated task is waiting for being performed.'
preprocessing:
- type: JSONPATH
parameters:
- $.task_max_waiting_in_queue_millis
- type: MULTIPLIER
parameters:
- '0.001'
master_item:
key: es.cluster.get_health
tags:
- tag: component
value: tasks
- uuid: 85e82cba1e5c479caede2a94c58239be
name: 'ES: Number of unassigned shards'
type: DEPENDENT
key: es.cluster.unassigned_shards
delay: '0'
history: 7d
description: 'The number of shards that are not allocated.'
preprocessing:
- type: JSONPATH
parameters:
- $.unassigned_shards
master_item:
key: es.cluster.get_health
tags:
- tag: component
value: shards
triggers:
- uuid: 35bdd54f27a64009b96a06a12508f99c
expression: 'min(/Elasticsearch Cluster by HTTP/es.cluster.unassigned_shards,10m)>0'
name: 'ES: Cluster has the unassigned shards'
priority: AVERAGE
description: 'The cluster has the unassigned shards longer than 10 minutes.'
tags:
- tag: scope
value: notice
- uuid: db2f3161eaff4eb0b25d4191b423c733
name: 'ES: Indices with shards assigned to nodes'
type: DEPENDENT
key: es.indices.count
delay: '0'
history: 7d
description: 'The total number of indices with shards assigned to the selected nodes.'
preprocessing:
- type: JSONPATH
parameters:
- $.indices.count
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: es.cluster.get_stats
tags:
- tag: component
value: indices
- uuid: 25cb1e0203334efd96d3e626f81b4670
name: 'ES: Number of non-deleted documents'
type: DEPENDENT
key: es.indices.docs.count
delay: '0'
history: 7d
description: |
The total number of non-deleted documents across all primary shards assigned to the selected nodes.
This number is based on the documents in Lucene segments and may include the documents from nested fields.
preprocessing:
- type: JSONPATH
parameters:
- $.indices.docs.count
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: es.cluster.get_stats
tags:
- tag: component
value: documents
- uuid: 54f51a653e014290aa3f91deaca44e47
name: 'ES: Nodes with the data role'
type: DEPENDENT
key: es.nodes.count.data
delay: '0'
history: 7d
description: 'The number of selected nodes with the data role.'
preprocessing:
- type: JSONPATH
parameters:
- $.nodes.count.data
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: es.cluster.get_stats
tags:
- tag: component
value: nodes
- uuid: 613ab3469f234e278af99d61e57b46bd
name: 'ES: Nodes with the ingest role'
type: DEPENDENT
key: es.nodes.count.ingest
delay: '0'
history: 7d
description: 'The number of selected nodes with the ingest role.'
preprocessing:
- type: JSONPATH
parameters:
- $.nodes.count.ingest
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: es.cluster.get_stats
tags:
- tag: component
value: nodes
- uuid: e7b36f37b86845339a306dacf874164a
name: 'ES: Nodes with the master role'
type: DEPENDENT
key: es.nodes.count.master
delay: '0'
history: 7d
description: 'The number of selected nodes with the master role.'
preprocessing:
- type: JSONPATH
parameters:
- $.nodes.count.master
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: es.cluster.get_stats
tags:
- tag: component
value: nodes
triggers:
- uuid: 8011883baef04aa0bed343d1bb0df288
expression: 'last(/Elasticsearch Cluster by HTTP/es.nodes.count.master)=2'
name: 'ES: Cluster has only two master nodes'
priority: DISASTER
description: 'The cluster has only two nodes with a master role and will be unavailable if one of them breaks.'
tags:
- tag: scope
value: notice
- uuid: c3ea99e5897b4663a3239d0edd66f1f0
name: 'ES: Total available size to JVM in all file stores'
type: DEPENDENT
key: es.nodes.fs.available_in_bytes
delay: '0'
history: 7d
units: B
description: |
The total number of bytes available to JVM in the file stores across all selected nodes.
Depending on OS or process-level restrictions, this number may be less than nodes.fs.free_in_byes.
This is the actual amount of free disk space the selected Elasticsearch nodes can use.
preprocessing:
- type: JSONPATH
parameters:
- $.nodes.fs.available_in_bytes
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: es.cluster.get_stats
tags:
- tag: component
value: storage
- uuid: b8c3c5d8866d4a6b9c6847b5c8da0631
name: 'ES: Total size of all file stores'
type: DEPENDENT
key: es.nodes.fs.total_in_bytes
delay: '0'
history: 7d
units: B
description: 'The total size in bytes of all file stores across all selected nodes.'
preprocessing:
- type: JSONPATH
parameters:
- $.nodes.fs.total_in_bytes
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: es.cluster.get_stats
tags:
- tag: component
value: storage
- uuid: 66c22b8b2b8b40fda7ac6f0ae472befd
name: 'ES: Get nodes stats'
type: HTTP_AGENT
key: es.nodes.get_stats
history: '0'
trends: '0'
value_type: TEXT
authtype: BASIC
username: '{$ELASTICSEARCH.USERNAME}'
password: '{$ELASTICSEARCH.PASSWORD}'
description: 'Returns cluster nodes statistics.'
timeout: 30s
url: '{$ELASTICSEARCH.SCHEME}://{HOST.CONN}:{$ELASTICSEARCH.PORT}/_nodes/stats'
tags:
- tag: component
value: raw
- uuid: 2dcf54f21cbd4af9a7931e3a2522685c
name: 'ES: Cluster uptime'
type: DEPENDENT
key: es.nodes.jvm.max_uptime
delay: '0'
history: 7d
value_type: FLOAT
units: s
description: 'Uptime duration in seconds since JVM has last started.'
preprocessing:
- type: JSONPATH
parameters:
- $.nodes.jvm.max_uptime_in_millis
- type: MULTIPLIER
parameters:
- '0.001'
master_item:
key: es.cluster.get_stats
tags:
- tag: component
value: system
triggers:
- uuid: d28aa18c85cb4f48a9e7b8ba55d65400
expression: 'last(/Elasticsearch Cluster by HTTP/es.nodes.jvm.max_uptime)<10m'
name: 'ES: Cluster has been restarted'
event_name: 'ES: Cluster has been restarted (uptime < 10m)'
priority: INFO
description: 'Uptime is less than 10 minutes.'
manual_close: 'YES'
tags:
- tag: scope
value: notice
- uuid: 671888d47c724e54aca78fbe1b3ecaed
name: 'ES: Service response time'
type: SIMPLE
key: 'net.tcp.service.perf["{$ELASTICSEARCH.SCHEME}","{HOST.CONN}","{$ELASTICSEARCH.PORT}"]'
history: 7d
value_type: FLOAT
units: s
description: 'Checks performance of the TCP service.'
tags:
- tag: component
value: network
triggers:
- uuid: 2a3f3b96e5dd47de998ccc17f109e149
expression: 'min(/Elasticsearch Cluster by HTTP/net.tcp.service.perf["{$ELASTICSEARCH.SCHEME}","{HOST.CONN}","{$ELASTICSEARCH.PORT}"],5m)>{$ELASTICSEARCH.RESPONSE_TIME.MAX.WARN}'
name: 'ES: Service response time is too high'
event_name: 'ES: Service response time is too high (over {$ELASTICSEARCH.RESPONSE_TIME.MAX.WARN} for 5m)'
priority: WARNING
description: 'The performance of the TCP service is very low.'
manual_close: 'YES'
dependencies:
- name: 'ES: Service is down'
expression: 'last(/Elasticsearch Cluster by HTTP/net.tcp.service["{$ELASTICSEARCH.SCHEME}","{HOST.CONN}","{$ELASTICSEARCH.PORT}"])=0'
tags:
- tag: scope
value: performance
- uuid: d0d38ce55f844a51a0f2131c86bec1ae
name: 'ES: Service status'
type: SIMPLE
key: 'net.tcp.service["{$ELASTICSEARCH.SCHEME}","{HOST.CONN}","{$ELASTICSEARCH.PORT}"]'
history: 7d
description: 'Checks if the service is running and accepting TCP connections.'
valuemap:
name: 'Service state'
preprocessing:
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 10m
tags:
- tag: component
value: health
- tag: component
value: network
triggers:
- uuid: b4d76f68ce94492f96bbfbb778d1f144
expression: 'last(/Elasticsearch Cluster by HTTP/net.tcp.service["{$ELASTICSEARCH.SCHEME}","{HOST.CONN}","{$ELASTICSEARCH.PORT}"])=0'
name: 'ES: Service is down'
priority: AVERAGE
description: 'The service is unavailable or does not accept TCP connections.'
manual_close: 'YES'
tags:
- tag: scope
value: availability
discovery_rules:
- uuid: 5105173f24d941b2969fe3d190d22e82
name: 'Cluster nodes discovery'
type: HTTP_AGENT
key: es.nodes.discovery
delay: 1h
authtype: BASIC
username: '{$ELASTICSEARCH.USERNAME}'
password: '{$ELASTICSEARCH.PASSWORD}'
description: 'Discovery ES cluster nodes.'
item_prototypes:
- uuid: b4e87d039e9d4feeb03e0e33f14b2c82
name: 'ES {#ES.NODE}: Total available size'
type: DEPENDENT
key: 'es.node.fs.total.available_in_bytes[{#ES.NODE}]'
delay: '0'
history: 7d
units: B
description: |
The total number of bytes available to this Java virtual machine on all file stores.
Depending on OS or process level restrictions, this might appear less than fs.total.free_in_bytes.
This is the actual amount of free disk space the Elasticsearch node can utilize.
preprocessing:
- type: JSONPATH
parameters:
- $..fs.total.available_in_bytes.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: storage
- tag: node
value: '{#ES.NODE}'
- uuid: 4f315fdf62884b0284bf04f1a85aeb98
name: 'ES {#ES.NODE}: Total size'
type: DEPENDENT
key: 'es.node.fs.total.total_in_bytes[{#ES.NODE}]'
delay: '0'
history: 7d
units: B
description: 'Total size (in bytes) of all file stores.'
preprocessing:
- type: JSONPATH
parameters:
- $..fs.total.total_in_bytes.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1d
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: storage
- tag: node
value: '{#ES.NODE}'
- uuid: 979eddbb814f44ef9ce34d19cc79ecd0
name: 'ES {#ES.NODE}: Get data'
type: DEPENDENT
key: 'es.node.get.data[{#ES.NODE}]'
delay: '0'
history: '0'
trends: '0'
value_type: TEXT
description: 'Returns cluster nodes statistics.'
preprocessing:
- type: JSONPATH
parameters:
- '$..[?(@.name==''{#ES.NODE}'')].first()'
master_item:
key: es.nodes.get_stats
tags:
- tag: component
value: raw
- tag: node
value: '{#ES.NODE}'
- uuid: 2ba1fc7e2dad4d0ab1807221fb1e4fca
name: 'ES {#ES.NODE}: Number of open HTTP connections'
type: DEPENDENT
key: 'es.node.http.current_open[{#ES.NODE}]'
delay: '0'
history: 7d
description: 'The number of currently open HTTP connections for the node.'
preprocessing:
- type: JSONPATH
parameters:
- $..http.current_open.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: connections
- tag: node
value: '{#ES.NODE}'
- uuid: fcf163d5db9b455fa38823e8ad16e578
name: 'ES {#ES.NODE}: Rate of HTTP connections opened'
type: DEPENDENT
key: 'es.node.http.opened.rate[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: rps
description: 'The number of HTTP connections opened for the node per second.'
preprocessing:
- type: JSONPATH
parameters:
- $..http.total_opened.first()
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: connections
- tag: node
value: '{#ES.NODE}'
- uuid: c530311329e348cca74c38fa2260236b
name: 'ES {#ES.NODE}: Flush latency'
type: CALCULATED
key: 'es.node.indices.flush.latency[{#ES.NODE}]'
history: 7d
value_type: FLOAT
units: ms
params: 'change(//es.node.indices.flush.total_time_in_millis[{#ES.NODE}]) / ( change(//es.node.indices.flush.total[{#ES.NODE}]) + (change(//es.node.indices.flush.total[{#ES.NODE}]) = 0) )'
description: 'The average flush latency calculated from the available flush.total and flush.total_time_in_millis metrics.'
tags:
- tag: component
value: operations
- tag: node
value: '{#ES.NODE}'
trigger_prototypes:
- uuid: 6c2ea291374b4de1ab06ddeccba635b1
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.indices.flush.latency[{#ES.NODE}],5m)>{$ELASTICSEARCH.FLUSH_LATENCY.MAX.WARN}'
name: 'ES {#ES.NODE}: Flush latency is too high'
event_name: 'ES {#ES.NODE}: Flush latency is too high (over {$ELASTICSEARCH.FLUSH_LATENCY.MAX.WARN}ms for 5m)'
priority: WARNING
description: |
If you see this metric increasing steadily, it may indicate a problem with slow disks; this problem may escalate
and eventually prevent you from being able to add new information to your index.
tags:
- tag: scope
value: notice
- uuid: 8e270dfff9c84d2a96a134dd6d86533b
name: 'ES {#ES.NODE}: Total number of index flushes to disk'
type: DEPENDENT
key: 'es.node.indices.flush.total[{#ES.NODE}]'
delay: '0'
history: 7d
description: 'The total number of flush operations.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.flush.total.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: operations
- tag: node
value: '{#ES.NODE}'
- uuid: 7e18149dcaee47748e4073f4ce814c03
name: 'ES {#ES.NODE}: Total time spent on flushing indices to disk'
type: DEPENDENT
key: 'es.node.indices.flush.total_time_in_millis[{#ES.NODE}]'
delay: '0'
history: 7d
units: ms
description: 'Total time in milliseconds spent performing flush operations.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.flush.total_time_in_millis.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: operations
- tag: node
value: '{#ES.NODE}'
- uuid: e91cc33c088a4f56a9176fd6a09f2411
name: 'ES {#ES.NODE}: Current indexing operations'
type: DEPENDENT
key: 'es.node.indices.indexing.index_current[{#ES.NODE}]'
delay: '0'
history: 7d
description: 'The number of indexing operations currently running.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.indexing.index_current.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: operations
- tag: node
value: '{#ES.NODE}'
- uuid: 23f54c51c30a4dbdbef48611a7907db6
name: 'ES {#ES.NODE}: Indexing latency'
type: CALCULATED
key: 'es.node.indices.indexing.index_latency[{#ES.NODE}]'
history: 7d
value_type: FLOAT
units: ms
params: 'change(//es.node.indices.indexing.index_time_in_millis[{#ES.NODE}]) / ( change(//es.node.indices.indexing.index_total[{#ES.NODE}]) + (change(//es.node.indices.indexing.index_total[{#ES.NODE}]) = 0) )'
description: 'The average indexing latency calculated from the available index_total and index_time_in_millis metrics.'
tags:
- tag: component
value: operations
- tag: node
value: '{#ES.NODE}'
trigger_prototypes:
- uuid: 2755b3f8811a45fba7c48347707ceaf8
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.indices.indexing.index_latency[{#ES.NODE}],5m)>{$ELASTICSEARCH.INDEXING_LATENCY.MAX.WARN}'
name: 'ES {#ES.NODE}: Indexing latency is too high'
event_name: 'ES {#ES.NODE}: Indexing latency is too high (over {$ELASTICSEARCH.INDEXING_LATENCY.MAX.WARN}ms for 5m)'
priority: WARNING
description: |
If the latency is increasing, it may indicate that you are indexing too many documents at the same time (Elasticsearch's documentation
recommends starting with a bulk indexing size of 5 to 15 megabytes and increasing slowly from there).
tags:
- tag: scope
value: notice
- uuid: f471dad45ff149b09a479963cb616fc2
name: 'ES {#ES.NODE}: Total time spent performing indexing'
type: DEPENDENT
key: 'es.node.indices.indexing.index_time_in_millis[{#ES.NODE}]'
delay: '0'
history: 7d
units: ms
description: 'Total time in milliseconds spent performing indexing operations.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.indexing.index_time_in_millis.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: operations
- tag: node
value: '{#ES.NODE}'
- uuid: ad36b8495eca49c48e7d8a7877a325c2
name: 'ES {#ES.NODE}: Total number of indexing'
type: DEPENDENT
key: 'es.node.indices.indexing.index_total[{#ES.NODE}]'
delay: '0'
history: 7d
description: 'The total number of indexing operations.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.indexing.index_total.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: operations
- tag: node
value: '{#ES.NODE}'
- uuid: bb0cb2196b14483f8807a855f2f531a7
name: 'ES {#ES.NODE}: Time spent throttling operations'
type: DEPENDENT
key: 'es.node.indices.indexing.throttle_time[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: s
description: 'Time in seconds spent throttling operations for the last measuring span.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.indexing.throttle_time_in_millis.first()
- type: MULTIPLIER
parameters:
- '0.001'
- type: SIMPLE_CHANGE
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: operations
- tag: node
value: '{#ES.NODE}'
- uuid: 5f3b7dca802343cd905d54e66ac0e113
name: 'ES {#ES.NODE}: Time spent throttling merge operations'
type: DEPENDENT
key: 'es.node.indices.merges.total_throttled_time[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: s
description: 'Time in seconds spent throttling merge operations for the last measuring span.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.merges.total_throttled_time_in_millis.first()
- type: MULTIPLIER
parameters:
- '0.001'
- type: SIMPLE_CHANGE
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: operations
- tag: node
value: '{#ES.NODE}'
- uuid: 9c82da45a63947dd91a4e19e6f2d121d
name: 'ES {#ES.NODE}: Time spent throttling recovery operations'
type: DEPENDENT
key: 'es.node.indices.recovery.throttle_time[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: s
description: 'Time in seconds spent throttling recovery operations for the last measuring span.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.recovery.throttle_time_in_millis.first()
- type: MULTIPLIER
parameters:
- '0.001'
- type: SIMPLE_CHANGE
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: operations
- tag: node
value: '{#ES.NODE}'
- uuid: e27361fdce5a4635854960066ac050ca
name: 'ES {#ES.NODE}: Rate of index refreshes'
type: DEPENDENT
key: 'es.node.indices.refresh.rate[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: rps
description: 'The number of refresh operations per second.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.refresh.total.first()
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: operations
- tag: node
value: '{#ES.NODE}'
- uuid: fd20bbc5012d4c5693710b321e252193
name: 'ES {#ES.NODE}: Time spent performing refresh'
type: DEPENDENT
key: 'es.node.indices.refresh.time[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: s
description: 'Time in seconds spent performing refresh operations for the last measuring span.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.refresh.total_time_in_millis.first()
- type: MULTIPLIER
parameters:
- '0.001'
- type: SIMPLE_CHANGE
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: operations
- tag: node
value: '{#ES.NODE}'
- uuid: 6d3b074aecb44a08a3573aba0ff006f9
name: 'ES {#ES.NODE}: Rate of fetch'
type: DEPENDENT
key: 'es.node.indices.search.fetch.rate[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: rps
description: 'The number of fetch operations per second.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.search.fetch_total.first()
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: fetches
- tag: node
value: '{#ES.NODE}'
- uuid: 36b2ffa3ed9f4c9781ccded273c395d7
name: 'ES {#ES.NODE}: Current fetch operations'
type: DEPENDENT
key: 'es.node.indices.search.fetch_current[{#ES.NODE}]'
delay: '0'
history: 7d
description: 'The number of fetch operations currently running.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.search.fetch_current.first()
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: fetches
- tag: node
value: '{#ES.NODE}'
- uuid: 79b5fd5e04e249da9f73fbe68915fdb1
name: 'ES {#ES.NODE}: Fetch latency'
type: CALCULATED
key: 'es.node.indices.search.fetch_latency[{#ES.NODE}]'
history: 7d
value_type: FLOAT
units: ms
params: 'change(//es.node.indices.search.fetch_time_in_millis[{#ES.NODE}]) / ( change(//es.node.indices.search.fetch_total[{#ES.NODE}]) + (change(//es.node.indices.search.fetch_total[{#ES.NODE}]) = 0) )'
description: 'The average fetch latency calculated by sampling the total number of fetches and the total elapsed time at regular intervals.'
tags:
- tag: component
value: fetches
- tag: node
value: '{#ES.NODE}'
trigger_prototypes:
- uuid: 6edd6851f31842659a57e29c444a9b32
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.indices.search.fetch_latency[{#ES.NODE}],5m)>{$ELASTICSEARCH.FETCH_LATENCY.MAX.WARN}'
name: 'ES {#ES.NODE}: Fetch latency is too high'
event_name: 'ES {#ES.NODE}: Fetch latency is too high (over {$ELASTICSEARCH.FETCH_LATENCY.MAX.WARN}ms for 5m)'
priority: WARNING
description: |
The fetch phase should typically take much less time than the query phase. If you notice this metric consistently increasing,
this could indicate a problem with slow disks, enriching of documents (highlighting the relevant text in search results, etc.),
or requesting too many results.
tags:
- tag: scope
value: notice
- uuid: a10e7dca72c8411a9b7fdcbeb676017e
name: 'ES {#ES.NODE}: Time spent performing fetch'
type: DEPENDENT
key: 'es.node.indices.search.fetch_time[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: s
description: 'Time in seconds spent performing fetch operations for the last measuring span.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.search.fetch_time_in_millis.first()
- type: MULTIPLIER
parameters:
- '0.001'
- type: SIMPLE_CHANGE
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: fetches
- tag: node
value: '{#ES.NODE}'
- uuid: f35e3e9773394632b422dbc4e4442171
name: 'ES {#ES.NODE}: Total time spent performing fetch'
type: DEPENDENT
key: 'es.node.indices.search.fetch_time_in_millis[{#ES.NODE}]'
delay: '0'
history: 7d
units: ms
description: 'Time in milliseconds spent performing fetch operations.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.search.fetch_time_in_millis.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: fetches
- tag: node
value: '{#ES.NODE}'
- uuid: b3ac68f3531f478b9ad28fb1988df257
name: 'ES {#ES.NODE}: Total number of fetch'
type: DEPENDENT
key: 'es.node.indices.search.fetch_total[{#ES.NODE}]'
delay: '0'
history: 7d
description: 'The total number of fetch operations.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.search.fetch_total.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: fetches
- tag: node
value: '{#ES.NODE}'
- uuid: d347df9c9eee4aa89ccfb9147143b5d5
name: 'ES {#ES.NODE}: Rate of queries'
type: DEPENDENT
key: 'es.node.indices.search.query.rate[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: rps
description: 'The number of query operations per second.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.search.query_total.first()
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: queries
- tag: node
value: '{#ES.NODE}'
- uuid: 20fb738abf7a4aa1bf3ccb84790a26c9
name: 'ES {#ES.NODE}: Current query operations'
type: DEPENDENT
key: 'es.node.indices.search.query_current[{#ES.NODE}]'
delay: '0'
history: 7d
description: 'The number of query operations currently running.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.search.query_current.first()
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: queries
- tag: node
value: '{#ES.NODE}'
- uuid: fadb6fe530ab4b1296d4c17728667b12
name: 'ES {#ES.NODE}: Query latency'
type: CALCULATED
key: 'es.node.indices.search.query_latency[{#ES.NODE}]'
history: 7d
value_type: FLOAT
units: ms
params: |
change(//es.node.indices.search.query_time_in_millis[{#ES.NODE}]) /
( change(//es.node.indices.search.query_total[{#ES.NODE}]) + (change(//es.node.indices.search.query_total[{#ES.NODE}]) = 0) )
description: 'The average query latency calculated by sampling the total number of queries and the total elapsed time at regular intervals.'
tags:
- tag: component
value: queries
- tag: node
value: '{#ES.NODE}'
trigger_prototypes:
- uuid: 1892000290c843e887d128bf9f97c869
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.indices.search.query_latency[{#ES.NODE}],5m)>{$ELASTICSEARCH.QUERY_LATENCY.MAX.WARN}'
name: 'ES {#ES.NODE}: Query latency is too high'
event_name: 'ES {#ES.NODE}: Query latency is too high (over {$ELASTICSEARCH.QUERY_LATENCY.MAX.WARN}ms for 5m)'
priority: WARNING
description: 'If latency exceeds a threshold, look for potential resource bottlenecks, or investigate whether you need to optimize your queries.'
tags:
- tag: scope
value: notice
- uuid: ce807b641b0b4501b6a8e253d8403ce4
name: 'ES {#ES.NODE}: Time spent performing query'
type: DEPENDENT
key: 'es.node.indices.search.query_time[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: s
description: 'Time in seconds spent performing query operations for the last measuring span.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.search.query_time_in_millis.first()
- type: MULTIPLIER
parameters:
- '0.001'
- type: SIMPLE_CHANGE
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: queries
- tag: node
value: '{#ES.NODE}'
- uuid: 7afc767463c64bbb9290975a8cef3cec
name: 'ES {#ES.NODE}: Total time spent performing query'
type: DEPENDENT
key: 'es.node.indices.search.query_time_in_millis[{#ES.NODE}]'
delay: '0'
history: 7d
units: ms
description: 'Time in milliseconds spent performing query operations.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.search.query_time_in_millis.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: queries
- tag: node
value: '{#ES.NODE}'
- uuid: 7279b682fa3e4661a1600d6da25e0fc7
name: 'ES {#ES.NODE}: Total number of query'
type: DEPENDENT
key: 'es.node.indices.search.query_total[{#ES.NODE}]'
delay: '0'
history: 7d
description: 'The total number of query operations.'
preprocessing:
- type: JSONPATH
parameters:
- $..indices.search.query_total.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: queries
- tag: node
value: '{#ES.NODE}'
- uuid: 269cc21be4c94ba58a52b9fca9590632
name: 'ES {#ES.NODE}: Amount of JVM heap committed'
type: DEPENDENT
key: 'es.node.jvm.mem.heap_committed_in_bytes[{#ES.NODE}]'
delay: '0'
history: 7d
units: B
description: 'The amount of memory, in bytes, available for use by the heap.'
preprocessing:
- type: JSONPATH
parameters:
- $..jvm.mem.heap_committed_in_bytes.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: memory
- tag: node
value: '{#ES.NODE}'
- uuid: 7f205e21644a4f629e4a419c42670158
name: 'ES {#ES.NODE}: Maximum JVM memory available for use'
type: DEPENDENT
key: 'es.node.jvm.mem.heap_max_in_bytes[{#ES.NODE}]'
delay: '0'
history: 7d
units: B
description: 'The maximum amount of memory, in bytes, available for use by the heap.'
preprocessing:
- type: JSONPATH
parameters:
- $..jvm.mem.heap_max_in_bytes.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1d
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: memory
- tag: node
value: '{#ES.NODE}'
- uuid: bb55c78c2aca49e49f6200e14c25dee8
name: 'ES {#ES.NODE}: Amount of JVM heap currently in use'
type: DEPENDENT
key: 'es.node.jvm.mem.heap_used_in_bytes[{#ES.NODE}]'
delay: '0'
history: 7d
units: B
description: 'The memory, in bytes, currently in use by the heap.'
preprocessing:
- type: JSONPATH
parameters:
- $..jvm.mem.heap_used_in_bytes.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: memory
- tag: node
value: '{#ES.NODE}'
- uuid: 71fca039cad847da9623aaeb722168f1
name: 'ES {#ES.NODE}: Percent of JVM heap currently in use'
type: DEPENDENT
key: 'es.node.jvm.mem.heap_used_percent[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
description: 'The percentage of memory currently in use by the heap.'
preprocessing:
- type: JSONPATH
parameters:
- $..jvm.mem.heap_used_percent.first()
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: memory
- tag: node
value: '{#ES.NODE}'
trigger_prototypes:
- uuid: 9d199607ddda48eebe9f93cae6d6575d
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.jvm.mem.heap_used_percent[{#ES.NODE}],1h)>{$ELASTICSEARCH.HEAP_USED.MAX.CRIT}'
name: 'ES {#ES.NODE}: Percent of JVM heap in use is critical'
event_name: 'ES {#ES.NODE}: Percent of JVM heap in use is critical (over {$ELASTICSEARCH.HEAP_USED.MAX.CRIT}% for 1h)'
priority: HIGH
description: |
This indicates that the rate of garbage collection isn't keeping up with the rate of garbage creation.
To address this problem, you can either increase your heap size (as long as it remains below the recommended
guidelines stated above), or scale out the cluster by adding more nodes.
tags:
- tag: scope
value: performance
- uuid: bbba4a577a2c4328b2392fdeb1ff9bb4
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.jvm.mem.heap_used_percent[{#ES.NODE}],1h)>{$ELASTICSEARCH.HEAP_USED.MAX.WARN}'
name: 'ES {#ES.NODE}: Percent of JVM heap in use is high'
event_name: 'ES {#ES.NODE}: Percent of JVM heap in use is high (over {$ELASTICSEARCH.HEAP_USED.MAX.WARN}% for 1h)'
priority: WARNING
description: |
This indicates that the rate of garbage collection isn't keeping up with the rate of garbage creation.
To address this problem, you can either increase your heap size (as long as it remains below the recommended
guidelines stated above), or scale out the cluster by adding more nodes.
dependencies:
- name: 'ES {#ES.NODE}: Percent of JVM heap in use is critical'
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.jvm.mem.heap_used_percent[{#ES.NODE}],1h)>{$ELASTICSEARCH.HEAP_USED.MAX.CRIT}'
tags:
- tag: scope
value: performance
- uuid: 3c2f512ae6ff4221a7de4e5dbff2ed48
name: 'ES {#ES.NODE}: Node uptime'
type: DEPENDENT
key: 'es.node.jvm.uptime[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: s
description: 'JVM uptime in seconds.'
preprocessing:
- type: JSONPATH
parameters:
- $..jvm.uptime_in_millis.first()
- type: MULTIPLIER
parameters:
- '0.001'
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: system
- tag: node
value: '{#ES.NODE}'
trigger_prototypes:
- uuid: 580ae8aadf994fbcb34c9c8ad7cd5fad
expression: 'last(/Elasticsearch Cluster by HTTP/es.node.jvm.uptime[{#ES.NODE}])<10m'
name: 'ES {#ES.NODE}: has been restarted'
event_name: 'ES {#ES.NODE}: has been restarted (uptime < 10m)'
priority: INFO
description: 'Uptime is less than 10 minutes.'
manual_close: 'YES'
tags:
- tag: scope
value: notice
- uuid: 14ea2732fbfc40ceaafadfff1830ac4d
name: 'ES {#ES.NODE}: Refresh thread pool active threads'
type: DEPENDENT
key: 'es.node.thread_pool.refresh.active[{#ES.NODE}]'
delay: '0'
history: 7d
description: 'The number of active threads in the refresh thread pool.'
preprocessing:
- type: JSONPATH
parameters:
- $..thread_pool.refresh.active.first()
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: refresh-thread-pool
- tag: node
value: '{#ES.NODE}'
- uuid: b6496d2878914b06bf6301630b4609a8
name: 'ES {#ES.NODE}: Refresh thread pool executor tasks completed'
type: DEPENDENT
key: 'es.node.thread_pool.refresh.completed.rate[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: rps
description: 'The number of tasks completed by the refresh thread pool executor.'
preprocessing:
- type: JSONPATH
parameters:
- $..thread_pool.refresh.completed.first()
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: refresh-thread-pool
- tag: node
value: '{#ES.NODE}'
- uuid: 4b748eeb937e45308e58d699b713cf01
name: 'ES {#ES.NODE}: Refresh thread pool tasks in queue'
type: DEPENDENT
key: 'es.node.thread_pool.refresh.queue[{#ES.NODE}]'
delay: '0'
history: 7d
description: 'The number of tasks in queue for the refresh thread pool.'
preprocessing:
- type: JSONPATH
parameters:
- $..thread_pool.refresh.queue.first()
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: refresh-thread-pool
- tag: node
value: '{#ES.NODE}'
- uuid: 593514af005044ce8e6207a5616270f2
name: 'ES {#ES.NODE}: Refresh thread pool executor tasks rejected'
type: DEPENDENT
key: 'es.node.thread_pool.refresh.rejected.rate[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: rps
description: 'The number of tasks rejected by the refresh thread pool executor.'
preprocessing:
- type: JSONPATH
parameters:
- $..thread_pool.refresh.rejected.first()
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: refresh-thread-pool
- tag: node
value: '{#ES.NODE}'
trigger_prototypes:
- uuid: b416941e8e0141c6af27c62f052ac860
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.thread_pool.refresh.rejected.rate[{#ES.NODE}],5m)>0'
name: 'ES {#ES.NODE}: Refresh thread pool executor has the rejected tasks'
event_name: 'ES {#ES.NODE}: Refresh thread pool executor has the rejected tasks (for 5m)'
priority: WARNING
description: 'The number of tasks rejected by the refresh thread pool executor is over 0 for 5m.'
tags:
- tag: scope
value: notice
- uuid: 63875f40cf7c4f8f842562be2adfb7c7
name: 'ES {#ES.NODE}: Search thread pool active threads'
type: DEPENDENT
key: 'es.node.thread_pool.search.active[{#ES.NODE}]'
delay: '0'
history: 7d
description: 'The number of active threads in the search thread pool.'
preprocessing:
- type: JSONPATH
parameters:
- $..thread_pool.search.active.first()
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: search-thread-pool
- tag: node
value: '{#ES.NODE}'
- uuid: dc67ab311d4945aaae3347464785abb1
name: 'ES {#ES.NODE}: Search thread pool executor tasks completed'
type: DEPENDENT
key: 'es.node.thread_pool.search.completed.rate[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: rps
description: 'The number of tasks completed by the search thread pool executor.'
preprocessing:
- type: JSONPATH
parameters:
- $..thread_pool.search.completed.first()
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: search-thread-pool
- tag: node
value: '{#ES.NODE}'
- uuid: d11933b62131425d83ab09c6d5fd5e85
name: 'ES {#ES.NODE}: Search thread pool tasks in queue'
type: DEPENDENT
key: 'es.node.thread_pool.search.queue[{#ES.NODE}]'
delay: '0'
history: 7d
description: 'The number of tasks in queue for the search thread pool.'
preprocessing:
- type: JSONPATH
parameters:
- $..thread_pool.search.queue.first()
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: search-thread-pool
- tag: node
value: '{#ES.NODE}'
- uuid: 820ed330abc845919b1dada3cfa81387
name: 'ES {#ES.NODE}: Search thread pool executor tasks rejected'
type: DEPENDENT
key: 'es.node.thread_pool.search.rejected.rate[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: rps
description: 'The number of tasks rejected by the search thread pool executor.'
preprocessing:
- type: JSONPATH
parameters:
- $..thread_pool.search.rejected.first()
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: search-thread-pool
- tag: node
value: '{#ES.NODE}'
trigger_prototypes:
- uuid: 5cfd3f9924614a4283f1255752b672f0
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.thread_pool.search.rejected.rate[{#ES.NODE}],5m)>0'
name: 'ES {#ES.NODE}: Search thread pool executor has the rejected tasks'
event_name: 'ES {#ES.NODE}: Search thread pool executor has the rejected tasks (for 5m)'
priority: WARNING
description: 'The number of tasks rejected by the search thread pool executor is over 0 for 5m.'
tags:
- tag: scope
value: notice
- uuid: 4b7dc34d78a64b24a8fd19af95e0f0bd
name: 'ES {#ES.NODE}: Write thread pool active threads'
type: DEPENDENT
key: 'es.node.thread_pool.write.active[{#ES.NODE}]'
delay: '0'
history: 7d
description: 'The number of active threads in the write thread pool.'
preprocessing:
- type: JSONPATH
parameters:
- $..thread_pool.write.active.first()
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: write-thread-pool
- tag: node
value: '{#ES.NODE}'
- uuid: 5b5ac74702564bd490c7378adcf75c28
name: 'ES {#ES.NODE}: Write thread pool executor tasks completed'
type: DEPENDENT
key: 'es.node.thread_pool.write.completed.rate[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: rps
description: 'The number of tasks completed by the write thread pool executor.'
preprocessing:
- type: JSONPATH
parameters:
- $..thread_pool.write.completed.first()
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: write-thread-pool
- tag: node
value: '{#ES.NODE}'
- uuid: a0d8f8d896a546d1ade07c355992308d
name: 'ES {#ES.NODE}: Write thread pool tasks in queue'
type: DEPENDENT
key: 'es.node.thread_pool.write.queue[{#ES.NODE}]'
delay: '0'
history: 7d
description: 'The number of tasks in queue for the write thread pool.'
preprocessing:
- type: JSONPATH
parameters:
- $..thread_pool.write.queue.first()
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: write-thread-pool
- tag: node
value: '{#ES.NODE}'
- uuid: a21213815a30485a88b183e7b40a4e7e
name: 'ES {#ES.NODE}: Write thread pool executor tasks rejected'
type: DEPENDENT
key: 'es.node.thread_pool.write.rejected.rate[{#ES.NODE}]'
delay: '0'
history: 7d
value_type: FLOAT
units: rps
description: 'The number of tasks rejected by the write thread pool executor.'
preprocessing:
- type: JSONPATH
parameters:
- $..thread_pool.write.rejected.first()
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: 'es.node.get.data[{#ES.NODE}]'
tags:
- tag: component
value: write-thread-pool
- tag: node
value: '{#ES.NODE}'
trigger_prototypes:
- uuid: 1f5e30a34bb84cef831670974ec7c3e8
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.thread_pool.write.rejected.rate[{#ES.NODE}],5m)>0'
name: 'ES {#ES.NODE}: Write thread pool executor has the rejected tasks'
event_name: 'ES {#ES.NODE}: Write thread pool executor has the rejected tasks (for 5m)'
priority: WARNING
description: 'The number of tasks rejected by the write thread pool executor is over 0 for 5m.'
tags:
- tag: scope
value: notice
graph_prototypes:
- uuid: 0725d91f63b64346bbef2c20d2901e9b
name: 'ES {#ES.NODE}: Latency'
graph_items:
- color: 199C0D
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.indices.search.query_latency[{#ES.NODE}]'
- sortorder: '1'
color: F63100
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.indices.indexing.index_latency[{#ES.NODE}]'
- sortorder: '2'
color: 00611C
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.indices.search.fetch_latency[{#ES.NODE}]'
- sortorder: '3'
color: F7941D
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.indices.flush.latency[{#ES.NODE}]'
- uuid: 5196ed75f24d48f7ae55b564b8d925e5
name: 'ES {#ES.NODE}: Query load'
graph_items:
- color: 199C0D
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.indices.search.fetch_current[{#ES.NODE}]'
- sortorder: '1'
color: F63100
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.indices.search.query_current[{#ES.NODE}]'
- uuid: 6d0e4fde0eeb47b6af4c9f40311b384d
name: 'ES {#ES.NODE}: Refresh thread pool'
graph_items:
- color: 199C0D
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.thread_pool.refresh.active[{#ES.NODE}]'
- sortorder: '1'
color: F63100
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.thread_pool.refresh.queue[{#ES.NODE}]'
- sortorder: '2'
color: 00611C
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.thread_pool.refresh.completed.rate[{#ES.NODE}]'
- sortorder: '3'
color: F7941D
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.thread_pool.refresh.rejected.rate[{#ES.NODE}]'
- uuid: 9abe085b960d40bf8a99c88ec3564a8d
name: 'ES {#ES.NODE}: Search thread pool'
graph_items:
- color: 199C0D
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.thread_pool.search.active[{#ES.NODE}]'
- sortorder: '1'
color: F63100
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.thread_pool.search.queue[{#ES.NODE}]'
- sortorder: '2'
color: 00611C
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.thread_pool.search.completed.rate[{#ES.NODE}]'
- sortorder: '3'
color: F7941D
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.thread_pool.search.rejected.rate[{#ES.NODE}]'
- uuid: 689492a2614642a1a5d3279c42b29e85
name: 'ES {#ES.NODE}: Write thread pool'
graph_items:
- color: 199C0D
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.thread_pool.write.active[{#ES.NODE}]'
- sortorder: '1'
color: F63100
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.thread_pool.write.queue[{#ES.NODE}]'
- sortorder: '2'
color: 00611C
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.thread_pool.write.completed.rate[{#ES.NODE}]'
- sortorder: '3'
color: F7941D
item:
host: 'Elasticsearch Cluster by HTTP'
key: 'es.node.thread_pool.write.rejected.rate[{#ES.NODE}]'
timeout: 15s
url: '{$ELASTICSEARCH.SCHEME}://{HOST.CONN}:{$ELASTICSEARCH.PORT}/_nodes/_all/nodes'
lld_macro_paths:
- lld_macro: '{#ES.NODE}'
path: $..name.first()
preprocessing:
- type: JSONPATH
parameters:
- '$.nodes.[*]'
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1d
tags:
- tag: class
value: software
- tag: target
value: elasticsearch
macros:
- macro: '{$ELASTICSEARCH.FETCH_LATENCY.MAX.WARN}'
value: '100'
description: 'Maximum of fetch latency in milliseconds for trigger expression.'
- macro: '{$ELASTICSEARCH.FLUSH_LATENCY.MAX.WARN}'
value: '100'
description: 'Maximum of flush latency in milliseconds for trigger expression.'
- macro: '{$ELASTICSEARCH.HEAP_USED.MAX.CRIT}'
value: '95'
description: 'The maximum percent in the use of JVM heap for critically trigger expression.'
- macro: '{$ELASTICSEARCH.HEAP_USED.MAX.WARN}'
value: '85'
description: 'The maximum percent in the use of JVM heap for warning trigger expression.'
- macro: '{$ELASTICSEARCH.INDEXING_LATENCY.MAX.WARN}'
value: '100'
description: 'Maximum of indexing latency in milliseconds for trigger expression.'
- macro: '{$ELASTICSEARCH.PASSWORD}'
description: 'The password of the Elasticsearch.'
- macro: '{$ELASTICSEARCH.PORT}'
value: '9200'
description: 'The port of the Elasticsearch host.'
- macro: '{$ELASTICSEARCH.QUERY_LATENCY.MAX.WARN}'
value: '100'
description: 'Maximum of query latency in milliseconds for trigger expression.'
- macro: '{$ELASTICSEARCH.RESPONSE_TIME.MAX.WARN}'
value: 10s
description: 'The ES cluster maximum response time in seconds for trigger expression.'
- macro: '{$ELASTICSEARCH.SCHEME}'
value: http
description: 'The scheme of the Elasticsearch (http/https).'
- macro: '{$ELASTICSEARCH.USERNAME}'
description: 'The username of the Elasticsearch.'
valuemaps:
- uuid: d651bdf75d0849d5ab2b0802fab76e22
name: 'ES cluster state'
mappings:
- value: '0'
newvalue: green
- value: '1'
newvalue: yellow
- value: '2'
newvalue: red
- value: '255'
newvalue: unknown
- uuid: 15d416d869894fdb959ca2cda2c5e37c
name: 'Service state'
mappings:
- value: '0'
newvalue: Down
- value: '1'
newvalue: Up
triggers:
- uuid: a2f33888d2774325b7955b633a7aae81
expression: '(last(/Elasticsearch Cluster by HTTP/es.nodes.fs.total_in_bytes)-last(/Elasticsearch Cluster by HTTP/es.nodes.fs.available_in_bytes))/(last(/Elasticsearch Cluster by HTTP/es.cluster.number_of_data_nodes)-1)>last(/Elasticsearch Cluster by HTTP/es.nodes.fs.available_in_bytes)'
name: 'ES: Cluster does not have enough space for resharding'
priority: HIGH
description: 'There is not enough disk space for index resharding.'
tags:
- tag: scope
value: capacity
graphs:
- uuid: 50f90b092fa24658b83b131fa7a3f2ce
name: 'ES: Cluster health'
graph_items:
- color: 199C0D
item:
host: 'Elasticsearch Cluster by HTTP'
key: es.cluster.inactive_shards_percent_as_number
- sortorder: '1'
color: F63100
item:
host: 'Elasticsearch Cluster by HTTP'
key: es.cluster.relocating_shards
- sortorder: '2'
color: 00611C
item:
host: 'Elasticsearch Cluster by HTTP'
key: es.cluster.initializing_shards
- sortorder: '3'
color: F7941D
item:
host: 'Elasticsearch Cluster by HTTP'
key: es.cluster.unassigned_shards
- sortorder: '4'
color: FC6EA3
item:
host: 'Elasticsearch Cluster by HTTP'
key: es.cluster.delayed_unassigned_shards
- sortorder: '5'
color: 6C59DC
item:
host: 'Elasticsearch Cluster by HTTP'
key: es.cluster.number_of_pending_tasks
- sortorder: '6'
color: C7A72D
item:
host: 'Elasticsearch Cluster by HTTP'
key: es.cluster.task_max_waiting_in_queue