You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1891 lines
78 KiB
1891 lines
78 KiB
zabbix_export:
|
|
version: '7.0'
|
|
template_groups:
|
|
- uuid: a571c0d144b14fd4a87a9d9b2aa9fcd6
|
|
name: Templates/Applications
|
|
templates:
|
|
- uuid: 52b2664578884d9eba62e47375c99f8e
|
|
template: 'Elasticsearch Cluster by HTTP'
|
|
name: 'Elasticsearch Cluster by HTTP'
|
|
description: |
|
|
The template to monitor Elasticsearch by Zabbix that work without any external scripts.
|
|
It works with both standalone and cluster instances.
|
|
The metrics are collected in one pass remotely using an HTTP agent.
|
|
They are getting values from REST API _cluster/health, _cluster/stats, _nodes/stats requests.
|
|
You can set {$ELASTICSEARCH.USERNAME} and {$ELASTICSEARCH.PASSWORD} macros in the template for using on the host level.
|
|
If you use an atypical location ES API, don't forget to change the macros {$ELASTICSEARCH.SCHEME},{$ELASTICSEARCH.PORT}.
|
|
|
|
|
|
You can discuss this template or leave feedback on our forum https://www.zabbix.com/forum/zabbix-suggestions-and-feedback/399473-discussion-thread-for-official-zabbix-template-for-elasticsearch
|
|
|
|
Generated by official Zabbix template tool "Templator" 2.0.0
|
|
vendor:
|
|
name: Zabbix
|
|
version: 7.0-0
|
|
groups:
|
|
- name: Templates/Applications
|
|
items:
|
|
- uuid: f3531c005c7f477b9916b4bf1ad273c2
|
|
name: 'ES: Delayed unassigned shards'
|
|
type: DEPENDENT
|
|
key: es.cluster.delayed_unassigned_shards
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of shards whose allocation has been delayed by the timeout settings.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.delayed_unassigned_shards
|
|
master_item:
|
|
key: es.cluster.get_health
|
|
tags:
|
|
- tag: component
|
|
value: shards
|
|
- uuid: 7f587fd270be4eb68d81ae1de2a3ca1f
|
|
name: 'ES: Get cluster health'
|
|
type: HTTP_AGENT
|
|
key: es.cluster.get_health
|
|
history: '0'
|
|
trends: '0'
|
|
value_type: TEXT
|
|
authtype: BASIC
|
|
username: '{$ELASTICSEARCH.USERNAME}'
|
|
password: '{$ELASTICSEARCH.PASSWORD}'
|
|
description: 'Returns the health status of a cluster.'
|
|
timeout: 15s
|
|
url: '{$ELASTICSEARCH.SCHEME}://{HOST.CONN}:{$ELASTICSEARCH.PORT}/_cluster/health?timeout=5s'
|
|
tags:
|
|
- tag: component
|
|
value: raw
|
|
- uuid: 7066a66f352e4d79ba4aec11c0c5c611
|
|
name: 'ES: Get cluster stats'
|
|
type: HTTP_AGENT
|
|
key: es.cluster.get_stats
|
|
history: '0'
|
|
trends: '0'
|
|
value_type: TEXT
|
|
authtype: BASIC
|
|
username: '{$ELASTICSEARCH.USERNAME}'
|
|
password: '{$ELASTICSEARCH.PASSWORD}'
|
|
description: 'Returns cluster statistics.'
|
|
timeout: 15s
|
|
url: '{$ELASTICSEARCH.SCHEME}://{HOST.CONN}:{$ELASTICSEARCH.PORT}/_cluster/stats'
|
|
tags:
|
|
- tag: component
|
|
value: raw
|
|
- uuid: 500a763b9bfd4044b2d3bc95d3a0586c
|
|
name: 'ES: Inactive shards percentage'
|
|
type: DEPENDENT
|
|
key: es.cluster.inactive_shards_percent_as_number
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: '%'
|
|
description: 'The ratio of inactive shards in the cluster expressed as a percentage.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.active_shards_percent_as_number
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- 'return (100 - value)'
|
|
master_item:
|
|
key: es.cluster.get_health
|
|
tags:
|
|
- tag: component
|
|
value: shards
|
|
- uuid: e0b684d5992a496981cf6fb9bb85be62
|
|
name: 'ES: Number of initializing shards'
|
|
type: DEPENDENT
|
|
key: es.cluster.initializing_shards
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of shards that are under initialization.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.initializing_shards
|
|
master_item:
|
|
key: es.cluster.get_health
|
|
tags:
|
|
- tag: component
|
|
value: shards
|
|
triggers:
|
|
- uuid: ee59684bb2044540ad0306a5b6deb408
|
|
expression: 'min(/Elasticsearch Cluster by HTTP/es.cluster.initializing_shards,10m)>0'
|
|
name: 'ES: Cluster has the initializing shards'
|
|
priority: AVERAGE
|
|
description: 'The cluster has the initializing shards longer than 10 minutes.'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: 7d3c87e2fcae49438a14380f7d5faa81
|
|
name: 'ES: Number of data nodes'
|
|
type: DEPENDENT
|
|
key: es.cluster.number_of_data_nodes
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of nodes that are dedicated to data nodes.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.number_of_data_nodes
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: es.cluster.get_health
|
|
tags:
|
|
- tag: component
|
|
value: nodes
|
|
- uuid: 4ec7496b441643f39df3e25c0225e6ec
|
|
name: 'ES: Number of nodes'
|
|
type: DEPENDENT
|
|
key: es.cluster.number_of_nodes
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of nodes within the cluster.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.number_of_nodes
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: es.cluster.get_health
|
|
tags:
|
|
- tag: component
|
|
value: nodes
|
|
triggers:
|
|
- uuid: 5be4cb40e72442aca18ea014adbead23
|
|
expression: 'change(/Elasticsearch Cluster by HTTP/es.cluster.number_of_nodes)<0'
|
|
name: 'ES: The number of nodes within the cluster has decreased'
|
|
priority: INFO
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: 2fba2b29425b4ff9a4008db9bde49d7f
|
|
expression: 'change(/Elasticsearch Cluster by HTTP/es.cluster.number_of_nodes)>0'
|
|
name: 'ES: The number of nodes within the cluster has increased'
|
|
priority: INFO
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: 2c84eea7f4a642f8892cb7f50febb562
|
|
name: 'ES: Number of pending tasks'
|
|
type: DEPENDENT
|
|
key: es.cluster.number_of_pending_tasks
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of cluster-level changes that have not yet been executed.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.number_of_pending_tasks
|
|
master_item:
|
|
key: es.cluster.get_health
|
|
tags:
|
|
- tag: component
|
|
value: tasks
|
|
- uuid: 981818f43a3c4b36b36e4b3c4e3468e9
|
|
name: 'ES: Number of relocating shards'
|
|
type: DEPENDENT
|
|
key: es.cluster.relocating_shards
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of shards that are under relocation.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.relocating_shards
|
|
master_item:
|
|
key: es.cluster.get_health
|
|
tags:
|
|
- tag: component
|
|
value: shards
|
|
- uuid: b9568a6370dc40efae45ac1e0b719dd7
|
|
name: 'ES: Cluster health status'
|
|
type: DEPENDENT
|
|
key: es.cluster.status
|
|
delay: '0'
|
|
history: 7d
|
|
description: |
|
|
Health status of the cluster, based on the state of its primary and replica shards. Statuses are:
|
|
green
|
|
All shards are assigned.
|
|
yellow
|
|
All primary shards are assigned, but one or more replica shards are unassigned. If a node in the cluster fails, some data could be unavailable until that node is repaired.
|
|
red
|
|
One or more primary shards are unassigned, so some data is unavailable. This can occur briefly during cluster startup as primary shards are assigned.
|
|
valuemap:
|
|
name: 'ES cluster state'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.status
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
var state = ['green', 'yellow', 'red'];
|
|
|
|
return state.indexOf(value.trim()) === -1 ? 255 : state.indexOf(value.trim());
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: es.cluster.get_health
|
|
tags:
|
|
- tag: component
|
|
value: health
|
|
triggers:
|
|
- uuid: f3bb7e96f6074063bad76521e1dce24f
|
|
expression: 'last(/Elasticsearch Cluster by HTTP/es.cluster.status)=2'
|
|
name: 'ES: Health is RED'
|
|
priority: HIGH
|
|
description: |
|
|
One or more primary shards are unassigned, so some data is unavailable.
|
|
This can occur briefly during cluster startup as primary shards are assigned.
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 33a6ef13f7b240768841919482709411
|
|
expression: 'last(/Elasticsearch Cluster by HTTP/es.cluster.status)=255'
|
|
name: 'ES: Health is UNKNOWN'
|
|
priority: HIGH
|
|
description: 'The health status of the cluster is unknown or cannot be obtained.'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 99f36c2aa5d64248b96d1fc97c3b3065
|
|
expression: 'last(/Elasticsearch Cluster by HTTP/es.cluster.status)=1'
|
|
name: 'ES: Health is YELLOW'
|
|
priority: AVERAGE
|
|
description: |
|
|
All primary shards are assigned, but one or more replica shards are unassigned.
|
|
If a node in the cluster fails, some data could be unavailable until that node is repaired.
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 2295e2ba3d4949feb3519ac85ba6ff86
|
|
name: 'ES: Task max waiting in queue'
|
|
type: DEPENDENT
|
|
key: es.cluster.task_max_waiting_in_queue
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'The time expressed in seconds since the earliest initiated task is waiting for being performed.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.task_max_waiting_in_queue_millis
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '0.001'
|
|
master_item:
|
|
key: es.cluster.get_health
|
|
tags:
|
|
- tag: component
|
|
value: tasks
|
|
- uuid: 85e82cba1e5c479caede2a94c58239be
|
|
name: 'ES: Number of unassigned shards'
|
|
type: DEPENDENT
|
|
key: es.cluster.unassigned_shards
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of shards that are not allocated.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.unassigned_shards
|
|
master_item:
|
|
key: es.cluster.get_health
|
|
tags:
|
|
- tag: component
|
|
value: shards
|
|
triggers:
|
|
- uuid: 35bdd54f27a64009b96a06a12508f99c
|
|
expression: 'min(/Elasticsearch Cluster by HTTP/es.cluster.unassigned_shards,10m)>0'
|
|
name: 'ES: Cluster has the unassigned shards'
|
|
priority: AVERAGE
|
|
description: 'The cluster has the unassigned shards longer than 10 minutes.'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: db2f3161eaff4eb0b25d4191b423c733
|
|
name: 'ES: Indices with shards assigned to nodes'
|
|
type: DEPENDENT
|
|
key: es.indices.count
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The total number of indices with shards assigned to the selected nodes.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.indices.count
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: es.cluster.get_stats
|
|
tags:
|
|
- tag: component
|
|
value: indices
|
|
- uuid: 25cb1e0203334efd96d3e626f81b4670
|
|
name: 'ES: Number of non-deleted documents'
|
|
type: DEPENDENT
|
|
key: es.indices.docs.count
|
|
delay: '0'
|
|
history: 7d
|
|
description: |
|
|
The total number of non-deleted documents across all primary shards assigned to the selected nodes.
|
|
This number is based on the documents in Lucene segments and may include the documents from nested fields.
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.indices.docs.count
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: es.cluster.get_stats
|
|
tags:
|
|
- tag: component
|
|
value: documents
|
|
- uuid: 54f51a653e014290aa3f91deaca44e47
|
|
name: 'ES: Nodes with the data role'
|
|
type: DEPENDENT
|
|
key: es.nodes.count.data
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of selected nodes with the data role.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.nodes.count.data
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: es.cluster.get_stats
|
|
tags:
|
|
- tag: component
|
|
value: nodes
|
|
- uuid: 613ab3469f234e278af99d61e57b46bd
|
|
name: 'ES: Nodes with the ingest role'
|
|
type: DEPENDENT
|
|
key: es.nodes.count.ingest
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of selected nodes with the ingest role.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.nodes.count.ingest
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: es.cluster.get_stats
|
|
tags:
|
|
- tag: component
|
|
value: nodes
|
|
- uuid: e7b36f37b86845339a306dacf874164a
|
|
name: 'ES: Nodes with the master role'
|
|
type: DEPENDENT
|
|
key: es.nodes.count.master
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of selected nodes with the master role.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.nodes.count.master
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: es.cluster.get_stats
|
|
tags:
|
|
- tag: component
|
|
value: nodes
|
|
triggers:
|
|
- uuid: 8011883baef04aa0bed343d1bb0df288
|
|
expression: 'last(/Elasticsearch Cluster by HTTP/es.nodes.count.master)=2'
|
|
name: 'ES: Cluster has only two master nodes'
|
|
priority: DISASTER
|
|
description: 'The cluster has only two nodes with a master role and will be unavailable if one of them breaks.'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: c3ea99e5897b4663a3239d0edd66f1f0
|
|
name: 'ES: Total available size to JVM in all file stores'
|
|
type: DEPENDENT
|
|
key: es.nodes.fs.available_in_bytes
|
|
delay: '0'
|
|
history: 7d
|
|
units: B
|
|
description: |
|
|
The total number of bytes available to JVM in the file stores across all selected nodes.
|
|
Depending on OS or process-level restrictions, this number may be less than nodes.fs.free_in_byes.
|
|
This is the actual amount of free disk space the selected Elasticsearch nodes can use.
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.nodes.fs.available_in_bytes
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: es.cluster.get_stats
|
|
tags:
|
|
- tag: component
|
|
value: storage
|
|
- uuid: b8c3c5d8866d4a6b9c6847b5c8da0631
|
|
name: 'ES: Total size of all file stores'
|
|
type: DEPENDENT
|
|
key: es.nodes.fs.total_in_bytes
|
|
delay: '0'
|
|
history: 7d
|
|
units: B
|
|
description: 'The total size in bytes of all file stores across all selected nodes.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.nodes.fs.total_in_bytes
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: es.cluster.get_stats
|
|
tags:
|
|
- tag: component
|
|
value: storage
|
|
- uuid: 66c22b8b2b8b40fda7ac6f0ae472befd
|
|
name: 'ES: Get nodes stats'
|
|
type: HTTP_AGENT
|
|
key: es.nodes.get_stats
|
|
history: '0'
|
|
trends: '0'
|
|
value_type: TEXT
|
|
authtype: BASIC
|
|
username: '{$ELASTICSEARCH.USERNAME}'
|
|
password: '{$ELASTICSEARCH.PASSWORD}'
|
|
description: 'Returns cluster nodes statistics.'
|
|
timeout: 30s
|
|
url: '{$ELASTICSEARCH.SCHEME}://{HOST.CONN}:{$ELASTICSEARCH.PORT}/_nodes/stats'
|
|
tags:
|
|
- tag: component
|
|
value: raw
|
|
- uuid: 2dcf54f21cbd4af9a7931e3a2522685c
|
|
name: 'ES: Cluster uptime'
|
|
type: DEPENDENT
|
|
key: es.nodes.jvm.max_uptime
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Uptime duration in seconds since JVM has last started.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.nodes.jvm.max_uptime_in_millis
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '0.001'
|
|
master_item:
|
|
key: es.cluster.get_stats
|
|
tags:
|
|
- tag: component
|
|
value: system
|
|
triggers:
|
|
- uuid: d28aa18c85cb4f48a9e7b8ba55d65400
|
|
expression: 'last(/Elasticsearch Cluster by HTTP/es.nodes.jvm.max_uptime)<10m'
|
|
name: 'ES: Cluster has been restarted'
|
|
event_name: 'ES: Cluster has been restarted (uptime < 10m)'
|
|
priority: INFO
|
|
description: 'Uptime is less than 10 minutes.'
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: 671888d47c724e54aca78fbe1b3ecaed
|
|
name: 'ES: Service response time'
|
|
type: SIMPLE
|
|
key: 'net.tcp.service.perf["{$ELASTICSEARCH.SCHEME}","{HOST.CONN}","{$ELASTICSEARCH.PORT}"]'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Checks performance of the TCP service.'
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
triggers:
|
|
- uuid: 2a3f3b96e5dd47de998ccc17f109e149
|
|
expression: 'min(/Elasticsearch Cluster by HTTP/net.tcp.service.perf["{$ELASTICSEARCH.SCHEME}","{HOST.CONN}","{$ELASTICSEARCH.PORT}"],5m)>{$ELASTICSEARCH.RESPONSE_TIME.MAX.WARN}'
|
|
name: 'ES: Service response time is too high'
|
|
event_name: 'ES: Service response time is too high (over {$ELASTICSEARCH.RESPONSE_TIME.MAX.WARN} for 5m)'
|
|
priority: WARNING
|
|
description: 'The performance of the TCP service is very low.'
|
|
manual_close: 'YES'
|
|
dependencies:
|
|
- name: 'ES: Service is down'
|
|
expression: 'last(/Elasticsearch Cluster by HTTP/net.tcp.service["{$ELASTICSEARCH.SCHEME}","{HOST.CONN}","{$ELASTICSEARCH.PORT}"])=0'
|
|
tags:
|
|
- tag: scope
|
|
value: performance
|
|
- uuid: d0d38ce55f844a51a0f2131c86bec1ae
|
|
name: 'ES: Service status'
|
|
type: SIMPLE
|
|
key: 'net.tcp.service["{$ELASTICSEARCH.SCHEME}","{HOST.CONN}","{$ELASTICSEARCH.PORT}"]'
|
|
history: 7d
|
|
description: 'Checks if the service is running and accepting TCP connections.'
|
|
valuemap:
|
|
name: 'Service state'
|
|
preprocessing:
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 10m
|
|
tags:
|
|
- tag: component
|
|
value: health
|
|
- tag: component
|
|
value: network
|
|
triggers:
|
|
- uuid: b4d76f68ce94492f96bbfbb778d1f144
|
|
expression: 'last(/Elasticsearch Cluster by HTTP/net.tcp.service["{$ELASTICSEARCH.SCHEME}","{HOST.CONN}","{$ELASTICSEARCH.PORT}"])=0'
|
|
name: 'ES: Service is down'
|
|
priority: AVERAGE
|
|
description: 'The service is unavailable or does not accept TCP connections.'
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
discovery_rules:
|
|
- uuid: 5105173f24d941b2969fe3d190d22e82
|
|
name: 'Cluster nodes discovery'
|
|
type: HTTP_AGENT
|
|
key: es.nodes.discovery
|
|
delay: 1h
|
|
authtype: BASIC
|
|
username: '{$ELASTICSEARCH.USERNAME}'
|
|
password: '{$ELASTICSEARCH.PASSWORD}'
|
|
description: 'Discovery ES cluster nodes.'
|
|
item_prototypes:
|
|
- uuid: b4e87d039e9d4feeb03e0e33f14b2c82
|
|
name: 'ES {#ES.NODE}: Total available size'
|
|
type: DEPENDENT
|
|
key: 'es.node.fs.total.available_in_bytes[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
units: B
|
|
description: |
|
|
The total number of bytes available to this Java virtual machine on all file stores.
|
|
Depending on OS or process level restrictions, this might appear less than fs.total.free_in_bytes.
|
|
This is the actual amount of free disk space the Elasticsearch node can utilize.
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..fs.total.available_in_bytes.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: storage
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 4f315fdf62884b0284bf04f1a85aeb98
|
|
name: 'ES {#ES.NODE}: Total size'
|
|
type: DEPENDENT
|
|
key: 'es.node.fs.total.total_in_bytes[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
units: B
|
|
description: 'Total size (in bytes) of all file stores.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..fs.total.total_in_bytes.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1d
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: storage
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 979eddbb814f44ef9ce34d19cc79ecd0
|
|
name: 'ES {#ES.NODE}: Get data'
|
|
type: DEPENDENT
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: '0'
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Returns cluster nodes statistics.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- '$..[?(@.name==''{#ES.NODE}'')].first()'
|
|
master_item:
|
|
key: es.nodes.get_stats
|
|
tags:
|
|
- tag: component
|
|
value: raw
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 2ba1fc7e2dad4d0ab1807221fb1e4fca
|
|
name: 'ES {#ES.NODE}: Number of open HTTP connections'
|
|
type: DEPENDENT
|
|
key: 'es.node.http.current_open[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of currently open HTTP connections for the node.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..http.current_open.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: connections
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: fcf163d5db9b455fa38823e8ad16e578
|
|
name: 'ES {#ES.NODE}: Rate of HTTP connections opened'
|
|
type: DEPENDENT
|
|
key: 'es.node.http.opened.rate[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of HTTP connections opened for the node per second.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..http.total_opened.first()
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: connections
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: c530311329e348cca74c38fa2260236b
|
|
name: 'ES {#ES.NODE}: Flush latency'
|
|
type: CALCULATED
|
|
key: 'es.node.indices.flush.latency[{#ES.NODE}]'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: ms
|
|
params: 'change(//es.node.indices.flush.total_time_in_millis[{#ES.NODE}]) / ( change(//es.node.indices.flush.total[{#ES.NODE}]) + (change(//es.node.indices.flush.total[{#ES.NODE}]) = 0) )'
|
|
description: 'The average flush latency calculated from the available flush.total and flush.total_time_in_millis metrics.'
|
|
tags:
|
|
- tag: component
|
|
value: operations
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
trigger_prototypes:
|
|
- uuid: 6c2ea291374b4de1ab06ddeccba635b1
|
|
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.indices.flush.latency[{#ES.NODE}],5m)>{$ELASTICSEARCH.FLUSH_LATENCY.MAX.WARN}'
|
|
name: 'ES {#ES.NODE}: Flush latency is too high'
|
|
event_name: 'ES {#ES.NODE}: Flush latency is too high (over {$ELASTICSEARCH.FLUSH_LATENCY.MAX.WARN}ms for 5m)'
|
|
priority: WARNING
|
|
description: |
|
|
If you see this metric increasing steadily, it may indicate a problem with slow disks; this problem may escalate
|
|
and eventually prevent you from being able to add new information to your index.
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: 8e270dfff9c84d2a96a134dd6d86533b
|
|
name: 'ES {#ES.NODE}: Total number of index flushes to disk'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.flush.total[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The total number of flush operations.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.flush.total.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: operations
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 7e18149dcaee47748e4073f4ce814c03
|
|
name: 'ES {#ES.NODE}: Total time spent on flushing indices to disk'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.flush.total_time_in_millis[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
units: ms
|
|
description: 'Total time in milliseconds spent performing flush operations.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.flush.total_time_in_millis.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: operations
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: e91cc33c088a4f56a9176fd6a09f2411
|
|
name: 'ES {#ES.NODE}: Current indexing operations'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.indexing.index_current[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of indexing operations currently running.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.indexing.index_current.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: operations
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 23f54c51c30a4dbdbef48611a7907db6
|
|
name: 'ES {#ES.NODE}: Indexing latency'
|
|
type: CALCULATED
|
|
key: 'es.node.indices.indexing.index_latency[{#ES.NODE}]'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: ms
|
|
params: 'change(//es.node.indices.indexing.index_time_in_millis[{#ES.NODE}]) / ( change(//es.node.indices.indexing.index_total[{#ES.NODE}]) + (change(//es.node.indices.indexing.index_total[{#ES.NODE}]) = 0) )'
|
|
description: 'The average indexing latency calculated from the available index_total and index_time_in_millis metrics.'
|
|
tags:
|
|
- tag: component
|
|
value: operations
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
trigger_prototypes:
|
|
- uuid: 2755b3f8811a45fba7c48347707ceaf8
|
|
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.indices.indexing.index_latency[{#ES.NODE}],5m)>{$ELASTICSEARCH.INDEXING_LATENCY.MAX.WARN}'
|
|
name: 'ES {#ES.NODE}: Indexing latency is too high'
|
|
event_name: 'ES {#ES.NODE}: Indexing latency is too high (over {$ELASTICSEARCH.INDEXING_LATENCY.MAX.WARN}ms for 5m)'
|
|
priority: WARNING
|
|
description: |
|
|
If the latency is increasing, it may indicate that you are indexing too many documents at the same time (Elasticsearch's documentation
|
|
recommends starting with a bulk indexing size of 5 to 15 megabytes and increasing slowly from there).
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: f471dad45ff149b09a479963cb616fc2
|
|
name: 'ES {#ES.NODE}: Total time spent performing indexing'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.indexing.index_time_in_millis[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
units: ms
|
|
description: 'Total time in milliseconds spent performing indexing operations.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.indexing.index_time_in_millis.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: operations
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: ad36b8495eca49c48e7d8a7877a325c2
|
|
name: 'ES {#ES.NODE}: Total number of indexing'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.indexing.index_total[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The total number of indexing operations.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.indexing.index_total.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: operations
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: bb0cb2196b14483f8807a855f2f531a7
|
|
name: 'ES {#ES.NODE}: Time spent throttling operations'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.indexing.throttle_time[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time in seconds spent throttling operations for the last measuring span.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.indexing.throttle_time_in_millis.first()
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '0.001'
|
|
- type: SIMPLE_CHANGE
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: operations
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 5f3b7dca802343cd905d54e66ac0e113
|
|
name: 'ES {#ES.NODE}: Time spent throttling merge operations'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.merges.total_throttled_time[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time in seconds spent throttling merge operations for the last measuring span.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.merges.total_throttled_time_in_millis.first()
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '0.001'
|
|
- type: SIMPLE_CHANGE
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: operations
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 9c82da45a63947dd91a4e19e6f2d121d
|
|
name: 'ES {#ES.NODE}: Time spent throttling recovery operations'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.recovery.throttle_time[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time in seconds spent throttling recovery operations for the last measuring span.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.recovery.throttle_time_in_millis.first()
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '0.001'
|
|
- type: SIMPLE_CHANGE
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: operations
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: e27361fdce5a4635854960066ac050ca
|
|
name: 'ES {#ES.NODE}: Rate of index refreshes'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.refresh.rate[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of refresh operations per second.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.refresh.total.first()
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: operations
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: fd20bbc5012d4c5693710b321e252193
|
|
name: 'ES {#ES.NODE}: Time spent performing refresh'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.refresh.time[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time in seconds spent performing refresh operations for the last measuring span.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.refresh.total_time_in_millis.first()
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '0.001'
|
|
- type: SIMPLE_CHANGE
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: operations
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 6d3b074aecb44a08a3573aba0ff006f9
|
|
name: 'ES {#ES.NODE}: Rate of fetch'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.search.fetch.rate[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of fetch operations per second.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.search.fetch_total.first()
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: fetches
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 36b2ffa3ed9f4c9781ccded273c395d7
|
|
name: 'ES {#ES.NODE}: Current fetch operations'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.search.fetch_current[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of fetch operations currently running.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.search.fetch_current.first()
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: fetches
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 79b5fd5e04e249da9f73fbe68915fdb1
|
|
name: 'ES {#ES.NODE}: Fetch latency'
|
|
type: CALCULATED
|
|
key: 'es.node.indices.search.fetch_latency[{#ES.NODE}]'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: ms
|
|
params: 'change(//es.node.indices.search.fetch_time_in_millis[{#ES.NODE}]) / ( change(//es.node.indices.search.fetch_total[{#ES.NODE}]) + (change(//es.node.indices.search.fetch_total[{#ES.NODE}]) = 0) )'
|
|
description: 'The average fetch latency calculated by sampling the total number of fetches and the total elapsed time at regular intervals.'
|
|
tags:
|
|
- tag: component
|
|
value: fetches
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
trigger_prototypes:
|
|
- uuid: 6edd6851f31842659a57e29c444a9b32
|
|
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.indices.search.fetch_latency[{#ES.NODE}],5m)>{$ELASTICSEARCH.FETCH_LATENCY.MAX.WARN}'
|
|
name: 'ES {#ES.NODE}: Fetch latency is too high'
|
|
event_name: 'ES {#ES.NODE}: Fetch latency is too high (over {$ELASTICSEARCH.FETCH_LATENCY.MAX.WARN}ms for 5m)'
|
|
priority: WARNING
|
|
description: |
|
|
The fetch phase should typically take much less time than the query phase. If you notice this metric consistently increasing,
|
|
this could indicate a problem with slow disks, enriching of documents (highlighting the relevant text in search results, etc.),
|
|
or requesting too many results.
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: a10e7dca72c8411a9b7fdcbeb676017e
|
|
name: 'ES {#ES.NODE}: Time spent performing fetch'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.search.fetch_time[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time in seconds spent performing fetch operations for the last measuring span.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.search.fetch_time_in_millis.first()
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '0.001'
|
|
- type: SIMPLE_CHANGE
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: fetches
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: f35e3e9773394632b422dbc4e4442171
|
|
name: 'ES {#ES.NODE}: Total time spent performing fetch'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.search.fetch_time_in_millis[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
units: ms
|
|
description: 'Time in milliseconds spent performing fetch operations.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.search.fetch_time_in_millis.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: fetches
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: b3ac68f3531f478b9ad28fb1988df257
|
|
name: 'ES {#ES.NODE}: Total number of fetch'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.search.fetch_total[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The total number of fetch operations.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.search.fetch_total.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: fetches
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: d347df9c9eee4aa89ccfb9147143b5d5
|
|
name: 'ES {#ES.NODE}: Rate of queries'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.search.query.rate[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of query operations per second.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.search.query_total.first()
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: queries
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 20fb738abf7a4aa1bf3ccb84790a26c9
|
|
name: 'ES {#ES.NODE}: Current query operations'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.search.query_current[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of query operations currently running.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.search.query_current.first()
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: queries
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: fadb6fe530ab4b1296d4c17728667b12
|
|
name: 'ES {#ES.NODE}: Query latency'
|
|
type: CALCULATED
|
|
key: 'es.node.indices.search.query_latency[{#ES.NODE}]'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: ms
|
|
params: |
|
|
change(//es.node.indices.search.query_time_in_millis[{#ES.NODE}]) /
|
|
( change(//es.node.indices.search.query_total[{#ES.NODE}]) + (change(//es.node.indices.search.query_total[{#ES.NODE}]) = 0) )
|
|
description: 'The average query latency calculated by sampling the total number of queries and the total elapsed time at regular intervals.'
|
|
tags:
|
|
- tag: component
|
|
value: queries
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
trigger_prototypes:
|
|
- uuid: 1892000290c843e887d128bf9f97c869
|
|
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.indices.search.query_latency[{#ES.NODE}],5m)>{$ELASTICSEARCH.QUERY_LATENCY.MAX.WARN}'
|
|
name: 'ES {#ES.NODE}: Query latency is too high'
|
|
event_name: 'ES {#ES.NODE}: Query latency is too high (over {$ELASTICSEARCH.QUERY_LATENCY.MAX.WARN}ms for 5m)'
|
|
priority: WARNING
|
|
description: 'If latency exceeds a threshold, look for potential resource bottlenecks, or investigate whether you need to optimize your queries.'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: ce807b641b0b4501b6a8e253d8403ce4
|
|
name: 'ES {#ES.NODE}: Time spent performing query'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.search.query_time[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time in seconds spent performing query operations for the last measuring span.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.search.query_time_in_millis.first()
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '0.001'
|
|
- type: SIMPLE_CHANGE
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: queries
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 7afc767463c64bbb9290975a8cef3cec
|
|
name: 'ES {#ES.NODE}: Total time spent performing query'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.search.query_time_in_millis[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
units: ms
|
|
description: 'Time in milliseconds spent performing query operations.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.search.query_time_in_millis.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: queries
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 7279b682fa3e4661a1600d6da25e0fc7
|
|
name: 'ES {#ES.NODE}: Total number of query'
|
|
type: DEPENDENT
|
|
key: 'es.node.indices.search.query_total[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The total number of query operations.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..indices.search.query_total.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: queries
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 269cc21be4c94ba58a52b9fca9590632
|
|
name: 'ES {#ES.NODE}: Amount of JVM heap committed'
|
|
type: DEPENDENT
|
|
key: 'es.node.jvm.mem.heap_committed_in_bytes[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
units: B
|
|
description: 'The amount of memory, in bytes, available for use by the heap.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..jvm.mem.heap_committed_in_bytes.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 7f205e21644a4f629e4a419c42670158
|
|
name: 'ES {#ES.NODE}: Maximum JVM memory available for use'
|
|
type: DEPENDENT
|
|
key: 'es.node.jvm.mem.heap_max_in_bytes[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
units: B
|
|
description: 'The maximum amount of memory, in bytes, available for use by the heap.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..jvm.mem.heap_max_in_bytes.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1d
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: bb55c78c2aca49e49f6200e14c25dee8
|
|
name: 'ES {#ES.NODE}: Amount of JVM heap currently in use'
|
|
type: DEPENDENT
|
|
key: 'es.node.jvm.mem.heap_used_in_bytes[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
units: B
|
|
description: 'The memory, in bytes, currently in use by the heap.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..jvm.mem.heap_used_in_bytes.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 71fca039cad847da9623aaeb722168f1
|
|
name: 'ES {#ES.NODE}: Percent of JVM heap currently in use'
|
|
type: DEPENDENT
|
|
key: 'es.node.jvm.mem.heap_used_percent[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: '%'
|
|
description: 'The percentage of memory currently in use by the heap.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..jvm.mem.heap_used_percent.first()
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
trigger_prototypes:
|
|
- uuid: 9d199607ddda48eebe9f93cae6d6575d
|
|
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.jvm.mem.heap_used_percent[{#ES.NODE}],1h)>{$ELASTICSEARCH.HEAP_USED.MAX.CRIT}'
|
|
name: 'ES {#ES.NODE}: Percent of JVM heap in use is critical'
|
|
event_name: 'ES {#ES.NODE}: Percent of JVM heap in use is critical (over {$ELASTICSEARCH.HEAP_USED.MAX.CRIT}% for 1h)'
|
|
priority: HIGH
|
|
description: |
|
|
This indicates that the rate of garbage collection isn't keeping up with the rate of garbage creation.
|
|
To address this problem, you can either increase your heap size (as long as it remains below the recommended
|
|
guidelines stated above), or scale out the cluster by adding more nodes.
|
|
tags:
|
|
- tag: scope
|
|
value: performance
|
|
- uuid: bbba4a577a2c4328b2392fdeb1ff9bb4
|
|
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.jvm.mem.heap_used_percent[{#ES.NODE}],1h)>{$ELASTICSEARCH.HEAP_USED.MAX.WARN}'
|
|
name: 'ES {#ES.NODE}: Percent of JVM heap in use is high'
|
|
event_name: 'ES {#ES.NODE}: Percent of JVM heap in use is high (over {$ELASTICSEARCH.HEAP_USED.MAX.WARN}% for 1h)'
|
|
priority: WARNING
|
|
description: |
|
|
This indicates that the rate of garbage collection isn't keeping up with the rate of garbage creation.
|
|
To address this problem, you can either increase your heap size (as long as it remains below the recommended
|
|
guidelines stated above), or scale out the cluster by adding more nodes.
|
|
dependencies:
|
|
- name: 'ES {#ES.NODE}: Percent of JVM heap in use is critical'
|
|
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.jvm.mem.heap_used_percent[{#ES.NODE}],1h)>{$ELASTICSEARCH.HEAP_USED.MAX.CRIT}'
|
|
tags:
|
|
- tag: scope
|
|
value: performance
|
|
- uuid: 3c2f512ae6ff4221a7de4e5dbff2ed48
|
|
name: 'ES {#ES.NODE}: Node uptime'
|
|
type: DEPENDENT
|
|
key: 'es.node.jvm.uptime[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'JVM uptime in seconds.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..jvm.uptime_in_millis.first()
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '0.001'
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: system
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
trigger_prototypes:
|
|
- uuid: 580ae8aadf994fbcb34c9c8ad7cd5fad
|
|
expression: 'last(/Elasticsearch Cluster by HTTP/es.node.jvm.uptime[{#ES.NODE}])<10m'
|
|
name: 'ES {#ES.NODE}: has been restarted'
|
|
event_name: 'ES {#ES.NODE}: has been restarted (uptime < 10m)'
|
|
priority: INFO
|
|
description: 'Uptime is less than 10 minutes.'
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: 14ea2732fbfc40ceaafadfff1830ac4d
|
|
name: 'ES {#ES.NODE}: Refresh thread pool active threads'
|
|
type: DEPENDENT
|
|
key: 'es.node.thread_pool.refresh.active[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of active threads in the refresh thread pool.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..thread_pool.refresh.active.first()
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: refresh-thread-pool
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: b6496d2878914b06bf6301630b4609a8
|
|
name: 'ES {#ES.NODE}: Refresh thread pool executor tasks completed'
|
|
type: DEPENDENT
|
|
key: 'es.node.thread_pool.refresh.completed.rate[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of tasks completed by the refresh thread pool executor.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..thread_pool.refresh.completed.first()
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: refresh-thread-pool
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 4b748eeb937e45308e58d699b713cf01
|
|
name: 'ES {#ES.NODE}: Refresh thread pool tasks in queue'
|
|
type: DEPENDENT
|
|
key: 'es.node.thread_pool.refresh.queue[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of tasks in queue for the refresh thread pool.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..thread_pool.refresh.queue.first()
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: refresh-thread-pool
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 593514af005044ce8e6207a5616270f2
|
|
name: 'ES {#ES.NODE}: Refresh thread pool executor tasks rejected'
|
|
type: DEPENDENT
|
|
key: 'es.node.thread_pool.refresh.rejected.rate[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of tasks rejected by the refresh thread pool executor.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..thread_pool.refresh.rejected.first()
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: refresh-thread-pool
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
trigger_prototypes:
|
|
- uuid: b416941e8e0141c6af27c62f052ac860
|
|
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.thread_pool.refresh.rejected.rate[{#ES.NODE}],5m)>0'
|
|
name: 'ES {#ES.NODE}: Refresh thread pool executor has the rejected tasks'
|
|
event_name: 'ES {#ES.NODE}: Refresh thread pool executor has the rejected tasks (for 5m)'
|
|
priority: WARNING
|
|
description: 'The number of tasks rejected by the refresh thread pool executor is over 0 for 5m.'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: 63875f40cf7c4f8f842562be2adfb7c7
|
|
name: 'ES {#ES.NODE}: Search thread pool active threads'
|
|
type: DEPENDENT
|
|
key: 'es.node.thread_pool.search.active[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of active threads in the search thread pool.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..thread_pool.search.active.first()
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: search-thread-pool
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: dc67ab311d4945aaae3347464785abb1
|
|
name: 'ES {#ES.NODE}: Search thread pool executor tasks completed'
|
|
type: DEPENDENT
|
|
key: 'es.node.thread_pool.search.completed.rate[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of tasks completed by the search thread pool executor.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..thread_pool.search.completed.first()
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: search-thread-pool
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: d11933b62131425d83ab09c6d5fd5e85
|
|
name: 'ES {#ES.NODE}: Search thread pool tasks in queue'
|
|
type: DEPENDENT
|
|
key: 'es.node.thread_pool.search.queue[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of tasks in queue for the search thread pool.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..thread_pool.search.queue.first()
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: search-thread-pool
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 820ed330abc845919b1dada3cfa81387
|
|
name: 'ES {#ES.NODE}: Search thread pool executor tasks rejected'
|
|
type: DEPENDENT
|
|
key: 'es.node.thread_pool.search.rejected.rate[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of tasks rejected by the search thread pool executor.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..thread_pool.search.rejected.first()
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: search-thread-pool
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
trigger_prototypes:
|
|
- uuid: 5cfd3f9924614a4283f1255752b672f0
|
|
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.thread_pool.search.rejected.rate[{#ES.NODE}],5m)>0'
|
|
name: 'ES {#ES.NODE}: Search thread pool executor has the rejected tasks'
|
|
event_name: 'ES {#ES.NODE}: Search thread pool executor has the rejected tasks (for 5m)'
|
|
priority: WARNING
|
|
description: 'The number of tasks rejected by the search thread pool executor is over 0 for 5m.'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: 4b7dc34d78a64b24a8fd19af95e0f0bd
|
|
name: 'ES {#ES.NODE}: Write thread pool active threads'
|
|
type: DEPENDENT
|
|
key: 'es.node.thread_pool.write.active[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of active threads in the write thread pool.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..thread_pool.write.active.first()
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: write-thread-pool
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: 5b5ac74702564bd490c7378adcf75c28
|
|
name: 'ES {#ES.NODE}: Write thread pool executor tasks completed'
|
|
type: DEPENDENT
|
|
key: 'es.node.thread_pool.write.completed.rate[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of tasks completed by the write thread pool executor.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..thread_pool.write.completed.first()
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: write-thread-pool
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: a0d8f8d896a546d1ade07c355992308d
|
|
name: 'ES {#ES.NODE}: Write thread pool tasks in queue'
|
|
type: DEPENDENT
|
|
key: 'es.node.thread_pool.write.queue[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'The number of tasks in queue for the write thread pool.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..thread_pool.write.queue.first()
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: write-thread-pool
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
- uuid: a21213815a30485a88b183e7b40a4e7e
|
|
name: 'ES {#ES.NODE}: Write thread pool executor tasks rejected'
|
|
type: DEPENDENT
|
|
key: 'es.node.thread_pool.write.rejected.rate[{#ES.NODE}]'
|
|
delay: '0'
|
|
history: 7d
|
|
value_type: FLOAT
|
|
units: rps
|
|
description: 'The number of tasks rejected by the write thread pool executor.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..thread_pool.write.rejected.first()
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: 'es.node.get.data[{#ES.NODE}]'
|
|
tags:
|
|
- tag: component
|
|
value: write-thread-pool
|
|
- tag: node
|
|
value: '{#ES.NODE}'
|
|
trigger_prototypes:
|
|
- uuid: 1f5e30a34bb84cef831670974ec7c3e8
|
|
expression: 'min(/Elasticsearch Cluster by HTTP/es.node.thread_pool.write.rejected.rate[{#ES.NODE}],5m)>0'
|
|
name: 'ES {#ES.NODE}: Write thread pool executor has the rejected tasks'
|
|
event_name: 'ES {#ES.NODE}: Write thread pool executor has the rejected tasks (for 5m)'
|
|
priority: WARNING
|
|
description: 'The number of tasks rejected by the write thread pool executor is over 0 for 5m.'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
graph_prototypes:
|
|
- uuid: 0725d91f63b64346bbef2c20d2901e9b
|
|
name: 'ES {#ES.NODE}: Latency'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.indices.search.query_latency[{#ES.NODE}]'
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.indices.indexing.index_latency[{#ES.NODE}]'
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.indices.search.fetch_latency[{#ES.NODE}]'
|
|
- sortorder: '3'
|
|
color: F7941D
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.indices.flush.latency[{#ES.NODE}]'
|
|
- uuid: 5196ed75f24d48f7ae55b564b8d925e5
|
|
name: 'ES {#ES.NODE}: Query load'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.indices.search.fetch_current[{#ES.NODE}]'
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.indices.search.query_current[{#ES.NODE}]'
|
|
- uuid: 6d0e4fde0eeb47b6af4c9f40311b384d
|
|
name: 'ES {#ES.NODE}: Refresh thread pool'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.thread_pool.refresh.active[{#ES.NODE}]'
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.thread_pool.refresh.queue[{#ES.NODE}]'
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.thread_pool.refresh.completed.rate[{#ES.NODE}]'
|
|
- sortorder: '3'
|
|
color: F7941D
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.thread_pool.refresh.rejected.rate[{#ES.NODE}]'
|
|
- uuid: 9abe085b960d40bf8a99c88ec3564a8d
|
|
name: 'ES {#ES.NODE}: Search thread pool'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.thread_pool.search.active[{#ES.NODE}]'
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.thread_pool.search.queue[{#ES.NODE}]'
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.thread_pool.search.completed.rate[{#ES.NODE}]'
|
|
- sortorder: '3'
|
|
color: F7941D
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.thread_pool.search.rejected.rate[{#ES.NODE}]'
|
|
- uuid: 689492a2614642a1a5d3279c42b29e85
|
|
name: 'ES {#ES.NODE}: Write thread pool'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.thread_pool.write.active[{#ES.NODE}]'
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.thread_pool.write.queue[{#ES.NODE}]'
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.thread_pool.write.completed.rate[{#ES.NODE}]'
|
|
- sortorder: '3'
|
|
color: F7941D
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: 'es.node.thread_pool.write.rejected.rate[{#ES.NODE}]'
|
|
timeout: 15s
|
|
url: '{$ELASTICSEARCH.SCHEME}://{HOST.CONN}:{$ELASTICSEARCH.PORT}/_nodes/_all/nodes'
|
|
lld_macro_paths:
|
|
- lld_macro: '{#ES.NODE}'
|
|
path: $..name.first()
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- '$.nodes.[*]'
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1d
|
|
tags:
|
|
- tag: class
|
|
value: software
|
|
- tag: target
|
|
value: elasticsearch
|
|
macros:
|
|
- macro: '{$ELASTICSEARCH.FETCH_LATENCY.MAX.WARN}'
|
|
value: '100'
|
|
description: 'Maximum of fetch latency in milliseconds for trigger expression.'
|
|
- macro: '{$ELASTICSEARCH.FLUSH_LATENCY.MAX.WARN}'
|
|
value: '100'
|
|
description: 'Maximum of flush latency in milliseconds for trigger expression.'
|
|
- macro: '{$ELASTICSEARCH.HEAP_USED.MAX.CRIT}'
|
|
value: '95'
|
|
description: 'The maximum percent in the use of JVM heap for critically trigger expression.'
|
|
- macro: '{$ELASTICSEARCH.HEAP_USED.MAX.WARN}'
|
|
value: '85'
|
|
description: 'The maximum percent in the use of JVM heap for warning trigger expression.'
|
|
- macro: '{$ELASTICSEARCH.INDEXING_LATENCY.MAX.WARN}'
|
|
value: '100'
|
|
description: 'Maximum of indexing latency in milliseconds for trigger expression.'
|
|
- macro: '{$ELASTICSEARCH.PASSWORD}'
|
|
description: 'The password of the Elasticsearch.'
|
|
- macro: '{$ELASTICSEARCH.PORT}'
|
|
value: '9200'
|
|
description: 'The port of the Elasticsearch host.'
|
|
- macro: '{$ELASTICSEARCH.QUERY_LATENCY.MAX.WARN}'
|
|
value: '100'
|
|
description: 'Maximum of query latency in milliseconds for trigger expression.'
|
|
- macro: '{$ELASTICSEARCH.RESPONSE_TIME.MAX.WARN}'
|
|
value: 10s
|
|
description: 'The ES cluster maximum response time in seconds for trigger expression.'
|
|
- macro: '{$ELASTICSEARCH.SCHEME}'
|
|
value: http
|
|
description: 'The scheme of the Elasticsearch (http/https).'
|
|
- macro: '{$ELASTICSEARCH.USERNAME}'
|
|
description: 'The username of the Elasticsearch.'
|
|
valuemaps:
|
|
- uuid: d651bdf75d0849d5ab2b0802fab76e22
|
|
name: 'ES cluster state'
|
|
mappings:
|
|
- value: '0'
|
|
newvalue: green
|
|
- value: '1'
|
|
newvalue: yellow
|
|
- value: '2'
|
|
newvalue: red
|
|
- value: '255'
|
|
newvalue: unknown
|
|
- uuid: 15d416d869894fdb959ca2cda2c5e37c
|
|
name: 'Service state'
|
|
mappings:
|
|
- value: '0'
|
|
newvalue: Down
|
|
- value: '1'
|
|
newvalue: Up
|
|
triggers:
|
|
- uuid: a2f33888d2774325b7955b633a7aae81
|
|
expression: '(last(/Elasticsearch Cluster by HTTP/es.nodes.fs.total_in_bytes)-last(/Elasticsearch Cluster by HTTP/es.nodes.fs.available_in_bytes))/(last(/Elasticsearch Cluster by HTTP/es.cluster.number_of_data_nodes)-1)>last(/Elasticsearch Cluster by HTTP/es.nodes.fs.available_in_bytes)'
|
|
name: 'ES: Cluster does not have enough space for resharding'
|
|
priority: HIGH
|
|
description: 'There is not enough disk space for index resharding.'
|
|
tags:
|
|
- tag: scope
|
|
value: capacity
|
|
graphs:
|
|
- uuid: 50f90b092fa24658b83b131fa7a3f2ce
|
|
name: 'ES: Cluster health'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: es.cluster.inactive_shards_percent_as_number
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: es.cluster.relocating_shards
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: es.cluster.initializing_shards
|
|
- sortorder: '3'
|
|
color: F7941D
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: es.cluster.unassigned_shards
|
|
- sortorder: '4'
|
|
color: FC6EA3
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: es.cluster.delayed_unassigned_shards
|
|
- sortorder: '5'
|
|
color: 6C59DC
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: es.cluster.number_of_pending_tasks
|
|
- sortorder: '6'
|
|
color: C7A72D
|
|
item:
|
|
host: 'Elasticsearch Cluster by HTTP'
|
|
key: es.cluster.task_max_waiting_in_queue
|