You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
zabbix/templates/app/nomad/template_app_nomad_http.yaml

6868 lines
249 KiB

zabbix_export:
version: '7.0'
template_groups:
- uuid: a571c0d144b14fd4a87a9d9b2aa9fcd6
name: Templates/Applications
host_groups:
- uuid: a571c0d144b14fd4a87a9d9b2aa9fcd6
name: Applications
templates:
- uuid: f74adf26d9ab44ada318002d31fd2881
template: 'HashiCorp Nomad by HTTP'
name: 'HashiCorp Nomad by HTTP'
description: |
Discover HashiCorp Nomad servers and clients automatically.
Don't forget to change macro {$NOMAD.ENDPOINT.API.URL}, {$NOMAD.TOKEN} values.
You can discuss this template or leave feedback on our forum: https://www.zabbix.com/forum/zabbix-suggestions-and-feedback.
Generated by official Zabbix template tool "Templator" 2.0.0
vendor:
name: Zabbix
version: 7.0-0
groups:
- name: Templates/Applications
items:
- uuid: 50bf00cc5c9f41c887add07d0bba3cc1
name: 'HashiCorp Nomad: Client nodes API response'
type: DEPENDENT
key: nomad.client.nodes.api.response
delay: '0'
history: 7d
trends: '0'
value_type: TEXT
description: 'Client nodes API response message.'
preprocessing:
- type: JAVASCRIPT
parameters:
- |
try {
var response = Object.keys(JSON.parse(value).header).filter(function (f) {
return f.match(/HTTP\/[\d.]+\s+\d{3}/);
});
return response.pop();
}
catch (error) {
return "HTTP/1.1 408 Request timeout";
}
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: nomad.client.nodes.get
tags:
- tag: component
value: status
triggers:
- uuid: f2e1cbbf808946ca902c1f378747e936
expression: 'find(/HashiCorp Nomad by HTTP/nomad.client.nodes.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
name: 'HashiCorp Nomad: Client nodes API connection has failed'
event_name: 'HashiCorp Nomad: Client nodes API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}'
priority: AVERAGE
description: |
Client nodes API connection has failed.
Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix.
manual_close: 'YES'
tags:
- tag: scope
value: availability
- uuid: 99d8cd0768094bef93995e4baa475186
name: 'HashiCorp Nomad: Nomad clients get'
type: HTTP_AGENT
key: nomad.client.nodes.get
delay: 1h
history: '0'
trends: '0'
value_type: TEXT
description: 'Nomad clients data in raw format.'
preprocessing:
- type: CHECK_NOT_SUPPORTED
parameters:
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}'
timeout: '{$NOMAD.DATA.TIMEOUT}'
url: '{$NOMAD.ENDPOINT.API.URL}/v1/nodes'
query_fields:
- name: os
value: '1'
status_codes: ''
http_proxy: '{$NOMAD.HTTP.PROXY}'
headers:
- name: X-Nomad-Token
value: '{$NOMAD.TOKEN}'
retrieve_mode: BOTH
output_format: JSON
tags:
- tag: component
value: raw
- uuid: 7a46e18358e641cc941b306de6eb5dc4
name: 'HashiCorp Nomad: Nomad clients count'
type: DEPENDENT
key: nomad.clients.count
delay: '0'
history: 7d
description: 'Nomad clients count.'
preprocessing:
- type: JSONPATH
parameters:
- '$.body[?(@.Name)].length()'
master_item:
key: nomad.client.nodes.get
- uuid: e848e135a5574931accf114fcafb89d7
name: 'HashiCorp Nomad: Region'
type: DEPENDENT
key: nomad.region
delay: '0'
history: 7d
trends: '0'
value_type: CHAR
description: 'Current cluster region.'
preprocessing:
- type: JSONPATH
parameters:
- $..region.first()
master_item:
key: nomad.server.nodes.get
tags:
- tag: component
value: environment
- uuid: 53a0c689f27547ccbcb0a57c736a5027
name: 'HashiCorp Nomad: Server-related APIs response'
type: DEPENDENT
key: nomad.server.api.response
delay: '0'
history: 7d
trends: '0'
value_type: TEXT
description: 'Server-related (`operator/raft/configuration`, `agent/members`) APIs error response message.'
preprocessing:
- type: JSONPATH
parameters:
- $.error
error_handler: CUSTOM_VALUE
error_handler_params: 'HTTP/1.1 200 OK'
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: nomad.server.nodes.get
triggers:
- uuid: 1560c32cb7de4fc8a58cf537c7958205
expression: 'find(/HashiCorp Nomad by HTTP/nomad.server.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
name: 'HashiCorp Nomad: Server-related API connection has failed'
event_name: 'HashiCorp Nomad: Server-related API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}'
priority: AVERAGE
description: |
Server-related API connection has failed.
Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix.
manual_close: 'YES'
tags:
- tag: scope
value: availability
- uuid: 254354aa21764751bf7e1961e37de7ae
name: 'HashiCorp Nomad: Nomad servers get'
type: SCRIPT
key: nomad.server.nodes.get
delay: 1h
history: '0'
trends: '0'
value_type: TEXT
params: |
var Nomad = {
setParams: function (params) {
['api_endpoint'].forEach(function (field) {
if (typeof params !== 'object' || typeof params[field] === 'undefined' || params[field] === '') {
throw 'Required param is not set: "' + field + '".';
}
});
Nomad.params = params;
if (typeof Nomad.params.api_endpoint === 'string' && !Nomad.params.api_endpoint.endsWith('/')) {
Nomad.params.api_endpoint += '/';
}
},
request: function (query) {
var response,
request = new HttpRequest(),
url = Nomad.params.api_endpoint + query;
request.addHeader('Content-Type: application/json');
request.addHeader('X-Nomad-Token: ' + Nomad.params.token);
// set proxy if needed
if (Nomad.params.http_proxy) {
request.setProxy(Nomad.params.http_proxy)
Zabbix.log(4, '[ Nomad ] Using http proxy: ' + Nomad.params.http_proxy);
}
Zabbix.log(4, '[ Nomad ] Sending request: ' + url);
response = request.get(url);
Zabbix.log(4, '[ Nomad ] Received response with status code ' + request.getStatus() + ': ' + response);
if (response !== null) {
try {
response = JSON.parse(response);
}
catch (error) {
throw 'Failed to parse response received from Nomad agent API.';
}
}
return {
status: request.getStatus(),
response: response
};
},
getField: function (data, path) {
var steps = path.split('.');
for (var i = 0; i < steps.length; i++) {
var step = steps[i];
if (typeof data !== 'object' || typeof data[step] === 'undefined') {
throw 'Required field was not found: ' + path;
}
data = data[step];
}
return data;
},
getIds: function () {
var result = this.request('v1/operator/raft/configuration');
if (typeof result.response !== 'object' || result.status != 200) {
throw 'Cannot get servers list from Nomad agent API.';
}
return this.getField(result, 'response.Servers')
.map(function (srv) {
return srv['ID'];
});
},
getServers: function () {
var ids = this.getIds();
result = Nomad.request('v1/agent/members');
if (typeof result.response !== 'object' || result.status != 200) {
throw 'Cannot get servers list from Nomad agent API.';
}
return this.getField(result,'response.Members').filter(function (s) {
return ids.indexOf(s.Tags.id) >= 0;
});
},
};
try {
Nomad.setParams(JSON.parse(value));
var servers = Nomad.getServers();
return JSON.stringify(servers);
}
catch (error) {
error += ((String(error).endsWith('.')) ? '' : '.');
Zabbix.log(3, '[ Nomad ] ERROR: ' + error);
return JSON.stringify({ error: error + ' Check debug log for more information.' });
}
description: 'Nomad servers data in raw format.'
timeout: '{$NOMAD.DATA.TIMEOUT}'
parameters:
- name: api_endpoint
value: '{$NOMAD.ENDPOINT.API.URL}'
- name: token
value: '{$NOMAD.TOKEN}'
- name: http_proxy
value: '{$NOMAD.HTTP.PROXY}'
tags:
- tag: component
value: raw
- uuid: 904e226008324cb8aa11f716e0420b51
name: 'HashiCorp Nomad: Nomad servers count'
type: DEPENDENT
key: nomad.servers.count
delay: '0'
history: 7d
description: 'Nomad servers count.'
preprocessing:
- type: JSONPATH
parameters:
- '$[?(@.Name)].length()'
master_item:
key: nomad.server.nodes.get
discovery_rules:
- uuid: 0ba167e5638344ab842224df5b8909e1
name: 'Clients discovery'
type: DEPENDENT
key: nomad.clients.discovery
delay: '0'
filter:
evaltype: AND
conditions:
- macro: '{#CLIENT.NAME}'
value: '{$NOMAD.CLIENT.NAME.MATCHES}'
formulaid: C
- macro: '{#CLIENT.NAME}'
value: '{$NOMAD.CLIENT.NAME.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: D
- macro: '{#CLIENT.DC}'
value: '{$NOMAD.CLIENT.DC.MATCHES}'
formulaid: A
- macro: '{#CLIENT.DC}'
value: '{$NOMAD.CLIENT.DC.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: B
- macro: '{#CLIENT.SCHEDULE.ELIGIBILITY}'
value: '{$NOMAD.CLIENT.SCHEDULE.ELIGIBILITY.MATCHES}'
formulaid: E
- macro: '{#CLIENT.SCHEDULE.ELIGIBILITY}'
value: '{$NOMAD.CLIENT.SCHEDULE.ELIGIBILITY.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: F
lifetime: 7d
description: 'Client nodes discovery.'
host_prototypes:
- uuid: 8284dcf3055749d3b27720526c665e34
host: '{#CLIENT.ID}'
name: '{#CLIENT.NAME}-client'
group_links:
- group:
name: Applications
templates:
- name: 'HashiCorp Nomad Client by HTTP'
tags:
- tag: class
value: '{#CLIENT.CLASS}'
- tag: dc
value: '{#CLIENT.DC}'
- tag: drained
value: '{#CLIENT.DRAIN.STATE}'
- tag: os
value: '{#CLIENT.OS}'
- tag: scheduling
value: '{#CLIENT.SCHEDULE.ELIGIBILITY}'
- tag: service
value: nomad-client
- tag: version
value: '{#CLIENT.VERSION}'
custom_interfaces: 'YES'
interfaces:
- ip: '{#CLIENT.IP}'
master_item:
key: nomad.client.nodes.get
lld_macro_paths:
- lld_macro: '{#CLIENT.CLASS}'
path: $.NodeClass
- lld_macro: '{#CLIENT.DC}'
path: $.Datacenter
- lld_macro: '{#CLIENT.DRAIN.STATE}'
path: $.Drain
- lld_macro: '{#CLIENT.ID}'
path: $.ID
- lld_macro: '{#CLIENT.IP}'
path: $.Address
- lld_macro: '{#CLIENT.NAME}'
path: $.Name
- lld_macro: '{#CLIENT.OS}'
path: '$.Attributes[''os.name'']'
- lld_macro: '{#CLIENT.SCHEDULE.ELIGIBILITY}'
path: $.SchedulingEligibility
- lld_macro: '{#CLIENT.VERSION}'
path: $.Version
preprocessing:
- type: JSONPATH
parameters:
- $.body
error_handler: DISCARD_VALUE
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
- uuid: 202ee1d0060d48b09a48fe2039d3b081
name: 'Servers discovery'
type: DEPENDENT
key: nomad.servers.discovery
delay: '0'
filter:
evaltype: AND
conditions:
- macro: '{#SERVER.NAME}'
value: '{$NOMAD.SERVER.NAME.MATCHES}'
formulaid: C
- macro: '{#SERVER.NAME}'
value: '{$NOMAD.SERVER.NAME.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: D
- macro: '{#SERVER.DC}'
value: '{$NOMAD.SERVER.DC.MATCHES}'
formulaid: A
- macro: '{#SERVER.DC}'
value: '{$NOMAD.SERVER.DC.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: B
lifetime: 7d
description: 'Server nodes discovery.'
host_prototypes:
- uuid: addb1ce7995f44089a13128052171445
host: '{#SERVER.ID}'
name: '{#SERVER.NAME}'
group_links:
- group:
name: Applications
templates:
- name: 'HashiCorp Nomad Server by HTTP'
tags:
- tag: dc
value: '{#SERVER.DC}'
- tag: region
value: '{#SERVER.REGION}'
- tag: role
value: '{#SERVER.ROLE}'
- tag: service
value: nomad-server
- tag: version
value: '{#SERVER.VERSION}'
custom_interfaces: 'YES'
interfaces:
- ip: '{#SERVER.IP}'
master_item:
key: nomad.server.nodes.get
lld_macro_paths:
- lld_macro: '{#SERVER.DC}'
path: $.Tags.dc
- lld_macro: '{#SERVER.ID}'
path: $.Tags.id
- lld_macro: '{#SERVER.IP}'
path: $.Addr
- lld_macro: '{#SERVER.NAME}'
path: $.Name
- lld_macro: '{#SERVER.REGION}'
path: $.Tags.region
- lld_macro: '{#SERVER.ROLE}'
path: $.Tags.role
- lld_macro: '{#SERVER.VERSION}'
path: $.Tags.build
preprocessing:
- type: CHECK_JSON_ERROR
parameters:
- $.error
error_handler: DISCARD_VALUE
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
tags:
- tag: class
value: software
- tag: target
value: nomad
macros:
- macro: '{$NOMAD.API.RESPONSE.SUCCESS}'
value: '200'
description: 'HTTP API successful response code. Availability triggers threshold. Change, if needed.'
- macro: '{$NOMAD.CLIENT.DC.MATCHES}'
value: '.*'
description: 'The filter to include HashiCorp Nomad clients by datacenter belonging.'
- macro: '{$NOMAD.CLIENT.DC.NOT_MATCHES}'
value: CHANGE_IF_NEEDED
description: 'The filter to exclude HashiCorp Nomad clients by datacenter belonging.'
- macro: '{$NOMAD.CLIENT.NAME.MATCHES}'
value: '.*'
description: 'The filter to include HashiCorp Nomad clients by name.'
- macro: '{$NOMAD.CLIENT.NAME.NOT_MATCHES}'
value: CHANGE_IF_NEEDED
description: 'The filter to exclude HashiCorp Nomad clients by name.'
- macro: '{$NOMAD.CLIENT.SCHEDULE.ELIGIBILITY.MATCHES}'
value: '.*'
description: 'The filter to include HashiCorp Nomad clients by scheduling eligibility.'
- macro: '{$NOMAD.CLIENT.SCHEDULE.ELIGIBILITY.NOT_MATCHES}'
value: CHANGE_IF_NEEDED
description: 'The filter to exclude HashiCorp Nomad clients by scheduling eligibility.'
- macro: '{$NOMAD.DATA.TIMEOUT}'
value: 15s
description: 'Response timeout for an API.'
- macro: '{$NOMAD.ENDPOINT.API.URL}'
value: 'http://localhost:4646'
description: 'API endpoint URL for one of the Nomad cluster members.'
- macro: '{$NOMAD.HTTP.PROXY}'
description: 'Sets the HTTP proxy for script and HTTP agent items. If this parameter is empty, then no proxy is used.'
- macro: '{$NOMAD.SERVER.DC.MATCHES}'
value: '.*'
description: 'The filter to include HashiCorp Nomad servers by datacenter belonging.'
- macro: '{$NOMAD.SERVER.DC.NOT_MATCHES}'
value: CHANGE_IF_NEEDED
description: 'The filter to exclude HashiCorp Nomad servers by datacenter belonging.'
- macro: '{$NOMAD.SERVER.NAME.MATCHES}'
value: '.*'
description: 'The filter to include HashiCorp Nomad servers by name.'
- macro: '{$NOMAD.SERVER.NAME.NOT_MATCHES}'
value: CHANGE_IF_NEEDED
description: 'The filter to exclude HashiCorp Nomad servers by name.'
- macro: '{$NOMAD.TOKEN}'
value: '<PUT YOUR AUTH TOKEN>'
description: 'Nomad authentication token.'
dashboards:
- uuid: 0b1cb124081f4cf1af2e35245aeb415f
name: 'Region resources'
pages:
- name: 'Region resources'
widgets:
- type: item
name: Clients
x: '16'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad by HTTP'
key: nomad.clients.count
- type: item
name: Region
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad by HTTP'
key: nomad.region
- type: item
name: Servers
x: '8'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad by HTTP'
key: nomad.servers.count
- uuid: 44eac6a1abe34999b85ad6d0e40073fd
template: 'HashiCorp Nomad Client by HTTP'
name: 'HashiCorp Nomad Client by HTTP'
description: |
Get HashiCorp Nomad client metrics by HTTP from metrics endpoint.
More information about metrics is available in the official documentation: https://developer.hashicorp.com/nomad/docs/operations/metrics-reference.
You can discuss this template or leave feedback on our forum: https://www.zabbix.com/forum/zabbix-suggestions-and-feedback.
Generated by official Zabbix template tool "Templator" 2.0.0
vendor:
name: Zabbix
version: 7.0-0
groups:
- name: Templates/Applications
items:
- uuid: f98bd789c9f7409aac2e1902776ddc21
name: 'HashiCorp Nomad Client: Service [rpc] state'
type: SIMPLE
key: 'net.tcp.service[tcp,,{$NOMAD.CLIENT.RPC.PORT}]'
history: 7d
trends: 90d
description: 'Current [rpc] service state.'
valuemap:
name: 'Service state'
preprocessing:
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
tags:
- tag: component
value: network
triggers:
- uuid: f368ff4de45e4620963135836eb54092
expression: 'last(/HashiCorp Nomad Client by HTTP/net.tcp.service[tcp,,{$NOMAD.CLIENT.RPC.PORT}]) = 0'
name: 'HashiCorp Nomad Client: Service [rpc] is down'
opdata: 'Service: rpc, Port: {$NOMAD.CLIENT.RPC.PORT}, State: {ITEM.LASTVALUE}'
priority: AVERAGE
description: |
Cannot establish the connection to [rpc] service port {$NOMAD.CLIENT.RPC.PORT}.
Check the Nomad state and network connectivity between Nomad and Zabbix.
manual_close: 'YES'
tags:
- tag: scope
value: availability
- uuid: 00b39edd70cc42daa565879494f15f38
name: 'HashiCorp Nomad Client: Service [serf] state'
type: SIMPLE
key: 'net.tcp.service[tcp,,{$NOMAD.CLIENT.SERF.PORT}]'
history: 7d
trends: 90d
description: 'Current [serf] service state.'
valuemap:
name: 'Service state'
preprocessing:
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
tags:
- tag: component
value: network
triggers:
- uuid: 0108c9f85b48433c8b9864e41458994f
expression: 'last(/HashiCorp Nomad Client by HTTP/net.tcp.service[tcp,,{$NOMAD.CLIENT.SERF.PORT}]) = 0'
name: 'HashiCorp Nomad Client: Service [serf] is down'
opdata: 'Service: serf, Port: {$NOMAD.CLIENT.SERF.PORT}, State: {ITEM.LASTVALUE}'
priority: AVERAGE
description: |
Cannot establish the connection to [serf] service port {$NOMAD.CLIENT.SERF.PORT}.
Check the Nomad state and network connectivity between Nomad and Zabbix.
manual_close: 'YES'
tags:
- tag: scope
value: availability
- uuid: 13aa2136b7634c32a56fe00b29056d2d
name: 'HashiCorp Nomad Client: CPU allocated'
type: DEPENDENT
key: nomad.client.allocated.cpu
delay: '0'
history: 7d
trends: 90d
units: '!Mhz'
description: 'Total amount of CPU shares the scheduler has allocated to tasks.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_allocated_cpu
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: cpu
- uuid: fb7211ae27c845bba8dca7e75e0083b8
name: 'HashiCorp Nomad Client: Disk allocated'
type: DEPENDENT
key: nomad.client.allocated.disk
delay: '0'
history: 7d
trends: 90d
units: B
description: 'Total amount of disk space the scheduler has allocated to tasks.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_allocated_disk
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1000000'
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: storage
- uuid: 27c71fbb37f14deba301c3b5c7daecb5
name: 'HashiCorp Nomad Client: Memory allocated'
type: DEPENDENT
key: nomad.client.allocated.memory
delay: '0'
history: 7d
trends: 90d
units: B
description: 'Total amount of memory the scheduler has allocated to tasks.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_allocated_memory
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1000000'
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: memory
- uuid: 945d348f2f2d4c02b3a1ec6f32f1290d
name: 'HashiCorp Nomad Client: Allocations blocked'
type: DEPENDENT
key: nomad.client.allocations.blocked
delay: '0'
history: 7d
trends: 90d
description: 'Number of allocations waiting for previous versions.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_allocations_blocked
- value
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: allocations
- uuid: f556557bf5de438d9aaf941ab5c8d93b
name: 'HashiCorp Nomad Client: Allocations completed, rate'
type: DEPENDENT
key: nomad.client.allocations.complete
delay: '0'
history: 7d
trends: 90d
description: 'Number of allocations completed.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_allocs_complete
- function
- sum
error_handler: CUSTOM_VALUE
error_handler_params: '0'
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: allocations
- uuid: 586bc8f63e7f46cf99337c7f70809c43
name: 'HashiCorp Nomad Client: Allocations failed, rate'
type: DEPENDENT
key: nomad.client.allocations.failed
delay: '0'
history: 7d
trends: 90d
description: 'Number of allocations failed.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_allocs_failed
- function
- sum
error_handler: CUSTOM_VALUE
error_handler_params: '0'
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: allocations
- uuid: 88b2eb8e9e8041df83674d3800b1f8c1
name: 'HashiCorp Nomad Client: Allocations migrating'
type: DEPENDENT
key: nomad.client.allocations.migrating
delay: '0'
history: 7d
trends: 90d
description: 'Number of allocations migrating data from previous versions.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_allocations_migrating
- value
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: allocations
- uuid: 717172608fbb4d81bfb65e7f4aa4aaf3
name: 'HashiCorp Nomad Client: Allocations OOM killed'
type: DEPENDENT
key: nomad.client.allocations.oom_killed
delay: '0'
history: 7d
trends: 90d
description: 'Number of allocations OOM killed.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_allocs_oom_killed
- value
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '0'
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: allocations
triggers:
- uuid: 747de32c46cb4d95851cee8a30c576b0
expression: 'last(/HashiCorp Nomad Client by HTTP/nomad.client.allocations.oom_killed) > 0'
name: 'HashiCorp Nomad Client: OOM killed allocations found'
opdata: 'OOM killed: {ITEM.LASTVALUE}'
priority: WARNING
description: 'OOM killed allocations found.'
manual_close: 'YES'
tags:
- tag: scope
value: performance
- uuid: 9fc77ab432c04cf4b1c0f1941da641d3
name: 'HashiCorp Nomad Client: Allocations pending'
type: DEPENDENT
key: nomad.client.allocations.pending
delay: '0'
history: 7d
trends: 90d
description: 'Number of allocations pending (received by the client but not yet running).'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_allocations_pending
- value
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: allocations
- uuid: eae33da538c642868cf9afe9e6495c95
name: 'HashiCorp Nomad Client: Allocations restarted, rate'
type: DEPENDENT
key: nomad.client.allocations.restart
delay: '0'
history: 7d
trends: 90d
description: 'Number of allocations restarted.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_allocs_restart
- function
- sum
error_handler: CUSTOM_VALUE
error_handler_params: '0'
- type: CHANGE_PER_SECOND
parameters:
- ''
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: allocations
- uuid: 6a4903e8bfa04f18a44813349ea6b33d
name: 'HashiCorp Nomad Client: Allocations running'
type: DEPENDENT
key: nomad.client.allocations.running
delay: '0'
history: 7d
trends: 90d
description: 'Number of allocations running.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_allocations_running
- value
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: allocations
- uuid: 3fcbebb8436d4dec8db6625e44520999
name: 'HashiCorp Nomad Client: Allocations starting'
type: DEPENDENT
key: nomad.client.allocations.start
delay: '0'
history: 7d
trends: 90d
description: 'Number of allocations starting.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_allocations_start
- value
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: allocations
- uuid: 166aad9bafeb478abcd8c61ea1d0cc98
name: 'HashiCorp Nomad Client: Allocations terminal'
type: DEPENDENT
key: nomad.client.allocations.terminal
delay: '0'
history: 7d
trends: 90d
description: 'Number of allocations terminal.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_allocations_terminal
- value
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: allocations
- uuid: 31faeb4af30f4f878050eca21e6f97df
name: 'HashiCorp Nomad Client: CPU idle utilization'
type: DEPENDENT
key: nomad.client.cpu.idle
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: '%'
description: 'CPU utilization in idle state.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_host_cpu_idle
- function
- avg
error_handler: DISCARD_VALUE
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: cpu
- uuid: ad75d4b5e69c4e11bc886582791951be
name: 'HashiCorp Nomad Client: CPU system utilization'
type: DEPENDENT
key: nomad.client.cpu.system
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: '%'
description: 'CPU utilization in system space.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_host_cpu_system
- function
- avg
error_handler: DISCARD_VALUE
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: cpu
- uuid: 71eb3676049a4d60aa69ef3901005894
name: 'HashiCorp Nomad Client: CPU total utilization'
type: DEPENDENT
key: nomad.client.cpu.total
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: '%'
description: 'Total CPU utilization.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_host_cpu_total
- function
- avg
error_handler: DISCARD_VALUE
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: cpu
triggers:
- uuid: 59db4f1d22cb48fab0ced8ad9ec39a25
expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.cpu.total, 10m) >= {$NOMAD.CPU.UTIL.MIN}'
name: 'HashiCorp Nomad Client: High CPU utilization'
event_name: 'HashiCorp Nomad Client: High CPU utilization: (over >= {$NOMAD.CPU.UTIL.MIN}% over last 10m)'
opdata: 'Current utilization: {ITEM.LASTVALUE}'
priority: AVERAGE
description: 'CPU utilization is too high. The system might be slow to respond.'
tags:
- tag: scope
value: performance
- uuid: cc0671cc71fb4bdd8cdf7a959e1022fd
name: 'HashiCorp Nomad Client: CPU user utilization'
type: DEPENDENT
key: nomad.client.cpu.user
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: '%'
description: 'CPU utilization in user space.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_host_cpu_user
- function
- avg
error_handler: DISCARD_VALUE
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: cpu
- uuid: 71be24cf848a448dbb993a577015bd9e
name: 'HashiCorp Nomad Client: Monitoring API response'
type: DEPENDENT
key: nomad.client.data.api.response
delay: '0'
history: 7d
trends: '0'
value_type: TEXT
description: 'Monitoring API response message.'
preprocessing:
- type: JAVASCRIPT
parameters:
- |
try {
var response = Object.keys(JSON.parse(value).header).filter(function (f) {
return f.match(/HTTP\/[\d.]+\s+\d{3}/);
});
return response.pop();
}
catch (error) {
return "HTTP/1.1 408 Request timeout";
}
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: nomad.client.data.get
tags:
- tag: component
value: status
triggers:
- uuid: 676e1535cde2424cbda78f18cd9084bf
expression: 'find(/HashiCorp Nomad Client by HTTP/nomad.client.data.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
name: 'HashiCorp Nomad Client: Monitoring API connection has failed'
event_name: 'HashiCorp Nomad Client: Monitoring API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}'
priority: AVERAGE
description: |
Monitoring API connection has failed.
Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix.
manual_close: 'YES'
tags:
- tag: scope
value: availability
- uuid: fe07fd88f3e14273b3f0b135aab26236
name: 'HashiCorp Nomad Client: Telemetry get'
type: HTTP_AGENT
key: nomad.client.data.get
history: '0'
trends: '0'
value_type: TEXT
description: 'Telemetry data in raw format.'
preprocessing:
- type: CHECK_NOT_SUPPORTED
parameters:
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}'
timeout: '{$NOMAD.DATA.TIMEOUT}'
url: '{$NOMAD.CLIENT.API.SCHEME}://{HOST.IP}:{$NOMAD.CLIENT.API.PORT}/v1/metrics'
query_fields:
- name: format
value: prometheus
status_codes: ''
http_proxy: '{$NOMAD.HTTP.PROXY}'
headers:
- name: X-Nomad-Token
value: '{$NOMAD.TOKEN}'
retrieve_mode: BOTH
output_format: JSON
tags:
- tag: component
value: raw
- uuid: c9bfa74720ea4e228e5d200ab1f38074
name: 'HashiCorp Nomad Client: Allocations API response'
type: DEPENDENT
key: nomad.client.job.allocs.api.response
delay: '0'
history: 7d
trends: '0'
value_type: TEXT
description: 'Allocations API response message.'
preprocessing:
- type: JAVASCRIPT
parameters:
- |
try {
var response = Object.keys(JSON.parse(value).header).filter(function (f) {
return f.match(/HTTP\/[\d.]+\s+\d{3}/);
});
return response.pop();
}
catch (error) {
return "HTTP/1.1 408 Request timeout";
}
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: nomad.client.job.allocs.get
tags:
- tag: component
value: status
triggers:
- uuid: 300f5fba0f45465aa6868a3f1d16633d
expression: 'find(/HashiCorp Nomad Client by HTTP/nomad.client.job.allocs.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
name: 'HashiCorp Nomad Client: Allocations API connection has failed'
event_name: 'HashiCorp Nomad Client: Allocations API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}'
priority: AVERAGE
description: |
Allocations API connection has failed.
Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix.
manual_close: 'YES'
dependencies:
- name: 'HashiCorp Nomad Client: Monitoring API connection has failed'
expression: 'find(/HashiCorp Nomad Client by HTTP/nomad.client.data.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
tags:
- tag: scope
value: availability
- uuid: 9948608c5b3a4dad9d1b433483296427
name: 'HashiCorp Nomad Client: Allocated jobs get'
type: HTTP_AGENT
key: nomad.client.job.allocs.get
delay: 1h
history: '0'
trends: '0'
value_type: TEXT
description: 'Allocated jobs data in raw format.'
preprocessing:
- type: CHECK_NOT_SUPPORTED
parameters:
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}'
timeout: '{$NOMAD.DATA.TIMEOUT}'
url: '{$NOMAD.CLIENT.API.SCHEME}://{HOST.IP}:{$NOMAD.CLIENT.API.PORT}/v1/allocations'
query_fields:
- name: reverse
value: '1'
- name: task_states
- name: filter
value: 'NodeID == "{HOST.HOST}" and DesiredStatus == "run" and ClientStatus == "running"'
status_codes: ''
http_proxy: '{$NOMAD.HTTP.PROXY}'
headers:
- name: X-Nomad-Token
value: '{$NOMAD.TOKEN}'
retrieve_mode: BOTH
output_format: JSON
tags:
- tag: component
value: raw
- uuid: f312a5ba5758426d83c5c3c414d765fd
name: 'HashiCorp Nomad Client: Memory available'
type: DEPENDENT
key: nomad.client.memory.available
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Total amount of memory available to processes which includes free and cached memory.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_host_memory_available
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: memory
- uuid: e379e923ed6b45a682f1da192b3a328d
name: 'HashiCorp Nomad Client: Memory free'
type: DEPENDENT
key: nomad.client.memory.free
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Amount of memory which is free.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_host_memory_free
- value
- ''
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: memory
- uuid: 50d152cf3c654e818aa7303841d5444a
name: 'HashiCorp Nomad Client: Memory size'
type: DEPENDENT
key: nomad.client.memory.total
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Total amount of physical memory on the node.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_host_memory_total
- value
- ''
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: memory
- uuid: b2638e0b928e46cd825304a3f1ca1682
name: 'HashiCorp Nomad Client: Memory used'
type: DEPENDENT
key: nomad.client.memory.used
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Amount of memory used by processes.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_host_memory_used
- value
- ''
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: memory
- uuid: 933621a61d2241d084b48ec6b3bd5b74
name: 'HashiCorp Nomad Client: Metrics'
type: DEPENDENT
key: nomad.client.metrics.get
delay: '0'
history: '0'
trends: '0'
value_type: TEXT
description: 'Nomad client metrics in raw format.'
preprocessing:
- type: JSONPATH
parameters:
- $.body
error_handler: DISCARD_VALUE
master_item:
key: nomad.client.data.get
tags:
- tag: component
value: raw
- uuid: 39430d6dceaa42a68c72ef68f67b1a12
name: 'HashiCorp Nomad Client: Nodes API response'
type: DEPENDENT
key: nomad.client.node.info.api.response
delay: '0'
history: 7d
trends: '0'
value_type: TEXT
description: 'Nodes API response message.'
preprocessing:
- type: JAVASCRIPT
parameters:
- |
try {
var response = Object.keys(JSON.parse(value).header).filter(function (f) {
return f.match(/HTTP\/[\d.]+\s+\d{3}/);
});
return response.pop();
}
catch (error) {
return "HTTP/1.1 408 Request timeout";
}
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: nomad.client.node.info.get
tags:
- tag: component
value: status
triggers:
- uuid: 3ea14e12d6154492a4505c4b51003ed3
expression: 'find(/HashiCorp Nomad Client by HTTP/nomad.client.node.info.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
name: 'HashiCorp Nomad Client: Nodes API connection has failed'
event_name: 'HashiCorp Nomad Client: Nodes API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}'
priority: AVERAGE
description: |
Nodes API connection has failed.
Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix.
manual_close: 'YES'
dependencies:
- name: 'HashiCorp Nomad Client: Monitoring API connection has failed'
expression: 'find(/HashiCorp Nomad Client by HTTP/nomad.client.data.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
tags:
- tag: scope
value: availability
- uuid: 36eb68c80b194a4591208cf210fb6d6e
name: 'HashiCorp Nomad Client: Node info get'
type: HTTP_AGENT
key: nomad.client.node.info.get
delay: 1h
history: '0'
trends: '0'
value_type: TEXT
description: 'Node info data in raw format.'
preprocessing:
- type: CHECK_NOT_SUPPORTED
parameters:
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}'
timeout: '{$NOMAD.DATA.TIMEOUT}'
url: '{$NOMAD.CLIENT.API.SCHEME}://{HOST.IP}:{$NOMAD.CLIENT.API.PORT}/v1/nodes'
query_fields:
- name: filter
value: 'ID == "{HOST.HOST}"'
status_codes: ''
http_proxy: '{$NOMAD.HTTP.PROXY}'
headers:
- name: X-Nomad-Token
value: '{$NOMAD.TOKEN}'
retrieve_mode: BOTH
output_format: JSON
tags:
- tag: component
value: raw
- uuid: 18e57950b0c041e0a2abc0dbd0644b33
name: 'HashiCorp Nomad Client: CPU unallocated'
type: DEPENDENT
key: nomad.client.unallocated.cpu
delay: '0'
history: 7d
trends: 90d
units: '!Mhz'
description: 'Total amount of CPU shares free for the scheduler to allocate to tasks.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_unallocated_cpu
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: cpu
- uuid: 44619e3740a9403581d54c6555ecd967
name: 'HashiCorp Nomad Client: Disk unallocated'
type: DEPENDENT
key: nomad.client.unallocated.disk
delay: '0'
history: 7d
trends: 90d
units: B
description: 'Total amount of disk space free for the scheduler to allocate to tasks.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_unallocated_disk
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1000000'
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: storage
- uuid: bc0928fcff2f458f83874d7442274e61
name: 'HashiCorp Nomad Client: Memory unallocated'
type: DEPENDENT
key: nomad.client.unallocated.memory
delay: '0'
history: 7d
trends: 90d
units: B
description: 'Total amount of memory free for the scheduler to allocate to tasks.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_unallocated_memory
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1000000'
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: memory
- uuid: 3561fb69284f4c88a7f5d8c7f04c3b14
name: 'HashiCorp Nomad Client: Uptime'
type: DEPENDENT
key: nomad.client.uptime
delay: '0'
history: 7d
trends: 90d
units: uptime
description: 'Uptime of the host running the Nomad client.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_client_uptime
- value
- ''
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: system
triggers:
- uuid: 74fe77871e8c4161b5944f30dd4d1c84
expression: 'last(/HashiCorp Nomad Client by HTTP/nomad.client.uptime) < 10m'
name: 'HashiCorp Nomad Client: The host has been restarted'
event_name: 'HashiCorp Nomad Client: The host has been restarted: (uptime < 10m)'
priority: WARNING
description: 'The host uptime is less than 10 minutes.'
manual_close: 'YES'
tags:
- tag: scope
value: notice
- uuid: 872961bd507c4f6380168d2284ead1ae
name: 'HashiCorp Nomad Client: Nomad client version'
type: DEPENDENT
key: nomad.client.version
delay: '0'
history: 7d
trends: '0'
value_type: CHAR
description: 'Nomad client version.'
preprocessing:
- type: JSONPATH
parameters:
- $.body..Version.first()
master_item:
key: nomad.client.node.info.get
triggers:
- uuid: afac97bced6f49bc994ae633b662722c
expression: 'change(/HashiCorp Nomad Client by HTTP/nomad.client.version)<>0'
name: 'HashiCorp Nomad Client: Nomad client version has changed'
event_name: 'HashiCorp Nomad Client: Nomad client version has changed to {ITEM.LASTVALUE}'
priority: INFO
description: 'Nomad client version has changed.'
manual_close: 'YES'
tags:
- tag: scope
value: availability
discovery_rules:
- uuid: c41a6405aea7400fb90c207ce95c6594
name: 'Allocated jobs discovery'
type: DEPENDENT
key: nomad.client.alloc.discovery
delay: '0'
filter:
evaltype: AND
conditions:
- macro: '{#JOB.NAME}'
value: '{$NOMAD.JOB.NAME.MATCHES}'
formulaid: A
- macro: '{#JOB.NAME}'
value: '{$NOMAD.JOB.NAME.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: B
- macro: '{#JOB.TASK.GROUP}'
value: '{$NOMAD.JOB.TASK.GROUP.MATCHES}'
formulaid: E
- macro: '{#JOB.TASK.GROUP}'
value: '{$NOMAD.JOB.TASK.GROUP.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: F
- macro: '{#JOB.NAMESPACE}'
value: '{$NOMAD.JOB.NAMESPACE.MATCHES}'
formulaid: C
- macro: '{#JOB.NAMESPACE}'
value: '{$NOMAD.JOB.NAMESPACE.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: D
- macro: '{#JOB.TYPE}'
value: '{$NOMAD.JOB.TYPE.MATCHES}'
formulaid: G
- macro: '{#JOB.TYPE}'
value: '{$NOMAD.JOB.TYPE.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: H
lifetime: '0'
description: 'Allocated jobs discovery.'
item_prototypes:
- uuid: ba9e5a022dbc4f7aa28a1db2ef6d73bb
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU allocated'
type: DEPENDENT
key: 'nomad.client.allocs.cpu.allocated["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: '!Mhz'
description: 'Total CPU resources allocated by the ["{#JOB.NAME}"] job across all cores.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_allocs_cpu_allocated{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
- function
- avg
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: cpu
- tag: component
value: job
- tag: job
value: '{#JOB.NAME}'
- tag: namespace
value: '{#JOB.NAMESPACE}'
- tag: task-group
value: '{#JOB.TASK.GROUP}'
- uuid: 85daa0ce6d4f41ab83a3a65ceb8359a9
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU system utilization'
type: DEPENDENT
key: 'nomad.client.allocs.cpu.system["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: '%'
description: 'Total CPU resources consumed by the ["{#JOB.NAME}"] job in system space.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_allocs_cpu_system{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
- function
- avg
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: cpu
- tag: component
value: job
- tag: job
value: '{#JOB.NAME}'
- tag: namespace
value: '{#JOB.NAMESPACE}'
- tag: task-group
value: '{#JOB.TASK.GROUP}'
- uuid: d5ec55f8d9a74c99924a01acb206295e
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU throttled periods time'
type: DEPENDENT
key: 'nomad.client.allocs.cpu.throttled_periods["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Total number of CPU periods that the ["{#JOB.NAME}"] job was throttled.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_allocs_cpu_throttled_periods{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
- function
- avg
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: cpu
- tag: component
value: job
- tag: job
value: '{#JOB.NAME}'
- tag: namespace
value: '{#JOB.NAMESPACE}'
- tag: task-group
value: '{#JOB.TASK.GROUP}'
- uuid: 3122f6d0c1b24ea7bac7a3af4fc00fb2
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU throttled time'
type: DEPENDENT
key: 'nomad.client.allocs.cpu.throttled_time["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Total time that the ["{#JOB.NAME}"] job was throttled.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_allocs_cpu_throttled_time{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
- function
- avg
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: cpu
- tag: component
value: job
- tag: job
value: '{#JOB.NAME}'
- tag: namespace
value: '{#JOB.NAMESPACE}'
- tag: task-group
value: '{#JOB.TASK.GROUP}'
- uuid: 5ff1ea02768243f4971339bf70f89772
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU total utilization'
type: DEPENDENT
key: 'nomad.client.allocs.cpu.total_percent["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: '%'
description: 'Total CPU resources consumed by the ["{#JOB.NAME}"] job across all cores.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_allocs_cpu_total_percent{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
- function
- avg
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: cpu
- tag: component
value: job
- tag: job
value: '{#JOB.NAME}'
- tag: namespace
value: '{#JOB.NAMESPACE}'
- tag: task-group
value: '{#JOB.TASK.GROUP}'
- uuid: 14179529be9d47bc898a5330ff37a9c5
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU ticks'
type: DEPENDENT
key: 'nomad.client.allocs.cpu.total_ticks["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'CPU ticks consumed by the process for the ["{#JOB.NAME}"] job in the last collection interval.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_allocs_cpu_total_ticks{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
- function
- avg
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: cpu
- tag: component
value: job
- tag: job
value: '{#JOB.NAME}'
- tag: namespace
value: '{#JOB.NAMESPACE}'
- tag: task-group
value: '{#JOB.TASK.GROUP}'
- uuid: 9fb3e3b461874e86aee9483d5af65535
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU user utilization'
type: DEPENDENT
key: 'nomad.client.allocs.cpu.user["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: '%'
description: 'Total CPU resources consumed by the ["{#JOB.NAME}"] job in user space.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_allocs_cpu_user{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
- function
- avg
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: cpu
- tag: component
value: job
- tag: job
value: '{#JOB.NAME}'
- tag: namespace
value: '{#JOB.NAMESPACE}'
- tag: task-group
value: '{#JOB.TASK.GROUP}'
- uuid: c353af0be02b47faa27036dfdd173176
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] Memory allocated'
type: DEPENDENT
key: 'nomad.client.allocs.memory.allocated["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Amount of memory allocated by the ["{#JOB.NAME}"] job.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_allocs_memory_allocated{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
- function
- avg
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: job
- tag: component
value: memory
- tag: job
value: '{#JOB.NAME}'
- tag: namespace
value: '{#JOB.NAMESPACE}'
- tag: task-group
value: '{#JOB.TASK.GROUP}'
- uuid: 7bd7a486864b4d0f82242676da2370ed
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] Memory cached'
type: DEPENDENT
key: 'nomad.client.allocs.memory.cache["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Amount of memory cached by the ["{#JOB.NAME}"] job.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_allocs_memory_cache{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
- function
- avg
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: job
- tag: component
value: memory
- tag: job
value: '{#JOB.NAME}'
- tag: namespace
value: '{#JOB.NAMESPACE}'
- tag: task-group
value: '{#JOB.TASK.GROUP}'
- uuid: b1419d5017cf4f8ba23e8c774c451772
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] Memory swapped'
type: DEPENDENT
key: 'nomad.client.allocs.memory.swap["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Amount of memory swapped by the ["{#JOB.NAME}"] job.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_allocs_memory_swap{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
- function
- avg
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: job
- tag: component
value: memory
- tag: job
value: '{#JOB.NAME}'
- tag: namespace
value: '{#JOB.NAMESPACE}'
- tag: task-group
value: '{#JOB.TASK.GROUP}'
- uuid: 09ba4987f69c496ebeab3fb08f4b499b
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] Memory used'
type: DEPENDENT
key: 'nomad.client.allocs.memory.usage["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Total amount of memory used by the ["{#JOB.NAME}"] job.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_allocs_memory_usage{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
- function
- avg
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: job
- tag: component
value: memory
- tag: job
value: '{#JOB.NAME}'
- tag: namespace
value: '{#JOB.NAMESPACE}'
- tag: task-group
value: '{#JOB.TASK.GROUP}'
graph_prototypes:
- uuid: 2c82c2841f414986b26fc6890707d36d
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] memory utilization'
type: STACKED
graph_items:
- color: 199C0D
item:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.allocs.memory.allocated["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
- sortorder: '1'
color: F63100
item:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.allocs.memory.cache["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
- sortorder: '2'
color: 00611C
item:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.allocs.memory.swap["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
- sortorder: '3'
color: F7941D
item:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.allocs.memory.usage["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
- uuid: bb8bbdb0180f4b35800144deec4456bb
name: 'HashiCorp Nomad Client: Job [{#JOB.NAME}] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] CPU throttling'
graph_items:
- color: 199C0D
item:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.allocs.cpu.throttled_time["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
- sortorder: '1'
color: F63100
item:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.allocs.cpu.throttled_periods["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
- uuid: f25a5178570e44ecac6a48cb8351cdd3
name: 'HashiCorp Nomad Client: Job [{#JOB.NAME}] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] CPU utilization'
type: STACKED
ymax_type_1: FIXED
graph_items:
- color: 199C0D
item:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.allocs.cpu.system["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
- sortorder: '1'
color: F63100
item:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.allocs.cpu.total_percent["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
- sortorder: '2'
color: 00611C
item:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.allocs.cpu.user["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
master_item:
key: nomad.client.job.allocs.get
lld_macro_paths:
- lld_macro: '{#JOB.NAMESPACE}'
path: $.Namespace
- lld_macro: '{#JOB.NAME}'
path: $.JobID
- lld_macro: '{#JOB.TASK.GROUP}'
path: $.TaskGroup
- lld_macro: '{#JOB.TYPE}'
path: $.JobType
preprocessing:
- type: JAVASCRIPT
parameters:
- |
var raw = JSON.parse(value),
body = raw.body,
exist_key = [],
result = [];
function getField(data, path) {
var steps = path.split('.');
for (var i = 0; i < steps.length; i++) {
var step = steps[i];
if (typeof data !== 'object' || typeof data[step] === 'undefined') {
throw 'Required field "' + path + '" is not present in data received.';
}
data = data[step];
}
return data;
}
for (i in body) {
var uniq_element = true;
for (key in exist_key) {
if ((exist_key[key].JobID == getField(body[i], 'JobID')) &&
(exist_key[key].TaskGroup == getField(body[i], 'TaskGroup')) &&
(exist_key[key].Namespace == getField(body[i], 'Namespace'))) {
uniq_element = false;
}
}
if (uniq_element) {
exist_key.push({ 'JobID': body[i].JobID, 'TaskGroup': body[i].TaskGroup, 'Namespace': body[i].Namespace });
result.push(body[i]);
}
}
return JSON.stringify(result);
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
- uuid: e2955bd147ed43cb894f0981c6eb3985
name: 'Physical disks discovery'
type: DEPENDENT
key: nomad.client.disk.discovery
delay: '0'
filter:
evaltype: AND
conditions:
- macro: '{#DEV.NAME}'
value: '{$NOMAD.DISK.NAME.MATCHES}'
formulaid: A
- macro: '{#DEV.NAME}'
value: '{$NOMAD.DISK.NAME.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: B
description: 'Physical disks discovery.'
item_prototypes:
- uuid: 4a79c99027494bb98b474e6f81db5e18
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] space available'
type: DEPENDENT
key: 'nomad.client.disk.available["{#DEV.NAME}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Amount of space which is available on ["{#DEV.NAME}"] disk.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_host_disk_available{disk="{#DEV.NAME}"}'
- value
- ''
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: storage
- tag: disk
value: '{#DEV.NAME}'
- uuid: 21043b0ed4424e3086a00a567d134d45
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] inodes utilization'
type: DEPENDENT
key: 'nomad.client.disk.inodes_percent["{#DEV.NAME}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: '%'
description: 'Disk space consumed by the inodes on ["{#DEV.NAME}"] disk.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_host_disk_inodes_percent{disk="{#DEV.NAME}"}'
- value
- ''
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: inodes
- tag: component
value: storage
- tag: disk
value: '{#DEV.NAME}'
trigger_prototypes:
- uuid: 433c2227305a4aeb82eb1d390621a81e
expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.inodes_percent["{#DEV.NAME}"],5m) >= {$NOMAD.INODES.FREE.MIN.CRIT:"{#DEV.NAME}"}'
name: 'HashiCorp Nomad Client: Running out of free inodes on [{#DEV.NAME}] device'
event_name: 'Running out of free inodes on [{#DEV.NAME}] (free < {$NOMAD.INODES.FREE.MIN.CRIT:"{#DEV.NAME}"}%)'
opdata: 'Free inodes: {ITEM.VALUE}'
priority: AVERAGE
description: |
It may become impossible to write to a disk if there are no index nodes left.
The following error messages may be returned as symptoms, even though the free space:
- No space left on device;
- Disk is full.
manual_close: 'YES'
tags:
- tag: scope
value: capacity
- uuid: d0f2aacaada545c4a81d35f0eb454532
expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.inodes_percent["{#DEV.NAME}"],5m) >= {$NOMAD.INODES.FREE.MIN.WARN:"{#DEV.NAME}"}'
name: 'HashiCorp Nomad Client: Running out of free inodes on [{#DEV.NAME}] device'
event_name: 'Running out of free inodes on [{#DEV.NAME}] (free < {$NOMAD.INODES.FREE.MIN.WARN:"{#DEV.NAME}"}%)'
opdata: 'Free inodes: {ITEM.VALUE}'
priority: WARNING
description: |
It may become impossible to write to a disk if there are no index nodes left.
The following error messages may be returned as symptoms, even though the free space:
- No space left on device;
- Disk is full.
manual_close: 'YES'
dependencies:
- name: 'HashiCorp Nomad Client: Running out of free inodes on [{#DEV.NAME}] device'
expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.inodes_percent["{#DEV.NAME}"],5m) >= {$NOMAD.INODES.FREE.MIN.CRIT:"{#DEV.NAME}"}'
tags:
- tag: scope
value: capacity
- uuid: 4358e8accdd0400496b491cda7edc909
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] size'
type: DEPENDENT
key: 'nomad.client.disk.size["{#DEV.NAME}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Total size of the ["{#DEV.NAME}"] device.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_host_disk_size{disk="{#DEV.NAME}"}'
- value
- ''
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: storage
- tag: disk
value: '{#DEV.NAME}'
- uuid: 778f4609e236434d8b216b62bfebf885
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] space used'
type: DEPENDENT
key: 'nomad.client.disk.used["{#DEV.NAME}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Amount of disk ["{#DEV.NAME}"] space which has been used.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_host_disk_used{disk="{#DEV.NAME}"}'
- value
- ''
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: storage
- tag: disk
value: '{#DEV.NAME}'
- uuid: aec5d7147bd64f508e793d7539a8524b
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] space utilization'
type: DEPENDENT
key: 'nomad.client.disk.used_percent["{#DEV.NAME}"]'
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: '%'
description: 'Percentage of disk ["{#DEV.NAME}"] space used.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_client_host_disk_used_percent{disk="{#DEV.NAME}"}'
- value
- ''
master_item:
key: nomad.client.metrics.get
tags:
- tag: component
value: storage
- tag: disk
value: '{#DEV.NAME}'
trigger_prototypes:
- uuid: b3e22e93d51a4a8d825b0bd2e45fc7a2
expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.used_percent["{#DEV.NAME}"],5m) >= {$NOMAD.DISK.UTIL.MIN.CRIT:"{#DEV.NAME}"}'
name: 'HashiCorp Nomad Client: High disk [{#DEV.NAME}] utilization'
event_name: 'Disk [{#DEV.NAME}] space low (utilization: >= {$NOMAD.DISK.UTIL.MIN.CRIT:"{#DEV.NAME}"}%)'
opdata: 'Current utilization: {ITEM.LASTVALUE}'
priority: AVERAGE
description: 'High disk [{#DEV.NAME}] utilization.'
manual_close: 'YES'
tags:
- tag: scope
value: capacity
- uuid: 05c3e6cd711f4a98a21c581a4437ffb5
expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.used_percent["{#DEV.NAME}"],5m) >= {$NOMAD.DISK.UTIL.MIN.WARN:"{#DEV.NAME}"}'
name: 'HashiCorp Nomad Client: High disk [{#DEV.NAME}] utilization'
event_name: 'Disk [{#DEV.NAME}] space low (utilization: >= {$NOMAD.DISK.UTIL.MIN.WARN:"{#DEV.NAME}"}%)'
opdata: 'Current utilization: {ITEM.LASTVALUE}'
priority: WARNING
description: 'High disk [{#DEV.NAME}] utilization.'
manual_close: 'YES'
dependencies:
- name: 'HashiCorp Nomad Client: Running out of free inodes on [{#DEV.NAME}] device'
expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.inodes_percent["{#DEV.NAME}"],5m) >= {$NOMAD.INODES.FREE.MIN.CRIT:"{#DEV.NAME}"}'
tags:
- tag: scope
value: capacity
graph_prototypes:
- uuid: e29168187396478ebec8c80825da070d
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] usage'
type: STACKED
graph_items:
- color: 199C0D
item:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.disk.size["{#DEV.NAME}"]'
- sortorder: '1'
color: F63100
item:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.disk.available["{#DEV.NAME}"]'
- sortorder: '2'
color: 00611C
item:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.disk.used["{#DEV.NAME}"]'
- uuid: 3b9290be87e94bd5987eb595debe5f26
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] utilization'
graph_items:
- color: 199C0D
item:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.disk.inodes_percent["{#DEV.NAME}"]'
- sortorder: '1'
color: F63100
item:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.disk.used_percent["{#DEV.NAME}"]'
master_item:
key: nomad.client.metrics.get
lld_macro_paths:
- lld_macro: '{#DEV.NAME}'
path: $.labels.disk
preprocessing:
- type: PROMETHEUS_TO_JSON
parameters:
- 'nomad_client_host_disk_available{disk=~".*"}'
- uuid: cfa76f8c4aaf49c5888ef82d6d7d484d
name: 'Drivers discovery'
type: DEPENDENT
key: nomad.client.drivers.discovery
delay: '0'
filter:
evaltype: AND
conditions:
- macro: '{#DRIVER.NAME}'
value: '{$NOMAD.DRIVER.NAME.MATCHES}'
formulaid: C
- macro: '{#DRIVER.NAME}'
value: '{$NOMAD.DRIVER.NAME.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: D
- macro: '{#DRIVER.DETECTED}'
value: '{$NOMAD.DRIVER.DETECT.MATCHES}'
formulaid: A
- macro: '{#DRIVER.DETECTED}'
value: '{$NOMAD.DRIVER.DETECT.NOT_MATCHES}'
operator: NOT_MATCHES_REGEX
formulaid: B
description: 'Client drivers discovery.'
item_prototypes:
- uuid: 6ffea3b88b29451ea6491cbd34a61148
name: 'HashiCorp Nomad Client: Driver [{#DRIVER.NAME}] detection state'
type: DEPENDENT
key: 'nomad.client.driver.detected["{#DRIVER.NAME}"]'
delay: '0'
history: 7d
trends: 90d
description: 'Driver [{#DRIVER.NAME}] detection state.'
valuemap:
name: 'Detection state'
preprocessing:
- type: JSONPATH
parameters:
- '$.body..Drivers.{#DRIVER.NAME}.Detected.first()'
- type: BOOL_TO_DECIMAL
parameters:
- ''
master_item:
key: nomad.client.node.info.get
tags:
- tag: component
value: driver
- tag: detected
value: '{#DRIVER.DETECTED}'
- tag: driver
value: '{#DRIVER.NAME}'
trigger_prototypes:
- uuid: 576192afab06466f80c95c1b17c1e6fa
expression: 'change(/HashiCorp Nomad Client by HTTP/nomad.client.driver.detected["{#DRIVER.NAME}"]) <> 0'
name: 'HashiCorp Nomad Client: Driver [{#DRIVER.NAME}] detection state has changed'
opdata: 'Current state: {ITEM.LASTVALUE}'
priority: INFO
description: 'The [{#DRIVER.NAME}] driver detection state has changed.'
manual_close: 'YES'
tags:
- tag: scope
value: availability
- uuid: fdff50758360474dafffee8cb7ba6289
name: 'HashiCorp Nomad Client: Driver [{#DRIVER.NAME}] state'
type: DEPENDENT
key: 'nomad.client.driver.state["{#DRIVER.NAME}"]'
delay: '0'
history: 7d
trends: 90d
description: 'Driver [{#DRIVER.NAME}] state.'
valuemap:
name: 'Driver state'
preprocessing:
- type: JSONPATH
parameters:
- '$.body..Drivers.{#DRIVER.NAME}.Healthy.first()'
- type: BOOL_TO_DECIMAL
parameters:
- ''
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: nomad.client.node.info.get
tags:
- tag: component
value: driver
- tag: detected
value: '{#DRIVER.DETECTED}'
- tag: driver
value: '{#DRIVER.NAME}'
trigger_prototypes:
- uuid: 5630f8b3585f4f5b8faf4a30d95755b8
expression: 'last(/HashiCorp Nomad Client by HTTP/nomad.client.driver.state["{#DRIVER.NAME}"]) = 0 and last(/HashiCorp Nomad Client by HTTP/nomad.client.driver.detected["{#DRIVER.NAME}"]) = 1'
name: 'HashiCorp Nomad Client: Driver [{#DRIVER.NAME}] is in unhealthy state'
priority: WARNING
description: 'The [{#DRIVER.NAME}] driver detected, but its state is unhealthy.'
manual_close: 'YES'
tags:
- tag: scope
value: availability
master_item:
key: nomad.client.node.info.get
preprocessing:
- type: JAVASCRIPT
parameters:
- |
var raw = JSON.parse(value),
body = raw.body[0],
arr = [];
function getField(data, path) {
var steps = path.split('.');
for (var i = 0; i < steps.length; i++) {
var step = steps[i];
if (typeof data !== 'object' || typeof data[step] === 'undefined') {
throw 'Required field "' + path + '" is not present in data received.';
}
data = data[step];
}
return data;
}
var keys = Object.keys(getField(body,'Drivers'));
for (k in keys) {
arr.push({
'{#DRIVER.NAME}': keys[k],
'{#DRIVER.DETECTED}': getField(body.Drivers[keys[k]],'Detected'),
});
}
return JSON.stringify(arr);
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
tags:
- tag: class
value: software
- tag: target
value: nomad-client
macros:
- macro: '{$NOMAD.API.RESPONSE.SUCCESS}'
value: '200'
description: 'HTTP API successful response code. Availability triggers threshold. Change, if needed.'
- macro: '{$NOMAD.CLIENT.API.PORT}'
value: '4646'
description: 'Nomad client API port.'
- macro: '{$NOMAD.CLIENT.API.SCHEME}'
value: http
description: 'Nomad client API scheme.'
- macro: '{$NOMAD.CLIENT.OPEN.FDS.MAX.WARN}'
value: '90'
description: 'Maximum percentage of used file descriptors.'
- macro: '{$NOMAD.CLIENT.RPC.PORT}'
value: '4647'
description: 'Nomad RPC service port.'
- macro: '{$NOMAD.CLIENT.SERF.PORT}'
value: '4648'
description: 'Nomad serf service port.'
- macro: '{$NOMAD.CPU.UTIL.MIN}'
value: '90'
description: 'CPU utilization threshold. Measured as a percentage.'
- macro: '{$NOMAD.DATA.TIMEOUT}'
value: 15s
description: 'Response timeout for an API.'
- macro: '{$NOMAD.DISK.NAME.MATCHES}'
value: '.*'
description: 'The filter to include HashiCorp Nomad client disks by name.'
- macro: '{$NOMAD.DISK.NAME.NOT_MATCHES}'
value: CHANGE_IF_NEEDED
description: 'The filter to exclude HashiCorp Nomad client disks by name.'
- macro: '{$NOMAD.DRIVER.DETECT.MATCHES}'
value: '.*'
description: 'The filter to include HashiCorp Nomad client drivers by detection state. Possible filtering values: `true`, `false`.'
- macro: '{$NOMAD.DRIVER.DETECT.NOT_MATCHES}'
value: CHANGE_IF_NEEDED
description: 'The filter to exclude HashiCorp Nomad client drivers by detection state. Possible filtering values: `true`, `false`.'
- macro: '{$NOMAD.DRIVER.NAME.MATCHES}'
value: '.*'
description: 'The filter to include HashiCorp Nomad client drivers by name.'
- macro: '{$NOMAD.DRIVER.NAME.NOT_MATCHES}'
value: CHANGE_IF_NEEDED
description: 'The filter to exclude HashiCorp Nomad client drivers by name.'
- macro: '{$NOMAD.HTTP.PROXY}'
description: 'Sets the HTTP proxy for HTTP agent item. If this parameter is empty, then no proxy is used.'
- macro: '{$NOMAD.INODES.FREE.MIN.CRIT}'
value: '10'
description: 'Critical threshold of the filesystem metadata utilization. Measured as a percentage.'
- macro: '{$NOMAD.INODES.FREE.MIN.WARN}'
value: '20'
description: 'Warning threshold of the filesystem metadata utilization. Measured as a percentage.'
- macro: '{$NOMAD.JOB.NAME.MATCHES}'
value: '.*'
description: 'The filter to include HashiCorp Nomad client jobs by name.'
- macro: '{$NOMAD.JOB.NAME.NOT_MATCHES}'
value: CHANGE_IF_NEEDED
description: 'The filter to exclude HashiCorp Nomad client jobs by name.'
- macro: '{$NOMAD.JOB.NAMESPACE.MATCHES}'
value: '.*'
description: 'The filter to include HashiCorp Nomad client jobs by namespace.'
- macro: '{$NOMAD.JOB.NAMESPACE.NOT_MATCHES}'
value: CHANGE_IF_NEEDED
description: 'The filter to exclude HashiCorp Nomad client jobs by namespace.'
- macro: '{$NOMAD.JOB.TASK.GROUP.MATCHES}'
value: '.*'
description: 'The filter to include HashiCorp Nomad client jobs by task group belonging.'
- macro: '{$NOMAD.JOB.TASK.GROUP.NOT_MATCHES}'
value: CHANGE_IF_NEEDED
description: 'The filter to exclude HashiCorp Nomad client jobs by task group belonging.'
- macro: '{$NOMAD.JOB.TYPE.MATCHES}'
value: '.*'
description: 'The filter to include HashiCorp Nomad client jobs by type.'
- macro: '{$NOMAD.JOB.TYPE.NOT_MATCHES}'
value: CHANGE_IF_NEEDED
description: 'The filter to exclude HashiCorp Nomad client jobs by type.'
- macro: '{$NOMAD.RAM.AVAIL.MIN}'
value: '5'
description: 'CPU utilization threshold. Measured as a percentage.'
- macro: '{$NOMAD.TOKEN}'
value: '<PUT YOUR AUTH TOKEN>'
description: 'Nomad authentication token.'
dashboards:
- uuid: 6cb91bf47abb4c29b5fdf6de15ee5f9e
name: Allocations
pages:
- name: Allocations
widgets:
- type: item
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.allocations.running
- type: item
x: '8'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.allocations.start
- type: item
x: '16'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.allocations.pending
- type: item
'y': '5'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.allocations.complete
- type: item
x: '8'
'y': '5'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.allocations.blocked
- type: item
x: '16'
'y': '5'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.allocations.restart
- type: item
'y': '10'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.allocations.migrating
- type: item
x: '8'
'y': '10'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.allocations.terminal
- type: item
x: '16'
'y': '10'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.allocations.failed
- uuid: 9df94875bc944537860bad6bbe832ca8
name: 'Job resources'
pages:
- name: 'Job resources'
widgets:
- type: graphprototype
width: '12'
height: '5'
fields:
- type: INTEGER
name: columns
value: '1'
- type: INTEGER
name: rows
value: '1'
- type: GRAPH_PROTOTYPE
name: graphid
value:
host: 'HashiCorp Nomad Client by HTTP'
name: 'HashiCorp Nomad Client: Job [{#JOB.NAME}] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] CPU utilization'
- type: graphprototype
x: '12'
width: '12'
height: '5'
fields:
- type: INTEGER
name: columns
value: '1'
- type: INTEGER
name: rows
value: '1'
- type: GRAPH_PROTOTYPE
name: graphid
value:
host: 'HashiCorp Nomad Client by HTTP'
name: 'HashiCorp Nomad Client: Job [{#JOB.NAME}] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] CPU throttling'
- type: graphprototype
'y': '5'
width: '12'
height: '5'
fields:
- type: INTEGER
name: columns
value: '1'
- type: INTEGER
name: rows
value: '1'
- type: INTEGER
name: source_type
value: '3'
- type: ITEM_PROTOTYPE
name: itemid
value:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.allocs.cpu.allocated["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
- type: graphprototype
x: '12'
'y': '5'
width: '12'
height: '5'
fields:
- type: INTEGER
name: columns
value: '1'
- type: INTEGER
name: rows
value: '1'
- type: INTEGER
name: source_type
value: '3'
- type: ITEM_PROTOTYPE
name: itemid
value:
host: 'HashiCorp Nomad Client by HTTP'
key: 'nomad.client.allocs.cpu.total_ticks["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
- type: graphprototype
'y': '10'
width: '24'
height: '5'
fields:
- type: INTEGER
name: columns
value: '1'
- type: INTEGER
name: rows
value: '1'
- type: GRAPH_PROTOTYPE
name: graphid
value:
host: 'HashiCorp Nomad Client by HTTP'
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] memory utilization'
- uuid: 040a905f916a4311a52ecc2b084175a4
name: 'Node resources'
pages:
- name: 'Node resources'
widgets:
- type: graph
width: '24'
height: '5'
fields:
- type: GRAPH
name: graphid
value:
host: 'HashiCorp Nomad Client by HTTP'
name: 'HashiCorp Nomad Client: CPU utilization'
- type: graph
'y': '5'
width: '12'
height: '5'
fields:
- type: GRAPH
name: graphid
value:
host: 'HashiCorp Nomad Client by HTTP'
name: 'HashiCorp Nomad Client: Memory utilization'
- type: graph
x: '12'
'y': '5'
width: '12'
height: '5'
fields:
- type: GRAPH
name: graphid
value:
host: 'HashiCorp Nomad Client by HTTP'
name: 'HashiCorp Nomad Client: Memory allocation'
- type: graphprototype
'y': '10'
width: '12'
height: '5'
fields:
- type: INTEGER
name: columns
value: '1'
- type: INTEGER
name: rows
value: '1'
- type: GRAPH_PROTOTYPE
name: graphid
value:
host: 'HashiCorp Nomad Client by HTTP'
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] usage'
- type: graphprototype
x: '12'
'y': '10'
width: '12'
height: '5'
fields:
- type: INTEGER
name: columns
value: '1'
- type: INTEGER
name: rows
value: '1'
- type: GRAPH_PROTOTYPE
name: graphid
value:
host: 'HashiCorp Nomad Client by HTTP'
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] utilization'
valuemaps:
- uuid: 766a3c43981147bf85e9e157eb69c510
name: 'Detection state'
mappings:
- value: '0'
newvalue: Undetected
- value: '1'
newvalue: Detected
- uuid: c202a63acca7407bb6d61bf631696691
name: 'Driver state'
mappings:
- value: '0'
newvalue: Unhealthy
- value: '1'
newvalue: Healthy
- uuid: 40e17de6519d44bb80b4566e0569c31b
name: 'Service state'
mappings:
- value: '0'
newvalue: Down
- value: '1'
newvalue: Up
- uuid: 8598d0e2bd6f4903832ec91b7b300062
template: 'HashiCorp Nomad Server by HTTP'
name: 'HashiCorp Nomad Server by HTTP'
description: |
Get HashiCorp Nomad server metrics by HTTP from metrics endpoint.
More information about metrics is available in the official documentation: https://developer.hashicorp.com/nomad/docs/operations/metrics-reference.
You can discuss this template or leave feedback on our forum: https://www.zabbix.com/forum/zabbix-suggestions-and-feedback.
Generated by official Zabbix template tool "Templator" 2.0.0
vendor:
name: Zabbix
version: 7.0-0
groups:
- name: Templates/Applications
items:
- uuid: c5903e91262b4877be8c7e8f304b2862
name: 'HashiCorp Nomad Server: Service [rpc] state'
type: SIMPLE
key: 'net.tcp.service[tcp,,{$NOMAD.SERVER.RPC.PORT}]'
history: 7d
trends: 90d
description: 'Current [rpc] service state.'
valuemap:
name: 'Service state'
preprocessing:
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
tags:
- tag: component
value: network
triggers:
- uuid: 0b534fa4c6cd4e89bd0a44ca8e810fa2
expression: 'last(/HashiCorp Nomad Server by HTTP/net.tcp.service[tcp,,{$NOMAD.SERVER.RPC.PORT}]) = 0'
name: 'HashiCorp Nomad Server: Service [rpc] is down'
opdata: 'Service: rpc, Port: {$NOMAD.SERVER.RPC.PORT}, State: {ITEM.LASTVALUE}'
priority: AVERAGE
description: |
Cannot establish the connection to [rpc] service port {$NOMAD.SERVER.RPC.PORT}.
Check the Nomad state and network connectivity between Nomad and Zabbix.
manual_close: 'YES'
tags:
- tag: scope
value: availability
- uuid: 9eecc3ee79f04448991c83ac93459597
name: 'HashiCorp Nomad Server: Service [serf] state'
type: SIMPLE
key: 'net.tcp.service[tcp,,{$NOMAD.SERVER.SERF.PORT}]'
history: 7d
trends: 90d
description: 'Current [serf] service state.'
valuemap:
name: 'Service state'
preprocessing:
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
tags:
- tag: component
value: network
triggers:
- uuid: 00799e8927b547308fc9caab4ba2d24d
expression: 'last(/HashiCorp Nomad Server by HTTP/net.tcp.service[tcp,,{$NOMAD.SERVER.SERF.PORT}]) = 0'
name: 'HashiCorp Nomad Server: Service [serf] is down'
opdata: 'Service: serf, Port: {$NOMAD.SERVER.SERF.PORT}, State: {ITEM.LASTVALUE}'
priority: AVERAGE
description: |
Cannot establish the connection to [serf] service port {$NOMAD.SERVER.SERF.PORT}.
Check the Nomad state and network connectivity between Nomad and Zabbix.
manual_close: 'YES'
tags:
- tag: scope
value: availability
- uuid: 04821dab074f43e785dec52fbf69240e
name: 'HashiCorp Nomad Server: Nomad raft version'
type: DEPENDENT
key: nomad.raft.version
delay: '0'
history: 7d
trends: '0'
value_type: CHAR
description: 'Nomad raft version.'
preprocessing:
- type: JSONPATH
parameters:
- $.body.stats.raft.protocol_version
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.stats.get
- uuid: 6ad163ebdefa4f5aa1e3712b7ddac7a7
name: 'HashiCorp Nomad Server: FSM allocation client update time'
type: DEPENDENT
key: nomad.server.alloc_client_update
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to apply AllocClientUpdate raft entry.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_fsm_alloc_client_update_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: fsm
- uuid: 22947f9cc2734f95997a55d9e6392b77
name: 'HashiCorp Nomad Server: Autopilot failure tolerance'
type: DEPENDENT
key: nomad.server.autopilot.failure_tolerance
delay: '0'
history: 7d
trends: 90d
description: 'The number of redundant healthy servers that can fail without causing an outage.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_autopilot_failure_tolerance
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: autopilot
triggers:
- uuid: fbb6d007481a4aab8d107a1d25a8c7ad
expression: 'last(/HashiCorp Nomad Server by HTTP/nomad.server.autopilot.failure_tolerance) < {$NOMAD.REDUNDANCY.MIN} and nodata(/HashiCorp Nomad Server by HTTP/nomad.server.autopilot.failure_tolerance,5m) = 0'
name: 'HashiCorp Nomad Server: Autopilot redundancy is low'
event_name: 'HashiCorp Nomad Server: Autopilot redundancy is low (less than {$NOMAD.REDUNDANCY.MIN})'
priority: WARNING
description: |
The autopilot redundancy is low.
Cluster crash risk is high due to one more server failure.
manual_close: 'YES'
tags:
- tag: scope
value: capacity
- uuid: 05ce375320dd402ab6c75f8fffba74cb
name: 'HashiCorp Nomad Server: Autopilot state'
type: DEPENDENT
key: nomad.server.autopilot.state
delay: '0'
history: 7d
trends: 90d
description: 'Current autopilot state.'
valuemap:
name: 'Autopilot state'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_autopilot_healthy
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: autopilot
triggers:
- uuid: 73c14eec055e47c6af6d8728ba0fb416
expression: 'last(/HashiCorp Nomad Server by HTTP/nomad.server.autopilot.state) = 0 and nodata(/HashiCorp Nomad Server by HTTP/nomad.server.autopilot.state,5m) = 0'
name: 'HashiCorp Nomad Server: Autopilot is unhealthy'
priority: AVERAGE
description: 'The autopilot is in unhealthy state. The successful failover probability is extremely low.'
manual_close: 'YES'
tags:
- tag: scope
value: availability
- uuid: ace31f3772fe4ffe84741554e3fba8bc
name: 'HashiCorp Nomad Server: CPU shares for blocked evaluations'
type: DEPENDENT
key: nomad.server.blocked_evals.cpu
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Amount of CPU shares requested by blocked evals.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_blocked_evals_cpu
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: cpu
- tag: component
value: evaluations
- uuid: d7fe536b4d0445328dfbf3b1188bea70
name: 'HashiCorp Nomad Server: CPU shares for blocked job evaluations'
type: DEPENDENT
key: nomad.server.blocked_evals.job.cpu
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Amount of CPU shares requested by blocked evals of a job.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_blocked_evals_job_cpu
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: cpu
- tag: component
value: evaluations
- tag: component
value: jobs
- uuid: 2f07cbd6ec11449cae787a6e84fa5fbf
name: 'HashiCorp Nomad Server: Memory shares for blocked job evaluations'
type: DEPENDENT
key: nomad.server.blocked_evals.job.memory
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Amount of memory requested by blocked evals of a job.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_blocked_evals_job_memory
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: evaluations
- tag: component
value: jobs
- tag: component
value: memory
- uuid: 5460a81df8174fb5a7596b4243162d05
name: 'HashiCorp Nomad Server: Memory shares by blocked evaluations'
type: DEPENDENT
key: nomad.server.blocked_evals.memory
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Amount of memory requested by blocked evals.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_blocked_evals_memory
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: evaluations
- tag: component
value: memory
- uuid: 474d05060b0d4a608f6a2d319235af91
name: 'HashiCorp Nomad Server: Evaluations blocked'
type: DEPENDENT
key: nomad.server.blocked_evals.total_blocked
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Count of evals in the blocked state for any reason (cluster resource exhaustion or quota limits).'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_blocked_evals_total_blocked
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: evaluations
- uuid: 80f5edf18daf407ba6b64374084a555b
name: 'HashiCorp Nomad Server: Evaluations escaped'
type: DEPENDENT
key: nomad.server.blocked_evals.total_escaped
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: |
Count of evals that have escaped computed node classes.
This indicates a scheduler optimization was skipped and is not usually a source of concern.
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_blocked_evals_total_escaped
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: evaluations
- uuid: 356dec0f5d5b45d981db4b590ef7dd73
name: 'HashiCorp Nomad Server: Evaluations blocked due to quota limit'
type: DEPENDENT
key: nomad.server.blocked_evals.total_quota_limit
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Count of blocked evals due to quota limits (the resources for these jobs are not counted in other blocked_evals metrics, except for total_blocked).'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_blocked_evals_total_quota_limit
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: evaluations
- uuid: 54ddea6f2fa04e6c8ceb456d4d67c530
name: 'HashiCorp Nomad Server: Evaluations enqueue time'
type: DEPENDENT
key: nomad.server.broker.eval_waiting
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Average time elapsed with evaluations waiting to be enqueued.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_eval_ack_sum
- function
- avg
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: evaluations
- uuid: 7df3a4b80f6b44c6a03b672182d3f73f
name: 'HashiCorp Nomad Server: Services ready to schedule'
type: DEPENDENT
key: nomad.server.broker.service_ready
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Count of service evals ready to be scheduled.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_broker_service_ready
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: services
- uuid: 0da8881c8ddf4cd18e04b38201b83b7d
name: 'HashiCorp Nomad Server: Services unacknowledged'
type: DEPENDENT
key: nomad.server.broker.service_unacked
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Count of unacknowledged service evals.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_broker_service_unacked
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: services
- uuid: 2a38602920224c31a221696cf65c6abc
name: 'HashiCorp Nomad Server: System evaluations ready to schedule'
type: DEPENDENT
key: nomad.server.broker.system_ready
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Count of service evals ready to be scheduled.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_broker_system_ready
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: evaluations
- uuid: 3faaaa69672c4b999a12f054dbd8b980
name: 'HashiCorp Nomad Server: System evaluations unacknowledged'
type: DEPENDENT
key: nomad.server.broker.system_unacked
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Count of unacknowledged system evals.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_broker_system_unacked
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: evaluations
- uuid: 2e556f5894c9492a80b34c84ceb69dba
name: 'HashiCorp Nomad Server: Evaluations pending'
type: DEPENDENT
key: nomad.server.broker.total_pending
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Evaluations that are pending until an existing evaluation for the same job completes.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_broker_total_pending
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: evaluations
- uuid: 01e11850732a46709942c225d294a8ef
name: 'HashiCorp Nomad Server: Evaluations ready'
type: DEPENDENT
key: nomad.server.broker.total_ready
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Number of evaluations ready to be processed.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_broker_total_ready
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: evaluations
- uuid: d57805e0d8a24117bb64992a9aeb3dc3
name: 'HashiCorp Nomad Server: Evaluations unacked'
type: DEPENDENT
key: nomad.server.broker.total_unacked
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Evaluations dispatched for processing but incomplete.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_broker_total_unacked
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: evaluations
- uuid: ac1d1422adc248b48ba65268f9a43be8
name: 'HashiCorp Nomad Server: Evaluations waiting'
type: DEPENDENT
key: nomad.server.broker.total_waiting
delay: '0'
history: 7d
trends: 90d
description: 'Count of evals waiting to be enqueued.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_broker_total_waiting
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: evaluations
- uuid: de3e13f7635e4058bd2ca12f6f454668
name: 'HashiCorp Nomad Server: RPC eval dequeue time'
type: DEPENDENT
key: nomad.server.client.dequeue
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for Eval.Dequeue RPC call.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_eval_dequeue_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: network
- tag: component
value: rpc
- uuid: ac2a5b69fef546d0924d631a9ed9cffd
name: 'HashiCorp Nomad Server: RPC get client allocs time'
type: DEPENDENT
key: nomad.server.client.get_client_allocs
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for Node.GetClientAllocs RPC call.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_client_get_client_allocs_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: network
- tag: component
value: rpc
- uuid: 758f3474636c42378fac78a2f5e3b63b
name: 'HashiCorp Nomad Server: RPC list time'
type: DEPENDENT
key: nomad.server.client.list
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for Node.List RPC call.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_client_list_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: network
- tag: component
value: rpc
- uuid: 588bb9b304634852af149d7903d42798
name: 'HashiCorp Nomad Server: RPC update allocations time'
type: DEPENDENT
key: nomad.server.client.update_alloc
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for Node.UpdateAlloc RPC call.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_client_update_alloc_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: network
- tag: component
value: rpc
- uuid: 1e1ce033c99b45a3892df8674deaf1f4
name: 'HashiCorp Nomad Server: RPC update status time'
type: DEPENDENT
key: nomad.server.client.update_status
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for Node.UpdateStatus RPC call.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_client_update_status_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: network
- tag: component
value: rpc
- uuid: c5bd6a645e9f49ba83fe065b2af6248e
name: 'HashiCorp Nomad Server: CPU time, rate'
type: DEPENDENT
key: nomad.server.cpu.time
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Total user and system CPU time spent in seconds.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- process_cpu_seconds_total
- value
- ''
error_handler: DISCARD_VALUE
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: cpu
- uuid: dbbee3bb99d347f9a9a5325b4f64a894
name: 'HashiCorp Nomad Server: Monitoring API response'
type: DEPENDENT
key: nomad.server.data.api.response
delay: '0'
history: 7d
trends: '0'
value_type: TEXT
description: 'Monitoring API response message.'
preprocessing:
- type: JAVASCRIPT
parameters:
- |
try {
var response = Object.keys(JSON.parse(value).header).filter(function (f) {
return f.match(/HTTP\/[\d.]+\s+\d{3}/);
});
return response.pop();
}
catch (error) {
return "HTTP/1.1 408 Request timeout";
}
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: nomad.server.data.get
tags:
- tag: component
value: status
triggers:
- uuid: 9dc8cd0bcbad4ee7a2afe45e33db53a7
expression: 'find(/HashiCorp Nomad Server by HTTP/nomad.server.data.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
name: 'HashiCorp Nomad Server: Monitoring API connection has failed'
event_name: 'HashiCorp Nomad Server: Monitoring API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}'
priority: AVERAGE
description: |
Monitoring API connection has failed.
Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix.
manual_close: 'YES'
tags:
- tag: scope
value: availability
- uuid: b7ba2f4a56b04de298fde77cbf5afb6a
name: 'HashiCorp Nomad Server: Telemetry get'
type: HTTP_AGENT
key: nomad.server.data.get
history: '0'
trends: '0'
value_type: TEXT
description: 'Telemetry data in raw format.'
preprocessing:
- type: CHECK_NOT_SUPPORTED
parameters:
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}'
timeout: '{$NOMAD.DATA.TIMEOUT}'
url: '{$NOMAD.SERVER.API.SCHEME}://{HOST.IP}:{$NOMAD.SERVER.API.PORT}/v1/metrics'
query_fields:
- name: format
value: prometheus
status_codes: ''
http_proxy: '{$NOMAD.HTTP.PROXY}'
headers:
- name: X-Nomad-Token
value: '{$NOMAD.TOKEN}'
retrieve_mode: BOTH
output_format: JSON
tags:
- tag: component
value: raw
- uuid: 7241c9923d714970ad950f9b7aa8ab52
name: 'HashiCorp Nomad Server: RPC evaluation acknowledgement time'
type: DEPENDENT
key: nomad.server.eval.ack
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for Eval.Ack RPC call.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_eval_ack_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: network
- tag: component
value: rpc
- uuid: b9085e173cec4ce2b2fa6fd68041695e
name: 'HashiCorp Nomad Server: FSM apply plan results time'
type: DEPENDENT
key: nomad.server.fsm.apply_plan_results
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to apply ApplyPlanResults raft entry.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_fsm_apply_plan_results_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: fsm
- uuid: 62b19ce1b2bb4fdaacff02d70101a10f
name: 'HashiCorp Nomad Server: FSM job registration time'
type: DEPENDENT
key: nomad.server.fsm.register_job
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to apply RegisterJob raft entry.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_fsm_register_job_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: fsm
- uuid: 2d0b9f09bfb5480ebd6d6cf96c15278e
name: 'HashiCorp Nomad Server: FSM update evaluation time'
type: DEPENDENT
key: nomad.server.fsm.update_eval
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to apply UpdateEval raft entry.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_fsm_update_eval_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: fsm
- uuid: 0abce7a087874e58a7da20acb3599535
name: 'HashiCorp Nomad Server: Heartbeats active'
type: DEPENDENT
key: nomad.server.heartbeat.active
delay: '0'
history: 7d
trends: 90d
description: |
Number of active heartbeat timers.
Each timer represents a Nomad client connection.
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_heartbeat_active
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: network
- uuid: dc95deb3590148a9bc50ea151a0d46b7
name: 'HashiCorp Nomad Server: RPC job allocations time'
type: DEPENDENT
key: nomad.server.job.allocations
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for Job.Allocations RPC call.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_job_allocations_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: network
- tag: component
value: rpc
- uuid: 11e18e811fe7461181417ca1f5dad915
name: 'HashiCorp Nomad Server: RPC job evaluations time'
type: DEPENDENT
key: nomad.server.job.evaluations
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for Job.Evaluations RPC call.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_job_evaluations_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: network
- tag: component
value: rpc
- uuid: 0efaac3a59d44ce09d31435ddfa6aada
name: 'HashiCorp Nomad Server: RPC get job time'
type: DEPENDENT
key: nomad.server.job.get_job
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for Job.GetJob RPC call.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_job_get_job_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: network
- tag: component
value: rpc
- uuid: 5ab3191f1f744682bab6d3142a1c39a4
name: 'HashiCorp Nomad Server: Jobs dead'
type: DEPENDENT
key: nomad.server.job_status.dead
delay: '0'
history: 7d
trends: 90d
description: 'Number of dead jobs.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_job_status_dead
- value
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: jobs
triggers:
- uuid: 44dcd52ebf54404e871501a4c4825424
expression: 'last(/HashiCorp Nomad Server by HTTP/nomad.server.job_status.dead) > 0 and nodata(/HashiCorp Nomad Server by HTTP/nomad.server.job_status.dead,5m) = 0'
name: 'HashiCorp Nomad Server: Dead jobs found'
opdata: 'Current amount: {ITEM.LASTVALUE}'
priority: WARNING
description: |
Jobs with the `Dead` state discovered.
Check the {$NOMAD.SERVER.API.SCHEME}://{HOST.IP}:{$NOMAD.SERVER.API.PORT}/v1/jobs URL for the details.
manual_close: 'YES'
tags:
- tag: scope
value: availability
- uuid: 39a27315e33549dc88c6cb426a5a05c9
name: 'HashiCorp Nomad Server: Jobs pending'
type: DEPENDENT
key: nomad.server.job_status.pending
delay: '0'
history: 7d
trends: 90d
description: 'Number of pending jobs.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_job_status_pending
- value
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: jobs
- uuid: 97907d8816b14fb69063a55bdea22701
name: 'HashiCorp Nomad Server: Jobs running'
type: DEPENDENT
key: nomad.server.job_status.running
delay: '0'
history: 7d
trends: 90d
description: 'Number of running jobs.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_job_status_running
- value
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: jobs
- uuid: 4be8c941477046dd9dcc549020d687e7
name: 'HashiCorp Nomad Server: Job allocations completed'
type: DEPENDENT
key: nomad.server.job_summary.complete
delay: '0'
history: 7d
trends: 90d
description: 'Number of complete allocations for a job.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_job_summary_complete
- function
- sum
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: jobs
- uuid: 6f6ad9a70b4e4759bc2e4cc213d697bb
name: 'HashiCorp Nomad Server: Job allocations failed'
type: DEPENDENT
key: nomad.server.job_summary.failed
delay: '0'
history: 7d
trends: 90d
description: 'Number of failed allocations for a job.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_job_summary_failed
- function
- sum
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: jobs
- uuid: 7a267a1b82b24157a19b52e6f285f0ce
name: 'HashiCorp Nomad Server: RPC job summary time'
type: DEPENDENT
key: nomad.server.job_summary.get_job_summary
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for Job.Summary RPC call.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_job_summary_get_job_summary_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: network
- tag: component
value: rpc
- uuid: 0c408dff86604486b0f23efed00f4348
name: 'HashiCorp Nomad Server: Job allocations lost'
type: DEPENDENT
key: nomad.server.job_summary.lost
delay: '0'
history: 7d
trends: 90d
description: 'Number of lost allocations for a job.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_job_summary_lost
- function
- sum
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: jobs
- uuid: d0de68553b484d2d9311b356a7b5a9e7
name: 'HashiCorp Nomad Server: Job allocations queued'
type: DEPENDENT
key: nomad.server.job_summary.queued
delay: '0'
history: 7d
trends: 90d
description: 'Number of queued allocations for a job.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_job_summary_queued
- function
- sum
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: jobs
- uuid: 98ab06dce1f84ef5993e8cc793114008
name: 'HashiCorp Nomad Server: Job allocations running'
type: DEPENDENT
key: nomad.server.job_summary.running
delay: '0'
history: 7d
trends: 90d
description: 'Number of running allocations for a job.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_job_summary_running
- function
- sum
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: jobs
- uuid: e3cc96bd72924bb5bd0334018a5a88be
name: 'HashiCorp Nomad Server: Job allocations starting'
type: DEPENDENT
key: nomad.server.job_summary.starting
delay: '0'
history: 7d
trends: 90d
description: 'Number of starting allocations for a job.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_job_summary_starting
- function
- sum
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: jobs
- uuid: 244a84e136c94a47b1b64b92d08ac20f
name: 'HashiCorp Nomad Server: Job allocations unknown'
type: DEPENDENT
key: nomad.server.job_summary.unknown
delay: '0'
history: 7d
trends: 90d
description: 'Number of unknown allocations for a job.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_job_summary_unknown
- function
- sum
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: jobs
- uuid: 478f45d1095646bca5ffd3a6d923dddf
name: 'HashiCorp Nomad Server: Leader barrier time'
type: DEPENDENT
key: nomad.server.leader.barrier
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to establish a raft barrier during leader transition.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_leader_barrier_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: leader
- tag: component
value: raft
- uuid: 8425336f75704a259bc066c795df1516
name: 'HashiCorp Nomad Server: Total reconcile time'
type: DEPENDENT
key: nomad.server.leader.reconcile
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to reconcile all serf peers with state store.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_leader_reconcile_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: leader
- tag: component
value: raft
- uuid: a5b6d36142174dd683e2aec5dd4ee1e9
name: 'HashiCorp Nomad Server: Reconcile peer time'
type: DEPENDENT
key: nomad.server.leader.reconcile_member
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to reconcile a serf peer with state store.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_leader_reconcileMember_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: leader
- tag: component
value: raft
- uuid: af6eda309cc545b2aa65efc0af103a02
name: 'HashiCorp Nomad Server: Gossip time'
type: DEPENDENT
key: nomad.server.memberlist.gossip
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to broadcast gossip messages.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_memberlist_gossip_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: gossip
- uuid: 25372c89439641d09521348033b1e324
name: 'HashiCorp Nomad Server: Metrics'
type: DEPENDENT
key: nomad.server.metrics.get
delay: '0'
history: '0'
trends: '0'
value_type: TEXT
description: 'Nomad server metrics in raw format.'
preprocessing:
- type: JSONPATH
parameters:
- $.body
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.data.get
tags:
- tag: component
value: raw
- uuid: 8a003df55bae4bf0a14fefc6f02f28bf
name: 'HashiCorp Nomad Server: Namespace list time'
type: DEPENDENT
key: nomad.server.namespace.list_namespace
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for Namespace.ListNamespaces.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_namespace_list_namespace_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: namespaces
- uuid: dd8d8b5cdf914002ac58183b667ceb06
name: 'HashiCorp Nomad Server: Plan apply time'
type: DEPENDENT
key: nomad.server.plan.apply
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to apply a plan.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_plan_apply_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: plan
- uuid: d8d69f231d23457e82c6d3941f09cf5d
name: 'HashiCorp Nomad Server: Plan evaluate time'
type: DEPENDENT
key: nomad.server.plan.evaluate
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to evaluate a plan.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_plan_evaluate_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: plan
- uuid: ee7b59fd19644bc4813696f8d806955f
name: 'HashiCorp Nomad Server: Plan queue'
type: DEPENDENT
key: nomad.server.plan.queue_depth
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Count of evals in the plan queue.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_plan_queue_depth
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: plan
- uuid: 9df85708b74d40509d9c9412a15e5ce5
name: 'HashiCorp Nomad Server: RPC plan submit time'
type: DEPENDENT
key: nomad.server.plan.submit
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for Plan.Submit RPC call.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_plan_submit_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: network
- tag: component
value: plan
- tag: component
value: rpc
- uuid: 851eea2a548644ba884ca852c2759d90
name: 'HashiCorp Nomad Server: Plan raft index processing time'
type: DEPENDENT
key: nomad.server.plan.wait_for_index
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed that planner waits for the raft index of the plan to be processed.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_plan_wait_for_index_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: plan
- uuid: 505ee2804b4e451892103b36cc38abde
name: 'HashiCorp Nomad Server: Open file descriptors, max'
type: DEPENDENT
key: nomad.server.process_max_fds
delay: '0'
history: 7d
trends: 90d
description: 'Maximum number of open file descriptors.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- process_max_fds
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: fds
- uuid: cba898eca0c84a84a1ea0309e04b9f84
name: 'HashiCorp Nomad Server: Open file descriptors'
type: DEPENDENT
key: nomad.server.process_open_fds
delay: '0'
history: 7d
trends: 90d
description: 'Number of open file descriptors.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- process_open_fds
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: fds
- uuid: 1dd455d2776f4378a39980cfa8eba33c
name: 'HashiCorp Nomad Server: FSM index'
type: DEPENDENT
key: nomad.server.raft.applied_index
delay: '0'
history: 7d
trends: 90d
description: 'Current index applied to FSM.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_appliedIndex
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: fsm
- uuid: bad74d0dc4b2431fbb5a442bc2fc1236
name: 'HashiCorp Nomad Server: Raft transactions, rate'
type: DEPENDENT
key: nomad.server.raft.apply
delay: '0'
history: 7d
trends: 90d
description: 'Number of Raft transactions.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_apply
- value
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '0'
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: raft
- uuid: 804693ed989b44e3903bcd81159dd51d
name: 'HashiCorp Nomad Server: Raft calls blocked, rate'
type: DEPENDENT
key: nomad.server.raft.barrier
delay: '0'
history: 7d
trends: 90d
description: 'Count of blocking raft API calls.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_barrier
- value
- ''
error_handler: DISCARD_VALUE
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: raft
- uuid: f7d1bc6c6a82475dabcdaaec7472fe02
name: 'HashiCorp Nomad Server: BoltDB freelist bytes'
type: DEPENDENT
key: nomad.server.raft.boltdb.freelist_bytes
delay: '0'
history: 7d
trends: 90d
units: B
description: 'Number of freelist bytes.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_freelistBytes
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: b3f246d0991746589f4d0d68653eb4e9
name: 'HashiCorp Nomad Server: BoltDB free page bytes'
type: DEPENDENT
key: nomad.server.raft.boltdb.free_page_bytes
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Number of free page bytes.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_freePageBytes
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: e3d0d6f656f5484f8e8a8ca1195aad59
name: 'HashiCorp Nomad Server: BoltDB free pages'
type: DEPENDENT
key: nomad.server.raft.boltdb.num_free_pages
delay: '0'
history: 7d
trends: 90d
description: 'Number of BoltDB free pages.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_numFreePages
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: 73f359bcb02d46a0adb6fdd51284afcf
name: 'HashiCorp Nomad Server: BoltDB pending pages'
type: DEPENDENT
key: nomad.server.raft.boltdb.num_pending_pages
delay: '0'
history: 7d
trends: 90d
description: 'Number of BoltDB pending pages.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_numPendingPages
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: 08afe7430e3c49e388f0a87c8bbf2ff4
name: 'HashiCorp Nomad Server: BoltDB open read transactions'
type: DEPENDENT
key: nomad.server.raft.boltdb.open_read_txn
delay: '0'
history: 7d
trends: 90d
description: 'Number of current open read transactions.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_openReadTxn
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: 36e89e86c9124e868670e9cb7b247899
name: 'HashiCorp Nomad Server: BoltDB read transactions, rate'
type: DEPENDENT
key: nomad.server.raft.boltdb.total_read_txn
delay: '0'
history: 7d
trends: 90d
description: 'Count of total read transactions.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_totalReadTxn
- value
- ''
error_handler: DISCARD_VALUE
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: 2733afd943b346e38c8605259bea038c
name: 'HashiCorp Nomad Server: BoltDB cursors'
type: DEPENDENT
key: nomad.server.raft.boltdb.txstats.cursor_count
delay: '0'
history: 7d
trends: 90d
description: 'Count of total database cursors.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_txstats_cursorCount
- value
- ''
error_handler: DISCARD_VALUE
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: 1641576e376c4c82a8b1882b4810db4c
name: 'HashiCorp Nomad Server: BoltDB nodes, rate'
type: DEPENDENT
key: nomad.server.raft.boltdb.txstats.node_count
delay: '0'
history: 7d
trends: 90d
description: 'Count of total database nodes.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_txstats_nodeCount
- value
- ''
error_handler: DISCARD_VALUE
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: f0151f3431254030a3bcd538e8d2068b
name: 'HashiCorp Nomad Server: BoltDB node dereferences, rate'
type: DEPENDENT
key: nomad.server.raft.boltdb.txstats.node_deref
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Count of total database node dereferences.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_txstats_nodeDeref
- value
- ''
error_handler: DISCARD_VALUE
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: 27a8a38acfc34e14b3fbb13aa58ad5d4
name: 'HashiCorp Nomad Server: BoltDB page allocations, rate'
type: DEPENDENT
key: nomad.server.raft.boltdb.txstats.page_alloc
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Number of page allocations.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_txstats_pageAlloc
- value
- ''
error_handler: DISCARD_VALUE
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: 14a5fc715dad4eacaddb2f277e1f85f6
name: 'HashiCorp Nomad Server: BoltDB pages in use'
type: DEPENDENT
key: nomad.server.raft.boltdb.txstats.page_count
delay: '0'
history: 7d
trends: 90d
description: 'Number of pages in use.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_txstats_pageCount
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: dd9729e812614ab59c595b7183a883b0
name: 'HashiCorp Nomad Server: BoltDB rebalance operations, rate'
type: DEPENDENT
key: nomad.server.raft.boltdb.txstats.rebalance
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Count of total rebalance operations.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_txstats_rebalance
- value
- ''
error_handler: DISCARD_VALUE
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: d270aa05d1ca4f3d9ee606937f1703cc
name: 'HashiCorp Nomad Server: BoltDB rebalance time'
type: DEPENDENT
key: nomad.server.raft.boltdb.txstats.rebalance_time
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Sample of rebalance operation times.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_txstats_rebalanceTime_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: 621478ee2bea482abffd6b8c80df5ee1
name: 'HashiCorp Nomad Server: BoltDB spill operations, rate'
type: DEPENDENT
key: nomad.server.raft.boltdb.txstats.spill
delay: '0'
history: 7d
trends: 90d
description: 'Count of total spill operations.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_txstats_spill
- value
- ''
error_handler: DISCARD_VALUE
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: 2ca3e7572bf04b04961107e7d9304887
name: 'HashiCorp Nomad Server: BoltDB spill time'
type: DEPENDENT
key: nomad.server.raft.boltdb.txstats.spill_time
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Sample of spill operation times.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_txstats_spillTime_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: bd14547f789540618db85057eceb70db
name: 'HashiCorp Nomad Server: BoltDB split operations, rate'
type: DEPENDENT
key: nomad.server.raft.boltdb.txstats.split
delay: '0'
history: 7d
trends: 90d
description: 'Count of total split operations.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_txstats_split
- value
- ''
error_handler: DISCARD_VALUE
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: b13b7b9eb66940c9bf4d14e711ee0279
name: 'HashiCorp Nomad Server: BoltDB write operations, rate'
type: DEPENDENT
key: nomad.server.raft.boltdb.txstats.write
delay: '0'
history: 7d
trends: 90d
description: 'Count of total write operations.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_txstats_write
- value
- ''
error_handler: DISCARD_VALUE
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: e9b31f3434aa4df0908a79ff5352a29e
name: 'HashiCorp Nomad Server: BoltDB write time'
type: DEPENDENT
key: nomad.server.raft.boltdb.txstats.write_time
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Sample of write operation times.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_boltdb_txstats_writeTime_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: boltdb
- uuid: dce75575b01e4cf8bd84d6a40c74f22b
name: 'HashiCorp Nomad Server: Cluster role'
type: DEPENDENT
key: nomad.server.raft.cluster_role
delay: '0'
history: 7d
trends: 90d
description: 'Current role in the cluster.'
valuemap:
name: 'Cluster role'
preprocessing:
- type: JSONPATH
parameters:
- $.body.stats.raft.state
error_handler: DISCARD_VALUE
- type: JAVASCRIPT
parameters:
- |
const idx = [
'Leader',
'Follower',
'Candidate',
].indexOf(value);
return idx !== -1 ? idx : 10;
master_item:
key: nomad.server.stats.get
tags:
- tag: component
value: system
triggers:
- uuid: 1bc6ee807acd4532a58b4527f8865c1c
expression: 'change(/HashiCorp Nomad Server by HTTP/nomad.server.raft.cluster_role) <> 0'
name: 'HashiCorp Nomad Server: Cluster role has changed'
event_name: 'HashiCorp Nomad Server: Cluster role has changed to {ITEM.LASTVALUE}'
priority: INFO
description: 'Cluster role has changed.'
manual_close: 'YES'
tags:
- tag: scope
value: notice
- uuid: a994ee525886407486adace82ea5ef90
name: 'HashiCorp Nomad Server: Raft commit logs enqueued'
type: DEPENDENT
key: nomad.server.raft.commit_num_logs
delay: '0'
history: 7d
trends: 90d
description: 'Count of logs enqueued.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_commitNumLogs
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: raft
- uuid: 3160d5b81e1540aab3244f1cf6bce95d
name: 'HashiCorp Nomad Server: Raft commit time'
type: DEPENDENT
key: nomad.server.raft.commit_time
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to commit writes.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_worker_dequeue_eval_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: raft
- uuid: 09f1baf6fc2443f48e01814b939c0749
name: 'HashiCorp Nomad Server: FSM apply time'
type: DEPENDENT
key: nomad.server.raft.fsm.apply
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to apply write to FSM.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_fsm_apply_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: fsm
- uuid: 18689de9474f4812ba9ed34d311ad3a1
name: 'HashiCorp Nomad Server: FSM autopilot time'
type: DEPENDENT
key: nomad.server.raft.fsm.autopilot
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to apply Autopilot raft entry.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_fsm_autopilot_sum
- value
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '0'
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: fsm
- uuid: 6846f262148e49288ecb7aa0ad18f7d2
name: 'HashiCorp Nomad Server: FSM enqueue time'
type: DEPENDENT
key: nomad.server.raft.fsm.enqueue
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to enqueue write to FSM.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_fsm_enqueue_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: fsm
- uuid: 4e2d0557fc984ed9858316c8409edd0e
name: 'HashiCorp Nomad Server: FSM register node time'
type: DEPENDENT
key: nomad.server.raft.fsm.register_node
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to apply RegisterNode raft entry.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_fsm_register_node_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: fsm
- uuid: bb65ca263f3244fc883d9102bbf9bef7
name: 'HashiCorp Nomad Server: Raft last index'
type: DEPENDENT
key: nomad.server.raft.last_index
delay: '0'
history: 7d
trends: 90d
description: 'Most recent index seen.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_lastIndex
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: raft
- uuid: bda052f13e3c4dacaafc4ddefbc98036
name: 'HashiCorp Nomad Server: Dispatch log time'
type: DEPENDENT
key: nomad.server.raft.leader.dispatch_log
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed to write log, mark in flight, and start replication.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_leader_dispatchLog_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: leader
- tag: component
value: raft
- uuid: f915e5e162814786943cd4cc85bb7aff
name: 'HashiCorp Nomad Server: Logs dispatched'
type: DEPENDENT
key: nomad.server.raft.leader.dispatch_num_logs
delay: '0'
history: 7d
trends: 90d
description: 'Count of logs dispatched.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_leader_dispatchNumLogs
- value
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: leader
- tag: component
value: raft
- uuid: 7f9db272307c43e7b87b8a4a4a6eae16
name: 'HashiCorp Nomad Server: Leader last contact'
type: DEPENDENT
key: nomad.server.raft.leader.lastContact
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: |
Time since last contact to leader.
General indicator of Raft latency.
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'nomad_raft_leader_lastContact{quantile="0.99"}'
- value
- ''
error_handler: DISCARD_VALUE
- type: STR_REPLACE
parameters:
- NaN
- '0'
- type: MULTIPLIER
parameters:
- '0.001'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: leader
- tag: component
value: raft
triggers:
- uuid: 65c69469e9354e81a632485a74bc711d
expression: 'min(/HashiCorp Nomad Server by HTTP/nomad.server.raft.leader.lastContact,5m) >= {$NOMAD.SERVER.LEADER.LATENCY} and nodata(/HashiCorp Nomad Server by HTTP/nomad.server.raft.leader.lastContact,5m) = 0'
name: 'HashiCorp Nomad Server: Leader last contact timeout exceeded'
event_name: 'Leader last contact timeout exceeded: over {$NOMAD.SERVER.LEADER.LATENCY} for the last 5 minutes'
priority: WARNING
description: |
The nomad.raft.leader.lastContact metric is a general indicator of Raft latency which can be used to observe how Raft timing is performing and guide infrastructure provisioning.
If this number trends upwards, look at CPU, disk IOPs, and network latency. nomad.raft.leader.lastContact should not get too close to the leader lease timeout of 500ms.
tags:
- tag: scope
value: performance
- uuid: bca59612c439447ebe78ae68e68fd6cb
name: 'HashiCorp Nomad Server: Raft peers'
type: DEPENDENT
key: nomad.server.raft.peers
delay: '0'
history: 7d
trends: 90d
description: 'Current cluster raft peers amount.'
preprocessing:
- type: JSONPATH
parameters:
- $.body.stats.raft.num_peers
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.stats.get
tags:
- tag: component
value: system
- uuid: 36ff560da62740a0b4657b4afabcf902
name: 'HashiCorp Nomad Server: Raft transaction commit time'
type: DEPENDENT
key: nomad.server.raft.replication.appendEntries
delay: '0'
history: 7d
trends: 90d
units: s
description: 'Raft transaction commit time.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_replication_appendEntries_rpc
- function
- avg
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '0.001'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: raft
- uuid: 662cd8712bbe4a4080a540f878b4ec44
name: 'HashiCorp Nomad Server: Heartbeat fails'
type: DEPENDENT
key: nomad.server.raft.transition.heartbeat_timeout
delay: '0'
history: 7d
trends: 90d
description: 'Count of failing to heartbeat and starting election.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_raft_transition_heartbeat_timeout
- value
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '0'
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: raft
- uuid: 3dae4fd461fa4a5abdba717e538b8405
name: 'HashiCorp Nomad Server: Resident memory size'
type: DEPENDENT
key: nomad.server.resident_memory_bytes
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Resident memory size in bytes.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- process_resident_memory_bytes
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: memory
- uuid: ec2d018e121442be9d3fb350f6572ca6
name: 'HashiCorp Nomad Server: RPC queries, rate'
type: DEPENDENT
key: nomad.server.rpc.query
delay: '0'
history: 7d
trends: 90d
description: 'Number of RPC queries.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_rpc_query
- value
- ''
error_handler: DISCARD_VALUE
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: network
- tag: component
value: rpc
- uuid: 406d9bc9db1e44509a77acd11963f406
name: 'HashiCorp Nomad Server: RPC requests, rate'
type: DEPENDENT
key: nomad.server.rpc.request
delay: '0'
history: 7d
trends: 90d
description: 'Number of RPC requests being handled.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_rpc_request
- value
- ''
error_handler: DISCARD_VALUE
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: network
- tag: component
value: rpc
- uuid: 0ef3cfaf0e694625bbc3f3a117ff9f06
name: 'HashiCorp Nomad Server: RPC error requests, rate'
type: DEPENDENT
key: nomad.server.rpc.request_error
delay: '0'
history: 7d
trends: 90d
description: 'Number of RPC requests being handled that result in an error.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_rpc_request
- value
- ''
error_handler: DISCARD_VALUE
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: network
- tag: component
value: rpc
- uuid: a0e7b48174de4e569762dc1654d63267
name: 'HashiCorp Nomad Server: Memory used'
type: DEPENDENT
key: nomad.server.runtime.alloc_bytes
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Memory utilization in bytes.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_runtime_alloc_bytes
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: memory
- uuid: 7d8883065c2c494f9c4853afe50d157c
name: 'HashiCorp Nomad Server: Objects freed, rate'
type: DEPENDENT
key: nomad.server.runtime.free_count
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Count of objects freed from heap by go runtime GC.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_runtime_free_count
- value
- ''
error_handler: DISCARD_VALUE
- type: CHANGE_PER_SECOND
parameters:
- ''
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: runtime
- uuid: 02b0699a706742d1bb557e32ed1b52ea
name: 'HashiCorp Nomad Server: GC pause time'
type: DEPENDENT
key: nomad.server.runtime.gc_pause_ns
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Go runtime GC pause times.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_runtime_gc_pause_ns_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: runtime
- uuid: c3fc612a6bec45ee9ffd51d168801974
name: 'HashiCorp Nomad Server: Heap objects'
type: DEPENDENT
key: nomad.server.runtime.heap_objects
delay: '0'
history: 7d
trends: 90d
description: |
Number of objects on the heap.
General memory pressure indicator.
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_runtime_heap_objects
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: memory
- uuid: 9b1bcd0020b5413e920ce12af6124687
name: 'HashiCorp Nomad Server: Goroutines'
type: DEPENDENT
key: nomad.server.runtime.num_goroutines
delay: '0'
history: 7d
trends: 90d
description: 'Number of goroutines and general load pressure indicator.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_runtime_num_goroutines
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: memory
- uuid: 71256d39cf844c22be8d25a9170e61f2
name: 'HashiCorp Nomad Server: GC metadata size'
type: DEPENDENT
key: nomad.server.runtime.sys_bytes
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Go runtime GC metadata size in bytes.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_runtime_sys_bytes
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: runtime
- uuid: 733b18dd02d7404bb8cf4c7447a374ae
name: 'HashiCorp Nomad Server: GC runs'
type: DEPENDENT
key: nomad.server.runtime.total_gc_runs
delay: '0'
history: 7d
trends: 90d
description: 'Count of go runtime GC runs.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_runtime_total_gc_runs
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: runtime
- uuid: c7f327a390884c928ebf228d8ebb3a4c
name: 'HashiCorp Nomad Server: Allocation reschedule attempts'
type: DEPENDENT
key: nomad.server.scheduler.allocs.rescheduled.attempted
delay: '0'
history: 7d
trends: 90d
description: 'Count of attempts to reschedule an allocation.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_scheduler_allocs_reschedule_attempted
- function
- sum
error_handler: CUSTOM_VALUE
error_handler_params: '0'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: allocations
- uuid: 8293b40a695b4b24ab9b6576a0f9a4d8
name: 'HashiCorp Nomad Server: Memberlist events'
type: DEPENDENT
key: nomad.server.serf.queue.event
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Count of memberlist events received.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_serf_queue_Event_sum
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: memberlist
- uuid: 8aacc54e455b49b0ab0cfed4559829f7
name: 'HashiCorp Nomad Server: Memberlist changes'
type: DEPENDENT
key: nomad.server.serf.queue.intent
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Count of memberlist changes.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_serf_queue_Intent_sum
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: memberlist
- uuid: 1c5f383a89f74934a6cb211bfd586856
name: 'HashiCorp Nomad Server: Memberlist queries'
type: DEPENDENT
key: nomad.server.serf.queue.queries
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Count of memberlist queries.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_serf_queue_Query_sum
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: memberlist
- uuid: e645f371b4ae48e2b2b782f4b0d0fae4
name: 'HashiCorp Nomad Server: Snapshot index'
type: DEPENDENT
key: nomad.server.state.snapshot.index
delay: '0'
history: 7d
trends: 90d
description: 'Current snapshot index.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_state_snapshotIndex
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: state
- uuid: 02ba826868414a16909f13b98cc4c445
name: 'HashiCorp Nomad Server: Internal stats API response'
type: DEPENDENT
key: nomad.server.stats.api.response
delay: '0'
history: 7d
trends: '0'
value_type: TEXT
description: 'Internal stats API response message.'
preprocessing:
- type: JAVASCRIPT
parameters:
- |
try {
var response = Object.keys(JSON.parse(value).header).filter(function (f) {
return f.match(/HTTP\/[\d.]+\s+\d{3}/);
});
return response.pop();
}
catch (error) {
return "HTTP/1.1 408 Request timeout";
}
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 1h
master_item:
key: nomad.server.stats.get
tags:
- tag: component
value: status
triggers:
- uuid: b60dd78bd2d24c22afec63a8b420bcd6
expression: 'find(/HashiCorp Nomad Server by HTTP/nomad.server.stats.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
name: 'HashiCorp Nomad Server: Internal stats API connection has failed'
event_name: 'HashiCorp Nomad Server: Internal stats API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}'
priority: AVERAGE
description: |
Internal stats API connection has failed.
Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix.
manual_close: 'YES'
dependencies:
- name: 'HashiCorp Nomad Server: Monitoring API connection has failed'
expression: 'find(/HashiCorp Nomad Server by HTTP/nomad.server.data.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
tags:
- tag: scope
value: availability
- uuid: f8d3a680e6f94250ab0a1ef8169889e3
name: 'HashiCorp Nomad Server: Internal stats get'
type: HTTP_AGENT
key: nomad.server.stats.get
delay: 1h
history: '0'
trends: '0'
value_type: TEXT
description: 'Internal stats data in raw format.'
preprocessing:
- type: CHECK_NOT_SUPPORTED
parameters:
- ''
error_handler: CUSTOM_VALUE
error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}'
timeout: '{$NOMAD.DATA.TIMEOUT}'
url: '{$NOMAD.SERVER.API.SCHEME}://{HOST.IP}:{$NOMAD.SERVER.API.PORT}/v1/agent/self'
query_fields:
- name: filter
value: 'ID == "{HOST.HOST}"'
status_codes: ''
http_proxy: '{$NOMAD.HTTP.PROXY}'
headers:
- name: X-Nomad-Token
value: '{$NOMAD.TOKEN}'
retrieve_mode: BOTH
output_format: JSON
tags:
- tag: component
value: raw
- uuid: 9593fe46e0d941f2b4ddef96baa39b0e
name: 'HashiCorp Nomad Server: Vault tokens revoked'
type: DEPENDENT
key: nomad.server.vault.distributed_tokens_revoked
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
description: 'Count of revoked tokens.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_vault_distributed_tokens_revoking
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: vault
- uuid: 40c41426ab96476c84647a18511af6e8
name: 'HashiCorp Nomad Server: Vault token last renewal'
type: DEPENDENT
key: nomad.server.vault.token_last_renewal
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time since last successful Vault token renewal.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_vault_token_last_renewal
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '0.001'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: vault
- uuid: f62d8cf1e4b94c9b8796bd30fa799f7a
name: 'HashiCorp Nomad Server: Vault token next renewal'
type: DEPENDENT
key: nomad.server.vault.token_next_renewal
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time until next Vault token renewal attempt.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_vault_token_next_renewal
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '0.001'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: vault
- uuid: c619c340887b43e1a4db750e74f1d4aa
name: 'HashiCorp Nomad Server: Vault token TTL'
type: DEPENDENT
key: nomad.server.vault.token_ttl
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time to live for Vault token.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_vault_token_ttl
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '0.001'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: vault
- uuid: 921fc50044f94aceaa9d0789be854496
name: 'HashiCorp Nomad Server: Nomad server version'
type: DEPENDENT
key: nomad.server.version
delay: '0'
history: 7d
trends: '0'
value_type: CHAR
description: 'Nomad server version.'
preprocessing:
- type: JSONPATH
parameters:
- $.body.config.Version.Version
master_item:
key: nomad.server.stats.get
triggers:
- uuid: 3c7dfd5ec21144b6b58c603d60142200
expression: 'change(/HashiCorp Nomad Server by HTTP/nomad.server.version)<>0'
name: 'HashiCorp Nomad Server: Nomad server version has changed'
event_name: 'HashiCorp Nomad Server: Nomad server version has changed to {ITEM.LASTVALUE}'
priority: INFO
description: 'Nomad server version has changed.'
manual_close: 'YES'
tags:
- tag: scope
value: availability
- uuid: d769576e16df4aed86004498055d5a94
name: 'HashiCorp Nomad Server: Virtual memory size'
type: DEPENDENT
key: nomad.server.virtual_memory_bytes
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: B
description: 'Virtual memory size in bytes.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- process_virtual_memory_bytes
- value
- ''
error_handler: DISCARD_VALUE
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: memory
- uuid: d1addab9628140b09adb60c2820c1829
name: 'HashiCorp Nomad Server: Worker evaluation create time'
type: DEPENDENT
key: nomad.server.worker.create_eval
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for worker to create an eval.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_worker_dequeue_eval_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: workers
- uuid: 0875726948384b9696158d4db2b64691
name: 'HashiCorp Nomad Server: Worker evaluation dequeue time'
type: DEPENDENT
key: nomad.server.worker.dequeue_eval
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for worker to dequeue an eval.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_worker_dequeue_eval_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: workers
- uuid: 5bc2cf7713274e1ca8997415f26f7087
name: 'HashiCorp Nomad Server: Worker invoke scheduler time'
type: DEPENDENT
key: nomad.server.worker.invoke_scheduler_service
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for worker to invoke the scheduler.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_worker_invoke_scheduler_service_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: workers
- uuid: c21aca97ba274913860b28a1fd06abec
name: 'HashiCorp Nomad Server: Worker acknowledgement send time'
type: DEPENDENT
key: nomad.server.worker.send_ack
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for worker to send acknowledgement.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_worker_send_ack_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: workers
- uuid: 3c7b49c3f60e4d69981f6dd3569e49e6
name: 'HashiCorp Nomad Server: Worker submit plan time'
type: DEPENDENT
key: nomad.server.worker.submit_plan
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for worker to submit plan.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_worker_submit_plan_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: workers
- uuid: f5cd72ca250f402f9ddd112fb858d1f7
name: 'HashiCorp Nomad Server: Worker update evaluation time'
type: DEPENDENT
key: nomad.server.worker.update_eval
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed for worker to submit updated eval.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_worker_update_eval_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: workers
- uuid: ae2e69196ef7416481e9904d9ee8862b
name: 'HashiCorp Nomad Server: Worker log replication time'
type: DEPENDENT
key: nomad.server.worker.wait_for_index
delay: '0'
history: 7d
trends: 90d
value_type: FLOAT
units: s
description: 'Time elapsed that worker waits for the raft index of the eval to be processed.'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- nomad_nomad_worker_wait_for_index_sum
- value
- ''
error_handler: DISCARD_VALUE
- type: MULTIPLIER
parameters:
- '1.0E-9'
master_item:
key: nomad.server.metrics.get
tags:
- tag: component
value: workers
tags:
- tag: class
value: software
- tag: target
value: nomad-server
macros:
- macro: '{$NOMAD.API.RESPONSE.SUCCESS}'
value: '200'
description: 'HTTP API successful response code. Availability triggers threshold. Change, if needed.'
- macro: '{$NOMAD.DATA.TIMEOUT}'
value: 15s
description: 'Response timeout for an API.'
- macro: '{$NOMAD.HTTP.PROXY}'
description: 'Sets the HTTP proxy for HTTP agent item. If this parameter is empty, then no proxy is used.'
- macro: '{$NOMAD.OPEN.FDS.MAX}'
value: '90'
description: 'Maximum percentage of used file descriptors.'
- macro: '{$NOMAD.REDUNDANCY.MIN}'
value: '1'
description: |
Amount of redundant servers to keep the cluster safe.
Default value - '1' for the 3-nodes cluster.
Change if needed.
- macro: '{$NOMAD.SERVER.API.PORT}'
value: '4646'
description: 'Nomad SERVER API port.'
- macro: '{$NOMAD.SERVER.API.SCHEME}'
value: http
description: 'Nomad SERVER API scheme.'
- macro: '{$NOMAD.SERVER.LEADER.LATENCY}'
value: 0.3s
description: 'Leader last contact latency threshold.'
- macro: '{$NOMAD.SERVER.RPC.PORT}'
value: '4647'
description: 'Nomad RPC service port.'
- macro: '{$NOMAD.SERVER.SERF.PORT}'
value: '4648'
description: 'Nomad serf service port.'
- macro: '{$NOMAD.TOKEN}'
value: '<PUT YOUR AUTH TOKEN>'
description: 'Nomad authentication token.'
dashboards:
- uuid: c721ccf33a6f412e994e0ef8c9dc81b5
name: BoltDB
pages:
- name: BoltDB
widgets:
- type: item
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.boltdb.txstats.cursor_count
- type: item
x: '8'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.boltdb.txstats.node_count
- type: item
x: '16'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.boltdb.txstats.node_deref
- type: graph
'y': '5'
width: '24'
height: '5'
fields:
- type: GRAPH
name: graphid
value:
host: 'HashiCorp Nomad Server by HTTP'
name: 'HashiCorp Nomad Server: Raft timers'
- type: graph
'y': '10'
width: '24'
height: '5'
fields:
- type: GRAPH
name: graphid
value:
host: 'HashiCorp Nomad Server by HTTP'
name: 'HashiCorp Nomad Server: BoltDB operations'
- type: graph
'y': '15'
width: '24'
height: '5'
fields:
- type: GRAPH
name: graphid
value:
host: 'HashiCorp Nomad Server by HTTP'
name: 'HashiCorp Nomad Server: BoltDB pages'
- uuid: 5b6b8811398f48ebb4abb73620320211
name: Cluster
pages:
- name: Cluster
widgets:
- type: item
width: '6'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: INTEGER
name: value_size
value: '30'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.cluster_role
- type: item
x: '6'
width: '6'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: INTEGER
name: value_size
value: '30'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.autopilot.state
- type: item
x: '12'
width: '6'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.autopilot.failure_tolerance
- type: item
x: '18'
width: '6'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.fsm.autopilot
- type: graph
'y': '5'
width: '24'
height: '5'
fields:
- type: GRAPH
name: graphid
value:
host: 'HashiCorp Nomad Server by HTTP'
name: 'HashiCorp Nomad Server: Raft timers'
- type: item
'y': '10'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.leader.dispatch_num_logs
- type: item
x: '8'
'y': '10'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.apply
- type: item
x: '16'
'y': '10'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.applied_index
- uuid: c7f515dd33ee4045b96ffc6d83cec12a
name: Jobs
pages:
- name: Jobs
widgets:
- type: item
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.job_status.running
- type: item
x: '8'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.job_status.pending
- type: item
x: '16'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.job_status.dead
- type: item
'y': '5'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.job_summary.running
- type: item
x: '8'
'y': '5'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.job_summary.starting
- type: item
x: '16'
'y': '5'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.job_summary.complete
- type: item
'y': '10'
width: '6'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.job_summary.queued
- type: item
x: '6'
'y': '10'
width: '6'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.job_summary.lost
- type: item
x: '12'
'y': '10'
width: '6'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.job_summary.failed
- type: item
x: '18'
'y': '10'
width: '6'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.job_summary.unknown
- type: item
'y': '15'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.broker.total_ready
- type: item
x: '8'
'y': '15'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.broker.total_pending
- type: item
x: '16'
'y': '15'
width: '8'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.broker.total_waiting
- type: item
'y': '20'
width: '6'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.blocked_evals.total_blocked
- type: item
x: '6'
'y': '20'
width: '6'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.blocked_evals.total_quota_limit
- type: item
x: '12'
'y': '20'
width: '6'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.blocked_evals.total_escaped
- type: item
x: '18'
'y': '20'
width: '6'
height: '5'
fields:
- type: INTEGER
name: show
value: '2'
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: adv_conf
value: '1'
- type: INTEGER
name: decimal_places
value: '0'
- type: ITEM
name: itemid
value:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.broker.total_unacked
valuemaps:
- uuid: 9898b464447240919cf8e25016be687f
name: 'Autopilot state'
mappings:
- value: '0'
newvalue: Unhealthy
- value: '1'
newvalue: Healthy
- uuid: fe32599dc75c48f6ab0887652ceb728c
name: 'Cluster role'
mappings:
- value: '0'
newvalue: Leader
- value: '1'
newvalue: Follower
- value: '2'
newvalue: Candidate
- uuid: b90612059a164f3fa0c7ab871afa0c59
name: 'Service state'
mappings:
- value: '0'
newvalue: Down
- value: '1'
newvalue: Up
triggers:
- uuid: d41ae1c102d84eab9629565bcc9b51df
expression: '(min(/HashiCorp Nomad Client by HTTP/nomad.client.memory.available, 10m) / last(/HashiCorp Nomad Client by HTTP/nomad.client.memory.total))*100 <= {$NOMAD.RAM.AVAIL.MIN}'
name: 'HashiCorp Nomad Client: High memory utilization'
event_name: 'HashiCorp Nomad Client: High memory utilization: (available < {$NOMAD.RAM.AVAIL.MIN}% over last 10m)'
opdata: 'RAM available: {ITEM.LASTVALUE1}, RAM total: {ITEM.LASTVALUE2}'
priority: AVERAGE
description: 'RAM utilization is too high. The system might be slow to respond.'
tags:
- tag: scope
value: performance
- uuid: 9b6d779864144e4e9e5e9924658d763e
expression: 'min(/HashiCorp Nomad Server by HTTP/nomad.server.process_open_fds,5m)/last(/HashiCorp Nomad Server by HTTP/nomad.server.process_max_fds)*100>{$NOMAD.OPEN.FDS.MAX}'
name: 'HashiCorp Nomad Server: Current number of open files is too high'
event_name: 'HashiCorp Nomad Server: Current number of open files is too high (over {$NOMAD.OPEN.FDS.MAX}% for 5m)'
priority: WARNING
description: 'Heavy file descriptor usage (i.e., near the process file descriptor limit) indicates a potential file descriptor exhaustion issue.'
tags:
- tag: scope
value: capacity
graphs:
- uuid: a221474b1f494f8e85edfefd3a86b2ad
name: 'HashiCorp Nomad Client: CPU utilization'
type: STACKED
graph_items:
- color: 199C0D
item:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.cpu.idle
- sortorder: '1'
color: F63100
item:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.cpu.system
- sortorder: '2'
color: 00611C
item:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.cpu.total
- sortorder: '3'
color: F7941D
item:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.cpu.user
- uuid: b36e33cf629444d096a222d4b806f10d
name: 'HashiCorp Nomad Client: Memory allocation'
type: STACKED
graph_items:
- color: 199C0D
item:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.allocated.memory
- sortorder: '1'
color: F63100
item:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.unallocated.memory
- uuid: a5658f2456f745ca9de427ce3bc4fc98
name: 'HashiCorp Nomad Client: Memory utilization'
type: STACKED
graph_items:
- color: 199C0D
item:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.memory.available
- sortorder: '1'
color: F63100
item:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.memory.free
- sortorder: '2'
color: 00611C
item:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.memory.total
- sortorder: '3'
color: F7941D
item:
host: 'HashiCorp Nomad Client by HTTP'
key: nomad.client.memory.used
- uuid: 5028c6df64d3442f8fda2bb1fbc5f6e9
name: 'HashiCorp Nomad Server: BoltDB operations'
graph_items:
- color: 199C0D
item:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.boltdb.txstats.rebalance
- sortorder: '1'
color: F63100
item:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.boltdb.txstats.spill
- sortorder: '2'
color: 00611C
item:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.boltdb.txstats.split
- sortorder: '3'
color: F7941D
item:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.boltdb.txstats.write
- uuid: 71832cf8ab4c4607985719e19646fcc7
name: 'HashiCorp Nomad Server: BoltDB pages'
graph_items:
- color: 199C0D
item:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.boltdb.num_free_pages
- sortorder: '1'
color: F63100
item:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.boltdb.txstats.page_count
- sortorder: '2'
color: 00611C
item:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.boltdb.num_pending_pages
- sortorder: '3'
color: F7941D
item:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.boltdb.txstats.page_alloc
- uuid: cc4f2fccc25840f0ba2049554c360080
name: 'HashiCorp Nomad Server: BoltDB timers'
graph_items:
- color: 199C0D
item:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.boltdb.txstats.rebalance_time
- sortorder: '1'
color: F63100
item:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.boltdb.txstats.spill_time
- sortorder: '2'
color: 00611C
item:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.boltdb.txstats.write_time
- uuid: fb8cbf7d131445f6ade454b7b7e2e748
name: 'HashiCorp Nomad Server: Raft timers'
graph_items:
- color: 199C0D
item:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.leader.dispatch_log
- sortorder: '1'
color: F63100
item:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.commit_time
- sortorder: '2'
color: 00611C
item:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.replication.appendEntries
- sortorder: '3'
color: F7941D
item:
host: 'HashiCorp Nomad Server by HTTP'
key: nomad.server.raft.fsm.apply