You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
6868 lines
249 KiB
6868 lines
249 KiB
zabbix_export:
|
|
version: '7.0'
|
|
template_groups:
|
|
- uuid: a571c0d144b14fd4a87a9d9b2aa9fcd6
|
|
name: Templates/Applications
|
|
host_groups:
|
|
- uuid: a571c0d144b14fd4a87a9d9b2aa9fcd6
|
|
name: Applications
|
|
templates:
|
|
- uuid: f74adf26d9ab44ada318002d31fd2881
|
|
template: 'HashiCorp Nomad by HTTP'
|
|
name: 'HashiCorp Nomad by HTTP'
|
|
description: |
|
|
Discover HashiCorp Nomad servers and clients automatically.
|
|
|
|
Don't forget to change macro {$NOMAD.ENDPOINT.API.URL}, {$NOMAD.TOKEN} values.
|
|
|
|
You can discuss this template or leave feedback on our forum: https://www.zabbix.com/forum/zabbix-suggestions-and-feedback.
|
|
|
|
Generated by official Zabbix template tool "Templator" 2.0.0
|
|
vendor:
|
|
name: Zabbix
|
|
version: 7.0-0
|
|
groups:
|
|
- name: Templates/Applications
|
|
items:
|
|
- uuid: 50bf00cc5c9f41c887add07d0bba3cc1
|
|
name: 'HashiCorp Nomad: Client nodes API response'
|
|
type: DEPENDENT
|
|
key: nomad.client.nodes.api.response
|
|
delay: '0'
|
|
history: 7d
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Client nodes API response message.'
|
|
preprocessing:
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
try {
|
|
var response = Object.keys(JSON.parse(value).header).filter(function (f) {
|
|
return f.match(/HTTP\/[\d.]+\s+\d{3}/);
|
|
});
|
|
|
|
return response.pop();
|
|
}
|
|
catch (error) {
|
|
return "HTTP/1.1 408 Request timeout";
|
|
}
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: nomad.client.nodes.get
|
|
tags:
|
|
- tag: component
|
|
value: status
|
|
triggers:
|
|
- uuid: f2e1cbbf808946ca902c1f378747e936
|
|
expression: 'find(/HashiCorp Nomad by HTTP/nomad.client.nodes.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
|
|
name: 'HashiCorp Nomad: Client nodes API connection has failed'
|
|
event_name: 'HashiCorp Nomad: Client nodes API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}'
|
|
priority: AVERAGE
|
|
description: |
|
|
Client nodes API connection has failed.
|
|
Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix.
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 99d8cd0768094bef93995e4baa475186
|
|
name: 'HashiCorp Nomad: Nomad clients get'
|
|
type: HTTP_AGENT
|
|
key: nomad.client.nodes.get
|
|
delay: 1h
|
|
history: '0'
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Nomad clients data in raw format.'
|
|
preprocessing:
|
|
- type: CHECK_NOT_SUPPORTED
|
|
parameters:
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}'
|
|
timeout: '{$NOMAD.DATA.TIMEOUT}'
|
|
url: '{$NOMAD.ENDPOINT.API.URL}/v1/nodes'
|
|
query_fields:
|
|
- name: os
|
|
value: '1'
|
|
status_codes: ''
|
|
http_proxy: '{$NOMAD.HTTP.PROXY}'
|
|
headers:
|
|
- name: X-Nomad-Token
|
|
value: '{$NOMAD.TOKEN}'
|
|
retrieve_mode: BOTH
|
|
output_format: JSON
|
|
tags:
|
|
- tag: component
|
|
value: raw
|
|
- uuid: 7a46e18358e641cc941b306de6eb5dc4
|
|
name: 'HashiCorp Nomad: Nomad clients count'
|
|
type: DEPENDENT
|
|
key: nomad.clients.count
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'Nomad clients count.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- '$.body[?(@.Name)].length()'
|
|
master_item:
|
|
key: nomad.client.nodes.get
|
|
- uuid: e848e135a5574931accf114fcafb89d7
|
|
name: 'HashiCorp Nomad: Region'
|
|
type: DEPENDENT
|
|
key: nomad.region
|
|
delay: '0'
|
|
history: 7d
|
|
trends: '0'
|
|
value_type: CHAR
|
|
description: 'Current cluster region.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $..region.first()
|
|
master_item:
|
|
key: nomad.server.nodes.get
|
|
tags:
|
|
- tag: component
|
|
value: environment
|
|
- uuid: 53a0c689f27547ccbcb0a57c736a5027
|
|
name: 'HashiCorp Nomad: Server-related APIs response'
|
|
type: DEPENDENT
|
|
key: nomad.server.api.response
|
|
delay: '0'
|
|
history: 7d
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Server-related (`operator/raft/configuration`, `agent/members`) APIs error response message.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.error
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: 'HTTP/1.1 200 OK'
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: nomad.server.nodes.get
|
|
triggers:
|
|
- uuid: 1560c32cb7de4fc8a58cf537c7958205
|
|
expression: 'find(/HashiCorp Nomad by HTTP/nomad.server.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
|
|
name: 'HashiCorp Nomad: Server-related API connection has failed'
|
|
event_name: 'HashiCorp Nomad: Server-related API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}'
|
|
priority: AVERAGE
|
|
description: |
|
|
Server-related API connection has failed.
|
|
Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix.
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 254354aa21764751bf7e1961e37de7ae
|
|
name: 'HashiCorp Nomad: Nomad servers get'
|
|
type: SCRIPT
|
|
key: nomad.server.nodes.get
|
|
delay: 1h
|
|
history: '0'
|
|
trends: '0'
|
|
value_type: TEXT
|
|
params: |
|
|
var Nomad = {
|
|
setParams: function (params) {
|
|
['api_endpoint'].forEach(function (field) {
|
|
if (typeof params !== 'object' || typeof params[field] === 'undefined' || params[field] === '') {
|
|
throw 'Required param is not set: "' + field + '".';
|
|
}
|
|
});
|
|
|
|
Nomad.params = params;
|
|
if (typeof Nomad.params.api_endpoint === 'string' && !Nomad.params.api_endpoint.endsWith('/')) {
|
|
Nomad.params.api_endpoint += '/';
|
|
}
|
|
},
|
|
|
|
request: function (query) {
|
|
var response,
|
|
request = new HttpRequest(),
|
|
url = Nomad.params.api_endpoint + query;
|
|
|
|
request.addHeader('Content-Type: application/json');
|
|
request.addHeader('X-Nomad-Token: ' + Nomad.params.token);
|
|
// set proxy if needed
|
|
if (Nomad.params.http_proxy) {
|
|
request.setProxy(Nomad.params.http_proxy)
|
|
Zabbix.log(4, '[ Nomad ] Using http proxy: ' + Nomad.params.http_proxy);
|
|
}
|
|
|
|
Zabbix.log(4, '[ Nomad ] Sending request: ' + url);
|
|
response = request.get(url);
|
|
|
|
Zabbix.log(4, '[ Nomad ] Received response with status code ' + request.getStatus() + ': ' + response);
|
|
|
|
if (response !== null) {
|
|
try {
|
|
response = JSON.parse(response);
|
|
}
|
|
catch (error) {
|
|
throw 'Failed to parse response received from Nomad agent API.';
|
|
}
|
|
}
|
|
|
|
return {
|
|
status: request.getStatus(),
|
|
response: response
|
|
};
|
|
},
|
|
|
|
getField: function (data, path) {
|
|
var steps = path.split('.');
|
|
for (var i = 0; i < steps.length; i++) {
|
|
var step = steps[i];
|
|
if (typeof data !== 'object' || typeof data[step] === 'undefined') {
|
|
throw 'Required field was not found: ' + path;
|
|
}
|
|
|
|
data = data[step];
|
|
}
|
|
|
|
return data;
|
|
},
|
|
|
|
getIds: function () {
|
|
var result = this.request('v1/operator/raft/configuration');
|
|
|
|
if (typeof result.response !== 'object' || result.status != 200) {
|
|
throw 'Cannot get servers list from Nomad agent API.';
|
|
}
|
|
|
|
return this.getField(result, 'response.Servers')
|
|
.map(function (srv) {
|
|
return srv['ID'];
|
|
});
|
|
},
|
|
|
|
getServers: function () {
|
|
var ids = this.getIds();
|
|
result = Nomad.request('v1/agent/members');
|
|
|
|
if (typeof result.response !== 'object' || result.status != 200) {
|
|
throw 'Cannot get servers list from Nomad agent API.';
|
|
}
|
|
|
|
return this.getField(result,'response.Members').filter(function (s) {
|
|
return ids.indexOf(s.Tags.id) >= 0;
|
|
});
|
|
|
|
},
|
|
};
|
|
|
|
try {
|
|
Nomad.setParams(JSON.parse(value));
|
|
|
|
var servers = Nomad.getServers();
|
|
|
|
return JSON.stringify(servers);
|
|
|
|
}
|
|
catch (error) {
|
|
error += ((String(error).endsWith('.')) ? '' : '.');
|
|
Zabbix.log(3, '[ Nomad ] ERROR: ' + error);
|
|
|
|
return JSON.stringify({ error: error + ' Check debug log for more information.' });
|
|
}
|
|
description: 'Nomad servers data in raw format.'
|
|
timeout: '{$NOMAD.DATA.TIMEOUT}'
|
|
parameters:
|
|
- name: api_endpoint
|
|
value: '{$NOMAD.ENDPOINT.API.URL}'
|
|
- name: token
|
|
value: '{$NOMAD.TOKEN}'
|
|
- name: http_proxy
|
|
value: '{$NOMAD.HTTP.PROXY}'
|
|
tags:
|
|
- tag: component
|
|
value: raw
|
|
- uuid: 904e226008324cb8aa11f716e0420b51
|
|
name: 'HashiCorp Nomad: Nomad servers count'
|
|
type: DEPENDENT
|
|
key: nomad.servers.count
|
|
delay: '0'
|
|
history: 7d
|
|
description: 'Nomad servers count.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- '$[?(@.Name)].length()'
|
|
master_item:
|
|
key: nomad.server.nodes.get
|
|
discovery_rules:
|
|
- uuid: 0ba167e5638344ab842224df5b8909e1
|
|
name: 'Clients discovery'
|
|
type: DEPENDENT
|
|
key: nomad.clients.discovery
|
|
delay: '0'
|
|
filter:
|
|
evaltype: AND
|
|
conditions:
|
|
- macro: '{#CLIENT.NAME}'
|
|
value: '{$NOMAD.CLIENT.NAME.MATCHES}'
|
|
formulaid: C
|
|
- macro: '{#CLIENT.NAME}'
|
|
value: '{$NOMAD.CLIENT.NAME.NOT_MATCHES}'
|
|
operator: NOT_MATCHES_REGEX
|
|
formulaid: D
|
|
- macro: '{#CLIENT.DC}'
|
|
value: '{$NOMAD.CLIENT.DC.MATCHES}'
|
|
formulaid: A
|
|
- macro: '{#CLIENT.DC}'
|
|
value: '{$NOMAD.CLIENT.DC.NOT_MATCHES}'
|
|
operator: NOT_MATCHES_REGEX
|
|
formulaid: B
|
|
- macro: '{#CLIENT.SCHEDULE.ELIGIBILITY}'
|
|
value: '{$NOMAD.CLIENT.SCHEDULE.ELIGIBILITY.MATCHES}'
|
|
formulaid: E
|
|
- macro: '{#CLIENT.SCHEDULE.ELIGIBILITY}'
|
|
value: '{$NOMAD.CLIENT.SCHEDULE.ELIGIBILITY.NOT_MATCHES}'
|
|
operator: NOT_MATCHES_REGEX
|
|
formulaid: F
|
|
lifetime: 7d
|
|
description: 'Client nodes discovery.'
|
|
host_prototypes:
|
|
- uuid: 8284dcf3055749d3b27720526c665e34
|
|
host: '{#CLIENT.ID}'
|
|
name: '{#CLIENT.NAME}-client'
|
|
group_links:
|
|
- group:
|
|
name: Applications
|
|
templates:
|
|
- name: 'HashiCorp Nomad Client by HTTP'
|
|
tags:
|
|
- tag: class
|
|
value: '{#CLIENT.CLASS}'
|
|
- tag: dc
|
|
value: '{#CLIENT.DC}'
|
|
- tag: drained
|
|
value: '{#CLIENT.DRAIN.STATE}'
|
|
- tag: os
|
|
value: '{#CLIENT.OS}'
|
|
- tag: scheduling
|
|
value: '{#CLIENT.SCHEDULE.ELIGIBILITY}'
|
|
- tag: service
|
|
value: nomad-client
|
|
- tag: version
|
|
value: '{#CLIENT.VERSION}'
|
|
custom_interfaces: 'YES'
|
|
interfaces:
|
|
- ip: '{#CLIENT.IP}'
|
|
master_item:
|
|
key: nomad.client.nodes.get
|
|
lld_macro_paths:
|
|
- lld_macro: '{#CLIENT.CLASS}'
|
|
path: $.NodeClass
|
|
- lld_macro: '{#CLIENT.DC}'
|
|
path: $.Datacenter
|
|
- lld_macro: '{#CLIENT.DRAIN.STATE}'
|
|
path: $.Drain
|
|
- lld_macro: '{#CLIENT.ID}'
|
|
path: $.ID
|
|
- lld_macro: '{#CLIENT.IP}'
|
|
path: $.Address
|
|
- lld_macro: '{#CLIENT.NAME}'
|
|
path: $.Name
|
|
- lld_macro: '{#CLIENT.OS}'
|
|
path: '$.Attributes[''os.name'']'
|
|
- lld_macro: '{#CLIENT.SCHEDULE.ELIGIBILITY}'
|
|
path: $.SchedulingEligibility
|
|
- lld_macro: '{#CLIENT.VERSION}'
|
|
path: $.Version
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.body
|
|
error_handler: DISCARD_VALUE
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
- uuid: 202ee1d0060d48b09a48fe2039d3b081
|
|
name: 'Servers discovery'
|
|
type: DEPENDENT
|
|
key: nomad.servers.discovery
|
|
delay: '0'
|
|
filter:
|
|
evaltype: AND
|
|
conditions:
|
|
- macro: '{#SERVER.NAME}'
|
|
value: '{$NOMAD.SERVER.NAME.MATCHES}'
|
|
formulaid: C
|
|
- macro: '{#SERVER.NAME}'
|
|
value: '{$NOMAD.SERVER.NAME.NOT_MATCHES}'
|
|
operator: NOT_MATCHES_REGEX
|
|
formulaid: D
|
|
- macro: '{#SERVER.DC}'
|
|
value: '{$NOMAD.SERVER.DC.MATCHES}'
|
|
formulaid: A
|
|
- macro: '{#SERVER.DC}'
|
|
value: '{$NOMAD.SERVER.DC.NOT_MATCHES}'
|
|
operator: NOT_MATCHES_REGEX
|
|
formulaid: B
|
|
lifetime: 7d
|
|
description: 'Server nodes discovery.'
|
|
host_prototypes:
|
|
- uuid: addb1ce7995f44089a13128052171445
|
|
host: '{#SERVER.ID}'
|
|
name: '{#SERVER.NAME}'
|
|
group_links:
|
|
- group:
|
|
name: Applications
|
|
templates:
|
|
- name: 'HashiCorp Nomad Server by HTTP'
|
|
tags:
|
|
- tag: dc
|
|
value: '{#SERVER.DC}'
|
|
- tag: region
|
|
value: '{#SERVER.REGION}'
|
|
- tag: role
|
|
value: '{#SERVER.ROLE}'
|
|
- tag: service
|
|
value: nomad-server
|
|
- tag: version
|
|
value: '{#SERVER.VERSION}'
|
|
custom_interfaces: 'YES'
|
|
interfaces:
|
|
- ip: '{#SERVER.IP}'
|
|
master_item:
|
|
key: nomad.server.nodes.get
|
|
lld_macro_paths:
|
|
- lld_macro: '{#SERVER.DC}'
|
|
path: $.Tags.dc
|
|
- lld_macro: '{#SERVER.ID}'
|
|
path: $.Tags.id
|
|
- lld_macro: '{#SERVER.IP}'
|
|
path: $.Addr
|
|
- lld_macro: '{#SERVER.NAME}'
|
|
path: $.Name
|
|
- lld_macro: '{#SERVER.REGION}'
|
|
path: $.Tags.region
|
|
- lld_macro: '{#SERVER.ROLE}'
|
|
path: $.Tags.role
|
|
- lld_macro: '{#SERVER.VERSION}'
|
|
path: $.Tags.build
|
|
preprocessing:
|
|
- type: CHECK_JSON_ERROR
|
|
parameters:
|
|
- $.error
|
|
error_handler: DISCARD_VALUE
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
tags:
|
|
- tag: class
|
|
value: software
|
|
- tag: target
|
|
value: nomad
|
|
macros:
|
|
- macro: '{$NOMAD.API.RESPONSE.SUCCESS}'
|
|
value: '200'
|
|
description: 'HTTP API successful response code. Availability triggers threshold. Change, if needed.'
|
|
- macro: '{$NOMAD.CLIENT.DC.MATCHES}'
|
|
value: '.*'
|
|
description: 'The filter to include HashiCorp Nomad clients by datacenter belonging.'
|
|
- macro: '{$NOMAD.CLIENT.DC.NOT_MATCHES}'
|
|
value: CHANGE_IF_NEEDED
|
|
description: 'The filter to exclude HashiCorp Nomad clients by datacenter belonging.'
|
|
- macro: '{$NOMAD.CLIENT.NAME.MATCHES}'
|
|
value: '.*'
|
|
description: 'The filter to include HashiCorp Nomad clients by name.'
|
|
- macro: '{$NOMAD.CLIENT.NAME.NOT_MATCHES}'
|
|
value: CHANGE_IF_NEEDED
|
|
description: 'The filter to exclude HashiCorp Nomad clients by name.'
|
|
- macro: '{$NOMAD.CLIENT.SCHEDULE.ELIGIBILITY.MATCHES}'
|
|
value: '.*'
|
|
description: 'The filter to include HashiCorp Nomad clients by scheduling eligibility.'
|
|
- macro: '{$NOMAD.CLIENT.SCHEDULE.ELIGIBILITY.NOT_MATCHES}'
|
|
value: CHANGE_IF_NEEDED
|
|
description: 'The filter to exclude HashiCorp Nomad clients by scheduling eligibility.'
|
|
- macro: '{$NOMAD.DATA.TIMEOUT}'
|
|
value: 15s
|
|
description: 'Response timeout for an API.'
|
|
- macro: '{$NOMAD.ENDPOINT.API.URL}'
|
|
value: 'http://localhost:4646'
|
|
description: 'API endpoint URL for one of the Nomad cluster members.'
|
|
- macro: '{$NOMAD.HTTP.PROXY}'
|
|
description: 'Sets the HTTP proxy for script and HTTP agent items. If this parameter is empty, then no proxy is used.'
|
|
- macro: '{$NOMAD.SERVER.DC.MATCHES}'
|
|
value: '.*'
|
|
description: 'The filter to include HashiCorp Nomad servers by datacenter belonging.'
|
|
- macro: '{$NOMAD.SERVER.DC.NOT_MATCHES}'
|
|
value: CHANGE_IF_NEEDED
|
|
description: 'The filter to exclude HashiCorp Nomad servers by datacenter belonging.'
|
|
- macro: '{$NOMAD.SERVER.NAME.MATCHES}'
|
|
value: '.*'
|
|
description: 'The filter to include HashiCorp Nomad servers by name.'
|
|
- macro: '{$NOMAD.SERVER.NAME.NOT_MATCHES}'
|
|
value: CHANGE_IF_NEEDED
|
|
description: 'The filter to exclude HashiCorp Nomad servers by name.'
|
|
- macro: '{$NOMAD.TOKEN}'
|
|
value: '<PUT YOUR AUTH TOKEN>'
|
|
description: 'Nomad authentication token.'
|
|
dashboards:
|
|
- uuid: 0b1cb124081f4cf1af2e35245aeb415f
|
|
name: 'Region resources'
|
|
pages:
|
|
- name: 'Region resources'
|
|
widgets:
|
|
- type: item
|
|
name: Clients
|
|
x: '16'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad by HTTP'
|
|
key: nomad.clients.count
|
|
- type: item
|
|
name: Region
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad by HTTP'
|
|
key: nomad.region
|
|
- type: item
|
|
name: Servers
|
|
x: '8'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad by HTTP'
|
|
key: nomad.servers.count
|
|
- uuid: 44eac6a1abe34999b85ad6d0e40073fd
|
|
template: 'HashiCorp Nomad Client by HTTP'
|
|
name: 'HashiCorp Nomad Client by HTTP'
|
|
description: |
|
|
Get HashiCorp Nomad client metrics by HTTP from metrics endpoint.
|
|
|
|
More information about metrics is available in the official documentation: https://developer.hashicorp.com/nomad/docs/operations/metrics-reference.
|
|
|
|
You can discuss this template or leave feedback on our forum: https://www.zabbix.com/forum/zabbix-suggestions-and-feedback.
|
|
|
|
Generated by official Zabbix template tool "Templator" 2.0.0
|
|
vendor:
|
|
name: Zabbix
|
|
version: 7.0-0
|
|
groups:
|
|
- name: Templates/Applications
|
|
items:
|
|
- uuid: f98bd789c9f7409aac2e1902776ddc21
|
|
name: 'HashiCorp Nomad Client: Service [rpc] state'
|
|
type: SIMPLE
|
|
key: 'net.tcp.service[tcp,,{$NOMAD.CLIENT.RPC.PORT}]'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Current [rpc] service state.'
|
|
valuemap:
|
|
name: 'Service state'
|
|
preprocessing:
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
triggers:
|
|
- uuid: f368ff4de45e4620963135836eb54092
|
|
expression: 'last(/HashiCorp Nomad Client by HTTP/net.tcp.service[tcp,,{$NOMAD.CLIENT.RPC.PORT}]) = 0'
|
|
name: 'HashiCorp Nomad Client: Service [rpc] is down'
|
|
opdata: 'Service: rpc, Port: {$NOMAD.CLIENT.RPC.PORT}, State: {ITEM.LASTVALUE}'
|
|
priority: AVERAGE
|
|
description: |
|
|
Cannot establish the connection to [rpc] service port {$NOMAD.CLIENT.RPC.PORT}.
|
|
Check the Nomad state and network connectivity between Nomad and Zabbix.
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 00b39edd70cc42daa565879494f15f38
|
|
name: 'HashiCorp Nomad Client: Service [serf] state'
|
|
type: SIMPLE
|
|
key: 'net.tcp.service[tcp,,{$NOMAD.CLIENT.SERF.PORT}]'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Current [serf] service state.'
|
|
valuemap:
|
|
name: 'Service state'
|
|
preprocessing:
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
triggers:
|
|
- uuid: 0108c9f85b48433c8b9864e41458994f
|
|
expression: 'last(/HashiCorp Nomad Client by HTTP/net.tcp.service[tcp,,{$NOMAD.CLIENT.SERF.PORT}]) = 0'
|
|
name: 'HashiCorp Nomad Client: Service [serf] is down'
|
|
opdata: 'Service: serf, Port: {$NOMAD.CLIENT.SERF.PORT}, State: {ITEM.LASTVALUE}'
|
|
priority: AVERAGE
|
|
description: |
|
|
Cannot establish the connection to [serf] service port {$NOMAD.CLIENT.SERF.PORT}.
|
|
Check the Nomad state and network connectivity between Nomad and Zabbix.
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 13aa2136b7634c32a56fe00b29056d2d
|
|
name: 'HashiCorp Nomad Client: CPU allocated'
|
|
type: DEPENDENT
|
|
key: nomad.client.allocated.cpu
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
units: '!Mhz'
|
|
description: 'Total amount of CPU shares the scheduler has allocated to tasks.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_allocated_cpu
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- uuid: fb7211ae27c845bba8dca7e75e0083b8
|
|
name: 'HashiCorp Nomad Client: Disk allocated'
|
|
type: DEPENDENT
|
|
key: nomad.client.allocated.disk
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
units: B
|
|
description: 'Total amount of disk space the scheduler has allocated to tasks.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_allocated_disk
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1000000'
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: storage
|
|
- uuid: 27c71fbb37f14deba301c3b5c7daecb5
|
|
name: 'HashiCorp Nomad Client: Memory allocated'
|
|
type: DEPENDENT
|
|
key: nomad.client.allocated.memory
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
units: B
|
|
description: 'Total amount of memory the scheduler has allocated to tasks.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_allocated_memory
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1000000'
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- uuid: 945d348f2f2d4c02b3a1ec6f32f1290d
|
|
name: 'HashiCorp Nomad Client: Allocations blocked'
|
|
type: DEPENDENT
|
|
key: nomad.client.allocations.blocked
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of allocations waiting for previous versions.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_allocations_blocked
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: allocations
|
|
- uuid: f556557bf5de438d9aaf941ab5c8d93b
|
|
name: 'HashiCorp Nomad Client: Allocations completed, rate'
|
|
type: DEPENDENT
|
|
key: nomad.client.allocations.complete
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of allocations completed.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_allocs_complete
|
|
- function
|
|
- sum
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: allocations
|
|
- uuid: 586bc8f63e7f46cf99337c7f70809c43
|
|
name: 'HashiCorp Nomad Client: Allocations failed, rate'
|
|
type: DEPENDENT
|
|
key: nomad.client.allocations.failed
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of allocations failed.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_allocs_failed
|
|
- function
|
|
- sum
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: allocations
|
|
- uuid: 88b2eb8e9e8041df83674d3800b1f8c1
|
|
name: 'HashiCorp Nomad Client: Allocations migrating'
|
|
type: DEPENDENT
|
|
key: nomad.client.allocations.migrating
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of allocations migrating data from previous versions.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_allocations_migrating
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: allocations
|
|
- uuid: 717172608fbb4d81bfb65e7f4aa4aaf3
|
|
name: 'HashiCorp Nomad Client: Allocations OOM killed'
|
|
type: DEPENDENT
|
|
key: nomad.client.allocations.oom_killed
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of allocations OOM killed.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_allocs_oom_killed
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: allocations
|
|
triggers:
|
|
- uuid: 747de32c46cb4d95851cee8a30c576b0
|
|
expression: 'last(/HashiCorp Nomad Client by HTTP/nomad.client.allocations.oom_killed) > 0'
|
|
name: 'HashiCorp Nomad Client: OOM killed allocations found'
|
|
opdata: 'OOM killed: {ITEM.LASTVALUE}'
|
|
priority: WARNING
|
|
description: 'OOM killed allocations found.'
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: performance
|
|
- uuid: 9fc77ab432c04cf4b1c0f1941da641d3
|
|
name: 'HashiCorp Nomad Client: Allocations pending'
|
|
type: DEPENDENT
|
|
key: nomad.client.allocations.pending
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of allocations pending (received by the client but not yet running).'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_allocations_pending
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: allocations
|
|
- uuid: eae33da538c642868cf9afe9e6495c95
|
|
name: 'HashiCorp Nomad Client: Allocations restarted, rate'
|
|
type: DEPENDENT
|
|
key: nomad.client.allocations.restart
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of allocations restarted.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_allocs_restart
|
|
- function
|
|
- sum
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: allocations
|
|
- uuid: 6a4903e8bfa04f18a44813349ea6b33d
|
|
name: 'HashiCorp Nomad Client: Allocations running'
|
|
type: DEPENDENT
|
|
key: nomad.client.allocations.running
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of allocations running.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_allocations_running
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: allocations
|
|
- uuid: 3fcbebb8436d4dec8db6625e44520999
|
|
name: 'HashiCorp Nomad Client: Allocations starting'
|
|
type: DEPENDENT
|
|
key: nomad.client.allocations.start
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of allocations starting.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_allocations_start
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: allocations
|
|
- uuid: 166aad9bafeb478abcd8c61ea1d0cc98
|
|
name: 'HashiCorp Nomad Client: Allocations terminal'
|
|
type: DEPENDENT
|
|
key: nomad.client.allocations.terminal
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of allocations terminal.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_allocations_terminal
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: allocations
|
|
- uuid: 31faeb4af30f4f878050eca21e6f97df
|
|
name: 'HashiCorp Nomad Client: CPU idle utilization'
|
|
type: DEPENDENT
|
|
key: nomad.client.cpu.idle
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: '%'
|
|
description: 'CPU utilization in idle state.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_host_cpu_idle
|
|
- function
|
|
- avg
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- uuid: ad75d4b5e69c4e11bc886582791951be
|
|
name: 'HashiCorp Nomad Client: CPU system utilization'
|
|
type: DEPENDENT
|
|
key: nomad.client.cpu.system
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: '%'
|
|
description: 'CPU utilization in system space.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_host_cpu_system
|
|
- function
|
|
- avg
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- uuid: 71eb3676049a4d60aa69ef3901005894
|
|
name: 'HashiCorp Nomad Client: CPU total utilization'
|
|
type: DEPENDENT
|
|
key: nomad.client.cpu.total
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: '%'
|
|
description: 'Total CPU utilization.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_host_cpu_total
|
|
- function
|
|
- avg
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
triggers:
|
|
- uuid: 59db4f1d22cb48fab0ced8ad9ec39a25
|
|
expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.cpu.total, 10m) >= {$NOMAD.CPU.UTIL.MIN}'
|
|
name: 'HashiCorp Nomad Client: High CPU utilization'
|
|
event_name: 'HashiCorp Nomad Client: High CPU utilization: (over >= {$NOMAD.CPU.UTIL.MIN}% over last 10m)'
|
|
opdata: 'Current utilization: {ITEM.LASTVALUE}'
|
|
priority: AVERAGE
|
|
description: 'CPU utilization is too high. The system might be slow to respond.'
|
|
tags:
|
|
- tag: scope
|
|
value: performance
|
|
- uuid: cc0671cc71fb4bdd8cdf7a959e1022fd
|
|
name: 'HashiCorp Nomad Client: CPU user utilization'
|
|
type: DEPENDENT
|
|
key: nomad.client.cpu.user
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: '%'
|
|
description: 'CPU utilization in user space.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_host_cpu_user
|
|
- function
|
|
- avg
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- uuid: 71be24cf848a448dbb993a577015bd9e
|
|
name: 'HashiCorp Nomad Client: Monitoring API response'
|
|
type: DEPENDENT
|
|
key: nomad.client.data.api.response
|
|
delay: '0'
|
|
history: 7d
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Monitoring API response message.'
|
|
preprocessing:
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
try {
|
|
var response = Object.keys(JSON.parse(value).header).filter(function (f) {
|
|
return f.match(/HTTP\/[\d.]+\s+\d{3}/);
|
|
});
|
|
|
|
return response.pop();
|
|
}
|
|
catch (error) {
|
|
return "HTTP/1.1 408 Request timeout";
|
|
}
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: nomad.client.data.get
|
|
tags:
|
|
- tag: component
|
|
value: status
|
|
triggers:
|
|
- uuid: 676e1535cde2424cbda78f18cd9084bf
|
|
expression: 'find(/HashiCorp Nomad Client by HTTP/nomad.client.data.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
|
|
name: 'HashiCorp Nomad Client: Monitoring API connection has failed'
|
|
event_name: 'HashiCorp Nomad Client: Monitoring API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}'
|
|
priority: AVERAGE
|
|
description: |
|
|
Monitoring API connection has failed.
|
|
Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix.
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: fe07fd88f3e14273b3f0b135aab26236
|
|
name: 'HashiCorp Nomad Client: Telemetry get'
|
|
type: HTTP_AGENT
|
|
key: nomad.client.data.get
|
|
history: '0'
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Telemetry data in raw format.'
|
|
preprocessing:
|
|
- type: CHECK_NOT_SUPPORTED
|
|
parameters:
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}'
|
|
timeout: '{$NOMAD.DATA.TIMEOUT}'
|
|
url: '{$NOMAD.CLIENT.API.SCHEME}://{HOST.IP}:{$NOMAD.CLIENT.API.PORT}/v1/metrics'
|
|
query_fields:
|
|
- name: format
|
|
value: prometheus
|
|
status_codes: ''
|
|
http_proxy: '{$NOMAD.HTTP.PROXY}'
|
|
headers:
|
|
- name: X-Nomad-Token
|
|
value: '{$NOMAD.TOKEN}'
|
|
retrieve_mode: BOTH
|
|
output_format: JSON
|
|
tags:
|
|
- tag: component
|
|
value: raw
|
|
- uuid: c9bfa74720ea4e228e5d200ab1f38074
|
|
name: 'HashiCorp Nomad Client: Allocations API response'
|
|
type: DEPENDENT
|
|
key: nomad.client.job.allocs.api.response
|
|
delay: '0'
|
|
history: 7d
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Allocations API response message.'
|
|
preprocessing:
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
try {
|
|
var response = Object.keys(JSON.parse(value).header).filter(function (f) {
|
|
return f.match(/HTTP\/[\d.]+\s+\d{3}/);
|
|
});
|
|
|
|
return response.pop();
|
|
}
|
|
catch (error) {
|
|
return "HTTP/1.1 408 Request timeout";
|
|
}
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: nomad.client.job.allocs.get
|
|
tags:
|
|
- tag: component
|
|
value: status
|
|
triggers:
|
|
- uuid: 300f5fba0f45465aa6868a3f1d16633d
|
|
expression: 'find(/HashiCorp Nomad Client by HTTP/nomad.client.job.allocs.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
|
|
name: 'HashiCorp Nomad Client: Allocations API connection has failed'
|
|
event_name: 'HashiCorp Nomad Client: Allocations API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}'
|
|
priority: AVERAGE
|
|
description: |
|
|
Allocations API connection has failed.
|
|
Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix.
|
|
manual_close: 'YES'
|
|
dependencies:
|
|
- name: 'HashiCorp Nomad Client: Monitoring API connection has failed'
|
|
expression: 'find(/HashiCorp Nomad Client by HTTP/nomad.client.data.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 9948608c5b3a4dad9d1b433483296427
|
|
name: 'HashiCorp Nomad Client: Allocated jobs get'
|
|
type: HTTP_AGENT
|
|
key: nomad.client.job.allocs.get
|
|
delay: 1h
|
|
history: '0'
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Allocated jobs data in raw format.'
|
|
preprocessing:
|
|
- type: CHECK_NOT_SUPPORTED
|
|
parameters:
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}'
|
|
timeout: '{$NOMAD.DATA.TIMEOUT}'
|
|
url: '{$NOMAD.CLIENT.API.SCHEME}://{HOST.IP}:{$NOMAD.CLIENT.API.PORT}/v1/allocations'
|
|
query_fields:
|
|
- name: reverse
|
|
value: '1'
|
|
- name: task_states
|
|
- name: filter
|
|
value: 'NodeID == "{HOST.HOST}" and DesiredStatus == "run" and ClientStatus == "running"'
|
|
status_codes: ''
|
|
http_proxy: '{$NOMAD.HTTP.PROXY}'
|
|
headers:
|
|
- name: X-Nomad-Token
|
|
value: '{$NOMAD.TOKEN}'
|
|
retrieve_mode: BOTH
|
|
output_format: JSON
|
|
tags:
|
|
- tag: component
|
|
value: raw
|
|
- uuid: f312a5ba5758426d83c5c3c414d765fd
|
|
name: 'HashiCorp Nomad Client: Memory available'
|
|
type: DEPENDENT
|
|
key: nomad.client.memory.available
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Total amount of memory available to processes which includes free and cached memory.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_host_memory_available
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- uuid: e379e923ed6b45a682f1da192b3a328d
|
|
name: 'HashiCorp Nomad Client: Memory free'
|
|
type: DEPENDENT
|
|
key: nomad.client.memory.free
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Amount of memory which is free.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_host_memory_free
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- uuid: 50d152cf3c654e818aa7303841d5444a
|
|
name: 'HashiCorp Nomad Client: Memory size'
|
|
type: DEPENDENT
|
|
key: nomad.client.memory.total
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Total amount of physical memory on the node.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_host_memory_total
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- uuid: b2638e0b928e46cd825304a3f1ca1682
|
|
name: 'HashiCorp Nomad Client: Memory used'
|
|
type: DEPENDENT
|
|
key: nomad.client.memory.used
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Amount of memory used by processes.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_host_memory_used
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- uuid: 933621a61d2241d084b48ec6b3bd5b74
|
|
name: 'HashiCorp Nomad Client: Metrics'
|
|
type: DEPENDENT
|
|
key: nomad.client.metrics.get
|
|
delay: '0'
|
|
history: '0'
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Nomad client metrics in raw format.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.body
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.client.data.get
|
|
tags:
|
|
- tag: component
|
|
value: raw
|
|
- uuid: 39430d6dceaa42a68c72ef68f67b1a12
|
|
name: 'HashiCorp Nomad Client: Nodes API response'
|
|
type: DEPENDENT
|
|
key: nomad.client.node.info.api.response
|
|
delay: '0'
|
|
history: 7d
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Nodes API response message.'
|
|
preprocessing:
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
try {
|
|
var response = Object.keys(JSON.parse(value).header).filter(function (f) {
|
|
return f.match(/HTTP\/[\d.]+\s+\d{3}/);
|
|
});
|
|
|
|
return response.pop();
|
|
}
|
|
catch (error) {
|
|
return "HTTP/1.1 408 Request timeout";
|
|
}
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: nomad.client.node.info.get
|
|
tags:
|
|
- tag: component
|
|
value: status
|
|
triggers:
|
|
- uuid: 3ea14e12d6154492a4505c4b51003ed3
|
|
expression: 'find(/HashiCorp Nomad Client by HTTP/nomad.client.node.info.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
|
|
name: 'HashiCorp Nomad Client: Nodes API connection has failed'
|
|
event_name: 'HashiCorp Nomad Client: Nodes API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}'
|
|
priority: AVERAGE
|
|
description: |
|
|
Nodes API connection has failed.
|
|
Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix.
|
|
manual_close: 'YES'
|
|
dependencies:
|
|
- name: 'HashiCorp Nomad Client: Monitoring API connection has failed'
|
|
expression: 'find(/HashiCorp Nomad Client by HTTP/nomad.client.data.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 36eb68c80b194a4591208cf210fb6d6e
|
|
name: 'HashiCorp Nomad Client: Node info get'
|
|
type: HTTP_AGENT
|
|
key: nomad.client.node.info.get
|
|
delay: 1h
|
|
history: '0'
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Node info data in raw format.'
|
|
preprocessing:
|
|
- type: CHECK_NOT_SUPPORTED
|
|
parameters:
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}'
|
|
timeout: '{$NOMAD.DATA.TIMEOUT}'
|
|
url: '{$NOMAD.CLIENT.API.SCHEME}://{HOST.IP}:{$NOMAD.CLIENT.API.PORT}/v1/nodes'
|
|
query_fields:
|
|
- name: filter
|
|
value: 'ID == "{HOST.HOST}"'
|
|
status_codes: ''
|
|
http_proxy: '{$NOMAD.HTTP.PROXY}'
|
|
headers:
|
|
- name: X-Nomad-Token
|
|
value: '{$NOMAD.TOKEN}'
|
|
retrieve_mode: BOTH
|
|
output_format: JSON
|
|
tags:
|
|
- tag: component
|
|
value: raw
|
|
- uuid: 18e57950b0c041e0a2abc0dbd0644b33
|
|
name: 'HashiCorp Nomad Client: CPU unallocated'
|
|
type: DEPENDENT
|
|
key: nomad.client.unallocated.cpu
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
units: '!Mhz'
|
|
description: 'Total amount of CPU shares free for the scheduler to allocate to tasks.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_unallocated_cpu
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- uuid: 44619e3740a9403581d54c6555ecd967
|
|
name: 'HashiCorp Nomad Client: Disk unallocated'
|
|
type: DEPENDENT
|
|
key: nomad.client.unallocated.disk
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
units: B
|
|
description: 'Total amount of disk space free for the scheduler to allocate to tasks.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_unallocated_disk
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1000000'
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: storage
|
|
- uuid: bc0928fcff2f458f83874d7442274e61
|
|
name: 'HashiCorp Nomad Client: Memory unallocated'
|
|
type: DEPENDENT
|
|
key: nomad.client.unallocated.memory
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
units: B
|
|
description: 'Total amount of memory free for the scheduler to allocate to tasks.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_unallocated_memory
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1000000'
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- uuid: 3561fb69284f4c88a7f5d8c7f04c3b14
|
|
name: 'HashiCorp Nomad Client: Uptime'
|
|
type: DEPENDENT
|
|
key: nomad.client.uptime
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
units: uptime
|
|
description: 'Uptime of the host running the Nomad client.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_client_uptime
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: system
|
|
triggers:
|
|
- uuid: 74fe77871e8c4161b5944f30dd4d1c84
|
|
expression: 'last(/HashiCorp Nomad Client by HTTP/nomad.client.uptime) < 10m'
|
|
name: 'HashiCorp Nomad Client: The host has been restarted'
|
|
event_name: 'HashiCorp Nomad Client: The host has been restarted: (uptime < 10m)'
|
|
priority: WARNING
|
|
description: 'The host uptime is less than 10 minutes.'
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: 872961bd507c4f6380168d2284ead1ae
|
|
name: 'HashiCorp Nomad Client: Nomad client version'
|
|
type: DEPENDENT
|
|
key: nomad.client.version
|
|
delay: '0'
|
|
history: 7d
|
|
trends: '0'
|
|
value_type: CHAR
|
|
description: 'Nomad client version.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.body..Version.first()
|
|
master_item:
|
|
key: nomad.client.node.info.get
|
|
triggers:
|
|
- uuid: afac97bced6f49bc994ae633b662722c
|
|
expression: 'change(/HashiCorp Nomad Client by HTTP/nomad.client.version)<>0'
|
|
name: 'HashiCorp Nomad Client: Nomad client version has changed'
|
|
event_name: 'HashiCorp Nomad Client: Nomad client version has changed to {ITEM.LASTVALUE}'
|
|
priority: INFO
|
|
description: 'Nomad client version has changed.'
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
discovery_rules:
|
|
- uuid: c41a6405aea7400fb90c207ce95c6594
|
|
name: 'Allocated jobs discovery'
|
|
type: DEPENDENT
|
|
key: nomad.client.alloc.discovery
|
|
delay: '0'
|
|
filter:
|
|
evaltype: AND
|
|
conditions:
|
|
- macro: '{#JOB.NAME}'
|
|
value: '{$NOMAD.JOB.NAME.MATCHES}'
|
|
formulaid: A
|
|
- macro: '{#JOB.NAME}'
|
|
value: '{$NOMAD.JOB.NAME.NOT_MATCHES}'
|
|
operator: NOT_MATCHES_REGEX
|
|
formulaid: B
|
|
- macro: '{#JOB.TASK.GROUP}'
|
|
value: '{$NOMAD.JOB.TASK.GROUP.MATCHES}'
|
|
formulaid: E
|
|
- macro: '{#JOB.TASK.GROUP}'
|
|
value: '{$NOMAD.JOB.TASK.GROUP.NOT_MATCHES}'
|
|
operator: NOT_MATCHES_REGEX
|
|
formulaid: F
|
|
- macro: '{#JOB.NAMESPACE}'
|
|
value: '{$NOMAD.JOB.NAMESPACE.MATCHES}'
|
|
formulaid: C
|
|
- macro: '{#JOB.NAMESPACE}'
|
|
value: '{$NOMAD.JOB.NAMESPACE.NOT_MATCHES}'
|
|
operator: NOT_MATCHES_REGEX
|
|
formulaid: D
|
|
- macro: '{#JOB.TYPE}'
|
|
value: '{$NOMAD.JOB.TYPE.MATCHES}'
|
|
formulaid: G
|
|
- macro: '{#JOB.TYPE}'
|
|
value: '{$NOMAD.JOB.TYPE.NOT_MATCHES}'
|
|
operator: NOT_MATCHES_REGEX
|
|
formulaid: H
|
|
lifetime: '0'
|
|
description: 'Allocated jobs discovery.'
|
|
item_prototypes:
|
|
- uuid: ba9e5a022dbc4f7aa28a1db2ef6d73bb
|
|
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU allocated'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.allocs.cpu.allocated["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: '!Mhz'
|
|
description: 'Total CPU resources allocated by the ["{#JOB.NAME}"] job across all cores.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_allocs_cpu_allocated{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
|
|
- function
|
|
- avg
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- tag: component
|
|
value: job
|
|
- tag: job
|
|
value: '{#JOB.NAME}'
|
|
- tag: namespace
|
|
value: '{#JOB.NAMESPACE}'
|
|
- tag: task-group
|
|
value: '{#JOB.TASK.GROUP}'
|
|
- uuid: 85daa0ce6d4f41ab83a3a65ceb8359a9
|
|
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU system utilization'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.allocs.cpu.system["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: '%'
|
|
description: 'Total CPU resources consumed by the ["{#JOB.NAME}"] job in system space.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_allocs_cpu_system{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
|
|
- function
|
|
- avg
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- tag: component
|
|
value: job
|
|
- tag: job
|
|
value: '{#JOB.NAME}'
|
|
- tag: namespace
|
|
value: '{#JOB.NAMESPACE}'
|
|
- tag: task-group
|
|
value: '{#JOB.TASK.GROUP}'
|
|
- uuid: d5ec55f8d9a74c99924a01acb206295e
|
|
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU throttled periods time'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.allocs.cpu.throttled_periods["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Total number of CPU periods that the ["{#JOB.NAME}"] job was throttled.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_allocs_cpu_throttled_periods{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
|
|
- function
|
|
- avg
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- tag: component
|
|
value: job
|
|
- tag: job
|
|
value: '{#JOB.NAME}'
|
|
- tag: namespace
|
|
value: '{#JOB.NAMESPACE}'
|
|
- tag: task-group
|
|
value: '{#JOB.TASK.GROUP}'
|
|
- uuid: 3122f6d0c1b24ea7bac7a3af4fc00fb2
|
|
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU throttled time'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.allocs.cpu.throttled_time["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Total time that the ["{#JOB.NAME}"] job was throttled.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_allocs_cpu_throttled_time{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
|
|
- function
|
|
- avg
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- tag: component
|
|
value: job
|
|
- tag: job
|
|
value: '{#JOB.NAME}'
|
|
- tag: namespace
|
|
value: '{#JOB.NAMESPACE}'
|
|
- tag: task-group
|
|
value: '{#JOB.TASK.GROUP}'
|
|
- uuid: 5ff1ea02768243f4971339bf70f89772
|
|
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU total utilization'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.allocs.cpu.total_percent["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: '%'
|
|
description: 'Total CPU resources consumed by the ["{#JOB.NAME}"] job across all cores.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_allocs_cpu_total_percent{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
|
|
- function
|
|
- avg
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- tag: component
|
|
value: job
|
|
- tag: job
|
|
value: '{#JOB.NAME}'
|
|
- tag: namespace
|
|
value: '{#JOB.NAMESPACE}'
|
|
- tag: task-group
|
|
value: '{#JOB.TASK.GROUP}'
|
|
- uuid: 14179529be9d47bc898a5330ff37a9c5
|
|
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU ticks'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.allocs.cpu.total_ticks["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'CPU ticks consumed by the process for the ["{#JOB.NAME}"] job in the last collection interval.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_allocs_cpu_total_ticks{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
|
|
- function
|
|
- avg
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- tag: component
|
|
value: job
|
|
- tag: job
|
|
value: '{#JOB.NAME}'
|
|
- tag: namespace
|
|
value: '{#JOB.NAMESPACE}'
|
|
- tag: task-group
|
|
value: '{#JOB.TASK.GROUP}'
|
|
- uuid: 9fb3e3b461874e86aee9483d5af65535
|
|
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU user utilization'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.allocs.cpu.user["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: '%'
|
|
description: 'Total CPU resources consumed by the ["{#JOB.NAME}"] job in user space.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_allocs_cpu_user{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
|
|
- function
|
|
- avg
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- tag: component
|
|
value: job
|
|
- tag: job
|
|
value: '{#JOB.NAME}'
|
|
- tag: namespace
|
|
value: '{#JOB.NAMESPACE}'
|
|
- tag: task-group
|
|
value: '{#JOB.TASK.GROUP}'
|
|
- uuid: c353af0be02b47faa27036dfdd173176
|
|
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] Memory allocated'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.allocs.memory.allocated["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Amount of memory allocated by the ["{#JOB.NAME}"] job.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_allocs_memory_allocated{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
|
|
- function
|
|
- avg
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: job
|
|
- tag: component
|
|
value: memory
|
|
- tag: job
|
|
value: '{#JOB.NAME}'
|
|
- tag: namespace
|
|
value: '{#JOB.NAMESPACE}'
|
|
- tag: task-group
|
|
value: '{#JOB.TASK.GROUP}'
|
|
- uuid: 7bd7a486864b4d0f82242676da2370ed
|
|
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] Memory cached'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.allocs.memory.cache["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Amount of memory cached by the ["{#JOB.NAME}"] job.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_allocs_memory_cache{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
|
|
- function
|
|
- avg
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: job
|
|
- tag: component
|
|
value: memory
|
|
- tag: job
|
|
value: '{#JOB.NAME}'
|
|
- tag: namespace
|
|
value: '{#JOB.NAMESPACE}'
|
|
- tag: task-group
|
|
value: '{#JOB.TASK.GROUP}'
|
|
- uuid: b1419d5017cf4f8ba23e8c774c451772
|
|
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] Memory swapped'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.allocs.memory.swap["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Amount of memory swapped by the ["{#JOB.NAME}"] job.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_allocs_memory_swap{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
|
|
- function
|
|
- avg
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: job
|
|
- tag: component
|
|
value: memory
|
|
- tag: job
|
|
value: '{#JOB.NAME}'
|
|
- tag: namespace
|
|
value: '{#JOB.NAMESPACE}'
|
|
- tag: task-group
|
|
value: '{#JOB.TASK.GROUP}'
|
|
- uuid: 09ba4987f69c496ebeab3fb08f4b499b
|
|
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] Memory used'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.allocs.memory.usage["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Total amount of memory used by the ["{#JOB.NAME}"] job.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_allocs_memory_usage{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}'
|
|
- function
|
|
- avg
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: job
|
|
- tag: component
|
|
value: memory
|
|
- tag: job
|
|
value: '{#JOB.NAME}'
|
|
- tag: namespace
|
|
value: '{#JOB.NAMESPACE}'
|
|
- tag: task-group
|
|
value: '{#JOB.TASK.GROUP}'
|
|
graph_prototypes:
|
|
- uuid: 2c82c2841f414986b26fc6890707d36d
|
|
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] memory utilization'
|
|
type: STACKED
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.allocs.memory.allocated["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.allocs.memory.cache["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.allocs.memory.swap["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
- sortorder: '3'
|
|
color: F7941D
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.allocs.memory.usage["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
- uuid: bb8bbdb0180f4b35800144deec4456bb
|
|
name: 'HashiCorp Nomad Client: Job [{#JOB.NAME}] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] CPU throttling'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.allocs.cpu.throttled_time["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.allocs.cpu.throttled_periods["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
- uuid: f25a5178570e44ecac6a48cb8351cdd3
|
|
name: 'HashiCorp Nomad Client: Job [{#JOB.NAME}] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] CPU utilization'
|
|
type: STACKED
|
|
ymax_type_1: FIXED
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.allocs.cpu.system["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.allocs.cpu.total_percent["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.allocs.cpu.user["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
master_item:
|
|
key: nomad.client.job.allocs.get
|
|
lld_macro_paths:
|
|
- lld_macro: '{#JOB.NAMESPACE}'
|
|
path: $.Namespace
|
|
- lld_macro: '{#JOB.NAME}'
|
|
path: $.JobID
|
|
- lld_macro: '{#JOB.TASK.GROUP}'
|
|
path: $.TaskGroup
|
|
- lld_macro: '{#JOB.TYPE}'
|
|
path: $.JobType
|
|
preprocessing:
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
var raw = JSON.parse(value),
|
|
body = raw.body,
|
|
exist_key = [],
|
|
result = [];
|
|
|
|
function getField(data, path) {
|
|
var steps = path.split('.');
|
|
for (var i = 0; i < steps.length; i++) {
|
|
var step = steps[i];
|
|
if (typeof data !== 'object' || typeof data[step] === 'undefined') {
|
|
throw 'Required field "' + path + '" is not present in data received.';
|
|
}
|
|
data = data[step];
|
|
}
|
|
return data;
|
|
}
|
|
|
|
for (i in body) {
|
|
var uniq_element = true;
|
|
|
|
for (key in exist_key) {
|
|
if ((exist_key[key].JobID == getField(body[i], 'JobID')) &&
|
|
(exist_key[key].TaskGroup == getField(body[i], 'TaskGroup')) &&
|
|
(exist_key[key].Namespace == getField(body[i], 'Namespace'))) {
|
|
uniq_element = false;
|
|
}
|
|
}
|
|
|
|
if (uniq_element) {
|
|
exist_key.push({ 'JobID': body[i].JobID, 'TaskGroup': body[i].TaskGroup, 'Namespace': body[i].Namespace });
|
|
result.push(body[i]);
|
|
}
|
|
}
|
|
|
|
return JSON.stringify(result);
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
- uuid: e2955bd147ed43cb894f0981c6eb3985
|
|
name: 'Physical disks discovery'
|
|
type: DEPENDENT
|
|
key: nomad.client.disk.discovery
|
|
delay: '0'
|
|
filter:
|
|
evaltype: AND
|
|
conditions:
|
|
- macro: '{#DEV.NAME}'
|
|
value: '{$NOMAD.DISK.NAME.MATCHES}'
|
|
formulaid: A
|
|
- macro: '{#DEV.NAME}'
|
|
value: '{$NOMAD.DISK.NAME.NOT_MATCHES}'
|
|
operator: NOT_MATCHES_REGEX
|
|
formulaid: B
|
|
description: 'Physical disks discovery.'
|
|
item_prototypes:
|
|
- uuid: 4a79c99027494bb98b474e6f81db5e18
|
|
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] space available'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.disk.available["{#DEV.NAME}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Amount of space which is available on ["{#DEV.NAME}"] disk.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_host_disk_available{disk="{#DEV.NAME}"}'
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: storage
|
|
- tag: disk
|
|
value: '{#DEV.NAME}'
|
|
- uuid: 21043b0ed4424e3086a00a567d134d45
|
|
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] inodes utilization'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.disk.inodes_percent["{#DEV.NAME}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: '%'
|
|
description: 'Disk space consumed by the inodes on ["{#DEV.NAME}"] disk.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_host_disk_inodes_percent{disk="{#DEV.NAME}"}'
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: inodes
|
|
- tag: component
|
|
value: storage
|
|
- tag: disk
|
|
value: '{#DEV.NAME}'
|
|
trigger_prototypes:
|
|
- uuid: 433c2227305a4aeb82eb1d390621a81e
|
|
expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.inodes_percent["{#DEV.NAME}"],5m) >= {$NOMAD.INODES.FREE.MIN.CRIT:"{#DEV.NAME}"}'
|
|
name: 'HashiCorp Nomad Client: Running out of free inodes on [{#DEV.NAME}] device'
|
|
event_name: 'Running out of free inodes on [{#DEV.NAME}] (free < {$NOMAD.INODES.FREE.MIN.CRIT:"{#DEV.NAME}"}%)'
|
|
opdata: 'Free inodes: {ITEM.VALUE}'
|
|
priority: AVERAGE
|
|
description: |
|
|
It may become impossible to write to a disk if there are no index nodes left.
|
|
The following error messages may be returned as symptoms, even though the free space:
|
|
- No space left on device;
|
|
- Disk is full.
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: capacity
|
|
- uuid: d0f2aacaada545c4a81d35f0eb454532
|
|
expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.inodes_percent["{#DEV.NAME}"],5m) >= {$NOMAD.INODES.FREE.MIN.WARN:"{#DEV.NAME}"}'
|
|
name: 'HashiCorp Nomad Client: Running out of free inodes on [{#DEV.NAME}] device'
|
|
event_name: 'Running out of free inodes on [{#DEV.NAME}] (free < {$NOMAD.INODES.FREE.MIN.WARN:"{#DEV.NAME}"}%)'
|
|
opdata: 'Free inodes: {ITEM.VALUE}'
|
|
priority: WARNING
|
|
description: |
|
|
It may become impossible to write to a disk if there are no index nodes left.
|
|
The following error messages may be returned as symptoms, even though the free space:
|
|
- No space left on device;
|
|
- Disk is full.
|
|
manual_close: 'YES'
|
|
dependencies:
|
|
- name: 'HashiCorp Nomad Client: Running out of free inodes on [{#DEV.NAME}] device'
|
|
expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.inodes_percent["{#DEV.NAME}"],5m) >= {$NOMAD.INODES.FREE.MIN.CRIT:"{#DEV.NAME}"}'
|
|
tags:
|
|
- tag: scope
|
|
value: capacity
|
|
- uuid: 4358e8accdd0400496b491cda7edc909
|
|
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] size'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.disk.size["{#DEV.NAME}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Total size of the ["{#DEV.NAME}"] device.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_host_disk_size{disk="{#DEV.NAME}"}'
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: storage
|
|
- tag: disk
|
|
value: '{#DEV.NAME}'
|
|
- uuid: 778f4609e236434d8b216b62bfebf885
|
|
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] space used'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.disk.used["{#DEV.NAME}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Amount of disk ["{#DEV.NAME}"] space which has been used.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_host_disk_used{disk="{#DEV.NAME}"}'
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: storage
|
|
- tag: disk
|
|
value: '{#DEV.NAME}'
|
|
- uuid: aec5d7147bd64f508e793d7539a8524b
|
|
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] space utilization'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.disk.used_percent["{#DEV.NAME}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: '%'
|
|
description: 'Percentage of disk ["{#DEV.NAME}"] space used.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_client_host_disk_used_percent{disk="{#DEV.NAME}"}'
|
|
- value
|
|
- ''
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: storage
|
|
- tag: disk
|
|
value: '{#DEV.NAME}'
|
|
trigger_prototypes:
|
|
- uuid: b3e22e93d51a4a8d825b0bd2e45fc7a2
|
|
expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.used_percent["{#DEV.NAME}"],5m) >= {$NOMAD.DISK.UTIL.MIN.CRIT:"{#DEV.NAME}"}'
|
|
name: 'HashiCorp Nomad Client: High disk [{#DEV.NAME}] utilization'
|
|
event_name: 'Disk [{#DEV.NAME}] space low (utilization: >= {$NOMAD.DISK.UTIL.MIN.CRIT:"{#DEV.NAME}"}%)'
|
|
opdata: 'Current utilization: {ITEM.LASTVALUE}'
|
|
priority: AVERAGE
|
|
description: 'High disk [{#DEV.NAME}] utilization.'
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: capacity
|
|
- uuid: 05c3e6cd711f4a98a21c581a4437ffb5
|
|
expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.used_percent["{#DEV.NAME}"],5m) >= {$NOMAD.DISK.UTIL.MIN.WARN:"{#DEV.NAME}"}'
|
|
name: 'HashiCorp Nomad Client: High disk [{#DEV.NAME}] utilization'
|
|
event_name: 'Disk [{#DEV.NAME}] space low (utilization: >= {$NOMAD.DISK.UTIL.MIN.WARN:"{#DEV.NAME}"}%)'
|
|
opdata: 'Current utilization: {ITEM.LASTVALUE}'
|
|
priority: WARNING
|
|
description: 'High disk [{#DEV.NAME}] utilization.'
|
|
manual_close: 'YES'
|
|
dependencies:
|
|
- name: 'HashiCorp Nomad Client: Running out of free inodes on [{#DEV.NAME}] device'
|
|
expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.inodes_percent["{#DEV.NAME}"],5m) >= {$NOMAD.INODES.FREE.MIN.CRIT:"{#DEV.NAME}"}'
|
|
tags:
|
|
- tag: scope
|
|
value: capacity
|
|
graph_prototypes:
|
|
- uuid: e29168187396478ebec8c80825da070d
|
|
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] usage'
|
|
type: STACKED
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.disk.size["{#DEV.NAME}"]'
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.disk.available["{#DEV.NAME}"]'
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.disk.used["{#DEV.NAME}"]'
|
|
- uuid: 3b9290be87e94bd5987eb595debe5f26
|
|
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] utilization'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.disk.inodes_percent["{#DEV.NAME}"]'
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.disk.used_percent["{#DEV.NAME}"]'
|
|
master_item:
|
|
key: nomad.client.metrics.get
|
|
lld_macro_paths:
|
|
- lld_macro: '{#DEV.NAME}'
|
|
path: $.labels.disk
|
|
preprocessing:
|
|
- type: PROMETHEUS_TO_JSON
|
|
parameters:
|
|
- 'nomad_client_host_disk_available{disk=~".*"}'
|
|
- uuid: cfa76f8c4aaf49c5888ef82d6d7d484d
|
|
name: 'Drivers discovery'
|
|
type: DEPENDENT
|
|
key: nomad.client.drivers.discovery
|
|
delay: '0'
|
|
filter:
|
|
evaltype: AND
|
|
conditions:
|
|
- macro: '{#DRIVER.NAME}'
|
|
value: '{$NOMAD.DRIVER.NAME.MATCHES}'
|
|
formulaid: C
|
|
- macro: '{#DRIVER.NAME}'
|
|
value: '{$NOMAD.DRIVER.NAME.NOT_MATCHES}'
|
|
operator: NOT_MATCHES_REGEX
|
|
formulaid: D
|
|
- macro: '{#DRIVER.DETECTED}'
|
|
value: '{$NOMAD.DRIVER.DETECT.MATCHES}'
|
|
formulaid: A
|
|
- macro: '{#DRIVER.DETECTED}'
|
|
value: '{$NOMAD.DRIVER.DETECT.NOT_MATCHES}'
|
|
operator: NOT_MATCHES_REGEX
|
|
formulaid: B
|
|
description: 'Client drivers discovery.'
|
|
item_prototypes:
|
|
- uuid: 6ffea3b88b29451ea6491cbd34a61148
|
|
name: 'HashiCorp Nomad Client: Driver [{#DRIVER.NAME}] detection state'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.driver.detected["{#DRIVER.NAME}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Driver [{#DRIVER.NAME}] detection state.'
|
|
valuemap:
|
|
name: 'Detection state'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- '$.body..Drivers.{#DRIVER.NAME}.Detected.first()'
|
|
- type: BOOL_TO_DECIMAL
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.client.node.info.get
|
|
tags:
|
|
- tag: component
|
|
value: driver
|
|
- tag: detected
|
|
value: '{#DRIVER.DETECTED}'
|
|
- tag: driver
|
|
value: '{#DRIVER.NAME}'
|
|
trigger_prototypes:
|
|
- uuid: 576192afab06466f80c95c1b17c1e6fa
|
|
expression: 'change(/HashiCorp Nomad Client by HTTP/nomad.client.driver.detected["{#DRIVER.NAME}"]) <> 0'
|
|
name: 'HashiCorp Nomad Client: Driver [{#DRIVER.NAME}] detection state has changed'
|
|
opdata: 'Current state: {ITEM.LASTVALUE}'
|
|
priority: INFO
|
|
description: 'The [{#DRIVER.NAME}] driver detection state has changed.'
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: fdff50758360474dafffee8cb7ba6289
|
|
name: 'HashiCorp Nomad Client: Driver [{#DRIVER.NAME}] state'
|
|
type: DEPENDENT
|
|
key: 'nomad.client.driver.state["{#DRIVER.NAME}"]'
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Driver [{#DRIVER.NAME}] state.'
|
|
valuemap:
|
|
name: 'Driver state'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- '$.body..Drivers.{#DRIVER.NAME}.Healthy.first()'
|
|
- type: BOOL_TO_DECIMAL
|
|
parameters:
|
|
- ''
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: nomad.client.node.info.get
|
|
tags:
|
|
- tag: component
|
|
value: driver
|
|
- tag: detected
|
|
value: '{#DRIVER.DETECTED}'
|
|
- tag: driver
|
|
value: '{#DRIVER.NAME}'
|
|
trigger_prototypes:
|
|
- uuid: 5630f8b3585f4f5b8faf4a30d95755b8
|
|
expression: 'last(/HashiCorp Nomad Client by HTTP/nomad.client.driver.state["{#DRIVER.NAME}"]) = 0 and last(/HashiCorp Nomad Client by HTTP/nomad.client.driver.detected["{#DRIVER.NAME}"]) = 1'
|
|
name: 'HashiCorp Nomad Client: Driver [{#DRIVER.NAME}] is in unhealthy state'
|
|
priority: WARNING
|
|
description: 'The [{#DRIVER.NAME}] driver detected, but its state is unhealthy.'
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
master_item:
|
|
key: nomad.client.node.info.get
|
|
preprocessing:
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
var raw = JSON.parse(value),
|
|
body = raw.body[0],
|
|
arr = [];
|
|
|
|
function getField(data, path) {
|
|
var steps = path.split('.');
|
|
for (var i = 0; i < steps.length; i++) {
|
|
var step = steps[i];
|
|
if (typeof data !== 'object' || typeof data[step] === 'undefined') {
|
|
throw 'Required field "' + path + '" is not present in data received.';
|
|
}
|
|
data = data[step];
|
|
}
|
|
return data;
|
|
}
|
|
|
|
var keys = Object.keys(getField(body,'Drivers'));
|
|
|
|
for (k in keys) {
|
|
arr.push({
|
|
'{#DRIVER.NAME}': keys[k],
|
|
'{#DRIVER.DETECTED}': getField(body.Drivers[keys[k]],'Detected'),
|
|
});
|
|
}
|
|
|
|
return JSON.stringify(arr);
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
tags:
|
|
- tag: class
|
|
value: software
|
|
- tag: target
|
|
value: nomad-client
|
|
macros:
|
|
- macro: '{$NOMAD.API.RESPONSE.SUCCESS}'
|
|
value: '200'
|
|
description: 'HTTP API successful response code. Availability triggers threshold. Change, if needed.'
|
|
- macro: '{$NOMAD.CLIENT.API.PORT}'
|
|
value: '4646'
|
|
description: 'Nomad client API port.'
|
|
- macro: '{$NOMAD.CLIENT.API.SCHEME}'
|
|
value: http
|
|
description: 'Nomad client API scheme.'
|
|
- macro: '{$NOMAD.CLIENT.OPEN.FDS.MAX.WARN}'
|
|
value: '90'
|
|
description: 'Maximum percentage of used file descriptors.'
|
|
- macro: '{$NOMAD.CLIENT.RPC.PORT}'
|
|
value: '4647'
|
|
description: 'Nomad RPC service port.'
|
|
- macro: '{$NOMAD.CLIENT.SERF.PORT}'
|
|
value: '4648'
|
|
description: 'Nomad serf service port.'
|
|
- macro: '{$NOMAD.CPU.UTIL.MIN}'
|
|
value: '90'
|
|
description: 'CPU utilization threshold. Measured as a percentage.'
|
|
- macro: '{$NOMAD.DATA.TIMEOUT}'
|
|
value: 15s
|
|
description: 'Response timeout for an API.'
|
|
- macro: '{$NOMAD.DISK.NAME.MATCHES}'
|
|
value: '.*'
|
|
description: 'The filter to include HashiCorp Nomad client disks by name.'
|
|
- macro: '{$NOMAD.DISK.NAME.NOT_MATCHES}'
|
|
value: CHANGE_IF_NEEDED
|
|
description: 'The filter to exclude HashiCorp Nomad client disks by name.'
|
|
- macro: '{$NOMAD.DRIVER.DETECT.MATCHES}'
|
|
value: '.*'
|
|
description: 'The filter to include HashiCorp Nomad client drivers by detection state. Possible filtering values: `true`, `false`.'
|
|
- macro: '{$NOMAD.DRIVER.DETECT.NOT_MATCHES}'
|
|
value: CHANGE_IF_NEEDED
|
|
description: 'The filter to exclude HashiCorp Nomad client drivers by detection state. Possible filtering values: `true`, `false`.'
|
|
- macro: '{$NOMAD.DRIVER.NAME.MATCHES}'
|
|
value: '.*'
|
|
description: 'The filter to include HashiCorp Nomad client drivers by name.'
|
|
- macro: '{$NOMAD.DRIVER.NAME.NOT_MATCHES}'
|
|
value: CHANGE_IF_NEEDED
|
|
description: 'The filter to exclude HashiCorp Nomad client drivers by name.'
|
|
- macro: '{$NOMAD.HTTP.PROXY}'
|
|
description: 'Sets the HTTP proxy for HTTP agent item. If this parameter is empty, then no proxy is used.'
|
|
- macro: '{$NOMAD.INODES.FREE.MIN.CRIT}'
|
|
value: '10'
|
|
description: 'Critical threshold of the filesystem metadata utilization. Measured as a percentage.'
|
|
- macro: '{$NOMAD.INODES.FREE.MIN.WARN}'
|
|
value: '20'
|
|
description: 'Warning threshold of the filesystem metadata utilization. Measured as a percentage.'
|
|
- macro: '{$NOMAD.JOB.NAME.MATCHES}'
|
|
value: '.*'
|
|
description: 'The filter to include HashiCorp Nomad client jobs by name.'
|
|
- macro: '{$NOMAD.JOB.NAME.NOT_MATCHES}'
|
|
value: CHANGE_IF_NEEDED
|
|
description: 'The filter to exclude HashiCorp Nomad client jobs by name.'
|
|
- macro: '{$NOMAD.JOB.NAMESPACE.MATCHES}'
|
|
value: '.*'
|
|
description: 'The filter to include HashiCorp Nomad client jobs by namespace.'
|
|
- macro: '{$NOMAD.JOB.NAMESPACE.NOT_MATCHES}'
|
|
value: CHANGE_IF_NEEDED
|
|
description: 'The filter to exclude HashiCorp Nomad client jobs by namespace.'
|
|
- macro: '{$NOMAD.JOB.TASK.GROUP.MATCHES}'
|
|
value: '.*'
|
|
description: 'The filter to include HashiCorp Nomad client jobs by task group belonging.'
|
|
- macro: '{$NOMAD.JOB.TASK.GROUP.NOT_MATCHES}'
|
|
value: CHANGE_IF_NEEDED
|
|
description: 'The filter to exclude HashiCorp Nomad client jobs by task group belonging.'
|
|
- macro: '{$NOMAD.JOB.TYPE.MATCHES}'
|
|
value: '.*'
|
|
description: 'The filter to include HashiCorp Nomad client jobs by type.'
|
|
- macro: '{$NOMAD.JOB.TYPE.NOT_MATCHES}'
|
|
value: CHANGE_IF_NEEDED
|
|
description: 'The filter to exclude HashiCorp Nomad client jobs by type.'
|
|
- macro: '{$NOMAD.RAM.AVAIL.MIN}'
|
|
value: '5'
|
|
description: 'CPU utilization threshold. Measured as a percentage.'
|
|
- macro: '{$NOMAD.TOKEN}'
|
|
value: '<PUT YOUR AUTH TOKEN>'
|
|
description: 'Nomad authentication token.'
|
|
dashboards:
|
|
- uuid: 6cb91bf47abb4c29b5fdf6de15ee5f9e
|
|
name: Allocations
|
|
pages:
|
|
- name: Allocations
|
|
widgets:
|
|
- type: item
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.allocations.running
|
|
- type: item
|
|
x: '8'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.allocations.start
|
|
- type: item
|
|
x: '16'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.allocations.pending
|
|
- type: item
|
|
'y': '5'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.allocations.complete
|
|
- type: item
|
|
x: '8'
|
|
'y': '5'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.allocations.blocked
|
|
- type: item
|
|
x: '16'
|
|
'y': '5'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.allocations.restart
|
|
- type: item
|
|
'y': '10'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.allocations.migrating
|
|
- type: item
|
|
x: '8'
|
|
'y': '10'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.allocations.terminal
|
|
- type: item
|
|
x: '16'
|
|
'y': '10'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.allocations.failed
|
|
- uuid: 9df94875bc944537860bad6bbe832ca8
|
|
name: 'Job resources'
|
|
pages:
|
|
- name: 'Job resources'
|
|
widgets:
|
|
- type: graphprototype
|
|
width: '12'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: columns
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: rows
|
|
value: '1'
|
|
- type: GRAPH_PROTOTYPE
|
|
name: graphid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
name: 'HashiCorp Nomad Client: Job [{#JOB.NAME}] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] CPU utilization'
|
|
- type: graphprototype
|
|
x: '12'
|
|
width: '12'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: columns
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: rows
|
|
value: '1'
|
|
- type: GRAPH_PROTOTYPE
|
|
name: graphid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
name: 'HashiCorp Nomad Client: Job [{#JOB.NAME}] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] CPU throttling'
|
|
- type: graphprototype
|
|
'y': '5'
|
|
width: '12'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: columns
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: rows
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: source_type
|
|
value: '3'
|
|
- type: ITEM_PROTOTYPE
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.allocs.cpu.allocated["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
- type: graphprototype
|
|
x: '12'
|
|
'y': '5'
|
|
width: '12'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: columns
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: rows
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: source_type
|
|
value: '3'
|
|
- type: ITEM_PROTOTYPE
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: 'nomad.client.allocs.cpu.total_ticks["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]'
|
|
- type: graphprototype
|
|
'y': '10'
|
|
width: '24'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: columns
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: rows
|
|
value: '1'
|
|
- type: GRAPH_PROTOTYPE
|
|
name: graphid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] memory utilization'
|
|
- uuid: 040a905f916a4311a52ecc2b084175a4
|
|
name: 'Node resources'
|
|
pages:
|
|
- name: 'Node resources'
|
|
widgets:
|
|
- type: graph
|
|
width: '24'
|
|
height: '5'
|
|
fields:
|
|
- type: GRAPH
|
|
name: graphid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
name: 'HashiCorp Nomad Client: CPU utilization'
|
|
- type: graph
|
|
'y': '5'
|
|
width: '12'
|
|
height: '5'
|
|
fields:
|
|
- type: GRAPH
|
|
name: graphid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
name: 'HashiCorp Nomad Client: Memory utilization'
|
|
- type: graph
|
|
x: '12'
|
|
'y': '5'
|
|
width: '12'
|
|
height: '5'
|
|
fields:
|
|
- type: GRAPH
|
|
name: graphid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
name: 'HashiCorp Nomad Client: Memory allocation'
|
|
- type: graphprototype
|
|
'y': '10'
|
|
width: '12'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: columns
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: rows
|
|
value: '1'
|
|
- type: GRAPH_PROTOTYPE
|
|
name: graphid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] usage'
|
|
- type: graphprototype
|
|
x: '12'
|
|
'y': '10'
|
|
width: '12'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: columns
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: rows
|
|
value: '1'
|
|
- type: GRAPH_PROTOTYPE
|
|
name: graphid
|
|
value:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] utilization'
|
|
valuemaps:
|
|
- uuid: 766a3c43981147bf85e9e157eb69c510
|
|
name: 'Detection state'
|
|
mappings:
|
|
- value: '0'
|
|
newvalue: Undetected
|
|
- value: '1'
|
|
newvalue: Detected
|
|
- uuid: c202a63acca7407bb6d61bf631696691
|
|
name: 'Driver state'
|
|
mappings:
|
|
- value: '0'
|
|
newvalue: Unhealthy
|
|
- value: '1'
|
|
newvalue: Healthy
|
|
- uuid: 40e17de6519d44bb80b4566e0569c31b
|
|
name: 'Service state'
|
|
mappings:
|
|
- value: '0'
|
|
newvalue: Down
|
|
- value: '1'
|
|
newvalue: Up
|
|
- uuid: 8598d0e2bd6f4903832ec91b7b300062
|
|
template: 'HashiCorp Nomad Server by HTTP'
|
|
name: 'HashiCorp Nomad Server by HTTP'
|
|
description: |
|
|
Get HashiCorp Nomad server metrics by HTTP from metrics endpoint.
|
|
|
|
More information about metrics is available in the official documentation: https://developer.hashicorp.com/nomad/docs/operations/metrics-reference.
|
|
|
|
You can discuss this template or leave feedback on our forum: https://www.zabbix.com/forum/zabbix-suggestions-and-feedback.
|
|
|
|
Generated by official Zabbix template tool "Templator" 2.0.0
|
|
vendor:
|
|
name: Zabbix
|
|
version: 7.0-0
|
|
groups:
|
|
- name: Templates/Applications
|
|
items:
|
|
- uuid: c5903e91262b4877be8c7e8f304b2862
|
|
name: 'HashiCorp Nomad Server: Service [rpc] state'
|
|
type: SIMPLE
|
|
key: 'net.tcp.service[tcp,,{$NOMAD.SERVER.RPC.PORT}]'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Current [rpc] service state.'
|
|
valuemap:
|
|
name: 'Service state'
|
|
preprocessing:
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
triggers:
|
|
- uuid: 0b534fa4c6cd4e89bd0a44ca8e810fa2
|
|
expression: 'last(/HashiCorp Nomad Server by HTTP/net.tcp.service[tcp,,{$NOMAD.SERVER.RPC.PORT}]) = 0'
|
|
name: 'HashiCorp Nomad Server: Service [rpc] is down'
|
|
opdata: 'Service: rpc, Port: {$NOMAD.SERVER.RPC.PORT}, State: {ITEM.LASTVALUE}'
|
|
priority: AVERAGE
|
|
description: |
|
|
Cannot establish the connection to [rpc] service port {$NOMAD.SERVER.RPC.PORT}.
|
|
Check the Nomad state and network connectivity between Nomad and Zabbix.
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 9eecc3ee79f04448991c83ac93459597
|
|
name: 'HashiCorp Nomad Server: Service [serf] state'
|
|
type: SIMPLE
|
|
key: 'net.tcp.service[tcp,,{$NOMAD.SERVER.SERF.PORT}]'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Current [serf] service state.'
|
|
valuemap:
|
|
name: 'Service state'
|
|
preprocessing:
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
triggers:
|
|
- uuid: 00799e8927b547308fc9caab4ba2d24d
|
|
expression: 'last(/HashiCorp Nomad Server by HTTP/net.tcp.service[tcp,,{$NOMAD.SERVER.SERF.PORT}]) = 0'
|
|
name: 'HashiCorp Nomad Server: Service [serf] is down'
|
|
opdata: 'Service: serf, Port: {$NOMAD.SERVER.SERF.PORT}, State: {ITEM.LASTVALUE}'
|
|
priority: AVERAGE
|
|
description: |
|
|
Cannot establish the connection to [serf] service port {$NOMAD.SERVER.SERF.PORT}.
|
|
Check the Nomad state and network connectivity between Nomad and Zabbix.
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 04821dab074f43e785dec52fbf69240e
|
|
name: 'HashiCorp Nomad Server: Nomad raft version'
|
|
type: DEPENDENT
|
|
key: nomad.raft.version
|
|
delay: '0'
|
|
history: 7d
|
|
trends: '0'
|
|
value_type: CHAR
|
|
description: 'Nomad raft version.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.body.stats.raft.protocol_version
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.stats.get
|
|
- uuid: 6ad163ebdefa4f5aa1e3712b7ddac7a7
|
|
name: 'HashiCorp Nomad Server: FSM allocation client update time'
|
|
type: DEPENDENT
|
|
key: nomad.server.alloc_client_update
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to apply AllocClientUpdate raft entry.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_fsm_alloc_client_update_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: fsm
|
|
- uuid: 22947f9cc2734f95997a55d9e6392b77
|
|
name: 'HashiCorp Nomad Server: Autopilot failure tolerance'
|
|
type: DEPENDENT
|
|
key: nomad.server.autopilot.failure_tolerance
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'The number of redundant healthy servers that can fail without causing an outage.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_autopilot_failure_tolerance
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: autopilot
|
|
triggers:
|
|
- uuid: fbb6d007481a4aab8d107a1d25a8c7ad
|
|
expression: 'last(/HashiCorp Nomad Server by HTTP/nomad.server.autopilot.failure_tolerance) < {$NOMAD.REDUNDANCY.MIN} and nodata(/HashiCorp Nomad Server by HTTP/nomad.server.autopilot.failure_tolerance,5m) = 0'
|
|
name: 'HashiCorp Nomad Server: Autopilot redundancy is low'
|
|
event_name: 'HashiCorp Nomad Server: Autopilot redundancy is low (less than {$NOMAD.REDUNDANCY.MIN})'
|
|
priority: WARNING
|
|
description: |
|
|
The autopilot redundancy is low.
|
|
Cluster crash risk is high due to one more server failure.
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: capacity
|
|
- uuid: 05ce375320dd402ab6c75f8fffba74cb
|
|
name: 'HashiCorp Nomad Server: Autopilot state'
|
|
type: DEPENDENT
|
|
key: nomad.server.autopilot.state
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Current autopilot state.'
|
|
valuemap:
|
|
name: 'Autopilot state'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_autopilot_healthy
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: autopilot
|
|
triggers:
|
|
- uuid: 73c14eec055e47c6af6d8728ba0fb416
|
|
expression: 'last(/HashiCorp Nomad Server by HTTP/nomad.server.autopilot.state) = 0 and nodata(/HashiCorp Nomad Server by HTTP/nomad.server.autopilot.state,5m) = 0'
|
|
name: 'HashiCorp Nomad Server: Autopilot is unhealthy'
|
|
priority: AVERAGE
|
|
description: 'The autopilot is in unhealthy state. The successful failover probability is extremely low.'
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: ace31f3772fe4ffe84741554e3fba8bc
|
|
name: 'HashiCorp Nomad Server: CPU shares for blocked evaluations'
|
|
type: DEPENDENT
|
|
key: nomad.server.blocked_evals.cpu
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Amount of CPU shares requested by blocked evals.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_blocked_evals_cpu
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- tag: component
|
|
value: evaluations
|
|
- uuid: d7fe536b4d0445328dfbf3b1188bea70
|
|
name: 'HashiCorp Nomad Server: CPU shares for blocked job evaluations'
|
|
type: DEPENDENT
|
|
key: nomad.server.blocked_evals.job.cpu
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Amount of CPU shares requested by blocked evals of a job.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_blocked_evals_job_cpu
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- tag: component
|
|
value: evaluations
|
|
- tag: component
|
|
value: jobs
|
|
- uuid: 2f07cbd6ec11449cae787a6e84fa5fbf
|
|
name: 'HashiCorp Nomad Server: Memory shares for blocked job evaluations'
|
|
type: DEPENDENT
|
|
key: nomad.server.blocked_evals.job.memory
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Amount of memory requested by blocked evals of a job.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_blocked_evals_job_memory
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: evaluations
|
|
- tag: component
|
|
value: jobs
|
|
- tag: component
|
|
value: memory
|
|
- uuid: 5460a81df8174fb5a7596b4243162d05
|
|
name: 'HashiCorp Nomad Server: Memory shares by blocked evaluations'
|
|
type: DEPENDENT
|
|
key: nomad.server.blocked_evals.memory
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Amount of memory requested by blocked evals.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_blocked_evals_memory
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: evaluations
|
|
- tag: component
|
|
value: memory
|
|
- uuid: 474d05060b0d4a608f6a2d319235af91
|
|
name: 'HashiCorp Nomad Server: Evaluations blocked'
|
|
type: DEPENDENT
|
|
key: nomad.server.blocked_evals.total_blocked
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Count of evals in the blocked state for any reason (cluster resource exhaustion or quota limits).'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_blocked_evals_total_blocked
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: evaluations
|
|
- uuid: 80f5edf18daf407ba6b64374084a555b
|
|
name: 'HashiCorp Nomad Server: Evaluations escaped'
|
|
type: DEPENDENT
|
|
key: nomad.server.blocked_evals.total_escaped
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: |
|
|
Count of evals that have escaped computed node classes.
|
|
This indicates a scheduler optimization was skipped and is not usually a source of concern.
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_blocked_evals_total_escaped
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: evaluations
|
|
- uuid: 356dec0f5d5b45d981db4b590ef7dd73
|
|
name: 'HashiCorp Nomad Server: Evaluations blocked due to quota limit'
|
|
type: DEPENDENT
|
|
key: nomad.server.blocked_evals.total_quota_limit
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Count of blocked evals due to quota limits (the resources for these jobs are not counted in other blocked_evals metrics, except for total_blocked).'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_blocked_evals_total_quota_limit
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: evaluations
|
|
- uuid: 54ddea6f2fa04e6c8ceb456d4d67c530
|
|
name: 'HashiCorp Nomad Server: Evaluations enqueue time'
|
|
type: DEPENDENT
|
|
key: nomad.server.broker.eval_waiting
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Average time elapsed with evaluations waiting to be enqueued.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_eval_ack_sum
|
|
- function
|
|
- avg
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: evaluations
|
|
- uuid: 7df3a4b80f6b44c6a03b672182d3f73f
|
|
name: 'HashiCorp Nomad Server: Services ready to schedule'
|
|
type: DEPENDENT
|
|
key: nomad.server.broker.service_ready
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Count of service evals ready to be scheduled.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_broker_service_ready
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: services
|
|
- uuid: 0da8881c8ddf4cd18e04b38201b83b7d
|
|
name: 'HashiCorp Nomad Server: Services unacknowledged'
|
|
type: DEPENDENT
|
|
key: nomad.server.broker.service_unacked
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Count of unacknowledged service evals.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_broker_service_unacked
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: services
|
|
- uuid: 2a38602920224c31a221696cf65c6abc
|
|
name: 'HashiCorp Nomad Server: System evaluations ready to schedule'
|
|
type: DEPENDENT
|
|
key: nomad.server.broker.system_ready
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Count of service evals ready to be scheduled.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_broker_system_ready
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: evaluations
|
|
- uuid: 3faaaa69672c4b999a12f054dbd8b980
|
|
name: 'HashiCorp Nomad Server: System evaluations unacknowledged'
|
|
type: DEPENDENT
|
|
key: nomad.server.broker.system_unacked
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Count of unacknowledged system evals.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_broker_system_unacked
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: evaluations
|
|
- uuid: 2e556f5894c9492a80b34c84ceb69dba
|
|
name: 'HashiCorp Nomad Server: Evaluations pending'
|
|
type: DEPENDENT
|
|
key: nomad.server.broker.total_pending
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Evaluations that are pending until an existing evaluation for the same job completes.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_broker_total_pending
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: evaluations
|
|
- uuid: 01e11850732a46709942c225d294a8ef
|
|
name: 'HashiCorp Nomad Server: Evaluations ready'
|
|
type: DEPENDENT
|
|
key: nomad.server.broker.total_ready
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Number of evaluations ready to be processed.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_broker_total_ready
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: evaluations
|
|
- uuid: d57805e0d8a24117bb64992a9aeb3dc3
|
|
name: 'HashiCorp Nomad Server: Evaluations unacked'
|
|
type: DEPENDENT
|
|
key: nomad.server.broker.total_unacked
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Evaluations dispatched for processing but incomplete.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_broker_total_unacked
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: evaluations
|
|
- uuid: ac1d1422adc248b48ba65268f9a43be8
|
|
name: 'HashiCorp Nomad Server: Evaluations waiting'
|
|
type: DEPENDENT
|
|
key: nomad.server.broker.total_waiting
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Count of evals waiting to be enqueued.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_broker_total_waiting
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: evaluations
|
|
- uuid: de3e13f7635e4058bd2ca12f6f454668
|
|
name: 'HashiCorp Nomad Server: RPC eval dequeue time'
|
|
type: DEPENDENT
|
|
key: nomad.server.client.dequeue
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for Eval.Dequeue RPC call.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_eval_dequeue_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: rpc
|
|
- uuid: ac2a5b69fef546d0924d631a9ed9cffd
|
|
name: 'HashiCorp Nomad Server: RPC get client allocs time'
|
|
type: DEPENDENT
|
|
key: nomad.server.client.get_client_allocs
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for Node.GetClientAllocs RPC call.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_client_get_client_allocs_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: rpc
|
|
- uuid: 758f3474636c42378fac78a2f5e3b63b
|
|
name: 'HashiCorp Nomad Server: RPC list time'
|
|
type: DEPENDENT
|
|
key: nomad.server.client.list
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for Node.List RPC call.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_client_list_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: rpc
|
|
- uuid: 588bb9b304634852af149d7903d42798
|
|
name: 'HashiCorp Nomad Server: RPC update allocations time'
|
|
type: DEPENDENT
|
|
key: nomad.server.client.update_alloc
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for Node.UpdateAlloc RPC call.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_client_update_alloc_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: rpc
|
|
- uuid: 1e1ce033c99b45a3892df8674deaf1f4
|
|
name: 'HashiCorp Nomad Server: RPC update status time'
|
|
type: DEPENDENT
|
|
key: nomad.server.client.update_status
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for Node.UpdateStatus RPC call.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_client_update_status_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: rpc
|
|
- uuid: c5bd6a645e9f49ba83fe065b2af6248e
|
|
name: 'HashiCorp Nomad Server: CPU time, rate'
|
|
type: DEPENDENT
|
|
key: nomad.server.cpu.time
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Total user and system CPU time spent in seconds.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- process_cpu_seconds_total
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: cpu
|
|
- uuid: dbbee3bb99d347f9a9a5325b4f64a894
|
|
name: 'HashiCorp Nomad Server: Monitoring API response'
|
|
type: DEPENDENT
|
|
key: nomad.server.data.api.response
|
|
delay: '0'
|
|
history: 7d
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Monitoring API response message.'
|
|
preprocessing:
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
try {
|
|
var response = Object.keys(JSON.parse(value).header).filter(function (f) {
|
|
return f.match(/HTTP\/[\d.]+\s+\d{3}/);
|
|
});
|
|
|
|
return response.pop();
|
|
}
|
|
catch (error) {
|
|
return "HTTP/1.1 408 Request timeout";
|
|
}
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: nomad.server.data.get
|
|
tags:
|
|
- tag: component
|
|
value: status
|
|
triggers:
|
|
- uuid: 9dc8cd0bcbad4ee7a2afe45e33db53a7
|
|
expression: 'find(/HashiCorp Nomad Server by HTTP/nomad.server.data.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
|
|
name: 'HashiCorp Nomad Server: Monitoring API connection has failed'
|
|
event_name: 'HashiCorp Nomad Server: Monitoring API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}'
|
|
priority: AVERAGE
|
|
description: |
|
|
Monitoring API connection has failed.
|
|
Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix.
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: b7ba2f4a56b04de298fde77cbf5afb6a
|
|
name: 'HashiCorp Nomad Server: Telemetry get'
|
|
type: HTTP_AGENT
|
|
key: nomad.server.data.get
|
|
history: '0'
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Telemetry data in raw format.'
|
|
preprocessing:
|
|
- type: CHECK_NOT_SUPPORTED
|
|
parameters:
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}'
|
|
timeout: '{$NOMAD.DATA.TIMEOUT}'
|
|
url: '{$NOMAD.SERVER.API.SCHEME}://{HOST.IP}:{$NOMAD.SERVER.API.PORT}/v1/metrics'
|
|
query_fields:
|
|
- name: format
|
|
value: prometheus
|
|
status_codes: ''
|
|
http_proxy: '{$NOMAD.HTTP.PROXY}'
|
|
headers:
|
|
- name: X-Nomad-Token
|
|
value: '{$NOMAD.TOKEN}'
|
|
retrieve_mode: BOTH
|
|
output_format: JSON
|
|
tags:
|
|
- tag: component
|
|
value: raw
|
|
- uuid: 7241c9923d714970ad950f9b7aa8ab52
|
|
name: 'HashiCorp Nomad Server: RPC evaluation acknowledgement time'
|
|
type: DEPENDENT
|
|
key: nomad.server.eval.ack
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for Eval.Ack RPC call.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_eval_ack_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: rpc
|
|
- uuid: b9085e173cec4ce2b2fa6fd68041695e
|
|
name: 'HashiCorp Nomad Server: FSM apply plan results time'
|
|
type: DEPENDENT
|
|
key: nomad.server.fsm.apply_plan_results
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to apply ApplyPlanResults raft entry.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_fsm_apply_plan_results_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: fsm
|
|
- uuid: 62b19ce1b2bb4fdaacff02d70101a10f
|
|
name: 'HashiCorp Nomad Server: FSM job registration time'
|
|
type: DEPENDENT
|
|
key: nomad.server.fsm.register_job
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to apply RegisterJob raft entry.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_fsm_register_job_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: fsm
|
|
- uuid: 2d0b9f09bfb5480ebd6d6cf96c15278e
|
|
name: 'HashiCorp Nomad Server: FSM update evaluation time'
|
|
type: DEPENDENT
|
|
key: nomad.server.fsm.update_eval
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to apply UpdateEval raft entry.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_fsm_update_eval_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: fsm
|
|
- uuid: 0abce7a087874e58a7da20acb3599535
|
|
name: 'HashiCorp Nomad Server: Heartbeats active'
|
|
type: DEPENDENT
|
|
key: nomad.server.heartbeat.active
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: |
|
|
Number of active heartbeat timers.
|
|
Each timer represents a Nomad client connection.
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_heartbeat_active
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- uuid: dc95deb3590148a9bc50ea151a0d46b7
|
|
name: 'HashiCorp Nomad Server: RPC job allocations time'
|
|
type: DEPENDENT
|
|
key: nomad.server.job.allocations
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for Job.Allocations RPC call.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_job_allocations_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: rpc
|
|
- uuid: 11e18e811fe7461181417ca1f5dad915
|
|
name: 'HashiCorp Nomad Server: RPC job evaluations time'
|
|
type: DEPENDENT
|
|
key: nomad.server.job.evaluations
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for Job.Evaluations RPC call.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_job_evaluations_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: rpc
|
|
- uuid: 0efaac3a59d44ce09d31435ddfa6aada
|
|
name: 'HashiCorp Nomad Server: RPC get job time'
|
|
type: DEPENDENT
|
|
key: nomad.server.job.get_job
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for Job.GetJob RPC call.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_job_get_job_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: rpc
|
|
- uuid: 5ab3191f1f744682bab6d3142a1c39a4
|
|
name: 'HashiCorp Nomad Server: Jobs dead'
|
|
type: DEPENDENT
|
|
key: nomad.server.job_status.dead
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of dead jobs.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_job_status_dead
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: jobs
|
|
triggers:
|
|
- uuid: 44dcd52ebf54404e871501a4c4825424
|
|
expression: 'last(/HashiCorp Nomad Server by HTTP/nomad.server.job_status.dead) > 0 and nodata(/HashiCorp Nomad Server by HTTP/nomad.server.job_status.dead,5m) = 0'
|
|
name: 'HashiCorp Nomad Server: Dead jobs found'
|
|
opdata: 'Current amount: {ITEM.LASTVALUE}'
|
|
priority: WARNING
|
|
description: |
|
|
Jobs with the `Dead` state discovered.
|
|
Check the {$NOMAD.SERVER.API.SCHEME}://{HOST.IP}:{$NOMAD.SERVER.API.PORT}/v1/jobs URL for the details.
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: 39a27315e33549dc88c6cb426a5a05c9
|
|
name: 'HashiCorp Nomad Server: Jobs pending'
|
|
type: DEPENDENT
|
|
key: nomad.server.job_status.pending
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of pending jobs.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_job_status_pending
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: jobs
|
|
- uuid: 97907d8816b14fb69063a55bdea22701
|
|
name: 'HashiCorp Nomad Server: Jobs running'
|
|
type: DEPENDENT
|
|
key: nomad.server.job_status.running
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of running jobs.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_job_status_running
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: jobs
|
|
- uuid: 4be8c941477046dd9dcc549020d687e7
|
|
name: 'HashiCorp Nomad Server: Job allocations completed'
|
|
type: DEPENDENT
|
|
key: nomad.server.job_summary.complete
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of complete allocations for a job.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_job_summary_complete
|
|
- function
|
|
- sum
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: jobs
|
|
- uuid: 6f6ad9a70b4e4759bc2e4cc213d697bb
|
|
name: 'HashiCorp Nomad Server: Job allocations failed'
|
|
type: DEPENDENT
|
|
key: nomad.server.job_summary.failed
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of failed allocations for a job.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_job_summary_failed
|
|
- function
|
|
- sum
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: jobs
|
|
- uuid: 7a267a1b82b24157a19b52e6f285f0ce
|
|
name: 'HashiCorp Nomad Server: RPC job summary time'
|
|
type: DEPENDENT
|
|
key: nomad.server.job_summary.get_job_summary
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for Job.Summary RPC call.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_job_summary_get_job_summary_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: rpc
|
|
- uuid: 0c408dff86604486b0f23efed00f4348
|
|
name: 'HashiCorp Nomad Server: Job allocations lost'
|
|
type: DEPENDENT
|
|
key: nomad.server.job_summary.lost
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of lost allocations for a job.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_job_summary_lost
|
|
- function
|
|
- sum
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: jobs
|
|
- uuid: d0de68553b484d2d9311b356a7b5a9e7
|
|
name: 'HashiCorp Nomad Server: Job allocations queued'
|
|
type: DEPENDENT
|
|
key: nomad.server.job_summary.queued
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of queued allocations for a job.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_job_summary_queued
|
|
- function
|
|
- sum
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: jobs
|
|
- uuid: 98ab06dce1f84ef5993e8cc793114008
|
|
name: 'HashiCorp Nomad Server: Job allocations running'
|
|
type: DEPENDENT
|
|
key: nomad.server.job_summary.running
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of running allocations for a job.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_job_summary_running
|
|
- function
|
|
- sum
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: jobs
|
|
- uuid: e3cc96bd72924bb5bd0334018a5a88be
|
|
name: 'HashiCorp Nomad Server: Job allocations starting'
|
|
type: DEPENDENT
|
|
key: nomad.server.job_summary.starting
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of starting allocations for a job.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_job_summary_starting
|
|
- function
|
|
- sum
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: jobs
|
|
- uuid: 244a84e136c94a47b1b64b92d08ac20f
|
|
name: 'HashiCorp Nomad Server: Job allocations unknown'
|
|
type: DEPENDENT
|
|
key: nomad.server.job_summary.unknown
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of unknown allocations for a job.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_job_summary_unknown
|
|
- function
|
|
- sum
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: jobs
|
|
- uuid: 478f45d1095646bca5ffd3a6d923dddf
|
|
name: 'HashiCorp Nomad Server: Leader barrier time'
|
|
type: DEPENDENT
|
|
key: nomad.server.leader.barrier
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to establish a raft barrier during leader transition.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_leader_barrier_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: leader
|
|
- tag: component
|
|
value: raft
|
|
- uuid: 8425336f75704a259bc066c795df1516
|
|
name: 'HashiCorp Nomad Server: Total reconcile time'
|
|
type: DEPENDENT
|
|
key: nomad.server.leader.reconcile
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to reconcile all serf peers with state store.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_leader_reconcile_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: leader
|
|
- tag: component
|
|
value: raft
|
|
- uuid: a5b6d36142174dd683e2aec5dd4ee1e9
|
|
name: 'HashiCorp Nomad Server: Reconcile peer time'
|
|
type: DEPENDENT
|
|
key: nomad.server.leader.reconcile_member
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to reconcile a serf peer with state store.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_leader_reconcileMember_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: leader
|
|
- tag: component
|
|
value: raft
|
|
- uuid: af6eda309cc545b2aa65efc0af103a02
|
|
name: 'HashiCorp Nomad Server: Gossip time'
|
|
type: DEPENDENT
|
|
key: nomad.server.memberlist.gossip
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to broadcast gossip messages.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_memberlist_gossip_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: gossip
|
|
- uuid: 25372c89439641d09521348033b1e324
|
|
name: 'HashiCorp Nomad Server: Metrics'
|
|
type: DEPENDENT
|
|
key: nomad.server.metrics.get
|
|
delay: '0'
|
|
history: '0'
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Nomad server metrics in raw format.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.body
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.data.get
|
|
tags:
|
|
- tag: component
|
|
value: raw
|
|
- uuid: 8a003df55bae4bf0a14fefc6f02f28bf
|
|
name: 'HashiCorp Nomad Server: Namespace list time'
|
|
type: DEPENDENT
|
|
key: nomad.server.namespace.list_namespace
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for Namespace.ListNamespaces.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_namespace_list_namespace_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: namespaces
|
|
- uuid: dd8d8b5cdf914002ac58183b667ceb06
|
|
name: 'HashiCorp Nomad Server: Plan apply time'
|
|
type: DEPENDENT
|
|
key: nomad.server.plan.apply
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to apply a plan.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_plan_apply_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: plan
|
|
- uuid: d8d69f231d23457e82c6d3941f09cf5d
|
|
name: 'HashiCorp Nomad Server: Plan evaluate time'
|
|
type: DEPENDENT
|
|
key: nomad.server.plan.evaluate
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to evaluate a plan.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_plan_evaluate_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: plan
|
|
- uuid: ee7b59fd19644bc4813696f8d806955f
|
|
name: 'HashiCorp Nomad Server: Plan queue'
|
|
type: DEPENDENT
|
|
key: nomad.server.plan.queue_depth
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Count of evals in the plan queue.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_plan_queue_depth
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: plan
|
|
- uuid: 9df85708b74d40509d9c9412a15e5ce5
|
|
name: 'HashiCorp Nomad Server: RPC plan submit time'
|
|
type: DEPENDENT
|
|
key: nomad.server.plan.submit
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for Plan.Submit RPC call.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_plan_submit_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: plan
|
|
- tag: component
|
|
value: rpc
|
|
- uuid: 851eea2a548644ba884ca852c2759d90
|
|
name: 'HashiCorp Nomad Server: Plan raft index processing time'
|
|
type: DEPENDENT
|
|
key: nomad.server.plan.wait_for_index
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed that planner waits for the raft index of the plan to be processed.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_plan_wait_for_index_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: plan
|
|
- uuid: 505ee2804b4e451892103b36cc38abde
|
|
name: 'HashiCorp Nomad Server: Open file descriptors, max'
|
|
type: DEPENDENT
|
|
key: nomad.server.process_max_fds
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Maximum number of open file descriptors.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- process_max_fds
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: fds
|
|
- uuid: cba898eca0c84a84a1ea0309e04b9f84
|
|
name: 'HashiCorp Nomad Server: Open file descriptors'
|
|
type: DEPENDENT
|
|
key: nomad.server.process_open_fds
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of open file descriptors.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- process_open_fds
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: fds
|
|
- uuid: 1dd455d2776f4378a39980cfa8eba33c
|
|
name: 'HashiCorp Nomad Server: FSM index'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.applied_index
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Current index applied to FSM.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_appliedIndex
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: fsm
|
|
- uuid: bad74d0dc4b2431fbb5a442bc2fc1236
|
|
name: 'HashiCorp Nomad Server: Raft transactions, rate'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.apply
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of Raft transactions.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_apply
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: raft
|
|
- uuid: 804693ed989b44e3903bcd81159dd51d
|
|
name: 'HashiCorp Nomad Server: Raft calls blocked, rate'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.barrier
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Count of blocking raft API calls.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_barrier
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: raft
|
|
- uuid: f7d1bc6c6a82475dabcdaaec7472fe02
|
|
name: 'HashiCorp Nomad Server: BoltDB freelist bytes'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.freelist_bytes
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
units: B
|
|
description: 'Number of freelist bytes.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_freelistBytes
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: b3f246d0991746589f4d0d68653eb4e9
|
|
name: 'HashiCorp Nomad Server: BoltDB free page bytes'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.free_page_bytes
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Number of free page bytes.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_freePageBytes
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: e3d0d6f656f5484f8e8a8ca1195aad59
|
|
name: 'HashiCorp Nomad Server: BoltDB free pages'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.num_free_pages
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of BoltDB free pages.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_numFreePages
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: 73f359bcb02d46a0adb6fdd51284afcf
|
|
name: 'HashiCorp Nomad Server: BoltDB pending pages'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.num_pending_pages
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of BoltDB pending pages.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_numPendingPages
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: 08afe7430e3c49e388f0a87c8bbf2ff4
|
|
name: 'HashiCorp Nomad Server: BoltDB open read transactions'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.open_read_txn
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of current open read transactions.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_openReadTxn
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: 36e89e86c9124e868670e9cb7b247899
|
|
name: 'HashiCorp Nomad Server: BoltDB read transactions, rate'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.total_read_txn
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Count of total read transactions.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_totalReadTxn
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: 2733afd943b346e38c8605259bea038c
|
|
name: 'HashiCorp Nomad Server: BoltDB cursors'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.txstats.cursor_count
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Count of total database cursors.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_txstats_cursorCount
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: 1641576e376c4c82a8b1882b4810db4c
|
|
name: 'HashiCorp Nomad Server: BoltDB nodes, rate'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.txstats.node_count
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Count of total database nodes.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_txstats_nodeCount
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: f0151f3431254030a3bcd538e8d2068b
|
|
name: 'HashiCorp Nomad Server: BoltDB node dereferences, rate'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.txstats.node_deref
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Count of total database node dereferences.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_txstats_nodeDeref
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: 27a8a38acfc34e14b3fbb13aa58ad5d4
|
|
name: 'HashiCorp Nomad Server: BoltDB page allocations, rate'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.txstats.page_alloc
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Number of page allocations.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_txstats_pageAlloc
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: 14a5fc715dad4eacaddb2f277e1f85f6
|
|
name: 'HashiCorp Nomad Server: BoltDB pages in use'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.txstats.page_count
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of pages in use.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_txstats_pageCount
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: dd9729e812614ab59c595b7183a883b0
|
|
name: 'HashiCorp Nomad Server: BoltDB rebalance operations, rate'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.txstats.rebalance
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Count of total rebalance operations.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_txstats_rebalance
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: d270aa05d1ca4f3d9ee606937f1703cc
|
|
name: 'HashiCorp Nomad Server: BoltDB rebalance time'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.txstats.rebalance_time
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Sample of rebalance operation times.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_txstats_rebalanceTime_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: 621478ee2bea482abffd6b8c80df5ee1
|
|
name: 'HashiCorp Nomad Server: BoltDB spill operations, rate'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.txstats.spill
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Count of total spill operations.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_txstats_spill
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: 2ca3e7572bf04b04961107e7d9304887
|
|
name: 'HashiCorp Nomad Server: BoltDB spill time'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.txstats.spill_time
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Sample of spill operation times.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_txstats_spillTime_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: bd14547f789540618db85057eceb70db
|
|
name: 'HashiCorp Nomad Server: BoltDB split operations, rate'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.txstats.split
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Count of total split operations.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_txstats_split
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: b13b7b9eb66940c9bf4d14e711ee0279
|
|
name: 'HashiCorp Nomad Server: BoltDB write operations, rate'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.txstats.write
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Count of total write operations.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_txstats_write
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: e9b31f3434aa4df0908a79ff5352a29e
|
|
name: 'HashiCorp Nomad Server: BoltDB write time'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.boltdb.txstats.write_time
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Sample of write operation times.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_boltdb_txstats_writeTime_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: boltdb
|
|
- uuid: dce75575b01e4cf8bd84d6a40c74f22b
|
|
name: 'HashiCorp Nomad Server: Cluster role'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.cluster_role
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Current role in the cluster.'
|
|
valuemap:
|
|
name: 'Cluster role'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.body.stats.raft.state
|
|
error_handler: DISCARD_VALUE
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
const idx = [
|
|
'Leader',
|
|
'Follower',
|
|
'Candidate',
|
|
].indexOf(value);
|
|
return idx !== -1 ? idx : 10;
|
|
master_item:
|
|
key: nomad.server.stats.get
|
|
tags:
|
|
- tag: component
|
|
value: system
|
|
triggers:
|
|
- uuid: 1bc6ee807acd4532a58b4527f8865c1c
|
|
expression: 'change(/HashiCorp Nomad Server by HTTP/nomad.server.raft.cluster_role) <> 0'
|
|
name: 'HashiCorp Nomad Server: Cluster role has changed'
|
|
event_name: 'HashiCorp Nomad Server: Cluster role has changed to {ITEM.LASTVALUE}'
|
|
priority: INFO
|
|
description: 'Cluster role has changed.'
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: notice
|
|
- uuid: a994ee525886407486adace82ea5ef90
|
|
name: 'HashiCorp Nomad Server: Raft commit logs enqueued'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.commit_num_logs
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Count of logs enqueued.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_commitNumLogs
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: raft
|
|
- uuid: 3160d5b81e1540aab3244f1cf6bce95d
|
|
name: 'HashiCorp Nomad Server: Raft commit time'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.commit_time
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to commit writes.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_worker_dequeue_eval_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: raft
|
|
- uuid: 09f1baf6fc2443f48e01814b939c0749
|
|
name: 'HashiCorp Nomad Server: FSM apply time'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.fsm.apply
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to apply write to FSM.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_fsm_apply_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: fsm
|
|
- uuid: 18689de9474f4812ba9ed34d311ad3a1
|
|
name: 'HashiCorp Nomad Server: FSM autopilot time'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.fsm.autopilot
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to apply Autopilot raft entry.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_fsm_autopilot_sum
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: fsm
|
|
- uuid: 6846f262148e49288ecb7aa0ad18f7d2
|
|
name: 'HashiCorp Nomad Server: FSM enqueue time'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.fsm.enqueue
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to enqueue write to FSM.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_fsm_enqueue_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: fsm
|
|
- uuid: 4e2d0557fc984ed9858316c8409edd0e
|
|
name: 'HashiCorp Nomad Server: FSM register node time'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.fsm.register_node
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to apply RegisterNode raft entry.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_fsm_register_node_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: fsm
|
|
- uuid: bb65ca263f3244fc883d9102bbf9bef7
|
|
name: 'HashiCorp Nomad Server: Raft last index'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.last_index
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Most recent index seen.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_lastIndex
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: raft
|
|
- uuid: bda052f13e3c4dacaafc4ddefbc98036
|
|
name: 'HashiCorp Nomad Server: Dispatch log time'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.leader.dispatch_log
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed to write log, mark in flight, and start replication.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_leader_dispatchLog_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: leader
|
|
- tag: component
|
|
value: raft
|
|
- uuid: f915e5e162814786943cd4cc85bb7aff
|
|
name: 'HashiCorp Nomad Server: Logs dispatched'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.leader.dispatch_num_logs
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Count of logs dispatched.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_leader_dispatchNumLogs
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: leader
|
|
- tag: component
|
|
value: raft
|
|
- uuid: 7f9db272307c43e7b87b8a4a4a6eae16
|
|
name: 'HashiCorp Nomad Server: Leader last contact'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.leader.lastContact
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: |
|
|
Time since last contact to leader.
|
|
General indicator of Raft latency.
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- 'nomad_raft_leader_lastContact{quantile="0.99"}'
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: STR_REPLACE
|
|
parameters:
|
|
- NaN
|
|
- '0'
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '0.001'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: leader
|
|
- tag: component
|
|
value: raft
|
|
triggers:
|
|
- uuid: 65c69469e9354e81a632485a74bc711d
|
|
expression: 'min(/HashiCorp Nomad Server by HTTP/nomad.server.raft.leader.lastContact,5m) >= {$NOMAD.SERVER.LEADER.LATENCY} and nodata(/HashiCorp Nomad Server by HTTP/nomad.server.raft.leader.lastContact,5m) = 0'
|
|
name: 'HashiCorp Nomad Server: Leader last contact timeout exceeded'
|
|
event_name: 'Leader last contact timeout exceeded: over {$NOMAD.SERVER.LEADER.LATENCY} for the last 5 minutes'
|
|
priority: WARNING
|
|
description: |
|
|
The nomad.raft.leader.lastContact metric is a general indicator of Raft latency which can be used to observe how Raft timing is performing and guide infrastructure provisioning.
|
|
If this number trends upwards, look at CPU, disk IOPs, and network latency. nomad.raft.leader.lastContact should not get too close to the leader lease timeout of 500ms.
|
|
tags:
|
|
- tag: scope
|
|
value: performance
|
|
- uuid: bca59612c439447ebe78ae68e68fd6cb
|
|
name: 'HashiCorp Nomad Server: Raft peers'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.peers
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Current cluster raft peers amount.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.body.stats.raft.num_peers
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.stats.get
|
|
tags:
|
|
- tag: component
|
|
value: system
|
|
- uuid: 36ff560da62740a0b4657b4afabcf902
|
|
name: 'HashiCorp Nomad Server: Raft transaction commit time'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.replication.appendEntries
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
units: s
|
|
description: 'Raft transaction commit time.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_replication_appendEntries_rpc
|
|
- function
|
|
- avg
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '0.001'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: raft
|
|
- uuid: 662cd8712bbe4a4080a540f878b4ec44
|
|
name: 'HashiCorp Nomad Server: Heartbeat fails'
|
|
type: DEPENDENT
|
|
key: nomad.server.raft.transition.heartbeat_timeout
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Count of failing to heartbeat and starting election.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_raft_transition_heartbeat_timeout
|
|
- value
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: raft
|
|
- uuid: 3dae4fd461fa4a5abdba717e538b8405
|
|
name: 'HashiCorp Nomad Server: Resident memory size'
|
|
type: DEPENDENT
|
|
key: nomad.server.resident_memory_bytes
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Resident memory size in bytes.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- process_resident_memory_bytes
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- uuid: ec2d018e121442be9d3fb350f6572ca6
|
|
name: 'HashiCorp Nomad Server: RPC queries, rate'
|
|
type: DEPENDENT
|
|
key: nomad.server.rpc.query
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of RPC queries.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_rpc_query
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: rpc
|
|
- uuid: 406d9bc9db1e44509a77acd11963f406
|
|
name: 'HashiCorp Nomad Server: RPC requests, rate'
|
|
type: DEPENDENT
|
|
key: nomad.server.rpc.request
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of RPC requests being handled.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_rpc_request
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: rpc
|
|
- uuid: 0ef3cfaf0e694625bbc3f3a117ff9f06
|
|
name: 'HashiCorp Nomad Server: RPC error requests, rate'
|
|
type: DEPENDENT
|
|
key: nomad.server.rpc.request_error
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of RPC requests being handled that result in an error.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_rpc_request
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: network
|
|
- tag: component
|
|
value: rpc
|
|
- uuid: a0e7b48174de4e569762dc1654d63267
|
|
name: 'HashiCorp Nomad Server: Memory used'
|
|
type: DEPENDENT
|
|
key: nomad.server.runtime.alloc_bytes
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Memory utilization in bytes.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_runtime_alloc_bytes
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- uuid: 7d8883065c2c494f9c4853afe50d157c
|
|
name: 'HashiCorp Nomad Server: Objects freed, rate'
|
|
type: DEPENDENT
|
|
key: nomad.server.runtime.free_count
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Count of objects freed from heap by go runtime GC.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_runtime_free_count
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: CHANGE_PER_SECOND
|
|
parameters:
|
|
- ''
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: runtime
|
|
- uuid: 02b0699a706742d1bb557e32ed1b52ea
|
|
name: 'HashiCorp Nomad Server: GC pause time'
|
|
type: DEPENDENT
|
|
key: nomad.server.runtime.gc_pause_ns
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Go runtime GC pause times.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_runtime_gc_pause_ns_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: runtime
|
|
- uuid: c3fc612a6bec45ee9ffd51d168801974
|
|
name: 'HashiCorp Nomad Server: Heap objects'
|
|
type: DEPENDENT
|
|
key: nomad.server.runtime.heap_objects
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: |
|
|
Number of objects on the heap.
|
|
General memory pressure indicator.
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_runtime_heap_objects
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- uuid: 9b1bcd0020b5413e920ce12af6124687
|
|
name: 'HashiCorp Nomad Server: Goroutines'
|
|
type: DEPENDENT
|
|
key: nomad.server.runtime.num_goroutines
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Number of goroutines and general load pressure indicator.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_runtime_num_goroutines
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- uuid: 71256d39cf844c22be8d25a9170e61f2
|
|
name: 'HashiCorp Nomad Server: GC metadata size'
|
|
type: DEPENDENT
|
|
key: nomad.server.runtime.sys_bytes
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Go runtime GC metadata size in bytes.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_runtime_sys_bytes
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: runtime
|
|
- uuid: 733b18dd02d7404bb8cf4c7447a374ae
|
|
name: 'HashiCorp Nomad Server: GC runs'
|
|
type: DEPENDENT
|
|
key: nomad.server.runtime.total_gc_runs
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Count of go runtime GC runs.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_runtime_total_gc_runs
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: runtime
|
|
- uuid: c7f327a390884c928ebf228d8ebb3a4c
|
|
name: 'HashiCorp Nomad Server: Allocation reschedule attempts'
|
|
type: DEPENDENT
|
|
key: nomad.server.scheduler.allocs.rescheduled.attempted
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Count of attempts to reschedule an allocation.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_scheduler_allocs_reschedule_attempted
|
|
- function
|
|
- sum
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '0'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: allocations
|
|
- uuid: 8293b40a695b4b24ab9b6576a0f9a4d8
|
|
name: 'HashiCorp Nomad Server: Memberlist events'
|
|
type: DEPENDENT
|
|
key: nomad.server.serf.queue.event
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Count of memberlist events received.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_serf_queue_Event_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: memberlist
|
|
- uuid: 8aacc54e455b49b0ab0cfed4559829f7
|
|
name: 'HashiCorp Nomad Server: Memberlist changes'
|
|
type: DEPENDENT
|
|
key: nomad.server.serf.queue.intent
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Count of memberlist changes.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_serf_queue_Intent_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: memberlist
|
|
- uuid: 1c5f383a89f74934a6cb211bfd586856
|
|
name: 'HashiCorp Nomad Server: Memberlist queries'
|
|
type: DEPENDENT
|
|
key: nomad.server.serf.queue.queries
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Count of memberlist queries.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_serf_queue_Query_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: memberlist
|
|
- uuid: e645f371b4ae48e2b2b782f4b0d0fae4
|
|
name: 'HashiCorp Nomad Server: Snapshot index'
|
|
type: DEPENDENT
|
|
key: nomad.server.state.snapshot.index
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
description: 'Current snapshot index.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_state_snapshotIndex
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: state
|
|
- uuid: 02ba826868414a16909f13b98cc4c445
|
|
name: 'HashiCorp Nomad Server: Internal stats API response'
|
|
type: DEPENDENT
|
|
key: nomad.server.stats.api.response
|
|
delay: '0'
|
|
history: 7d
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Internal stats API response message.'
|
|
preprocessing:
|
|
- type: JAVASCRIPT
|
|
parameters:
|
|
- |
|
|
try {
|
|
var response = Object.keys(JSON.parse(value).header).filter(function (f) {
|
|
return f.match(/HTTP\/[\d.]+\s+\d{3}/);
|
|
});
|
|
|
|
return response.pop();
|
|
}
|
|
catch (error) {
|
|
return "HTTP/1.1 408 Request timeout";
|
|
}
|
|
- type: DISCARD_UNCHANGED_HEARTBEAT
|
|
parameters:
|
|
- 1h
|
|
master_item:
|
|
key: nomad.server.stats.get
|
|
tags:
|
|
- tag: component
|
|
value: status
|
|
triggers:
|
|
- uuid: b60dd78bd2d24c22afec63a8b420bcd6
|
|
expression: 'find(/HashiCorp Nomad Server by HTTP/nomad.server.stats.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
|
|
name: 'HashiCorp Nomad Server: Internal stats API connection has failed'
|
|
event_name: 'HashiCorp Nomad Server: Internal stats API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}'
|
|
priority: AVERAGE
|
|
description: |
|
|
Internal stats API connection has failed.
|
|
Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix.
|
|
manual_close: 'YES'
|
|
dependencies:
|
|
- name: 'HashiCorp Nomad Server: Monitoring API connection has failed'
|
|
expression: 'find(/HashiCorp Nomad Server by HTTP/nomad.server.data.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: f8d3a680e6f94250ab0a1ef8169889e3
|
|
name: 'HashiCorp Nomad Server: Internal stats get'
|
|
type: HTTP_AGENT
|
|
key: nomad.server.stats.get
|
|
delay: 1h
|
|
history: '0'
|
|
trends: '0'
|
|
value_type: TEXT
|
|
description: 'Internal stats data in raw format.'
|
|
preprocessing:
|
|
- type: CHECK_NOT_SUPPORTED
|
|
parameters:
|
|
- ''
|
|
error_handler: CUSTOM_VALUE
|
|
error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}'
|
|
timeout: '{$NOMAD.DATA.TIMEOUT}'
|
|
url: '{$NOMAD.SERVER.API.SCHEME}://{HOST.IP}:{$NOMAD.SERVER.API.PORT}/v1/agent/self'
|
|
query_fields:
|
|
- name: filter
|
|
value: 'ID == "{HOST.HOST}"'
|
|
status_codes: ''
|
|
http_proxy: '{$NOMAD.HTTP.PROXY}'
|
|
headers:
|
|
- name: X-Nomad-Token
|
|
value: '{$NOMAD.TOKEN}'
|
|
retrieve_mode: BOTH
|
|
output_format: JSON
|
|
tags:
|
|
- tag: component
|
|
value: raw
|
|
- uuid: 9593fe46e0d941f2b4ddef96baa39b0e
|
|
name: 'HashiCorp Nomad Server: Vault tokens revoked'
|
|
type: DEPENDENT
|
|
key: nomad.server.vault.distributed_tokens_revoked
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
description: 'Count of revoked tokens.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_vault_distributed_tokens_revoking
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: vault
|
|
- uuid: 40c41426ab96476c84647a18511af6e8
|
|
name: 'HashiCorp Nomad Server: Vault token last renewal'
|
|
type: DEPENDENT
|
|
key: nomad.server.vault.token_last_renewal
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time since last successful Vault token renewal.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_vault_token_last_renewal
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '0.001'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: vault
|
|
- uuid: f62d8cf1e4b94c9b8796bd30fa799f7a
|
|
name: 'HashiCorp Nomad Server: Vault token next renewal'
|
|
type: DEPENDENT
|
|
key: nomad.server.vault.token_next_renewal
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time until next Vault token renewal attempt.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_vault_token_next_renewal
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '0.001'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: vault
|
|
- uuid: c619c340887b43e1a4db750e74f1d4aa
|
|
name: 'HashiCorp Nomad Server: Vault token TTL'
|
|
type: DEPENDENT
|
|
key: nomad.server.vault.token_ttl
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time to live for Vault token.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_vault_token_ttl
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '0.001'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: vault
|
|
- uuid: 921fc50044f94aceaa9d0789be854496
|
|
name: 'HashiCorp Nomad Server: Nomad server version'
|
|
type: DEPENDENT
|
|
key: nomad.server.version
|
|
delay: '0'
|
|
history: 7d
|
|
trends: '0'
|
|
value_type: CHAR
|
|
description: 'Nomad server version.'
|
|
preprocessing:
|
|
- type: JSONPATH
|
|
parameters:
|
|
- $.body.config.Version.Version
|
|
master_item:
|
|
key: nomad.server.stats.get
|
|
triggers:
|
|
- uuid: 3c7dfd5ec21144b6b58c603d60142200
|
|
expression: 'change(/HashiCorp Nomad Server by HTTP/nomad.server.version)<>0'
|
|
name: 'HashiCorp Nomad Server: Nomad server version has changed'
|
|
event_name: 'HashiCorp Nomad Server: Nomad server version has changed to {ITEM.LASTVALUE}'
|
|
priority: INFO
|
|
description: 'Nomad server version has changed.'
|
|
manual_close: 'YES'
|
|
tags:
|
|
- tag: scope
|
|
value: availability
|
|
- uuid: d769576e16df4aed86004498055d5a94
|
|
name: 'HashiCorp Nomad Server: Virtual memory size'
|
|
type: DEPENDENT
|
|
key: nomad.server.virtual_memory_bytes
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: B
|
|
description: 'Virtual memory size in bytes.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- process_virtual_memory_bytes
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: memory
|
|
- uuid: d1addab9628140b09adb60c2820c1829
|
|
name: 'HashiCorp Nomad Server: Worker evaluation create time'
|
|
type: DEPENDENT
|
|
key: nomad.server.worker.create_eval
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for worker to create an eval.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_worker_dequeue_eval_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: workers
|
|
- uuid: 0875726948384b9696158d4db2b64691
|
|
name: 'HashiCorp Nomad Server: Worker evaluation dequeue time'
|
|
type: DEPENDENT
|
|
key: nomad.server.worker.dequeue_eval
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for worker to dequeue an eval.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_worker_dequeue_eval_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: workers
|
|
- uuid: 5bc2cf7713274e1ca8997415f26f7087
|
|
name: 'HashiCorp Nomad Server: Worker invoke scheduler time'
|
|
type: DEPENDENT
|
|
key: nomad.server.worker.invoke_scheduler_service
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for worker to invoke the scheduler.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_worker_invoke_scheduler_service_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: workers
|
|
- uuid: c21aca97ba274913860b28a1fd06abec
|
|
name: 'HashiCorp Nomad Server: Worker acknowledgement send time'
|
|
type: DEPENDENT
|
|
key: nomad.server.worker.send_ack
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for worker to send acknowledgement.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_worker_send_ack_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: workers
|
|
- uuid: 3c7b49c3f60e4d69981f6dd3569e49e6
|
|
name: 'HashiCorp Nomad Server: Worker submit plan time'
|
|
type: DEPENDENT
|
|
key: nomad.server.worker.submit_plan
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for worker to submit plan.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_worker_submit_plan_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: workers
|
|
- uuid: f5cd72ca250f402f9ddd112fb858d1f7
|
|
name: 'HashiCorp Nomad Server: Worker update evaluation time'
|
|
type: DEPENDENT
|
|
key: nomad.server.worker.update_eval
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed for worker to submit updated eval.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_worker_update_eval_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: workers
|
|
- uuid: ae2e69196ef7416481e9904d9ee8862b
|
|
name: 'HashiCorp Nomad Server: Worker log replication time'
|
|
type: DEPENDENT
|
|
key: nomad.server.worker.wait_for_index
|
|
delay: '0'
|
|
history: 7d
|
|
trends: 90d
|
|
value_type: FLOAT
|
|
units: s
|
|
description: 'Time elapsed that worker waits for the raft index of the eval to be processed.'
|
|
preprocessing:
|
|
- type: PROMETHEUS_PATTERN
|
|
parameters:
|
|
- nomad_nomad_worker_wait_for_index_sum
|
|
- value
|
|
- ''
|
|
error_handler: DISCARD_VALUE
|
|
- type: MULTIPLIER
|
|
parameters:
|
|
- '1.0E-9'
|
|
master_item:
|
|
key: nomad.server.metrics.get
|
|
tags:
|
|
- tag: component
|
|
value: workers
|
|
tags:
|
|
- tag: class
|
|
value: software
|
|
- tag: target
|
|
value: nomad-server
|
|
macros:
|
|
- macro: '{$NOMAD.API.RESPONSE.SUCCESS}'
|
|
value: '200'
|
|
description: 'HTTP API successful response code. Availability triggers threshold. Change, if needed.'
|
|
- macro: '{$NOMAD.DATA.TIMEOUT}'
|
|
value: 15s
|
|
description: 'Response timeout for an API.'
|
|
- macro: '{$NOMAD.HTTP.PROXY}'
|
|
description: 'Sets the HTTP proxy for HTTP agent item. If this parameter is empty, then no proxy is used.'
|
|
- macro: '{$NOMAD.OPEN.FDS.MAX}'
|
|
value: '90'
|
|
description: 'Maximum percentage of used file descriptors.'
|
|
- macro: '{$NOMAD.REDUNDANCY.MIN}'
|
|
value: '1'
|
|
description: |
|
|
Amount of redundant servers to keep the cluster safe.
|
|
Default value - '1' for the 3-nodes cluster.
|
|
Change if needed.
|
|
- macro: '{$NOMAD.SERVER.API.PORT}'
|
|
value: '4646'
|
|
description: 'Nomad SERVER API port.'
|
|
- macro: '{$NOMAD.SERVER.API.SCHEME}'
|
|
value: http
|
|
description: 'Nomad SERVER API scheme.'
|
|
- macro: '{$NOMAD.SERVER.LEADER.LATENCY}'
|
|
value: 0.3s
|
|
description: 'Leader last contact latency threshold.'
|
|
- macro: '{$NOMAD.SERVER.RPC.PORT}'
|
|
value: '4647'
|
|
description: 'Nomad RPC service port.'
|
|
- macro: '{$NOMAD.SERVER.SERF.PORT}'
|
|
value: '4648'
|
|
description: 'Nomad serf service port.'
|
|
- macro: '{$NOMAD.TOKEN}'
|
|
value: '<PUT YOUR AUTH TOKEN>'
|
|
description: 'Nomad authentication token.'
|
|
dashboards:
|
|
- uuid: c721ccf33a6f412e994e0ef8c9dc81b5
|
|
name: BoltDB
|
|
pages:
|
|
- name: BoltDB
|
|
widgets:
|
|
- type: item
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.boltdb.txstats.cursor_count
|
|
- type: item
|
|
x: '8'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.boltdb.txstats.node_count
|
|
- type: item
|
|
x: '16'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.boltdb.txstats.node_deref
|
|
- type: graph
|
|
'y': '5'
|
|
width: '24'
|
|
height: '5'
|
|
fields:
|
|
- type: GRAPH
|
|
name: graphid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
name: 'HashiCorp Nomad Server: Raft timers'
|
|
- type: graph
|
|
'y': '10'
|
|
width: '24'
|
|
height: '5'
|
|
fields:
|
|
- type: GRAPH
|
|
name: graphid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
name: 'HashiCorp Nomad Server: BoltDB operations'
|
|
- type: graph
|
|
'y': '15'
|
|
width: '24'
|
|
height: '5'
|
|
fields:
|
|
- type: GRAPH
|
|
name: graphid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
name: 'HashiCorp Nomad Server: BoltDB pages'
|
|
- uuid: 5b6b8811398f48ebb4abb73620320211
|
|
name: Cluster
|
|
pages:
|
|
- name: Cluster
|
|
widgets:
|
|
- type: item
|
|
width: '6'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: INTEGER
|
|
name: value_size
|
|
value: '30'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.cluster_role
|
|
- type: item
|
|
x: '6'
|
|
width: '6'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: INTEGER
|
|
name: value_size
|
|
value: '30'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.autopilot.state
|
|
- type: item
|
|
x: '12'
|
|
width: '6'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.autopilot.failure_tolerance
|
|
- type: item
|
|
x: '18'
|
|
width: '6'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.fsm.autopilot
|
|
- type: graph
|
|
'y': '5'
|
|
width: '24'
|
|
height: '5'
|
|
fields:
|
|
- type: GRAPH
|
|
name: graphid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
name: 'HashiCorp Nomad Server: Raft timers'
|
|
- type: item
|
|
'y': '10'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.leader.dispatch_num_logs
|
|
- type: item
|
|
x: '8'
|
|
'y': '10'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.apply
|
|
- type: item
|
|
x: '16'
|
|
'y': '10'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.applied_index
|
|
- uuid: c7f515dd33ee4045b96ffc6d83cec12a
|
|
name: Jobs
|
|
pages:
|
|
- name: Jobs
|
|
widgets:
|
|
- type: item
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.job_status.running
|
|
- type: item
|
|
x: '8'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.job_status.pending
|
|
- type: item
|
|
x: '16'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.job_status.dead
|
|
- type: item
|
|
'y': '5'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.job_summary.running
|
|
- type: item
|
|
x: '8'
|
|
'y': '5'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.job_summary.starting
|
|
- type: item
|
|
x: '16'
|
|
'y': '5'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.job_summary.complete
|
|
- type: item
|
|
'y': '10'
|
|
width: '6'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.job_summary.queued
|
|
- type: item
|
|
x: '6'
|
|
'y': '10'
|
|
width: '6'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.job_summary.lost
|
|
- type: item
|
|
x: '12'
|
|
'y': '10'
|
|
width: '6'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.job_summary.failed
|
|
- type: item
|
|
x: '18'
|
|
'y': '10'
|
|
width: '6'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.job_summary.unknown
|
|
- type: item
|
|
'y': '15'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.broker.total_ready
|
|
- type: item
|
|
x: '8'
|
|
'y': '15'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.broker.total_pending
|
|
- type: item
|
|
x: '16'
|
|
'y': '15'
|
|
width: '8'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.broker.total_waiting
|
|
- type: item
|
|
'y': '20'
|
|
width: '6'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.blocked_evals.total_blocked
|
|
- type: item
|
|
x: '6'
|
|
'y': '20'
|
|
width: '6'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.blocked_evals.total_quota_limit
|
|
- type: item
|
|
x: '12'
|
|
'y': '20'
|
|
width: '6'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.blocked_evals.total_escaped
|
|
- type: item
|
|
x: '18'
|
|
'y': '20'
|
|
width: '6'
|
|
height: '5'
|
|
fields:
|
|
- type: INTEGER
|
|
name: show
|
|
value: '2'
|
|
- type: INTEGER
|
|
name: show
|
|
value: '4'
|
|
- type: INTEGER
|
|
name: adv_conf
|
|
value: '1'
|
|
- type: INTEGER
|
|
name: decimal_places
|
|
value: '0'
|
|
- type: ITEM
|
|
name: itemid
|
|
value:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.broker.total_unacked
|
|
valuemaps:
|
|
- uuid: 9898b464447240919cf8e25016be687f
|
|
name: 'Autopilot state'
|
|
mappings:
|
|
- value: '0'
|
|
newvalue: Unhealthy
|
|
- value: '1'
|
|
newvalue: Healthy
|
|
- uuid: fe32599dc75c48f6ab0887652ceb728c
|
|
name: 'Cluster role'
|
|
mappings:
|
|
- value: '0'
|
|
newvalue: Leader
|
|
- value: '1'
|
|
newvalue: Follower
|
|
- value: '2'
|
|
newvalue: Candidate
|
|
- uuid: b90612059a164f3fa0c7ab871afa0c59
|
|
name: 'Service state'
|
|
mappings:
|
|
- value: '0'
|
|
newvalue: Down
|
|
- value: '1'
|
|
newvalue: Up
|
|
triggers:
|
|
- uuid: d41ae1c102d84eab9629565bcc9b51df
|
|
expression: '(min(/HashiCorp Nomad Client by HTTP/nomad.client.memory.available, 10m) / last(/HashiCorp Nomad Client by HTTP/nomad.client.memory.total))*100 <= {$NOMAD.RAM.AVAIL.MIN}'
|
|
name: 'HashiCorp Nomad Client: High memory utilization'
|
|
event_name: 'HashiCorp Nomad Client: High memory utilization: (available < {$NOMAD.RAM.AVAIL.MIN}% over last 10m)'
|
|
opdata: 'RAM available: {ITEM.LASTVALUE1}, RAM total: {ITEM.LASTVALUE2}'
|
|
priority: AVERAGE
|
|
description: 'RAM utilization is too high. The system might be slow to respond.'
|
|
tags:
|
|
- tag: scope
|
|
value: performance
|
|
- uuid: 9b6d779864144e4e9e5e9924658d763e
|
|
expression: 'min(/HashiCorp Nomad Server by HTTP/nomad.server.process_open_fds,5m)/last(/HashiCorp Nomad Server by HTTP/nomad.server.process_max_fds)*100>{$NOMAD.OPEN.FDS.MAX}'
|
|
name: 'HashiCorp Nomad Server: Current number of open files is too high'
|
|
event_name: 'HashiCorp Nomad Server: Current number of open files is too high (over {$NOMAD.OPEN.FDS.MAX}% for 5m)'
|
|
priority: WARNING
|
|
description: 'Heavy file descriptor usage (i.e., near the process file descriptor limit) indicates a potential file descriptor exhaustion issue.'
|
|
tags:
|
|
- tag: scope
|
|
value: capacity
|
|
graphs:
|
|
- uuid: a221474b1f494f8e85edfefd3a86b2ad
|
|
name: 'HashiCorp Nomad Client: CPU utilization'
|
|
type: STACKED
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.cpu.idle
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.cpu.system
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.cpu.total
|
|
- sortorder: '3'
|
|
color: F7941D
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.cpu.user
|
|
- uuid: b36e33cf629444d096a222d4b806f10d
|
|
name: 'HashiCorp Nomad Client: Memory allocation'
|
|
type: STACKED
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.allocated.memory
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.unallocated.memory
|
|
- uuid: a5658f2456f745ca9de427ce3bc4fc98
|
|
name: 'HashiCorp Nomad Client: Memory utilization'
|
|
type: STACKED
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.memory.available
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.memory.free
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.memory.total
|
|
- sortorder: '3'
|
|
color: F7941D
|
|
item:
|
|
host: 'HashiCorp Nomad Client by HTTP'
|
|
key: nomad.client.memory.used
|
|
- uuid: 5028c6df64d3442f8fda2bb1fbc5f6e9
|
|
name: 'HashiCorp Nomad Server: BoltDB operations'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.boltdb.txstats.rebalance
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.boltdb.txstats.spill
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.boltdb.txstats.split
|
|
- sortorder: '3'
|
|
color: F7941D
|
|
item:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.boltdb.txstats.write
|
|
- uuid: 71832cf8ab4c4607985719e19646fcc7
|
|
name: 'HashiCorp Nomad Server: BoltDB pages'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.boltdb.num_free_pages
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.boltdb.txstats.page_count
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.boltdb.num_pending_pages
|
|
- sortorder: '3'
|
|
color: F7941D
|
|
item:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.boltdb.txstats.page_alloc
|
|
- uuid: cc4f2fccc25840f0ba2049554c360080
|
|
name: 'HashiCorp Nomad Server: BoltDB timers'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.boltdb.txstats.rebalance_time
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.boltdb.txstats.spill_time
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.boltdb.txstats.write_time
|
|
- uuid: fb8cbf7d131445f6ade454b7b7e2e748
|
|
name: 'HashiCorp Nomad Server: Raft timers'
|
|
graph_items:
|
|
- color: 199C0D
|
|
item:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.leader.dispatch_log
|
|
- sortorder: '1'
|
|
color: F63100
|
|
item:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.commit_time
|
|
- sortorder: '2'
|
|
color: 00611C
|
|
item:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.replication.appendEntries
|
|
- sortorder: '3'
|
|
color: F7941D
|
|
item:
|
|
host: 'HashiCorp Nomad Server by HTTP'
|
|
key: nomad.server.raft.fsm.apply
|