zabbix_export: version: '7.0' template_groups: - uuid: a571c0d144b14fd4a87a9d9b2aa9fcd6 name: Templates/Applications host_groups: - uuid: a571c0d144b14fd4a87a9d9b2aa9fcd6 name: Applications templates: - uuid: f74adf26d9ab44ada318002d31fd2881 template: 'HashiCorp Nomad by HTTP' name: 'HashiCorp Nomad by HTTP' description: | Discover HashiCorp Nomad servers and clients automatically. Don't forget to change macro {$NOMAD.ENDPOINT.API.URL}, {$NOMAD.TOKEN} values. You can discuss this template or leave feedback on our forum: https://www.zabbix.com/forum/zabbix-suggestions-and-feedback. Generated by official Zabbix template tool "Templator" 2.0.0 vendor: name: Zabbix version: 7.0-0 groups: - name: Templates/Applications items: - uuid: 50bf00cc5c9f41c887add07d0bba3cc1 name: 'HashiCorp Nomad: Client nodes API response' type: DEPENDENT key: nomad.client.nodes.api.response delay: '0' history: 7d trends: '0' value_type: TEXT description: 'Client nodes API response message.' preprocessing: - type: JAVASCRIPT parameters: - | try { var response = Object.keys(JSON.parse(value).header).filter(function (f) { return f.match(/HTTP\/[\d.]+\s+\d{3}/); }); return response.pop(); } catch (error) { return "HTTP/1.1 408 Request timeout"; } - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: nomad.client.nodes.get tags: - tag: component value: status triggers: - uuid: f2e1cbbf808946ca902c1f378747e936 expression: 'find(/HashiCorp Nomad by HTTP/nomad.client.nodes.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0' name: 'HashiCorp Nomad: Client nodes API connection has failed' event_name: 'HashiCorp Nomad: Client nodes API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}' priority: AVERAGE description: | Client nodes API connection has failed. Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix. manual_close: 'YES' tags: - tag: scope value: availability - uuid: 99d8cd0768094bef93995e4baa475186 name: 'HashiCorp Nomad: Nomad clients get' type: HTTP_AGENT key: nomad.client.nodes.get delay: 1h history: '0' trends: '0' value_type: TEXT description: 'Nomad clients data in raw format.' preprocessing: - type: CHECK_NOT_SUPPORTED parameters: - '' error_handler: CUSTOM_VALUE error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}' timeout: '{$NOMAD.DATA.TIMEOUT}' url: '{$NOMAD.ENDPOINT.API.URL}/v1/nodes' query_fields: - name: os value: '1' status_codes: '' http_proxy: '{$NOMAD.HTTP.PROXY}' headers: - name: X-Nomad-Token value: '{$NOMAD.TOKEN}' retrieve_mode: BOTH output_format: JSON tags: - tag: component value: raw - uuid: 7a46e18358e641cc941b306de6eb5dc4 name: 'HashiCorp Nomad: Nomad clients count' type: DEPENDENT key: nomad.clients.count delay: '0' history: 7d description: 'Nomad clients count.' preprocessing: - type: JSONPATH parameters: - '$.body[?(@.Name)].length()' master_item: key: nomad.client.nodes.get - uuid: e848e135a5574931accf114fcafb89d7 name: 'HashiCorp Nomad: Region' type: DEPENDENT key: nomad.region delay: '0' history: 7d trends: '0' value_type: CHAR description: 'Current cluster region.' preprocessing: - type: JSONPATH parameters: - $..region.first() master_item: key: nomad.server.nodes.get tags: - tag: component value: environment - uuid: 53a0c689f27547ccbcb0a57c736a5027 name: 'HashiCorp Nomad: Server-related APIs response' type: DEPENDENT key: nomad.server.api.response delay: '0' history: 7d trends: '0' value_type: TEXT description: 'Server-related (`operator/raft/configuration`, `agent/members`) APIs error response message.' preprocessing: - type: JSONPATH parameters: - $.error error_handler: CUSTOM_VALUE error_handler_params: 'HTTP/1.1 200 OK' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: nomad.server.nodes.get triggers: - uuid: 1560c32cb7de4fc8a58cf537c7958205 expression: 'find(/HashiCorp Nomad by HTTP/nomad.server.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0' name: 'HashiCorp Nomad: Server-related API connection has failed' event_name: 'HashiCorp Nomad: Server-related API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}' priority: AVERAGE description: | Server-related API connection has failed. Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix. manual_close: 'YES' tags: - tag: scope value: availability - uuid: 254354aa21764751bf7e1961e37de7ae name: 'HashiCorp Nomad: Nomad servers get' type: SCRIPT key: nomad.server.nodes.get delay: 1h history: '0' trends: '0' value_type: TEXT params: | var Nomad = { setParams: function (params) { ['api_endpoint'].forEach(function (field) { if (typeof params !== 'object' || typeof params[field] === 'undefined' || params[field] === '') { throw 'Required param is not set: "' + field + '".'; } }); Nomad.params = params; if (typeof Nomad.params.api_endpoint === 'string' && !Nomad.params.api_endpoint.endsWith('/')) { Nomad.params.api_endpoint += '/'; } }, request: function (query) { var response, request = new HttpRequest(), url = Nomad.params.api_endpoint + query; request.addHeader('Content-Type: application/json'); request.addHeader('X-Nomad-Token: ' + Nomad.params.token); // set proxy if needed if (Nomad.params.http_proxy) { request.setProxy(Nomad.params.http_proxy) Zabbix.log(4, '[ Nomad ] Using http proxy: ' + Nomad.params.http_proxy); } Zabbix.log(4, '[ Nomad ] Sending request: ' + url); response = request.get(url); Zabbix.log(4, '[ Nomad ] Received response with status code ' + request.getStatus() + ': ' + response); if (response !== null) { try { response = JSON.parse(response); } catch (error) { throw 'Failed to parse response received from Nomad agent API.'; } } return { status: request.getStatus(), response: response }; }, getField: function (data, path) { var steps = path.split('.'); for (var i = 0; i < steps.length; i++) { var step = steps[i]; if (typeof data !== 'object' || typeof data[step] === 'undefined') { throw 'Required field was not found: ' + path; } data = data[step]; } return data; }, getIds: function () { var result = this.request('v1/operator/raft/configuration'); if (typeof result.response !== 'object' || result.status != 200) { throw 'Cannot get servers list from Nomad agent API.'; } return this.getField(result, 'response.Servers') .map(function (srv) { return srv['ID']; }); }, getServers: function () { var ids = this.getIds(); result = Nomad.request('v1/agent/members'); if (typeof result.response !== 'object' || result.status != 200) { throw 'Cannot get servers list from Nomad agent API.'; } return this.getField(result,'response.Members').filter(function (s) { return ids.indexOf(s.Tags.id) >= 0; }); }, }; try { Nomad.setParams(JSON.parse(value)); var servers = Nomad.getServers(); return JSON.stringify(servers); } catch (error) { error += ((String(error).endsWith('.')) ? '' : '.'); Zabbix.log(3, '[ Nomad ] ERROR: ' + error); return JSON.stringify({ error: error + ' Check debug log for more information.' }); } description: 'Nomad servers data in raw format.' timeout: '{$NOMAD.DATA.TIMEOUT}' parameters: - name: api_endpoint value: '{$NOMAD.ENDPOINT.API.URL}' - name: token value: '{$NOMAD.TOKEN}' - name: http_proxy value: '{$NOMAD.HTTP.PROXY}' tags: - tag: component value: raw - uuid: 904e226008324cb8aa11f716e0420b51 name: 'HashiCorp Nomad: Nomad servers count' type: DEPENDENT key: nomad.servers.count delay: '0' history: 7d description: 'Nomad servers count.' preprocessing: - type: JSONPATH parameters: - '$[?(@.Name)].length()' master_item: key: nomad.server.nodes.get discovery_rules: - uuid: 0ba167e5638344ab842224df5b8909e1 name: 'Clients discovery' type: DEPENDENT key: nomad.clients.discovery delay: '0' filter: evaltype: AND conditions: - macro: '{#CLIENT.NAME}' value: '{$NOMAD.CLIENT.NAME.MATCHES}' formulaid: C - macro: '{#CLIENT.NAME}' value: '{$NOMAD.CLIENT.NAME.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: D - macro: '{#CLIENT.DC}' value: '{$NOMAD.CLIENT.DC.MATCHES}' formulaid: A - macro: '{#CLIENT.DC}' value: '{$NOMAD.CLIENT.DC.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: B - macro: '{#CLIENT.SCHEDULE.ELIGIBILITY}' value: '{$NOMAD.CLIENT.SCHEDULE.ELIGIBILITY.MATCHES}' formulaid: E - macro: '{#CLIENT.SCHEDULE.ELIGIBILITY}' value: '{$NOMAD.CLIENT.SCHEDULE.ELIGIBILITY.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: F lifetime: 7d description: 'Client nodes discovery.' host_prototypes: - uuid: 8284dcf3055749d3b27720526c665e34 host: '{#CLIENT.ID}' name: '{#CLIENT.NAME}-client' group_links: - group: name: Applications templates: - name: 'HashiCorp Nomad Client by HTTP' tags: - tag: class value: '{#CLIENT.CLASS}' - tag: dc value: '{#CLIENT.DC}' - tag: drained value: '{#CLIENT.DRAIN.STATE}' - tag: os value: '{#CLIENT.OS}' - tag: scheduling value: '{#CLIENT.SCHEDULE.ELIGIBILITY}' - tag: service value: nomad-client - tag: version value: '{#CLIENT.VERSION}' custom_interfaces: 'YES' interfaces: - ip: '{#CLIENT.IP}' master_item: key: nomad.client.nodes.get lld_macro_paths: - lld_macro: '{#CLIENT.CLASS}' path: $.NodeClass - lld_macro: '{#CLIENT.DC}' path: $.Datacenter - lld_macro: '{#CLIENT.DRAIN.STATE}' path: $.Drain - lld_macro: '{#CLIENT.ID}' path: $.ID - lld_macro: '{#CLIENT.IP}' path: $.Address - lld_macro: '{#CLIENT.NAME}' path: $.Name - lld_macro: '{#CLIENT.OS}' path: '$.Attributes[''os.name'']' - lld_macro: '{#CLIENT.SCHEDULE.ELIGIBILITY}' path: $.SchedulingEligibility - lld_macro: '{#CLIENT.VERSION}' path: $.Version preprocessing: - type: JSONPATH parameters: - $.body error_handler: DISCARD_VALUE - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h - uuid: 202ee1d0060d48b09a48fe2039d3b081 name: 'Servers discovery' type: DEPENDENT key: nomad.servers.discovery delay: '0' filter: evaltype: AND conditions: - macro: '{#SERVER.NAME}' value: '{$NOMAD.SERVER.NAME.MATCHES}' formulaid: C - macro: '{#SERVER.NAME}' value: '{$NOMAD.SERVER.NAME.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: D - macro: '{#SERVER.DC}' value: '{$NOMAD.SERVER.DC.MATCHES}' formulaid: A - macro: '{#SERVER.DC}' value: '{$NOMAD.SERVER.DC.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: B lifetime: 7d description: 'Server nodes discovery.' host_prototypes: - uuid: addb1ce7995f44089a13128052171445 host: '{#SERVER.ID}' name: '{#SERVER.NAME}' group_links: - group: name: Applications templates: - name: 'HashiCorp Nomad Server by HTTP' tags: - tag: dc value: '{#SERVER.DC}' - tag: region value: '{#SERVER.REGION}' - tag: role value: '{#SERVER.ROLE}' - tag: service value: nomad-server - tag: version value: '{#SERVER.VERSION}' custom_interfaces: 'YES' interfaces: - ip: '{#SERVER.IP}' master_item: key: nomad.server.nodes.get lld_macro_paths: - lld_macro: '{#SERVER.DC}' path: $.Tags.dc - lld_macro: '{#SERVER.ID}' path: $.Tags.id - lld_macro: '{#SERVER.IP}' path: $.Addr - lld_macro: '{#SERVER.NAME}' path: $.Name - lld_macro: '{#SERVER.REGION}' path: $.Tags.region - lld_macro: '{#SERVER.ROLE}' path: $.Tags.role - lld_macro: '{#SERVER.VERSION}' path: $.Tags.build preprocessing: - type: CHECK_JSON_ERROR parameters: - $.error error_handler: DISCARD_VALUE - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h tags: - tag: class value: software - tag: target value: nomad macros: - macro: '{$NOMAD.API.RESPONSE.SUCCESS}' value: '200' description: 'HTTP API successful response code. Availability triggers threshold. Change, if needed.' - macro: '{$NOMAD.CLIENT.DC.MATCHES}' value: '.*' description: 'The filter to include HashiCorp Nomad clients by datacenter belonging.' - macro: '{$NOMAD.CLIENT.DC.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'The filter to exclude HashiCorp Nomad clients by datacenter belonging.' - macro: '{$NOMAD.CLIENT.NAME.MATCHES}' value: '.*' description: 'The filter to include HashiCorp Nomad clients by name.' - macro: '{$NOMAD.CLIENT.NAME.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'The filter to exclude HashiCorp Nomad clients by name.' - macro: '{$NOMAD.CLIENT.SCHEDULE.ELIGIBILITY.MATCHES}' value: '.*' description: 'The filter to include HashiCorp Nomad clients by scheduling eligibility.' - macro: '{$NOMAD.CLIENT.SCHEDULE.ELIGIBILITY.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'The filter to exclude HashiCorp Nomad clients by scheduling eligibility.' - macro: '{$NOMAD.DATA.TIMEOUT}' value: 15s description: 'Response timeout for an API.' - macro: '{$NOMAD.ENDPOINT.API.URL}' value: 'http://localhost:4646' description: 'API endpoint URL for one of the Nomad cluster members.' - macro: '{$NOMAD.HTTP.PROXY}' description: 'Sets the HTTP proxy for script and HTTP agent items. If this parameter is empty, then no proxy is used.' - macro: '{$NOMAD.SERVER.DC.MATCHES}' value: '.*' description: 'The filter to include HashiCorp Nomad servers by datacenter belonging.' - macro: '{$NOMAD.SERVER.DC.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'The filter to exclude HashiCorp Nomad servers by datacenter belonging.' - macro: '{$NOMAD.SERVER.NAME.MATCHES}' value: '.*' description: 'The filter to include HashiCorp Nomad servers by name.' - macro: '{$NOMAD.SERVER.NAME.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'The filter to exclude HashiCorp Nomad servers by name.' - macro: '{$NOMAD.TOKEN}' value: '' description: 'Nomad authentication token.' dashboards: - uuid: 0b1cb124081f4cf1af2e35245aeb415f name: 'Region resources' pages: - name: 'Region resources' widgets: - type: item name: Clients x: '16' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad by HTTP' key: nomad.clients.count - type: item name: Region width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad by HTTP' key: nomad.region - type: item name: Servers x: '8' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad by HTTP' key: nomad.servers.count - uuid: 44eac6a1abe34999b85ad6d0e40073fd template: 'HashiCorp Nomad Client by HTTP' name: 'HashiCorp Nomad Client by HTTP' description: | Get HashiCorp Nomad client metrics by HTTP from metrics endpoint. More information about metrics is available in the official documentation: https://developer.hashicorp.com/nomad/docs/operations/metrics-reference. You can discuss this template or leave feedback on our forum: https://www.zabbix.com/forum/zabbix-suggestions-and-feedback. Generated by official Zabbix template tool "Templator" 2.0.0 vendor: name: Zabbix version: 7.0-0 groups: - name: Templates/Applications items: - uuid: f98bd789c9f7409aac2e1902776ddc21 name: 'HashiCorp Nomad Client: Service [rpc] state' type: SIMPLE key: 'net.tcp.service[tcp,,{$NOMAD.CLIENT.RPC.PORT}]' history: 7d trends: 90d description: 'Current [rpc] service state.' valuemap: name: 'Service state' preprocessing: - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h tags: - tag: component value: network triggers: - uuid: f368ff4de45e4620963135836eb54092 expression: 'last(/HashiCorp Nomad Client by HTTP/net.tcp.service[tcp,,{$NOMAD.CLIENT.RPC.PORT}]) = 0' name: 'HashiCorp Nomad Client: Service [rpc] is down' opdata: 'Service: rpc, Port: {$NOMAD.CLIENT.RPC.PORT}, State: {ITEM.LASTVALUE}' priority: AVERAGE description: | Cannot establish the connection to [rpc] service port {$NOMAD.CLIENT.RPC.PORT}. Check the Nomad state and network connectivity between Nomad and Zabbix. manual_close: 'YES' tags: - tag: scope value: availability - uuid: 00b39edd70cc42daa565879494f15f38 name: 'HashiCorp Nomad Client: Service [serf] state' type: SIMPLE key: 'net.tcp.service[tcp,,{$NOMAD.CLIENT.SERF.PORT}]' history: 7d trends: 90d description: 'Current [serf] service state.' valuemap: name: 'Service state' preprocessing: - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h tags: - tag: component value: network triggers: - uuid: 0108c9f85b48433c8b9864e41458994f expression: 'last(/HashiCorp Nomad Client by HTTP/net.tcp.service[tcp,,{$NOMAD.CLIENT.SERF.PORT}]) = 0' name: 'HashiCorp Nomad Client: Service [serf] is down' opdata: 'Service: serf, Port: {$NOMAD.CLIENT.SERF.PORT}, State: {ITEM.LASTVALUE}' priority: AVERAGE description: | Cannot establish the connection to [serf] service port {$NOMAD.CLIENT.SERF.PORT}. Check the Nomad state and network connectivity between Nomad and Zabbix. manual_close: 'YES' tags: - tag: scope value: availability - uuid: 13aa2136b7634c32a56fe00b29056d2d name: 'HashiCorp Nomad Client: CPU allocated' type: DEPENDENT key: nomad.client.allocated.cpu delay: '0' history: 7d trends: 90d units: '!Mhz' description: 'Total amount of CPU shares the scheduler has allocated to tasks.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_allocated_cpu - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.client.metrics.get tags: - tag: component value: cpu - uuid: fb7211ae27c845bba8dca7e75e0083b8 name: 'HashiCorp Nomad Client: Disk allocated' type: DEPENDENT key: nomad.client.allocated.disk delay: '0' history: 7d trends: 90d units: B description: 'Total amount of disk space the scheduler has allocated to tasks.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_allocated_disk - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1000000' master_item: key: nomad.client.metrics.get tags: - tag: component value: storage - uuid: 27c71fbb37f14deba301c3b5c7daecb5 name: 'HashiCorp Nomad Client: Memory allocated' type: DEPENDENT key: nomad.client.allocated.memory delay: '0' history: 7d trends: 90d units: B description: 'Total amount of memory the scheduler has allocated to tasks.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_allocated_memory - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1000000' master_item: key: nomad.client.metrics.get tags: - tag: component value: memory - uuid: 945d348f2f2d4c02b3a1ec6f32f1290d name: 'HashiCorp Nomad Client: Allocations blocked' type: DEPENDENT key: nomad.client.allocations.blocked delay: '0' history: 7d trends: 90d description: 'Number of allocations waiting for previous versions.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_allocations_blocked - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.client.metrics.get tags: - tag: component value: allocations - uuid: f556557bf5de438d9aaf941ab5c8d93b name: 'HashiCorp Nomad Client: Allocations completed, rate' type: DEPENDENT key: nomad.client.allocations.complete delay: '0' history: 7d trends: 90d description: 'Number of allocations completed.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_allocs_complete - function - sum error_handler: CUSTOM_VALUE error_handler_params: '0' - type: CHANGE_PER_SECOND parameters: - '' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: nomad.client.metrics.get tags: - tag: component value: allocations - uuid: 586bc8f63e7f46cf99337c7f70809c43 name: 'HashiCorp Nomad Client: Allocations failed, rate' type: DEPENDENT key: nomad.client.allocations.failed delay: '0' history: 7d trends: 90d description: 'Number of allocations failed.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_allocs_failed - function - sum error_handler: CUSTOM_VALUE error_handler_params: '0' - type: CHANGE_PER_SECOND parameters: - '' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: nomad.client.metrics.get tags: - tag: component value: allocations - uuid: 88b2eb8e9e8041df83674d3800b1f8c1 name: 'HashiCorp Nomad Client: Allocations migrating' type: DEPENDENT key: nomad.client.allocations.migrating delay: '0' history: 7d trends: 90d description: 'Number of allocations migrating data from previous versions.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_allocations_migrating - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.client.metrics.get tags: - tag: component value: allocations - uuid: 717172608fbb4d81bfb65e7f4aa4aaf3 name: 'HashiCorp Nomad Client: Allocations OOM killed' type: DEPENDENT key: nomad.client.allocations.oom_killed delay: '0' history: 7d trends: 90d description: 'Number of allocations OOM killed.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_allocs_oom_killed - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: nomad.client.metrics.get tags: - tag: component value: allocations triggers: - uuid: 747de32c46cb4d95851cee8a30c576b0 expression: 'last(/HashiCorp Nomad Client by HTTP/nomad.client.allocations.oom_killed) > 0' name: 'HashiCorp Nomad Client: OOM killed allocations found' opdata: 'OOM killed: {ITEM.LASTVALUE}' priority: WARNING description: 'OOM killed allocations found.' manual_close: 'YES' tags: - tag: scope value: performance - uuid: 9fc77ab432c04cf4b1c0f1941da641d3 name: 'HashiCorp Nomad Client: Allocations pending' type: DEPENDENT key: nomad.client.allocations.pending delay: '0' history: 7d trends: 90d description: 'Number of allocations pending (received by the client but not yet running).' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_allocations_pending - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.client.metrics.get tags: - tag: component value: allocations - uuid: eae33da538c642868cf9afe9e6495c95 name: 'HashiCorp Nomad Client: Allocations restarted, rate' type: DEPENDENT key: nomad.client.allocations.restart delay: '0' history: 7d trends: 90d description: 'Number of allocations restarted.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_allocs_restart - function - sum error_handler: CUSTOM_VALUE error_handler_params: '0' - type: CHANGE_PER_SECOND parameters: - '' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: nomad.client.metrics.get tags: - tag: component value: allocations - uuid: 6a4903e8bfa04f18a44813349ea6b33d name: 'HashiCorp Nomad Client: Allocations running' type: DEPENDENT key: nomad.client.allocations.running delay: '0' history: 7d trends: 90d description: 'Number of allocations running.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_allocations_running - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.client.metrics.get tags: - tag: component value: allocations - uuid: 3fcbebb8436d4dec8db6625e44520999 name: 'HashiCorp Nomad Client: Allocations starting' type: DEPENDENT key: nomad.client.allocations.start delay: '0' history: 7d trends: 90d description: 'Number of allocations starting.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_allocations_start - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.client.metrics.get tags: - tag: component value: allocations - uuid: 166aad9bafeb478abcd8c61ea1d0cc98 name: 'HashiCorp Nomad Client: Allocations terminal' type: DEPENDENT key: nomad.client.allocations.terminal delay: '0' history: 7d trends: 90d description: 'Number of allocations terminal.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_allocations_terminal - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.client.metrics.get tags: - tag: component value: allocations - uuid: 31faeb4af30f4f878050eca21e6f97df name: 'HashiCorp Nomad Client: CPU idle utilization' type: DEPENDENT key: nomad.client.cpu.idle delay: '0' history: 7d trends: 90d value_type: FLOAT units: '%' description: 'CPU utilization in idle state.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_host_cpu_idle - function - avg error_handler: DISCARD_VALUE master_item: key: nomad.client.metrics.get tags: - tag: component value: cpu - uuid: ad75d4b5e69c4e11bc886582791951be name: 'HashiCorp Nomad Client: CPU system utilization' type: DEPENDENT key: nomad.client.cpu.system delay: '0' history: 7d trends: 90d value_type: FLOAT units: '%' description: 'CPU utilization in system space.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_host_cpu_system - function - avg error_handler: DISCARD_VALUE master_item: key: nomad.client.metrics.get tags: - tag: component value: cpu - uuid: 71eb3676049a4d60aa69ef3901005894 name: 'HashiCorp Nomad Client: CPU total utilization' type: DEPENDENT key: nomad.client.cpu.total delay: '0' history: 7d trends: 90d value_type: FLOAT units: '%' description: 'Total CPU utilization.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_host_cpu_total - function - avg error_handler: DISCARD_VALUE master_item: key: nomad.client.metrics.get tags: - tag: component value: cpu triggers: - uuid: 59db4f1d22cb48fab0ced8ad9ec39a25 expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.cpu.total, 10m) >= {$NOMAD.CPU.UTIL.MIN}' name: 'HashiCorp Nomad Client: High CPU utilization' event_name: 'HashiCorp Nomad Client: High CPU utilization: (over >= {$NOMAD.CPU.UTIL.MIN}% over last 10m)' opdata: 'Current utilization: {ITEM.LASTVALUE}' priority: AVERAGE description: 'CPU utilization is too high. The system might be slow to respond.' tags: - tag: scope value: performance - uuid: cc0671cc71fb4bdd8cdf7a959e1022fd name: 'HashiCorp Nomad Client: CPU user utilization' type: DEPENDENT key: nomad.client.cpu.user delay: '0' history: 7d trends: 90d value_type: FLOAT units: '%' description: 'CPU utilization in user space.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_host_cpu_user - function - avg error_handler: DISCARD_VALUE master_item: key: nomad.client.metrics.get tags: - tag: component value: cpu - uuid: 71be24cf848a448dbb993a577015bd9e name: 'HashiCorp Nomad Client: Monitoring API response' type: DEPENDENT key: nomad.client.data.api.response delay: '0' history: 7d trends: '0' value_type: TEXT description: 'Monitoring API response message.' preprocessing: - type: JAVASCRIPT parameters: - | try { var response = Object.keys(JSON.parse(value).header).filter(function (f) { return f.match(/HTTP\/[\d.]+\s+\d{3}/); }); return response.pop(); } catch (error) { return "HTTP/1.1 408 Request timeout"; } - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: nomad.client.data.get tags: - tag: component value: status triggers: - uuid: 676e1535cde2424cbda78f18cd9084bf expression: 'find(/HashiCorp Nomad Client by HTTP/nomad.client.data.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0' name: 'HashiCorp Nomad Client: Monitoring API connection has failed' event_name: 'HashiCorp Nomad Client: Monitoring API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}' priority: AVERAGE description: | Monitoring API connection has failed. Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix. manual_close: 'YES' tags: - tag: scope value: availability - uuid: fe07fd88f3e14273b3f0b135aab26236 name: 'HashiCorp Nomad Client: Telemetry get' type: HTTP_AGENT key: nomad.client.data.get history: '0' trends: '0' value_type: TEXT description: 'Telemetry data in raw format.' preprocessing: - type: CHECK_NOT_SUPPORTED parameters: - '' error_handler: CUSTOM_VALUE error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}' timeout: '{$NOMAD.DATA.TIMEOUT}' url: '{$NOMAD.CLIENT.API.SCHEME}://{HOST.IP}:{$NOMAD.CLIENT.API.PORT}/v1/metrics' query_fields: - name: format value: prometheus status_codes: '' http_proxy: '{$NOMAD.HTTP.PROXY}' headers: - name: X-Nomad-Token value: '{$NOMAD.TOKEN}' retrieve_mode: BOTH output_format: JSON tags: - tag: component value: raw - uuid: c9bfa74720ea4e228e5d200ab1f38074 name: 'HashiCorp Nomad Client: Allocations API response' type: DEPENDENT key: nomad.client.job.allocs.api.response delay: '0' history: 7d trends: '0' value_type: TEXT description: 'Allocations API response message.' preprocessing: - type: JAVASCRIPT parameters: - | try { var response = Object.keys(JSON.parse(value).header).filter(function (f) { return f.match(/HTTP\/[\d.]+\s+\d{3}/); }); return response.pop(); } catch (error) { return "HTTP/1.1 408 Request timeout"; } - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: nomad.client.job.allocs.get tags: - tag: component value: status triggers: - uuid: 300f5fba0f45465aa6868a3f1d16633d expression: 'find(/HashiCorp Nomad Client by HTTP/nomad.client.job.allocs.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0' name: 'HashiCorp Nomad Client: Allocations API connection has failed' event_name: 'HashiCorp Nomad Client: Allocations API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}' priority: AVERAGE description: | Allocations API connection has failed. Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix. manual_close: 'YES' dependencies: - name: 'HashiCorp Nomad Client: Monitoring API connection has failed' expression: 'find(/HashiCorp Nomad Client by HTTP/nomad.client.data.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0' tags: - tag: scope value: availability - uuid: 9948608c5b3a4dad9d1b433483296427 name: 'HashiCorp Nomad Client: Allocated jobs get' type: HTTP_AGENT key: nomad.client.job.allocs.get delay: 1h history: '0' trends: '0' value_type: TEXT description: 'Allocated jobs data in raw format.' preprocessing: - type: CHECK_NOT_SUPPORTED parameters: - '' error_handler: CUSTOM_VALUE error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}' timeout: '{$NOMAD.DATA.TIMEOUT}' url: '{$NOMAD.CLIENT.API.SCHEME}://{HOST.IP}:{$NOMAD.CLIENT.API.PORT}/v1/allocations' query_fields: - name: reverse value: '1' - name: task_states - name: filter value: 'NodeID == "{HOST.HOST}" and DesiredStatus == "run" and ClientStatus == "running"' status_codes: '' http_proxy: '{$NOMAD.HTTP.PROXY}' headers: - name: X-Nomad-Token value: '{$NOMAD.TOKEN}' retrieve_mode: BOTH output_format: JSON tags: - tag: component value: raw - uuid: f312a5ba5758426d83c5c3c414d765fd name: 'HashiCorp Nomad Client: Memory available' type: DEPENDENT key: nomad.client.memory.available delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Total amount of memory available to processes which includes free and cached memory.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_host_memory_available - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.client.metrics.get tags: - tag: component value: memory - uuid: e379e923ed6b45a682f1da192b3a328d name: 'HashiCorp Nomad Client: Memory free' type: DEPENDENT key: nomad.client.memory.free delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Amount of memory which is free.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_host_memory_free - value - '' master_item: key: nomad.client.metrics.get tags: - tag: component value: memory - uuid: 50d152cf3c654e818aa7303841d5444a name: 'HashiCorp Nomad Client: Memory size' type: DEPENDENT key: nomad.client.memory.total delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Total amount of physical memory on the node.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_host_memory_total - value - '' master_item: key: nomad.client.metrics.get tags: - tag: component value: memory - uuid: b2638e0b928e46cd825304a3f1ca1682 name: 'HashiCorp Nomad Client: Memory used' type: DEPENDENT key: nomad.client.memory.used delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Amount of memory used by processes.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_host_memory_used - value - '' master_item: key: nomad.client.metrics.get tags: - tag: component value: memory - uuid: 933621a61d2241d084b48ec6b3bd5b74 name: 'HashiCorp Nomad Client: Metrics' type: DEPENDENT key: nomad.client.metrics.get delay: '0' history: '0' trends: '0' value_type: TEXT description: 'Nomad client metrics in raw format.' preprocessing: - type: JSONPATH parameters: - $.body error_handler: DISCARD_VALUE master_item: key: nomad.client.data.get tags: - tag: component value: raw - uuid: 39430d6dceaa42a68c72ef68f67b1a12 name: 'HashiCorp Nomad Client: Nodes API response' type: DEPENDENT key: nomad.client.node.info.api.response delay: '0' history: 7d trends: '0' value_type: TEXT description: 'Nodes API response message.' preprocessing: - type: JAVASCRIPT parameters: - | try { var response = Object.keys(JSON.parse(value).header).filter(function (f) { return f.match(/HTTP\/[\d.]+\s+\d{3}/); }); return response.pop(); } catch (error) { return "HTTP/1.1 408 Request timeout"; } - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: nomad.client.node.info.get tags: - tag: component value: status triggers: - uuid: 3ea14e12d6154492a4505c4b51003ed3 expression: 'find(/HashiCorp Nomad Client by HTTP/nomad.client.node.info.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0' name: 'HashiCorp Nomad Client: Nodes API connection has failed' event_name: 'HashiCorp Nomad Client: Nodes API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}' priority: AVERAGE description: | Nodes API connection has failed. Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix. manual_close: 'YES' dependencies: - name: 'HashiCorp Nomad Client: Monitoring API connection has failed' expression: 'find(/HashiCorp Nomad Client by HTTP/nomad.client.data.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0' tags: - tag: scope value: availability - uuid: 36eb68c80b194a4591208cf210fb6d6e name: 'HashiCorp Nomad Client: Node info get' type: HTTP_AGENT key: nomad.client.node.info.get delay: 1h history: '0' trends: '0' value_type: TEXT description: 'Node info data in raw format.' preprocessing: - type: CHECK_NOT_SUPPORTED parameters: - '' error_handler: CUSTOM_VALUE error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}' timeout: '{$NOMAD.DATA.TIMEOUT}' url: '{$NOMAD.CLIENT.API.SCHEME}://{HOST.IP}:{$NOMAD.CLIENT.API.PORT}/v1/nodes' query_fields: - name: filter value: 'ID == "{HOST.HOST}"' status_codes: '' http_proxy: '{$NOMAD.HTTP.PROXY}' headers: - name: X-Nomad-Token value: '{$NOMAD.TOKEN}' retrieve_mode: BOTH output_format: JSON tags: - tag: component value: raw - uuid: 18e57950b0c041e0a2abc0dbd0644b33 name: 'HashiCorp Nomad Client: CPU unallocated' type: DEPENDENT key: nomad.client.unallocated.cpu delay: '0' history: 7d trends: 90d units: '!Mhz' description: 'Total amount of CPU shares free for the scheduler to allocate to tasks.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_unallocated_cpu - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.client.metrics.get tags: - tag: component value: cpu - uuid: 44619e3740a9403581d54c6555ecd967 name: 'HashiCorp Nomad Client: Disk unallocated' type: DEPENDENT key: nomad.client.unallocated.disk delay: '0' history: 7d trends: 90d units: B description: 'Total amount of disk space free for the scheduler to allocate to tasks.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_unallocated_disk - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1000000' master_item: key: nomad.client.metrics.get tags: - tag: component value: storage - uuid: bc0928fcff2f458f83874d7442274e61 name: 'HashiCorp Nomad Client: Memory unallocated' type: DEPENDENT key: nomad.client.unallocated.memory delay: '0' history: 7d trends: 90d units: B description: 'Total amount of memory free for the scheduler to allocate to tasks.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_unallocated_memory - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1000000' master_item: key: nomad.client.metrics.get tags: - tag: component value: memory - uuid: 3561fb69284f4c88a7f5d8c7f04c3b14 name: 'HashiCorp Nomad Client: Uptime' type: DEPENDENT key: nomad.client.uptime delay: '0' history: 7d trends: 90d units: uptime description: 'Uptime of the host running the Nomad client.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_client_uptime - value - '' master_item: key: nomad.client.metrics.get tags: - tag: component value: system triggers: - uuid: 74fe77871e8c4161b5944f30dd4d1c84 expression: 'last(/HashiCorp Nomad Client by HTTP/nomad.client.uptime) < 10m' name: 'HashiCorp Nomad Client: The host has been restarted' event_name: 'HashiCorp Nomad Client: The host has been restarted: (uptime < 10m)' priority: WARNING description: 'The host uptime is less than 10 minutes.' manual_close: 'YES' tags: - tag: scope value: notice - uuid: 872961bd507c4f6380168d2284ead1ae name: 'HashiCorp Nomad Client: Nomad client version' type: DEPENDENT key: nomad.client.version delay: '0' history: 7d trends: '0' value_type: CHAR description: 'Nomad client version.' preprocessing: - type: JSONPATH parameters: - $.body..Version.first() master_item: key: nomad.client.node.info.get triggers: - uuid: afac97bced6f49bc994ae633b662722c expression: 'change(/HashiCorp Nomad Client by HTTP/nomad.client.version)<>0' name: 'HashiCorp Nomad Client: Nomad client version has changed' event_name: 'HashiCorp Nomad Client: Nomad client version has changed to {ITEM.LASTVALUE}' priority: INFO description: 'Nomad client version has changed.' manual_close: 'YES' tags: - tag: scope value: availability discovery_rules: - uuid: c41a6405aea7400fb90c207ce95c6594 name: 'Allocated jobs discovery' type: DEPENDENT key: nomad.client.alloc.discovery delay: '0' filter: evaltype: AND conditions: - macro: '{#JOB.NAME}' value: '{$NOMAD.JOB.NAME.MATCHES}' formulaid: A - macro: '{#JOB.NAME}' value: '{$NOMAD.JOB.NAME.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: B - macro: '{#JOB.TASK.GROUP}' value: '{$NOMAD.JOB.TASK.GROUP.MATCHES}' formulaid: E - macro: '{#JOB.TASK.GROUP}' value: '{$NOMAD.JOB.TASK.GROUP.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: F - macro: '{#JOB.NAMESPACE}' value: '{$NOMAD.JOB.NAMESPACE.MATCHES}' formulaid: C - macro: '{#JOB.NAMESPACE}' value: '{$NOMAD.JOB.NAMESPACE.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: D - macro: '{#JOB.TYPE}' value: '{$NOMAD.JOB.TYPE.MATCHES}' formulaid: G - macro: '{#JOB.TYPE}' value: '{$NOMAD.JOB.TYPE.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: H lifetime: '0' description: 'Allocated jobs discovery.' item_prototypes: - uuid: ba9e5a022dbc4f7aa28a1db2ef6d73bb name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU allocated' type: DEPENDENT key: 'nomad.client.allocs.cpu.allocated["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT units: '!Mhz' description: 'Total CPU resources allocated by the ["{#JOB.NAME}"] job across all cores.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_allocs_cpu_allocated{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}' - function - avg master_item: key: nomad.client.metrics.get tags: - tag: component value: cpu - tag: component value: job - tag: job value: '{#JOB.NAME}' - tag: namespace value: '{#JOB.NAMESPACE}' - tag: task-group value: '{#JOB.TASK.GROUP}' - uuid: 85daa0ce6d4f41ab83a3a65ceb8359a9 name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU system utilization' type: DEPENDENT key: 'nomad.client.allocs.cpu.system["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT units: '%' description: 'Total CPU resources consumed by the ["{#JOB.NAME}"] job in system space.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_allocs_cpu_system{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}' - function - avg master_item: key: nomad.client.metrics.get tags: - tag: component value: cpu - tag: component value: job - tag: job value: '{#JOB.NAME}' - tag: namespace value: '{#JOB.NAMESPACE}' - tag: task-group value: '{#JOB.TASK.GROUP}' - uuid: d5ec55f8d9a74c99924a01acb206295e name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU throttled periods time' type: DEPENDENT key: 'nomad.client.allocs.cpu.throttled_periods["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Total number of CPU periods that the ["{#JOB.NAME}"] job was throttled.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_allocs_cpu_throttled_periods{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}' - function - avg - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.client.metrics.get tags: - tag: component value: cpu - tag: component value: job - tag: job value: '{#JOB.NAME}' - tag: namespace value: '{#JOB.NAMESPACE}' - tag: task-group value: '{#JOB.TASK.GROUP}' - uuid: 3122f6d0c1b24ea7bac7a3af4fc00fb2 name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU throttled time' type: DEPENDENT key: 'nomad.client.allocs.cpu.throttled_time["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Total time that the ["{#JOB.NAME}"] job was throttled.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_allocs_cpu_throttled_time{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}' - function - avg error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.client.metrics.get tags: - tag: component value: cpu - tag: component value: job - tag: job value: '{#JOB.NAME}' - tag: namespace value: '{#JOB.NAMESPACE}' - tag: task-group value: '{#JOB.TASK.GROUP}' - uuid: 5ff1ea02768243f4971339bf70f89772 name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU total utilization' type: DEPENDENT key: 'nomad.client.allocs.cpu.total_percent["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT units: '%' description: 'Total CPU resources consumed by the ["{#JOB.NAME}"] job across all cores.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_allocs_cpu_total_percent{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}' - function - avg master_item: key: nomad.client.metrics.get tags: - tag: component value: cpu - tag: component value: job - tag: job value: '{#JOB.NAME}' - tag: namespace value: '{#JOB.NAMESPACE}' - tag: task-group value: '{#JOB.TASK.GROUP}' - uuid: 14179529be9d47bc898a5330ff37a9c5 name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU ticks' type: DEPENDENT key: 'nomad.client.allocs.cpu.total_ticks["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'CPU ticks consumed by the process for the ["{#JOB.NAME}"] job in the last collection interval.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_allocs_cpu_total_ticks{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}' - function - avg master_item: key: nomad.client.metrics.get tags: - tag: component value: cpu - tag: component value: job - tag: job value: '{#JOB.NAME}' - tag: namespace value: '{#JOB.NAMESPACE}' - tag: task-group value: '{#JOB.TASK.GROUP}' - uuid: 9fb3e3b461874e86aee9483d5af65535 name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] CPU user utilization' type: DEPENDENT key: 'nomad.client.allocs.cpu.user["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT units: '%' description: 'Total CPU resources consumed by the ["{#JOB.NAME}"] job in user space.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_allocs_cpu_user{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}' - function - avg master_item: key: nomad.client.metrics.get tags: - tag: component value: cpu - tag: component value: job - tag: job value: '{#JOB.NAME}' - tag: namespace value: '{#JOB.NAMESPACE}' - tag: task-group value: '{#JOB.TASK.GROUP}' - uuid: c353af0be02b47faa27036dfdd173176 name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] Memory allocated' type: DEPENDENT key: 'nomad.client.allocs.memory.allocated["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Amount of memory allocated by the ["{#JOB.NAME}"] job.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_allocs_memory_allocated{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}' - function - avg master_item: key: nomad.client.metrics.get tags: - tag: component value: job - tag: component value: memory - tag: job value: '{#JOB.NAME}' - tag: namespace value: '{#JOB.NAMESPACE}' - tag: task-group value: '{#JOB.TASK.GROUP}' - uuid: 7bd7a486864b4d0f82242676da2370ed name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] Memory cached' type: DEPENDENT key: 'nomad.client.allocs.memory.cache["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Amount of memory cached by the ["{#JOB.NAME}"] job.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_allocs_memory_cache{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}' - function - avg master_item: key: nomad.client.metrics.get tags: - tag: component value: job - tag: component value: memory - tag: job value: '{#JOB.NAME}' - tag: namespace value: '{#JOB.NAMESPACE}' - tag: task-group value: '{#JOB.TASK.GROUP}' - uuid: b1419d5017cf4f8ba23e8c774c451772 name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] Memory swapped' type: DEPENDENT key: 'nomad.client.allocs.memory.swap["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Amount of memory swapped by the ["{#JOB.NAME}"] job.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_allocs_memory_swap{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}' - function - avg master_item: key: nomad.client.metrics.get tags: - tag: component value: job - tag: component value: memory - tag: job value: '{#JOB.NAME}' - tag: namespace value: '{#JOB.NAMESPACE}' - tag: task-group value: '{#JOB.TASK.GROUP}' - uuid: 09ba4987f69c496ebeab3fb08f4b499b name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] Memory used' type: DEPENDENT key: 'nomad.client.allocs.memory.usage["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Total amount of memory used by the ["{#JOB.NAME}"] job.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_allocs_memory_usage{job="{#JOB.NAME}",task_group="{#JOB.TASK.GROUP}",namespace="{#JOB.NAMESPACE}"}' - function - avg master_item: key: nomad.client.metrics.get tags: - tag: component value: job - tag: component value: memory - tag: job value: '{#JOB.NAME}' - tag: namespace value: '{#JOB.NAMESPACE}' - tag: task-group value: '{#JOB.TASK.GROUP}' graph_prototypes: - uuid: 2c82c2841f414986b26fc6890707d36d name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] memory utilization' type: STACKED graph_items: - color: 199C0D item: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.allocs.memory.allocated["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' - sortorder: '1' color: F63100 item: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.allocs.memory.cache["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' - sortorder: '2' color: 00611C item: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.allocs.memory.swap["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' - sortorder: '3' color: F7941D item: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.allocs.memory.usage["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' - uuid: bb8bbdb0180f4b35800144deec4456bb name: 'HashiCorp Nomad Client: Job [{#JOB.NAME}] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] CPU throttling' graph_items: - color: 199C0D item: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.allocs.cpu.throttled_time["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' - sortorder: '1' color: F63100 item: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.allocs.cpu.throttled_periods["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' - uuid: f25a5178570e44ecac6a48cb8351cdd3 name: 'HashiCorp Nomad Client: Job [{#JOB.NAME}] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] CPU utilization' type: STACKED ymax_type_1: FIXED graph_items: - color: 199C0D item: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.allocs.cpu.system["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' - sortorder: '1' color: F63100 item: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.allocs.cpu.total_percent["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' - sortorder: '2' color: 00611C item: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.allocs.cpu.user["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' master_item: key: nomad.client.job.allocs.get lld_macro_paths: - lld_macro: '{#JOB.NAMESPACE}' path: $.Namespace - lld_macro: '{#JOB.NAME}' path: $.JobID - lld_macro: '{#JOB.TASK.GROUP}' path: $.TaskGroup - lld_macro: '{#JOB.TYPE}' path: $.JobType preprocessing: - type: JAVASCRIPT parameters: - | var raw = JSON.parse(value), body = raw.body, exist_key = [], result = []; function getField(data, path) { var steps = path.split('.'); for (var i = 0; i < steps.length; i++) { var step = steps[i]; if (typeof data !== 'object' || typeof data[step] === 'undefined') { throw 'Required field "' + path + '" is not present in data received.'; } data = data[step]; } return data; } for (i in body) { var uniq_element = true; for (key in exist_key) { if ((exist_key[key].JobID == getField(body[i], 'JobID')) && (exist_key[key].TaskGroup == getField(body[i], 'TaskGroup')) && (exist_key[key].Namespace == getField(body[i], 'Namespace'))) { uniq_element = false; } } if (uniq_element) { exist_key.push({ 'JobID': body[i].JobID, 'TaskGroup': body[i].TaskGroup, 'Namespace': body[i].Namespace }); result.push(body[i]); } } return JSON.stringify(result); - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h - uuid: e2955bd147ed43cb894f0981c6eb3985 name: 'Physical disks discovery' type: DEPENDENT key: nomad.client.disk.discovery delay: '0' filter: evaltype: AND conditions: - macro: '{#DEV.NAME}' value: '{$NOMAD.DISK.NAME.MATCHES}' formulaid: A - macro: '{#DEV.NAME}' value: '{$NOMAD.DISK.NAME.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: B description: 'Physical disks discovery.' item_prototypes: - uuid: 4a79c99027494bb98b474e6f81db5e18 name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] space available' type: DEPENDENT key: 'nomad.client.disk.available["{#DEV.NAME}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Amount of space which is available on ["{#DEV.NAME}"] disk.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_host_disk_available{disk="{#DEV.NAME}"}' - value - '' master_item: key: nomad.client.metrics.get tags: - tag: component value: storage - tag: disk value: '{#DEV.NAME}' - uuid: 21043b0ed4424e3086a00a567d134d45 name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] inodes utilization' type: DEPENDENT key: 'nomad.client.disk.inodes_percent["{#DEV.NAME}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT units: '%' description: 'Disk space consumed by the inodes on ["{#DEV.NAME}"] disk.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_host_disk_inodes_percent{disk="{#DEV.NAME}"}' - value - '' master_item: key: nomad.client.metrics.get tags: - tag: component value: inodes - tag: component value: storage - tag: disk value: '{#DEV.NAME}' trigger_prototypes: - uuid: 433c2227305a4aeb82eb1d390621a81e expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.inodes_percent["{#DEV.NAME}"],5m) >= {$NOMAD.INODES.FREE.MIN.CRIT:"{#DEV.NAME}"}' name: 'HashiCorp Nomad Client: Running out of free inodes on [{#DEV.NAME}] device' event_name: 'Running out of free inodes on [{#DEV.NAME}] (free < {$NOMAD.INODES.FREE.MIN.CRIT:"{#DEV.NAME}"}%)' opdata: 'Free inodes: {ITEM.VALUE}' priority: AVERAGE description: | It may become impossible to write to a disk if there are no index nodes left. The following error messages may be returned as symptoms, even though the free space: - No space left on device; - Disk is full. manual_close: 'YES' tags: - tag: scope value: capacity - uuid: d0f2aacaada545c4a81d35f0eb454532 expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.inodes_percent["{#DEV.NAME}"],5m) >= {$NOMAD.INODES.FREE.MIN.WARN:"{#DEV.NAME}"}' name: 'HashiCorp Nomad Client: Running out of free inodes on [{#DEV.NAME}] device' event_name: 'Running out of free inodes on [{#DEV.NAME}] (free < {$NOMAD.INODES.FREE.MIN.WARN:"{#DEV.NAME}"}%)' opdata: 'Free inodes: {ITEM.VALUE}' priority: WARNING description: | It may become impossible to write to a disk if there are no index nodes left. The following error messages may be returned as symptoms, even though the free space: - No space left on device; - Disk is full. manual_close: 'YES' dependencies: - name: 'HashiCorp Nomad Client: Running out of free inodes on [{#DEV.NAME}] device' expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.inodes_percent["{#DEV.NAME}"],5m) >= {$NOMAD.INODES.FREE.MIN.CRIT:"{#DEV.NAME}"}' tags: - tag: scope value: capacity - uuid: 4358e8accdd0400496b491cda7edc909 name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] size' type: DEPENDENT key: 'nomad.client.disk.size["{#DEV.NAME}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Total size of the ["{#DEV.NAME}"] device.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_host_disk_size{disk="{#DEV.NAME}"}' - value - '' master_item: key: nomad.client.metrics.get tags: - tag: component value: storage - tag: disk value: '{#DEV.NAME}' - uuid: 778f4609e236434d8b216b62bfebf885 name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] space used' type: DEPENDENT key: 'nomad.client.disk.used["{#DEV.NAME}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Amount of disk ["{#DEV.NAME}"] space which has been used.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_host_disk_used{disk="{#DEV.NAME}"}' - value - '' master_item: key: nomad.client.metrics.get tags: - tag: component value: storage - tag: disk value: '{#DEV.NAME}' - uuid: aec5d7147bd64f508e793d7539a8524b name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] space utilization' type: DEPENDENT key: 'nomad.client.disk.used_percent["{#DEV.NAME}"]' delay: '0' history: 7d trends: 90d value_type: FLOAT units: '%' description: 'Percentage of disk ["{#DEV.NAME}"] space used.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_client_host_disk_used_percent{disk="{#DEV.NAME}"}' - value - '' master_item: key: nomad.client.metrics.get tags: - tag: component value: storage - tag: disk value: '{#DEV.NAME}' trigger_prototypes: - uuid: b3e22e93d51a4a8d825b0bd2e45fc7a2 expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.used_percent["{#DEV.NAME}"],5m) >= {$NOMAD.DISK.UTIL.MIN.CRIT:"{#DEV.NAME}"}' name: 'HashiCorp Nomad Client: High disk [{#DEV.NAME}] utilization' event_name: 'Disk [{#DEV.NAME}] space low (utilization: >= {$NOMAD.DISK.UTIL.MIN.CRIT:"{#DEV.NAME}"}%)' opdata: 'Current utilization: {ITEM.LASTVALUE}' priority: AVERAGE description: 'High disk [{#DEV.NAME}] utilization.' manual_close: 'YES' tags: - tag: scope value: capacity - uuid: 05c3e6cd711f4a98a21c581a4437ffb5 expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.used_percent["{#DEV.NAME}"],5m) >= {$NOMAD.DISK.UTIL.MIN.WARN:"{#DEV.NAME}"}' name: 'HashiCorp Nomad Client: High disk [{#DEV.NAME}] utilization' event_name: 'Disk [{#DEV.NAME}] space low (utilization: >= {$NOMAD.DISK.UTIL.MIN.WARN:"{#DEV.NAME}"}%)' opdata: 'Current utilization: {ITEM.LASTVALUE}' priority: WARNING description: 'High disk [{#DEV.NAME}] utilization.' manual_close: 'YES' dependencies: - name: 'HashiCorp Nomad Client: Running out of free inodes on [{#DEV.NAME}] device' expression: 'min(/HashiCorp Nomad Client by HTTP/nomad.client.disk.inodes_percent["{#DEV.NAME}"],5m) >= {$NOMAD.INODES.FREE.MIN.CRIT:"{#DEV.NAME}"}' tags: - tag: scope value: capacity graph_prototypes: - uuid: e29168187396478ebec8c80825da070d name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] usage' type: STACKED graph_items: - color: 199C0D item: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.disk.size["{#DEV.NAME}"]' - sortorder: '1' color: F63100 item: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.disk.available["{#DEV.NAME}"]' - sortorder: '2' color: 00611C item: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.disk.used["{#DEV.NAME}"]' - uuid: 3b9290be87e94bd5987eb595debe5f26 name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] utilization' graph_items: - color: 199C0D item: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.disk.inodes_percent["{#DEV.NAME}"]' - sortorder: '1' color: F63100 item: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.disk.used_percent["{#DEV.NAME}"]' master_item: key: nomad.client.metrics.get lld_macro_paths: - lld_macro: '{#DEV.NAME}' path: $.labels.disk preprocessing: - type: PROMETHEUS_TO_JSON parameters: - 'nomad_client_host_disk_available{disk=~".*"}' - uuid: cfa76f8c4aaf49c5888ef82d6d7d484d name: 'Drivers discovery' type: DEPENDENT key: nomad.client.drivers.discovery delay: '0' filter: evaltype: AND conditions: - macro: '{#DRIVER.NAME}' value: '{$NOMAD.DRIVER.NAME.MATCHES}' formulaid: C - macro: '{#DRIVER.NAME}' value: '{$NOMAD.DRIVER.NAME.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: D - macro: '{#DRIVER.DETECTED}' value: '{$NOMAD.DRIVER.DETECT.MATCHES}' formulaid: A - macro: '{#DRIVER.DETECTED}' value: '{$NOMAD.DRIVER.DETECT.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: B description: 'Client drivers discovery.' item_prototypes: - uuid: 6ffea3b88b29451ea6491cbd34a61148 name: 'HashiCorp Nomad Client: Driver [{#DRIVER.NAME}] detection state' type: DEPENDENT key: 'nomad.client.driver.detected["{#DRIVER.NAME}"]' delay: '0' history: 7d trends: 90d description: 'Driver [{#DRIVER.NAME}] detection state.' valuemap: name: 'Detection state' preprocessing: - type: JSONPATH parameters: - '$.body..Drivers.{#DRIVER.NAME}.Detected.first()' - type: BOOL_TO_DECIMAL parameters: - '' master_item: key: nomad.client.node.info.get tags: - tag: component value: driver - tag: detected value: '{#DRIVER.DETECTED}' - tag: driver value: '{#DRIVER.NAME}' trigger_prototypes: - uuid: 576192afab06466f80c95c1b17c1e6fa expression: 'change(/HashiCorp Nomad Client by HTTP/nomad.client.driver.detected["{#DRIVER.NAME}"]) <> 0' name: 'HashiCorp Nomad Client: Driver [{#DRIVER.NAME}] detection state has changed' opdata: 'Current state: {ITEM.LASTVALUE}' priority: INFO description: 'The [{#DRIVER.NAME}] driver detection state has changed.' manual_close: 'YES' tags: - tag: scope value: availability - uuid: fdff50758360474dafffee8cb7ba6289 name: 'HashiCorp Nomad Client: Driver [{#DRIVER.NAME}] state' type: DEPENDENT key: 'nomad.client.driver.state["{#DRIVER.NAME}"]' delay: '0' history: 7d trends: 90d description: 'Driver [{#DRIVER.NAME}] state.' valuemap: name: 'Driver state' preprocessing: - type: JSONPATH parameters: - '$.body..Drivers.{#DRIVER.NAME}.Healthy.first()' - type: BOOL_TO_DECIMAL parameters: - '' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: nomad.client.node.info.get tags: - tag: component value: driver - tag: detected value: '{#DRIVER.DETECTED}' - tag: driver value: '{#DRIVER.NAME}' trigger_prototypes: - uuid: 5630f8b3585f4f5b8faf4a30d95755b8 expression: 'last(/HashiCorp Nomad Client by HTTP/nomad.client.driver.state["{#DRIVER.NAME}"]) = 0 and last(/HashiCorp Nomad Client by HTTP/nomad.client.driver.detected["{#DRIVER.NAME}"]) = 1' name: 'HashiCorp Nomad Client: Driver [{#DRIVER.NAME}] is in unhealthy state' priority: WARNING description: 'The [{#DRIVER.NAME}] driver detected, but its state is unhealthy.' manual_close: 'YES' tags: - tag: scope value: availability master_item: key: nomad.client.node.info.get preprocessing: - type: JAVASCRIPT parameters: - | var raw = JSON.parse(value), body = raw.body[0], arr = []; function getField(data, path) { var steps = path.split('.'); for (var i = 0; i < steps.length; i++) { var step = steps[i]; if (typeof data !== 'object' || typeof data[step] === 'undefined') { throw 'Required field "' + path + '" is not present in data received.'; } data = data[step]; } return data; } var keys = Object.keys(getField(body,'Drivers')); for (k in keys) { arr.push({ '{#DRIVER.NAME}': keys[k], '{#DRIVER.DETECTED}': getField(body.Drivers[keys[k]],'Detected'), }); } return JSON.stringify(arr); - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h tags: - tag: class value: software - tag: target value: nomad-client macros: - macro: '{$NOMAD.API.RESPONSE.SUCCESS}' value: '200' description: 'HTTP API successful response code. Availability triggers threshold. Change, if needed.' - macro: '{$NOMAD.CLIENT.API.PORT}' value: '4646' description: 'Nomad client API port.' - macro: '{$NOMAD.CLIENT.API.SCHEME}' value: http description: 'Nomad client API scheme.' - macro: '{$NOMAD.CLIENT.OPEN.FDS.MAX.WARN}' value: '90' description: 'Maximum percentage of used file descriptors.' - macro: '{$NOMAD.CLIENT.RPC.PORT}' value: '4647' description: 'Nomad RPC service port.' - macro: '{$NOMAD.CLIENT.SERF.PORT}' value: '4648' description: 'Nomad serf service port.' - macro: '{$NOMAD.CPU.UTIL.MIN}' value: '90' description: 'CPU utilization threshold. Measured as a percentage.' - macro: '{$NOMAD.DATA.TIMEOUT}' value: 15s description: 'Response timeout for an API.' - macro: '{$NOMAD.DISK.NAME.MATCHES}' value: '.*' description: 'The filter to include HashiCorp Nomad client disks by name.' - macro: '{$NOMAD.DISK.NAME.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'The filter to exclude HashiCorp Nomad client disks by name.' - macro: '{$NOMAD.DRIVER.DETECT.MATCHES}' value: '.*' description: 'The filter to include HashiCorp Nomad client drivers by detection state. Possible filtering values: `true`, `false`.' - macro: '{$NOMAD.DRIVER.DETECT.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'The filter to exclude HashiCorp Nomad client drivers by detection state. Possible filtering values: `true`, `false`.' - macro: '{$NOMAD.DRIVER.NAME.MATCHES}' value: '.*' description: 'The filter to include HashiCorp Nomad client drivers by name.' - macro: '{$NOMAD.DRIVER.NAME.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'The filter to exclude HashiCorp Nomad client drivers by name.' - macro: '{$NOMAD.HTTP.PROXY}' description: 'Sets the HTTP proxy for HTTP agent item. If this parameter is empty, then no proxy is used.' - macro: '{$NOMAD.INODES.FREE.MIN.CRIT}' value: '10' description: 'Critical threshold of the filesystem metadata utilization. Measured as a percentage.' - macro: '{$NOMAD.INODES.FREE.MIN.WARN}' value: '20' description: 'Warning threshold of the filesystem metadata utilization. Measured as a percentage.' - macro: '{$NOMAD.JOB.NAME.MATCHES}' value: '.*' description: 'The filter to include HashiCorp Nomad client jobs by name.' - macro: '{$NOMAD.JOB.NAME.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'The filter to exclude HashiCorp Nomad client jobs by name.' - macro: '{$NOMAD.JOB.NAMESPACE.MATCHES}' value: '.*' description: 'The filter to include HashiCorp Nomad client jobs by namespace.' - macro: '{$NOMAD.JOB.NAMESPACE.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'The filter to exclude HashiCorp Nomad client jobs by namespace.' - macro: '{$NOMAD.JOB.TASK.GROUP.MATCHES}' value: '.*' description: 'The filter to include HashiCorp Nomad client jobs by task group belonging.' - macro: '{$NOMAD.JOB.TASK.GROUP.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'The filter to exclude HashiCorp Nomad client jobs by task group belonging.' - macro: '{$NOMAD.JOB.TYPE.MATCHES}' value: '.*' description: 'The filter to include HashiCorp Nomad client jobs by type.' - macro: '{$NOMAD.JOB.TYPE.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'The filter to exclude HashiCorp Nomad client jobs by type.' - macro: '{$NOMAD.RAM.AVAIL.MIN}' value: '5' description: 'CPU utilization threshold. Measured as a percentage.' - macro: '{$NOMAD.TOKEN}' value: '' description: 'Nomad authentication token.' dashboards: - uuid: 6cb91bf47abb4c29b5fdf6de15ee5f9e name: Allocations pages: - name: Allocations widgets: - type: item width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.allocations.running - type: item x: '8' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.allocations.start - type: item x: '16' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.allocations.pending - type: item 'y': '5' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.allocations.complete - type: item x: '8' 'y': '5' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.allocations.blocked - type: item x: '16' 'y': '5' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.allocations.restart - type: item 'y': '10' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.allocations.migrating - type: item x: '8' 'y': '10' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.allocations.terminal - type: item x: '16' 'y': '10' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.allocations.failed - uuid: 9df94875bc944537860bad6bbe832ca8 name: 'Job resources' pages: - name: 'Job resources' widgets: - type: graphprototype width: '12' height: '5' fields: - type: INTEGER name: columns value: '1' - type: INTEGER name: rows value: '1' - type: GRAPH_PROTOTYPE name: graphid value: host: 'HashiCorp Nomad Client by HTTP' name: 'HashiCorp Nomad Client: Job [{#JOB.NAME}] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] CPU utilization' - type: graphprototype x: '12' width: '12' height: '5' fields: - type: INTEGER name: columns value: '1' - type: INTEGER name: rows value: '1' - type: GRAPH_PROTOTYPE name: graphid value: host: 'HashiCorp Nomad Client by HTTP' name: 'HashiCorp Nomad Client: Job [{#JOB.NAME}] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] CPU throttling' - type: graphprototype 'y': '5' width: '12' height: '5' fields: - type: INTEGER name: columns value: '1' - type: INTEGER name: rows value: '1' - type: INTEGER name: source_type value: '3' - type: ITEM_PROTOTYPE name: itemid value: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.allocs.cpu.allocated["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' - type: graphprototype x: '12' 'y': '5' width: '12' height: '5' fields: - type: INTEGER name: columns value: '1' - type: INTEGER name: rows value: '1' - type: INTEGER name: source_type value: '3' - type: ITEM_PROTOTYPE name: itemid value: host: 'HashiCorp Nomad Client by HTTP' key: 'nomad.client.allocs.cpu.total_ticks["{#JOB.NAME}","{#JOB.TASK.GROUP}","{#JOB.NAMESPACE}"]' - type: graphprototype 'y': '10' width: '24' height: '5' fields: - type: INTEGER name: columns value: '1' - type: INTEGER name: rows value: '1' - type: GRAPH_PROTOTYPE name: graphid value: host: 'HashiCorp Nomad Client by HTTP' name: 'HashiCorp Nomad Client: Job ["{#JOB.NAME}"] task ["{#JOB.TASK.GROUP}"] namespace ["{#JOB.NAMESPACE}"] memory utilization' - uuid: 040a905f916a4311a52ecc2b084175a4 name: 'Node resources' pages: - name: 'Node resources' widgets: - type: graph width: '24' height: '5' fields: - type: GRAPH name: graphid value: host: 'HashiCorp Nomad Client by HTTP' name: 'HashiCorp Nomad Client: CPU utilization' - type: graph 'y': '5' width: '12' height: '5' fields: - type: GRAPH name: graphid value: host: 'HashiCorp Nomad Client by HTTP' name: 'HashiCorp Nomad Client: Memory utilization' - type: graph x: '12' 'y': '5' width: '12' height: '5' fields: - type: GRAPH name: graphid value: host: 'HashiCorp Nomad Client by HTTP' name: 'HashiCorp Nomad Client: Memory allocation' - type: graphprototype 'y': '10' width: '12' height: '5' fields: - type: INTEGER name: columns value: '1' - type: INTEGER name: rows value: '1' - type: GRAPH_PROTOTYPE name: graphid value: host: 'HashiCorp Nomad Client by HTTP' name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] usage' - type: graphprototype x: '12' 'y': '10' width: '12' height: '5' fields: - type: INTEGER name: columns value: '1' - type: INTEGER name: rows value: '1' - type: GRAPH_PROTOTYPE name: graphid value: host: 'HashiCorp Nomad Client by HTTP' name: 'HashiCorp Nomad Client: Disk ["{#DEV.NAME}"] utilization' valuemaps: - uuid: 766a3c43981147bf85e9e157eb69c510 name: 'Detection state' mappings: - value: '0' newvalue: Undetected - value: '1' newvalue: Detected - uuid: c202a63acca7407bb6d61bf631696691 name: 'Driver state' mappings: - value: '0' newvalue: Unhealthy - value: '1' newvalue: Healthy - uuid: 40e17de6519d44bb80b4566e0569c31b name: 'Service state' mappings: - value: '0' newvalue: Down - value: '1' newvalue: Up - uuid: 8598d0e2bd6f4903832ec91b7b300062 template: 'HashiCorp Nomad Server by HTTP' name: 'HashiCorp Nomad Server by HTTP' description: | Get HashiCorp Nomad server metrics by HTTP from metrics endpoint. More information about metrics is available in the official documentation: https://developer.hashicorp.com/nomad/docs/operations/metrics-reference. You can discuss this template or leave feedback on our forum: https://www.zabbix.com/forum/zabbix-suggestions-and-feedback. Generated by official Zabbix template tool "Templator" 2.0.0 vendor: name: Zabbix version: 7.0-0 groups: - name: Templates/Applications items: - uuid: c5903e91262b4877be8c7e8f304b2862 name: 'HashiCorp Nomad Server: Service [rpc] state' type: SIMPLE key: 'net.tcp.service[tcp,,{$NOMAD.SERVER.RPC.PORT}]' history: 7d trends: 90d description: 'Current [rpc] service state.' valuemap: name: 'Service state' preprocessing: - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h tags: - tag: component value: network triggers: - uuid: 0b534fa4c6cd4e89bd0a44ca8e810fa2 expression: 'last(/HashiCorp Nomad Server by HTTP/net.tcp.service[tcp,,{$NOMAD.SERVER.RPC.PORT}]) = 0' name: 'HashiCorp Nomad Server: Service [rpc] is down' opdata: 'Service: rpc, Port: {$NOMAD.SERVER.RPC.PORT}, State: {ITEM.LASTVALUE}' priority: AVERAGE description: | Cannot establish the connection to [rpc] service port {$NOMAD.SERVER.RPC.PORT}. Check the Nomad state and network connectivity between Nomad and Zabbix. manual_close: 'YES' tags: - tag: scope value: availability - uuid: 9eecc3ee79f04448991c83ac93459597 name: 'HashiCorp Nomad Server: Service [serf] state' type: SIMPLE key: 'net.tcp.service[tcp,,{$NOMAD.SERVER.SERF.PORT}]' history: 7d trends: 90d description: 'Current [serf] service state.' valuemap: name: 'Service state' preprocessing: - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h tags: - tag: component value: network triggers: - uuid: 00799e8927b547308fc9caab4ba2d24d expression: 'last(/HashiCorp Nomad Server by HTTP/net.tcp.service[tcp,,{$NOMAD.SERVER.SERF.PORT}]) = 0' name: 'HashiCorp Nomad Server: Service [serf] is down' opdata: 'Service: serf, Port: {$NOMAD.SERVER.SERF.PORT}, State: {ITEM.LASTVALUE}' priority: AVERAGE description: | Cannot establish the connection to [serf] service port {$NOMAD.SERVER.SERF.PORT}. Check the Nomad state and network connectivity between Nomad and Zabbix. manual_close: 'YES' tags: - tag: scope value: availability - uuid: 04821dab074f43e785dec52fbf69240e name: 'HashiCorp Nomad Server: Nomad raft version' type: DEPENDENT key: nomad.raft.version delay: '0' history: 7d trends: '0' value_type: CHAR description: 'Nomad raft version.' preprocessing: - type: JSONPATH parameters: - $.body.stats.raft.protocol_version error_handler: DISCARD_VALUE master_item: key: nomad.server.stats.get - uuid: 6ad163ebdefa4f5aa1e3712b7ddac7a7 name: 'HashiCorp Nomad Server: FSM allocation client update time' type: DEPENDENT key: nomad.server.alloc_client_update delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to apply AllocClientUpdate raft entry.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_fsm_alloc_client_update_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: fsm - uuid: 22947f9cc2734f95997a55d9e6392b77 name: 'HashiCorp Nomad Server: Autopilot failure tolerance' type: DEPENDENT key: nomad.server.autopilot.failure_tolerance delay: '0' history: 7d trends: 90d description: 'The number of redundant healthy servers that can fail without causing an outage.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_autopilot_failure_tolerance - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: autopilot triggers: - uuid: fbb6d007481a4aab8d107a1d25a8c7ad expression: 'last(/HashiCorp Nomad Server by HTTP/nomad.server.autopilot.failure_tolerance) < {$NOMAD.REDUNDANCY.MIN} and nodata(/HashiCorp Nomad Server by HTTP/nomad.server.autopilot.failure_tolerance,5m) = 0' name: 'HashiCorp Nomad Server: Autopilot redundancy is low' event_name: 'HashiCorp Nomad Server: Autopilot redundancy is low (less than {$NOMAD.REDUNDANCY.MIN})' priority: WARNING description: | The autopilot redundancy is low. Cluster crash risk is high due to one more server failure. manual_close: 'YES' tags: - tag: scope value: capacity - uuid: 05ce375320dd402ab6c75f8fffba74cb name: 'HashiCorp Nomad Server: Autopilot state' type: DEPENDENT key: nomad.server.autopilot.state delay: '0' history: 7d trends: 90d description: 'Current autopilot state.' valuemap: name: 'Autopilot state' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_autopilot_healthy - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: autopilot triggers: - uuid: 73c14eec055e47c6af6d8728ba0fb416 expression: 'last(/HashiCorp Nomad Server by HTTP/nomad.server.autopilot.state) = 0 and nodata(/HashiCorp Nomad Server by HTTP/nomad.server.autopilot.state,5m) = 0' name: 'HashiCorp Nomad Server: Autopilot is unhealthy' priority: AVERAGE description: 'The autopilot is in unhealthy state. The successful failover probability is extremely low.' manual_close: 'YES' tags: - tag: scope value: availability - uuid: ace31f3772fe4ffe84741554e3fba8bc name: 'HashiCorp Nomad Server: CPU shares for blocked evaluations' type: DEPENDENT key: nomad.server.blocked_evals.cpu delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Amount of CPU shares requested by blocked evals.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_blocked_evals_cpu - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: cpu - tag: component value: evaluations - uuid: d7fe536b4d0445328dfbf3b1188bea70 name: 'HashiCorp Nomad Server: CPU shares for blocked job evaluations' type: DEPENDENT key: nomad.server.blocked_evals.job.cpu delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Amount of CPU shares requested by blocked evals of a job.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_blocked_evals_job_cpu - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: cpu - tag: component value: evaluations - tag: component value: jobs - uuid: 2f07cbd6ec11449cae787a6e84fa5fbf name: 'HashiCorp Nomad Server: Memory shares for blocked job evaluations' type: DEPENDENT key: nomad.server.blocked_evals.job.memory delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Amount of memory requested by blocked evals of a job.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_blocked_evals_job_memory - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: evaluations - tag: component value: jobs - tag: component value: memory - uuid: 5460a81df8174fb5a7596b4243162d05 name: 'HashiCorp Nomad Server: Memory shares by blocked evaluations' type: DEPENDENT key: nomad.server.blocked_evals.memory delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Amount of memory requested by blocked evals.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_blocked_evals_memory - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: evaluations - tag: component value: memory - uuid: 474d05060b0d4a608f6a2d319235af91 name: 'HashiCorp Nomad Server: Evaluations blocked' type: DEPENDENT key: nomad.server.blocked_evals.total_blocked delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Count of evals in the blocked state for any reason (cluster resource exhaustion or quota limits).' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_blocked_evals_total_blocked - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: evaluations - uuid: 80f5edf18daf407ba6b64374084a555b name: 'HashiCorp Nomad Server: Evaluations escaped' type: DEPENDENT key: nomad.server.blocked_evals.total_escaped delay: '0' history: 7d trends: 90d value_type: FLOAT description: | Count of evals that have escaped computed node classes. This indicates a scheduler optimization was skipped and is not usually a source of concern. preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_blocked_evals_total_escaped - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: evaluations - uuid: 356dec0f5d5b45d981db4b590ef7dd73 name: 'HashiCorp Nomad Server: Evaluations blocked due to quota limit' type: DEPENDENT key: nomad.server.blocked_evals.total_quota_limit delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Count of blocked evals due to quota limits (the resources for these jobs are not counted in other blocked_evals metrics, except for total_blocked).' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_blocked_evals_total_quota_limit - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: evaluations - uuid: 54ddea6f2fa04e6c8ceb456d4d67c530 name: 'HashiCorp Nomad Server: Evaluations enqueue time' type: DEPENDENT key: nomad.server.broker.eval_waiting delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Average time elapsed with evaluations waiting to be enqueued.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_eval_ack_sum - function - avg error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: evaluations - uuid: 7df3a4b80f6b44c6a03b672182d3f73f name: 'HashiCorp Nomad Server: Services ready to schedule' type: DEPENDENT key: nomad.server.broker.service_ready delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Count of service evals ready to be scheduled.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_broker_service_ready - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: services - uuid: 0da8881c8ddf4cd18e04b38201b83b7d name: 'HashiCorp Nomad Server: Services unacknowledged' type: DEPENDENT key: nomad.server.broker.service_unacked delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Count of unacknowledged service evals.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_broker_service_unacked - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: services - uuid: 2a38602920224c31a221696cf65c6abc name: 'HashiCorp Nomad Server: System evaluations ready to schedule' type: DEPENDENT key: nomad.server.broker.system_ready delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Count of service evals ready to be scheduled.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_broker_system_ready - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: evaluations - uuid: 3faaaa69672c4b999a12f054dbd8b980 name: 'HashiCorp Nomad Server: System evaluations unacknowledged' type: DEPENDENT key: nomad.server.broker.system_unacked delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Count of unacknowledged system evals.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_broker_system_unacked - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: evaluations - uuid: 2e556f5894c9492a80b34c84ceb69dba name: 'HashiCorp Nomad Server: Evaluations pending' type: DEPENDENT key: nomad.server.broker.total_pending delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Evaluations that are pending until an existing evaluation for the same job completes.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_broker_total_pending - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: evaluations - uuid: 01e11850732a46709942c225d294a8ef name: 'HashiCorp Nomad Server: Evaluations ready' type: DEPENDENT key: nomad.server.broker.total_ready delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Number of evaluations ready to be processed.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_broker_total_ready - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: evaluations - uuid: d57805e0d8a24117bb64992a9aeb3dc3 name: 'HashiCorp Nomad Server: Evaluations unacked' type: DEPENDENT key: nomad.server.broker.total_unacked delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Evaluations dispatched for processing but incomplete.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_broker_total_unacked - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: evaluations - uuid: ac1d1422adc248b48ba65268f9a43be8 name: 'HashiCorp Nomad Server: Evaluations waiting' type: DEPENDENT key: nomad.server.broker.total_waiting delay: '0' history: 7d trends: 90d description: 'Count of evals waiting to be enqueued.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_broker_total_waiting - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: evaluations - uuid: de3e13f7635e4058bd2ca12f6f454668 name: 'HashiCorp Nomad Server: RPC eval dequeue time' type: DEPENDENT key: nomad.server.client.dequeue delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for Eval.Dequeue RPC call.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_eval_dequeue_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: network - tag: component value: rpc - uuid: ac2a5b69fef546d0924d631a9ed9cffd name: 'HashiCorp Nomad Server: RPC get client allocs time' type: DEPENDENT key: nomad.server.client.get_client_allocs delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for Node.GetClientAllocs RPC call.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_client_get_client_allocs_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: network - tag: component value: rpc - uuid: 758f3474636c42378fac78a2f5e3b63b name: 'HashiCorp Nomad Server: RPC list time' type: DEPENDENT key: nomad.server.client.list delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for Node.List RPC call.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_client_list_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: network - tag: component value: rpc - uuid: 588bb9b304634852af149d7903d42798 name: 'HashiCorp Nomad Server: RPC update allocations time' type: DEPENDENT key: nomad.server.client.update_alloc delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for Node.UpdateAlloc RPC call.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_client_update_alloc_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: network - tag: component value: rpc - uuid: 1e1ce033c99b45a3892df8674deaf1f4 name: 'HashiCorp Nomad Server: RPC update status time' type: DEPENDENT key: nomad.server.client.update_status delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for Node.UpdateStatus RPC call.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_client_update_status_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: network - tag: component value: rpc - uuid: c5bd6a645e9f49ba83fe065b2af6248e name: 'HashiCorp Nomad Server: CPU time, rate' type: DEPENDENT key: nomad.server.cpu.time delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Total user and system CPU time spent in seconds.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_cpu_seconds_total - value - '' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: cpu - uuid: dbbee3bb99d347f9a9a5325b4f64a894 name: 'HashiCorp Nomad Server: Monitoring API response' type: DEPENDENT key: nomad.server.data.api.response delay: '0' history: 7d trends: '0' value_type: TEXT description: 'Monitoring API response message.' preprocessing: - type: JAVASCRIPT parameters: - | try { var response = Object.keys(JSON.parse(value).header).filter(function (f) { return f.match(/HTTP\/[\d.]+\s+\d{3}/); }); return response.pop(); } catch (error) { return "HTTP/1.1 408 Request timeout"; } - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: nomad.server.data.get tags: - tag: component value: status triggers: - uuid: 9dc8cd0bcbad4ee7a2afe45e33db53a7 expression: 'find(/HashiCorp Nomad Server by HTTP/nomad.server.data.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0' name: 'HashiCorp Nomad Server: Monitoring API connection has failed' event_name: 'HashiCorp Nomad Server: Monitoring API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}' priority: AVERAGE description: | Monitoring API connection has failed. Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix. manual_close: 'YES' tags: - tag: scope value: availability - uuid: b7ba2f4a56b04de298fde77cbf5afb6a name: 'HashiCorp Nomad Server: Telemetry get' type: HTTP_AGENT key: nomad.server.data.get history: '0' trends: '0' value_type: TEXT description: 'Telemetry data in raw format.' preprocessing: - type: CHECK_NOT_SUPPORTED parameters: - '' error_handler: CUSTOM_VALUE error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}' timeout: '{$NOMAD.DATA.TIMEOUT}' url: '{$NOMAD.SERVER.API.SCHEME}://{HOST.IP}:{$NOMAD.SERVER.API.PORT}/v1/metrics' query_fields: - name: format value: prometheus status_codes: '' http_proxy: '{$NOMAD.HTTP.PROXY}' headers: - name: X-Nomad-Token value: '{$NOMAD.TOKEN}' retrieve_mode: BOTH output_format: JSON tags: - tag: component value: raw - uuid: 7241c9923d714970ad950f9b7aa8ab52 name: 'HashiCorp Nomad Server: RPC evaluation acknowledgement time' type: DEPENDENT key: nomad.server.eval.ack delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for Eval.Ack RPC call.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_eval_ack_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: network - tag: component value: rpc - uuid: b9085e173cec4ce2b2fa6fd68041695e name: 'HashiCorp Nomad Server: FSM apply plan results time' type: DEPENDENT key: nomad.server.fsm.apply_plan_results delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to apply ApplyPlanResults raft entry.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_fsm_apply_plan_results_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: fsm - uuid: 62b19ce1b2bb4fdaacff02d70101a10f name: 'HashiCorp Nomad Server: FSM job registration time' type: DEPENDENT key: nomad.server.fsm.register_job delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to apply RegisterJob raft entry.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_fsm_register_job_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: fsm - uuid: 2d0b9f09bfb5480ebd6d6cf96c15278e name: 'HashiCorp Nomad Server: FSM update evaluation time' type: DEPENDENT key: nomad.server.fsm.update_eval delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to apply UpdateEval raft entry.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_fsm_update_eval_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: fsm - uuid: 0abce7a087874e58a7da20acb3599535 name: 'HashiCorp Nomad Server: Heartbeats active' type: DEPENDENT key: nomad.server.heartbeat.active delay: '0' history: 7d trends: 90d description: | Number of active heartbeat timers. Each timer represents a Nomad client connection. preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_heartbeat_active - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: network - uuid: dc95deb3590148a9bc50ea151a0d46b7 name: 'HashiCorp Nomad Server: RPC job allocations time' type: DEPENDENT key: nomad.server.job.allocations delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for Job.Allocations RPC call.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_job_allocations_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: network - tag: component value: rpc - uuid: 11e18e811fe7461181417ca1f5dad915 name: 'HashiCorp Nomad Server: RPC job evaluations time' type: DEPENDENT key: nomad.server.job.evaluations delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for Job.Evaluations RPC call.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_job_evaluations_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: network - tag: component value: rpc - uuid: 0efaac3a59d44ce09d31435ddfa6aada name: 'HashiCorp Nomad Server: RPC get job time' type: DEPENDENT key: nomad.server.job.get_job delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for Job.GetJob RPC call.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_job_get_job_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: network - tag: component value: rpc - uuid: 5ab3191f1f744682bab6d3142a1c39a4 name: 'HashiCorp Nomad Server: Jobs dead' type: DEPENDENT key: nomad.server.job_status.dead delay: '0' history: 7d trends: 90d description: 'Number of dead jobs.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_job_status_dead - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.server.metrics.get tags: - tag: component value: jobs triggers: - uuid: 44dcd52ebf54404e871501a4c4825424 expression: 'last(/HashiCorp Nomad Server by HTTP/nomad.server.job_status.dead) > 0 and nodata(/HashiCorp Nomad Server by HTTP/nomad.server.job_status.dead,5m) = 0' name: 'HashiCorp Nomad Server: Dead jobs found' opdata: 'Current amount: {ITEM.LASTVALUE}' priority: WARNING description: | Jobs with the `Dead` state discovered. Check the {$NOMAD.SERVER.API.SCHEME}://{HOST.IP}:{$NOMAD.SERVER.API.PORT}/v1/jobs URL for the details. manual_close: 'YES' tags: - tag: scope value: availability - uuid: 39a27315e33549dc88c6cb426a5a05c9 name: 'HashiCorp Nomad Server: Jobs pending' type: DEPENDENT key: nomad.server.job_status.pending delay: '0' history: 7d trends: 90d description: 'Number of pending jobs.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_job_status_pending - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.server.metrics.get tags: - tag: component value: jobs - uuid: 97907d8816b14fb69063a55bdea22701 name: 'HashiCorp Nomad Server: Jobs running' type: DEPENDENT key: nomad.server.job_status.running delay: '0' history: 7d trends: 90d description: 'Number of running jobs.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_job_status_running - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.server.metrics.get tags: - tag: component value: jobs - uuid: 4be8c941477046dd9dcc549020d687e7 name: 'HashiCorp Nomad Server: Job allocations completed' type: DEPENDENT key: nomad.server.job_summary.complete delay: '0' history: 7d trends: 90d description: 'Number of complete allocations for a job.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_job_summary_complete - function - sum error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.server.metrics.get tags: - tag: component value: jobs - uuid: 6f6ad9a70b4e4759bc2e4cc213d697bb name: 'HashiCorp Nomad Server: Job allocations failed' type: DEPENDENT key: nomad.server.job_summary.failed delay: '0' history: 7d trends: 90d description: 'Number of failed allocations for a job.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_job_summary_failed - function - sum error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.server.metrics.get tags: - tag: component value: jobs - uuid: 7a267a1b82b24157a19b52e6f285f0ce name: 'HashiCorp Nomad Server: RPC job summary time' type: DEPENDENT key: nomad.server.job_summary.get_job_summary delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for Job.Summary RPC call.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_job_summary_get_job_summary_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: network - tag: component value: rpc - uuid: 0c408dff86604486b0f23efed00f4348 name: 'HashiCorp Nomad Server: Job allocations lost' type: DEPENDENT key: nomad.server.job_summary.lost delay: '0' history: 7d trends: 90d description: 'Number of lost allocations for a job.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_job_summary_lost - function - sum error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.server.metrics.get tags: - tag: component value: jobs - uuid: d0de68553b484d2d9311b356a7b5a9e7 name: 'HashiCorp Nomad Server: Job allocations queued' type: DEPENDENT key: nomad.server.job_summary.queued delay: '0' history: 7d trends: 90d description: 'Number of queued allocations for a job.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_job_summary_queued - function - sum error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.server.metrics.get tags: - tag: component value: jobs - uuid: 98ab06dce1f84ef5993e8cc793114008 name: 'HashiCorp Nomad Server: Job allocations running' type: DEPENDENT key: nomad.server.job_summary.running delay: '0' history: 7d trends: 90d description: 'Number of running allocations for a job.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_job_summary_running - function - sum error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.server.metrics.get tags: - tag: component value: jobs - uuid: e3cc96bd72924bb5bd0334018a5a88be name: 'HashiCorp Nomad Server: Job allocations starting' type: DEPENDENT key: nomad.server.job_summary.starting delay: '0' history: 7d trends: 90d description: 'Number of starting allocations for a job.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_job_summary_starting - function - sum error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.server.metrics.get tags: - tag: component value: jobs - uuid: 244a84e136c94a47b1b64b92d08ac20f name: 'HashiCorp Nomad Server: Job allocations unknown' type: DEPENDENT key: nomad.server.job_summary.unknown delay: '0' history: 7d trends: 90d description: 'Number of unknown allocations for a job.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_job_summary_unknown - function - sum error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.server.metrics.get tags: - tag: component value: jobs - uuid: 478f45d1095646bca5ffd3a6d923dddf name: 'HashiCorp Nomad Server: Leader barrier time' type: DEPENDENT key: nomad.server.leader.barrier delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to establish a raft barrier during leader transition.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_leader_barrier_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: leader - tag: component value: raft - uuid: 8425336f75704a259bc066c795df1516 name: 'HashiCorp Nomad Server: Total reconcile time' type: DEPENDENT key: nomad.server.leader.reconcile delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to reconcile all serf peers with state store.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_leader_reconcile_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: leader - tag: component value: raft - uuid: a5b6d36142174dd683e2aec5dd4ee1e9 name: 'HashiCorp Nomad Server: Reconcile peer time' type: DEPENDENT key: nomad.server.leader.reconcile_member delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to reconcile a serf peer with state store.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_leader_reconcileMember_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: leader - tag: component value: raft - uuid: af6eda309cc545b2aa65efc0af103a02 name: 'HashiCorp Nomad Server: Gossip time' type: DEPENDENT key: nomad.server.memberlist.gossip delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to broadcast gossip messages.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_memberlist_gossip_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: gossip - uuid: 25372c89439641d09521348033b1e324 name: 'HashiCorp Nomad Server: Metrics' type: DEPENDENT key: nomad.server.metrics.get delay: '0' history: '0' trends: '0' value_type: TEXT description: 'Nomad server metrics in raw format.' preprocessing: - type: JSONPATH parameters: - $.body error_handler: DISCARD_VALUE master_item: key: nomad.server.data.get tags: - tag: component value: raw - uuid: 8a003df55bae4bf0a14fefc6f02f28bf name: 'HashiCorp Nomad Server: Namespace list time' type: DEPENDENT key: nomad.server.namespace.list_namespace delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for Namespace.ListNamespaces.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_namespace_list_namespace_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: namespaces - uuid: dd8d8b5cdf914002ac58183b667ceb06 name: 'HashiCorp Nomad Server: Plan apply time' type: DEPENDENT key: nomad.server.plan.apply delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to apply a plan.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_plan_apply_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: plan - uuid: d8d69f231d23457e82c6d3941f09cf5d name: 'HashiCorp Nomad Server: Plan evaluate time' type: DEPENDENT key: nomad.server.plan.evaluate delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to evaluate a plan.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_plan_evaluate_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: plan - uuid: ee7b59fd19644bc4813696f8d806955f name: 'HashiCorp Nomad Server: Plan queue' type: DEPENDENT key: nomad.server.plan.queue_depth delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Count of evals in the plan queue.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_plan_queue_depth - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: plan - uuid: 9df85708b74d40509d9c9412a15e5ce5 name: 'HashiCorp Nomad Server: RPC plan submit time' type: DEPENDENT key: nomad.server.plan.submit delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for Plan.Submit RPC call.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_plan_submit_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: network - tag: component value: plan - tag: component value: rpc - uuid: 851eea2a548644ba884ca852c2759d90 name: 'HashiCorp Nomad Server: Plan raft index processing time' type: DEPENDENT key: nomad.server.plan.wait_for_index delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed that planner waits for the raft index of the plan to be processed.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_plan_wait_for_index_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: plan - uuid: 505ee2804b4e451892103b36cc38abde name: 'HashiCorp Nomad Server: Open file descriptors, max' type: DEPENDENT key: nomad.server.process_max_fds delay: '0' history: 7d trends: 90d description: 'Maximum number of open file descriptors.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_max_fds - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: fds - uuid: cba898eca0c84a84a1ea0309e04b9f84 name: 'HashiCorp Nomad Server: Open file descriptors' type: DEPENDENT key: nomad.server.process_open_fds delay: '0' history: 7d trends: 90d description: 'Number of open file descriptors.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_open_fds - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: fds - uuid: 1dd455d2776f4378a39980cfa8eba33c name: 'HashiCorp Nomad Server: FSM index' type: DEPENDENT key: nomad.server.raft.applied_index delay: '0' history: 7d trends: 90d description: 'Current index applied to FSM.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_appliedIndex - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: fsm - uuid: bad74d0dc4b2431fbb5a442bc2fc1236 name: 'HashiCorp Nomad Server: Raft transactions, rate' type: DEPENDENT key: nomad.server.raft.apply delay: '0' history: 7d trends: 90d description: 'Number of Raft transactions.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_apply - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: raft - uuid: 804693ed989b44e3903bcd81159dd51d name: 'HashiCorp Nomad Server: Raft calls blocked, rate' type: DEPENDENT key: nomad.server.raft.barrier delay: '0' history: 7d trends: 90d description: 'Count of blocking raft API calls.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_barrier - value - '' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: raft - uuid: f7d1bc6c6a82475dabcdaaec7472fe02 name: 'HashiCorp Nomad Server: BoltDB freelist bytes' type: DEPENDENT key: nomad.server.raft.boltdb.freelist_bytes delay: '0' history: 7d trends: 90d units: B description: 'Number of freelist bytes.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_freelistBytes - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: b3f246d0991746589f4d0d68653eb4e9 name: 'HashiCorp Nomad Server: BoltDB free page bytes' type: DEPENDENT key: nomad.server.raft.boltdb.free_page_bytes delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Number of free page bytes.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_freePageBytes - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: e3d0d6f656f5484f8e8a8ca1195aad59 name: 'HashiCorp Nomad Server: BoltDB free pages' type: DEPENDENT key: nomad.server.raft.boltdb.num_free_pages delay: '0' history: 7d trends: 90d description: 'Number of BoltDB free pages.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_numFreePages - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: 73f359bcb02d46a0adb6fdd51284afcf name: 'HashiCorp Nomad Server: BoltDB pending pages' type: DEPENDENT key: nomad.server.raft.boltdb.num_pending_pages delay: '0' history: 7d trends: 90d description: 'Number of BoltDB pending pages.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_numPendingPages - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: 08afe7430e3c49e388f0a87c8bbf2ff4 name: 'HashiCorp Nomad Server: BoltDB open read transactions' type: DEPENDENT key: nomad.server.raft.boltdb.open_read_txn delay: '0' history: 7d trends: 90d description: 'Number of current open read transactions.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_openReadTxn - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: 36e89e86c9124e868670e9cb7b247899 name: 'HashiCorp Nomad Server: BoltDB read transactions, rate' type: DEPENDENT key: nomad.server.raft.boltdb.total_read_txn delay: '0' history: 7d trends: 90d description: 'Count of total read transactions.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_totalReadTxn - value - '' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: 2733afd943b346e38c8605259bea038c name: 'HashiCorp Nomad Server: BoltDB cursors' type: DEPENDENT key: nomad.server.raft.boltdb.txstats.cursor_count delay: '0' history: 7d trends: 90d description: 'Count of total database cursors.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_txstats_cursorCount - value - '' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: 1641576e376c4c82a8b1882b4810db4c name: 'HashiCorp Nomad Server: BoltDB nodes, rate' type: DEPENDENT key: nomad.server.raft.boltdb.txstats.node_count delay: '0' history: 7d trends: 90d description: 'Count of total database nodes.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_txstats_nodeCount - value - '' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: f0151f3431254030a3bcd538e8d2068b name: 'HashiCorp Nomad Server: BoltDB node dereferences, rate' type: DEPENDENT key: nomad.server.raft.boltdb.txstats.node_deref delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Count of total database node dereferences.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_txstats_nodeDeref - value - '' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: 27a8a38acfc34e14b3fbb13aa58ad5d4 name: 'HashiCorp Nomad Server: BoltDB page allocations, rate' type: DEPENDENT key: nomad.server.raft.boltdb.txstats.page_alloc delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Number of page allocations.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_txstats_pageAlloc - value - '' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: 14a5fc715dad4eacaddb2f277e1f85f6 name: 'HashiCorp Nomad Server: BoltDB pages in use' type: DEPENDENT key: nomad.server.raft.boltdb.txstats.page_count delay: '0' history: 7d trends: 90d description: 'Number of pages in use.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_txstats_pageCount - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: dd9729e812614ab59c595b7183a883b0 name: 'HashiCorp Nomad Server: BoltDB rebalance operations, rate' type: DEPENDENT key: nomad.server.raft.boltdb.txstats.rebalance delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Count of total rebalance operations.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_txstats_rebalance - value - '' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: d270aa05d1ca4f3d9ee606937f1703cc name: 'HashiCorp Nomad Server: BoltDB rebalance time' type: DEPENDENT key: nomad.server.raft.boltdb.txstats.rebalance_time delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Sample of rebalance operation times.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_txstats_rebalanceTime_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: 621478ee2bea482abffd6b8c80df5ee1 name: 'HashiCorp Nomad Server: BoltDB spill operations, rate' type: DEPENDENT key: nomad.server.raft.boltdb.txstats.spill delay: '0' history: 7d trends: 90d description: 'Count of total spill operations.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_txstats_spill - value - '' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: 2ca3e7572bf04b04961107e7d9304887 name: 'HashiCorp Nomad Server: BoltDB spill time' type: DEPENDENT key: nomad.server.raft.boltdb.txstats.spill_time delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Sample of spill operation times.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_txstats_spillTime_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: bd14547f789540618db85057eceb70db name: 'HashiCorp Nomad Server: BoltDB split operations, rate' type: DEPENDENT key: nomad.server.raft.boltdb.txstats.split delay: '0' history: 7d trends: 90d description: 'Count of total split operations.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_txstats_split - value - '' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: b13b7b9eb66940c9bf4d14e711ee0279 name: 'HashiCorp Nomad Server: BoltDB write operations, rate' type: DEPENDENT key: nomad.server.raft.boltdb.txstats.write delay: '0' history: 7d trends: 90d description: 'Count of total write operations.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_txstats_write - value - '' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: e9b31f3434aa4df0908a79ff5352a29e name: 'HashiCorp Nomad Server: BoltDB write time' type: DEPENDENT key: nomad.server.raft.boltdb.txstats.write_time delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Sample of write operation times.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_boltdb_txstats_writeTime_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: boltdb - uuid: dce75575b01e4cf8bd84d6a40c74f22b name: 'HashiCorp Nomad Server: Cluster role' type: DEPENDENT key: nomad.server.raft.cluster_role delay: '0' history: 7d trends: 90d description: 'Current role in the cluster.' valuemap: name: 'Cluster role' preprocessing: - type: JSONPATH parameters: - $.body.stats.raft.state error_handler: DISCARD_VALUE - type: JAVASCRIPT parameters: - | const idx = [ 'Leader', 'Follower', 'Candidate', ].indexOf(value); return idx !== -1 ? idx : 10; master_item: key: nomad.server.stats.get tags: - tag: component value: system triggers: - uuid: 1bc6ee807acd4532a58b4527f8865c1c expression: 'change(/HashiCorp Nomad Server by HTTP/nomad.server.raft.cluster_role) <> 0' name: 'HashiCorp Nomad Server: Cluster role has changed' event_name: 'HashiCorp Nomad Server: Cluster role has changed to {ITEM.LASTVALUE}' priority: INFO description: 'Cluster role has changed.' manual_close: 'YES' tags: - tag: scope value: notice - uuid: a994ee525886407486adace82ea5ef90 name: 'HashiCorp Nomad Server: Raft commit logs enqueued' type: DEPENDENT key: nomad.server.raft.commit_num_logs delay: '0' history: 7d trends: 90d description: 'Count of logs enqueued.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_commitNumLogs - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: raft - uuid: 3160d5b81e1540aab3244f1cf6bce95d name: 'HashiCorp Nomad Server: Raft commit time' type: DEPENDENT key: nomad.server.raft.commit_time delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to commit writes.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_worker_dequeue_eval_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: raft - uuid: 09f1baf6fc2443f48e01814b939c0749 name: 'HashiCorp Nomad Server: FSM apply time' type: DEPENDENT key: nomad.server.raft.fsm.apply delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to apply write to FSM.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_fsm_apply_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: fsm - uuid: 18689de9474f4812ba9ed34d311ad3a1 name: 'HashiCorp Nomad Server: FSM autopilot time' type: DEPENDENT key: nomad.server.raft.fsm.autopilot delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to apply Autopilot raft entry.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_fsm_autopilot_sum - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: fsm - uuid: 6846f262148e49288ecb7aa0ad18f7d2 name: 'HashiCorp Nomad Server: FSM enqueue time' type: DEPENDENT key: nomad.server.raft.fsm.enqueue delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to enqueue write to FSM.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_fsm_enqueue_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: fsm - uuid: 4e2d0557fc984ed9858316c8409edd0e name: 'HashiCorp Nomad Server: FSM register node time' type: DEPENDENT key: nomad.server.raft.fsm.register_node delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to apply RegisterNode raft entry.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_fsm_register_node_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: fsm - uuid: bb65ca263f3244fc883d9102bbf9bef7 name: 'HashiCorp Nomad Server: Raft last index' type: DEPENDENT key: nomad.server.raft.last_index delay: '0' history: 7d trends: 90d description: 'Most recent index seen.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_lastIndex - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: raft - uuid: bda052f13e3c4dacaafc4ddefbc98036 name: 'HashiCorp Nomad Server: Dispatch log time' type: DEPENDENT key: nomad.server.raft.leader.dispatch_log delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed to write log, mark in flight, and start replication.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_leader_dispatchLog_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: leader - tag: component value: raft - uuid: f915e5e162814786943cd4cc85bb7aff name: 'HashiCorp Nomad Server: Logs dispatched' type: DEPENDENT key: nomad.server.raft.leader.dispatch_num_logs delay: '0' history: 7d trends: 90d description: 'Count of logs dispatched.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_leader_dispatchNumLogs - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.server.metrics.get tags: - tag: component value: leader - tag: component value: raft - uuid: 7f9db272307c43e7b87b8a4a4a6eae16 name: 'HashiCorp Nomad Server: Leader last contact' type: DEPENDENT key: nomad.server.raft.leader.lastContact delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: | Time since last contact to leader. General indicator of Raft latency. preprocessing: - type: PROMETHEUS_PATTERN parameters: - 'nomad_raft_leader_lastContact{quantile="0.99"}' - value - '' error_handler: DISCARD_VALUE - type: STR_REPLACE parameters: - NaN - '0' - type: MULTIPLIER parameters: - '0.001' master_item: key: nomad.server.metrics.get tags: - tag: component value: leader - tag: component value: raft triggers: - uuid: 65c69469e9354e81a632485a74bc711d expression: 'min(/HashiCorp Nomad Server by HTTP/nomad.server.raft.leader.lastContact,5m) >= {$NOMAD.SERVER.LEADER.LATENCY} and nodata(/HashiCorp Nomad Server by HTTP/nomad.server.raft.leader.lastContact,5m) = 0' name: 'HashiCorp Nomad Server: Leader last contact timeout exceeded' event_name: 'Leader last contact timeout exceeded: over {$NOMAD.SERVER.LEADER.LATENCY} for the last 5 minutes' priority: WARNING description: | The nomad.raft.leader.lastContact metric is a general indicator of Raft latency which can be used to observe how Raft timing is performing and guide infrastructure provisioning. If this number trends upwards, look at CPU, disk IOPs, and network latency. nomad.raft.leader.lastContact should not get too close to the leader lease timeout of 500ms. tags: - tag: scope value: performance - uuid: bca59612c439447ebe78ae68e68fd6cb name: 'HashiCorp Nomad Server: Raft peers' type: DEPENDENT key: nomad.server.raft.peers delay: '0' history: 7d trends: 90d description: 'Current cluster raft peers amount.' preprocessing: - type: JSONPATH parameters: - $.body.stats.raft.num_peers error_handler: DISCARD_VALUE master_item: key: nomad.server.stats.get tags: - tag: component value: system - uuid: 36ff560da62740a0b4657b4afabcf902 name: 'HashiCorp Nomad Server: Raft transaction commit time' type: DEPENDENT key: nomad.server.raft.replication.appendEntries delay: '0' history: 7d trends: 90d units: s description: 'Raft transaction commit time.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_replication_appendEntries_rpc - function - avg error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '0.001' master_item: key: nomad.server.metrics.get tags: - tag: component value: raft - uuid: 662cd8712bbe4a4080a540f878b4ec44 name: 'HashiCorp Nomad Server: Heartbeat fails' type: DEPENDENT key: nomad.server.raft.transition.heartbeat_timeout delay: '0' history: 7d trends: 90d description: 'Count of failing to heartbeat and starting election.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_raft_transition_heartbeat_timeout - value - '' error_handler: CUSTOM_VALUE error_handler_params: '0' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: nomad.server.metrics.get tags: - tag: component value: raft - uuid: 3dae4fd461fa4a5abdba717e538b8405 name: 'HashiCorp Nomad Server: Resident memory size' type: DEPENDENT key: nomad.server.resident_memory_bytes delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Resident memory size in bytes.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_resident_memory_bytes - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: memory - uuid: ec2d018e121442be9d3fb350f6572ca6 name: 'HashiCorp Nomad Server: RPC queries, rate' type: DEPENDENT key: nomad.server.rpc.query delay: '0' history: 7d trends: 90d description: 'Number of RPC queries.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_rpc_query - value - '' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: network - tag: component value: rpc - uuid: 406d9bc9db1e44509a77acd11963f406 name: 'HashiCorp Nomad Server: RPC requests, rate' type: DEPENDENT key: nomad.server.rpc.request delay: '0' history: 7d trends: 90d description: 'Number of RPC requests being handled.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_rpc_request - value - '' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: network - tag: component value: rpc - uuid: 0ef3cfaf0e694625bbc3f3a117ff9f06 name: 'HashiCorp Nomad Server: RPC error requests, rate' type: DEPENDENT key: nomad.server.rpc.request_error delay: '0' history: 7d trends: 90d description: 'Number of RPC requests being handled that result in an error.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_rpc_request - value - '' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: network - tag: component value: rpc - uuid: a0e7b48174de4e569762dc1654d63267 name: 'HashiCorp Nomad Server: Memory used' type: DEPENDENT key: nomad.server.runtime.alloc_bytes delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Memory utilization in bytes.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_runtime_alloc_bytes - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: memory - uuid: 7d8883065c2c494f9c4853afe50d157c name: 'HashiCorp Nomad Server: Objects freed, rate' type: DEPENDENT key: nomad.server.runtime.free_count delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Count of objects freed from heap by go runtime GC.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_runtime_free_count - value - '' error_handler: DISCARD_VALUE - type: CHANGE_PER_SECOND parameters: - '' master_item: key: nomad.server.metrics.get tags: - tag: component value: runtime - uuid: 02b0699a706742d1bb557e32ed1b52ea name: 'HashiCorp Nomad Server: GC pause time' type: DEPENDENT key: nomad.server.runtime.gc_pause_ns delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Go runtime GC pause times.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_runtime_gc_pause_ns_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: runtime - uuid: c3fc612a6bec45ee9ffd51d168801974 name: 'HashiCorp Nomad Server: Heap objects' type: DEPENDENT key: nomad.server.runtime.heap_objects delay: '0' history: 7d trends: 90d description: | Number of objects on the heap. General memory pressure indicator. preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_runtime_heap_objects - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: memory - uuid: 9b1bcd0020b5413e920ce12af6124687 name: 'HashiCorp Nomad Server: Goroutines' type: DEPENDENT key: nomad.server.runtime.num_goroutines delay: '0' history: 7d trends: 90d description: 'Number of goroutines and general load pressure indicator.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_runtime_num_goroutines - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: memory - uuid: 71256d39cf844c22be8d25a9170e61f2 name: 'HashiCorp Nomad Server: GC metadata size' type: DEPENDENT key: nomad.server.runtime.sys_bytes delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Go runtime GC metadata size in bytes.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_runtime_sys_bytes - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: runtime - uuid: 733b18dd02d7404bb8cf4c7447a374ae name: 'HashiCorp Nomad Server: GC runs' type: DEPENDENT key: nomad.server.runtime.total_gc_runs delay: '0' history: 7d trends: 90d description: 'Count of go runtime GC runs.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_runtime_total_gc_runs - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: runtime - uuid: c7f327a390884c928ebf228d8ebb3a4c name: 'HashiCorp Nomad Server: Allocation reschedule attempts' type: DEPENDENT key: nomad.server.scheduler.allocs.rescheduled.attempted delay: '0' history: 7d trends: 90d description: 'Count of attempts to reschedule an allocation.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_scheduler_allocs_reschedule_attempted - function - sum error_handler: CUSTOM_VALUE error_handler_params: '0' master_item: key: nomad.server.metrics.get tags: - tag: component value: allocations - uuid: 8293b40a695b4b24ab9b6576a0f9a4d8 name: 'HashiCorp Nomad Server: Memberlist events' type: DEPENDENT key: nomad.server.serf.queue.event delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Count of memberlist events received.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_serf_queue_Event_sum - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: memberlist - uuid: 8aacc54e455b49b0ab0cfed4559829f7 name: 'HashiCorp Nomad Server: Memberlist changes' type: DEPENDENT key: nomad.server.serf.queue.intent delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Count of memberlist changes.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_serf_queue_Intent_sum - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: memberlist - uuid: 1c5f383a89f74934a6cb211bfd586856 name: 'HashiCorp Nomad Server: Memberlist queries' type: DEPENDENT key: nomad.server.serf.queue.queries delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Count of memberlist queries.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_serf_queue_Query_sum - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: memberlist - uuid: e645f371b4ae48e2b2b782f4b0d0fae4 name: 'HashiCorp Nomad Server: Snapshot index' type: DEPENDENT key: nomad.server.state.snapshot.index delay: '0' history: 7d trends: 90d description: 'Current snapshot index.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_state_snapshotIndex - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: state - uuid: 02ba826868414a16909f13b98cc4c445 name: 'HashiCorp Nomad Server: Internal stats API response' type: DEPENDENT key: nomad.server.stats.api.response delay: '0' history: 7d trends: '0' value_type: TEXT description: 'Internal stats API response message.' preprocessing: - type: JAVASCRIPT parameters: - | try { var response = Object.keys(JSON.parse(value).header).filter(function (f) { return f.match(/HTTP\/[\d.]+\s+\d{3}/); }); return response.pop(); } catch (error) { return "HTTP/1.1 408 Request timeout"; } - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1h master_item: key: nomad.server.stats.get tags: - tag: component value: status triggers: - uuid: b60dd78bd2d24c22afec63a8b420bcd6 expression: 'find(/HashiCorp Nomad Server by HTTP/nomad.server.stats.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0' name: 'HashiCorp Nomad Server: Internal stats API connection has failed' event_name: 'HashiCorp Nomad Server: Internal stats API connection has failed. HTTP response code: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+(\d{3})", \1")}. Response message: {{ITEM.VALUE}.regsub("HTTP\/[\d.]+\s+\d{3}\s+(.*)", \1")}' priority: AVERAGE description: | Internal stats API connection has failed. Ensure that Nomad API URL and the necessary permissions have been defined correctly, check the service state and network connectivity between Nomad and Zabbix. manual_close: 'YES' dependencies: - name: 'HashiCorp Nomad Server: Monitoring API connection has failed' expression: 'find(/HashiCorp Nomad Server by HTTP/nomad.server.data.api.response,,"like","{$NOMAD.API.RESPONSE.SUCCESS}")=0' tags: - tag: scope value: availability - uuid: f8d3a680e6f94250ab0a1ef8169889e3 name: 'HashiCorp Nomad Server: Internal stats get' type: HTTP_AGENT key: nomad.server.stats.get delay: 1h history: '0' trends: '0' value_type: TEXT description: 'Internal stats data in raw format.' preprocessing: - type: CHECK_NOT_SUPPORTED parameters: - '' error_handler: CUSTOM_VALUE error_handler_params: '{"header":{"HTTP/1.1 408 Request timeout":""}}' timeout: '{$NOMAD.DATA.TIMEOUT}' url: '{$NOMAD.SERVER.API.SCHEME}://{HOST.IP}:{$NOMAD.SERVER.API.PORT}/v1/agent/self' query_fields: - name: filter value: 'ID == "{HOST.HOST}"' status_codes: '' http_proxy: '{$NOMAD.HTTP.PROXY}' headers: - name: X-Nomad-Token value: '{$NOMAD.TOKEN}' retrieve_mode: BOTH output_format: JSON tags: - tag: component value: raw - uuid: 9593fe46e0d941f2b4ddef96baa39b0e name: 'HashiCorp Nomad Server: Vault tokens revoked' type: DEPENDENT key: nomad.server.vault.distributed_tokens_revoked delay: '0' history: 7d trends: 90d value_type: FLOAT description: 'Count of revoked tokens.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_vault_distributed_tokens_revoking - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: vault - uuid: 40c41426ab96476c84647a18511af6e8 name: 'HashiCorp Nomad Server: Vault token last renewal' type: DEPENDENT key: nomad.server.vault.token_last_renewal delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time since last successful Vault token renewal.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_vault_token_last_renewal - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '0.001' master_item: key: nomad.server.metrics.get tags: - tag: component value: vault - uuid: f62d8cf1e4b94c9b8796bd30fa799f7a name: 'HashiCorp Nomad Server: Vault token next renewal' type: DEPENDENT key: nomad.server.vault.token_next_renewal delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time until next Vault token renewal attempt.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_vault_token_next_renewal - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '0.001' master_item: key: nomad.server.metrics.get tags: - tag: component value: vault - uuid: c619c340887b43e1a4db750e74f1d4aa name: 'HashiCorp Nomad Server: Vault token TTL' type: DEPENDENT key: nomad.server.vault.token_ttl delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time to live for Vault token.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_vault_token_ttl - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '0.001' master_item: key: nomad.server.metrics.get tags: - tag: component value: vault - uuid: 921fc50044f94aceaa9d0789be854496 name: 'HashiCorp Nomad Server: Nomad server version' type: DEPENDENT key: nomad.server.version delay: '0' history: 7d trends: '0' value_type: CHAR description: 'Nomad server version.' preprocessing: - type: JSONPATH parameters: - $.body.config.Version.Version master_item: key: nomad.server.stats.get triggers: - uuid: 3c7dfd5ec21144b6b58c603d60142200 expression: 'change(/HashiCorp Nomad Server by HTTP/nomad.server.version)<>0' name: 'HashiCorp Nomad Server: Nomad server version has changed' event_name: 'HashiCorp Nomad Server: Nomad server version has changed to {ITEM.LASTVALUE}' priority: INFO description: 'Nomad server version has changed.' manual_close: 'YES' tags: - tag: scope value: availability - uuid: d769576e16df4aed86004498055d5a94 name: 'HashiCorp Nomad Server: Virtual memory size' type: DEPENDENT key: nomad.server.virtual_memory_bytes delay: '0' history: 7d trends: 90d value_type: FLOAT units: B description: 'Virtual memory size in bytes.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - process_virtual_memory_bytes - value - '' error_handler: DISCARD_VALUE master_item: key: nomad.server.metrics.get tags: - tag: component value: memory - uuid: d1addab9628140b09adb60c2820c1829 name: 'HashiCorp Nomad Server: Worker evaluation create time' type: DEPENDENT key: nomad.server.worker.create_eval delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for worker to create an eval.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_worker_dequeue_eval_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: workers - uuid: 0875726948384b9696158d4db2b64691 name: 'HashiCorp Nomad Server: Worker evaluation dequeue time' type: DEPENDENT key: nomad.server.worker.dequeue_eval delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for worker to dequeue an eval.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_worker_dequeue_eval_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: workers - uuid: 5bc2cf7713274e1ca8997415f26f7087 name: 'HashiCorp Nomad Server: Worker invoke scheduler time' type: DEPENDENT key: nomad.server.worker.invoke_scheduler_service delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for worker to invoke the scheduler.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_worker_invoke_scheduler_service_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: workers - uuid: c21aca97ba274913860b28a1fd06abec name: 'HashiCorp Nomad Server: Worker acknowledgement send time' type: DEPENDENT key: nomad.server.worker.send_ack delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for worker to send acknowledgement.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_worker_send_ack_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: workers - uuid: 3c7b49c3f60e4d69981f6dd3569e49e6 name: 'HashiCorp Nomad Server: Worker submit plan time' type: DEPENDENT key: nomad.server.worker.submit_plan delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for worker to submit plan.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_worker_submit_plan_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: workers - uuid: f5cd72ca250f402f9ddd112fb858d1f7 name: 'HashiCorp Nomad Server: Worker update evaluation time' type: DEPENDENT key: nomad.server.worker.update_eval delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed for worker to submit updated eval.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_worker_update_eval_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: workers - uuid: ae2e69196ef7416481e9904d9ee8862b name: 'HashiCorp Nomad Server: Worker log replication time' type: DEPENDENT key: nomad.server.worker.wait_for_index delay: '0' history: 7d trends: 90d value_type: FLOAT units: s description: 'Time elapsed that worker waits for the raft index of the eval to be processed.' preprocessing: - type: PROMETHEUS_PATTERN parameters: - nomad_nomad_worker_wait_for_index_sum - value - '' error_handler: DISCARD_VALUE - type: MULTIPLIER parameters: - '1.0E-9' master_item: key: nomad.server.metrics.get tags: - tag: component value: workers tags: - tag: class value: software - tag: target value: nomad-server macros: - macro: '{$NOMAD.API.RESPONSE.SUCCESS}' value: '200' description: 'HTTP API successful response code. Availability triggers threshold. Change, if needed.' - macro: '{$NOMAD.DATA.TIMEOUT}' value: 15s description: 'Response timeout for an API.' - macro: '{$NOMAD.HTTP.PROXY}' description: 'Sets the HTTP proxy for HTTP agent item. If this parameter is empty, then no proxy is used.' - macro: '{$NOMAD.OPEN.FDS.MAX}' value: '90' description: 'Maximum percentage of used file descriptors.' - macro: '{$NOMAD.REDUNDANCY.MIN}' value: '1' description: | Amount of redundant servers to keep the cluster safe. Default value - '1' for the 3-nodes cluster. Change if needed. - macro: '{$NOMAD.SERVER.API.PORT}' value: '4646' description: 'Nomad SERVER API port.' - macro: '{$NOMAD.SERVER.API.SCHEME}' value: http description: 'Nomad SERVER API scheme.' - macro: '{$NOMAD.SERVER.LEADER.LATENCY}' value: 0.3s description: 'Leader last contact latency threshold.' - macro: '{$NOMAD.SERVER.RPC.PORT}' value: '4647' description: 'Nomad RPC service port.' - macro: '{$NOMAD.SERVER.SERF.PORT}' value: '4648' description: 'Nomad serf service port.' - macro: '{$NOMAD.TOKEN}' value: '' description: 'Nomad authentication token.' dashboards: - uuid: c721ccf33a6f412e994e0ef8c9dc81b5 name: BoltDB pages: - name: BoltDB widgets: - type: item width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.boltdb.txstats.cursor_count - type: item x: '8' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.boltdb.txstats.node_count - type: item x: '16' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.boltdb.txstats.node_deref - type: graph 'y': '5' width: '24' height: '5' fields: - type: GRAPH name: graphid value: host: 'HashiCorp Nomad Server by HTTP' name: 'HashiCorp Nomad Server: Raft timers' - type: graph 'y': '10' width: '24' height: '5' fields: - type: GRAPH name: graphid value: host: 'HashiCorp Nomad Server by HTTP' name: 'HashiCorp Nomad Server: BoltDB operations' - type: graph 'y': '15' width: '24' height: '5' fields: - type: GRAPH name: graphid value: host: 'HashiCorp Nomad Server by HTTP' name: 'HashiCorp Nomad Server: BoltDB pages' - uuid: 5b6b8811398f48ebb4abb73620320211 name: Cluster pages: - name: Cluster widgets: - type: item width: '6' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: INTEGER name: value_size value: '30' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.cluster_role - type: item x: '6' width: '6' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: INTEGER name: value_size value: '30' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.autopilot.state - type: item x: '12' width: '6' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.autopilot.failure_tolerance - type: item x: '18' width: '6' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.fsm.autopilot - type: graph 'y': '5' width: '24' height: '5' fields: - type: GRAPH name: graphid value: host: 'HashiCorp Nomad Server by HTTP' name: 'HashiCorp Nomad Server: Raft timers' - type: item 'y': '10' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.leader.dispatch_num_logs - type: item x: '8' 'y': '10' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.apply - type: item x: '16' 'y': '10' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.applied_index - uuid: c7f515dd33ee4045b96ffc6d83cec12a name: Jobs pages: - name: Jobs widgets: - type: item width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.job_status.running - type: item x: '8' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.job_status.pending - type: item x: '16' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.job_status.dead - type: item 'y': '5' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.job_summary.running - type: item x: '8' 'y': '5' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.job_summary.starting - type: item x: '16' 'y': '5' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.job_summary.complete - type: item 'y': '10' width: '6' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.job_summary.queued - type: item x: '6' 'y': '10' width: '6' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.job_summary.lost - type: item x: '12' 'y': '10' width: '6' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.job_summary.failed - type: item x: '18' 'y': '10' width: '6' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.job_summary.unknown - type: item 'y': '15' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.broker.total_ready - type: item x: '8' 'y': '15' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.broker.total_pending - type: item x: '16' 'y': '15' width: '8' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.broker.total_waiting - type: item 'y': '20' width: '6' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.blocked_evals.total_blocked - type: item x: '6' 'y': '20' width: '6' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.blocked_evals.total_quota_limit - type: item x: '12' 'y': '20' width: '6' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.blocked_evals.total_escaped - type: item x: '18' 'y': '20' width: '6' height: '5' fields: - type: INTEGER name: show value: '2' - type: INTEGER name: show value: '4' - type: INTEGER name: adv_conf value: '1' - type: INTEGER name: decimal_places value: '0' - type: ITEM name: itemid value: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.broker.total_unacked valuemaps: - uuid: 9898b464447240919cf8e25016be687f name: 'Autopilot state' mappings: - value: '0' newvalue: Unhealthy - value: '1' newvalue: Healthy - uuid: fe32599dc75c48f6ab0887652ceb728c name: 'Cluster role' mappings: - value: '0' newvalue: Leader - value: '1' newvalue: Follower - value: '2' newvalue: Candidate - uuid: b90612059a164f3fa0c7ab871afa0c59 name: 'Service state' mappings: - value: '0' newvalue: Down - value: '1' newvalue: Up triggers: - uuid: d41ae1c102d84eab9629565bcc9b51df expression: '(min(/HashiCorp Nomad Client by HTTP/nomad.client.memory.available, 10m) / last(/HashiCorp Nomad Client by HTTP/nomad.client.memory.total))*100 <= {$NOMAD.RAM.AVAIL.MIN}' name: 'HashiCorp Nomad Client: High memory utilization' event_name: 'HashiCorp Nomad Client: High memory utilization: (available < {$NOMAD.RAM.AVAIL.MIN}% over last 10m)' opdata: 'RAM available: {ITEM.LASTVALUE1}, RAM total: {ITEM.LASTVALUE2}' priority: AVERAGE description: 'RAM utilization is too high. The system might be slow to respond.' tags: - tag: scope value: performance - uuid: 9b6d779864144e4e9e5e9924658d763e expression: 'min(/HashiCorp Nomad Server by HTTP/nomad.server.process_open_fds,5m)/last(/HashiCorp Nomad Server by HTTP/nomad.server.process_max_fds)*100>{$NOMAD.OPEN.FDS.MAX}' name: 'HashiCorp Nomad Server: Current number of open files is too high' event_name: 'HashiCorp Nomad Server: Current number of open files is too high (over {$NOMAD.OPEN.FDS.MAX}% for 5m)' priority: WARNING description: 'Heavy file descriptor usage (i.e., near the process file descriptor limit) indicates a potential file descriptor exhaustion issue.' tags: - tag: scope value: capacity graphs: - uuid: a221474b1f494f8e85edfefd3a86b2ad name: 'HashiCorp Nomad Client: CPU utilization' type: STACKED graph_items: - color: 199C0D item: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.cpu.idle - sortorder: '1' color: F63100 item: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.cpu.system - sortorder: '2' color: 00611C item: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.cpu.total - sortorder: '3' color: F7941D item: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.cpu.user - uuid: b36e33cf629444d096a222d4b806f10d name: 'HashiCorp Nomad Client: Memory allocation' type: STACKED graph_items: - color: 199C0D item: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.allocated.memory - sortorder: '1' color: F63100 item: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.unallocated.memory - uuid: a5658f2456f745ca9de427ce3bc4fc98 name: 'HashiCorp Nomad Client: Memory utilization' type: STACKED graph_items: - color: 199C0D item: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.memory.available - sortorder: '1' color: F63100 item: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.memory.free - sortorder: '2' color: 00611C item: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.memory.total - sortorder: '3' color: F7941D item: host: 'HashiCorp Nomad Client by HTTP' key: nomad.client.memory.used - uuid: 5028c6df64d3442f8fda2bb1fbc5f6e9 name: 'HashiCorp Nomad Server: BoltDB operations' graph_items: - color: 199C0D item: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.boltdb.txstats.rebalance - sortorder: '1' color: F63100 item: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.boltdb.txstats.spill - sortorder: '2' color: 00611C item: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.boltdb.txstats.split - sortorder: '3' color: F7941D item: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.boltdb.txstats.write - uuid: 71832cf8ab4c4607985719e19646fcc7 name: 'HashiCorp Nomad Server: BoltDB pages' graph_items: - color: 199C0D item: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.boltdb.num_free_pages - sortorder: '1' color: F63100 item: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.boltdb.txstats.page_count - sortorder: '2' color: 00611C item: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.boltdb.num_pending_pages - sortorder: '3' color: F7941D item: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.boltdb.txstats.page_alloc - uuid: cc4f2fccc25840f0ba2049554c360080 name: 'HashiCorp Nomad Server: BoltDB timers' graph_items: - color: 199C0D item: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.boltdb.txstats.rebalance_time - sortorder: '1' color: F63100 item: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.boltdb.txstats.spill_time - sortorder: '2' color: 00611C item: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.boltdb.txstats.write_time - uuid: fb8cbf7d131445f6ade454b7b7e2e748 name: 'HashiCorp Nomad Server: Raft timers' graph_items: - color: 199C0D item: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.leader.dispatch_log - sortorder: '1' color: F63100 item: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.commit_time - sortorder: '2' color: 00611C item: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.replication.appendEntries - sortorder: '3' color: F7941D item: host: 'HashiCorp Nomad Server by HTTP' key: nomad.server.raft.fsm.apply