You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1607 lines
64 KiB

1 year ago
zabbix_export:
version: '7.0'
template_groups:
- uuid: a571c0d144b14fd4a87a9d9b2aa9fcd6
name: Templates/Applications
templates:
- uuid: e129aeba7c814bf189772cf5919b4bbb
template: 'Hadoop by HTTP'
name: 'Hadoop by HTTP'
description: |
The template gets the Hadoop metrics from cluster's hosts (ResourceManager, NodeManagers, NameNode, DataNodes) by HTTP agent. You should define the IP address (or FQDN) and Web-UI port for the ResourceManager in {$HADOOP.RESOURCEMANAGER.HOST} and {$HADOOP.RESOURCEMANAGER.PORT} macros and for the NameNode in {$HADOOP.NAMENODE.HOST} and {$HADOOP.NAMENODE.PORT} macros respectively. Macros can be set in the template or overridden at the host level.
You can discuss this template or leave feedback on our forum https://www.zabbix.com/forum/zabbix-suggestions-and-feedback/413459-discussion-thread-for-official-zabbix-template-hadoop
Generated by official Zabbix template tool "Templator" 2.0.0
vendor:
name: Zabbix
version: 7.0-0
groups:
- name: Templates/Applications
items:
- uuid: d2d19ac9d1eb434c98a55cbf76c27850
name: 'Hadoop: Get DataNodes states'
type: HTTP_AGENT
key: hadoop.datanodes.get
history: '0'
trends: '0'
value_type: TEXT
preprocessing:
- type: JAVASCRIPT
parameters:
- |
try {
parsed = JSON.parse(value);
var result = [];
function getNodes(nodes, state) {
Object.keys(nodes).forEach(function (field) {
var Node = {};
Node['HostName'] = field || '';
Node['adminState'] = nodes[field].adminState || '';
Node['operState'] = state || '';
Node['version'] = nodes[field].version || '';
result.push(Node);
});
}
getNodes(JSON.parse(parsed.beans[0].LiveNodes), 'Live');
getNodes(JSON.parse(parsed.beans[0].DeadNodes), 'Dead');
getNodes(JSON.parse(parsed.beans[0].DecomNodes), 'Decommission');
getNodes(JSON.parse(parsed.beans[0].EnteringMaintenanceNodes), 'Maintenance');
return JSON.stringify(result);
}
catch (error) {
throw 'Failed to process response received from Hadoop';
}
url: '{$HADOOP.NAMENODE.HOST}:{$HADOOP.NAMENODE.PORT}/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo'
tags:
- tag: component
value: raw
- uuid: 2cb55b7ed9cd41878dc985497f45e084
name: 'NameNode: Total blocks'
type: DEPENDENT
key: hadoop.namenode.blocks_total
delay: '0'
history: 7d
description: 'Count of blocks tracked by NameNode.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=FSNamesystem'')].BlocksTotal.first()'
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
- uuid: 1d098dc6fa134053b6c6be0e7618092e
name: 'NameNode: Blocks allocable'
type: DEPENDENT
key: hadoop.namenode.block_capacity
delay: '0'
history: 7d
description: 'Maximum number of blocks allocable.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=FSNamesystem'')].BlockCapacity.first()'
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
- uuid: 26ca0bbd18e04b49b9eb8d2a74f4fd15
name: 'NameNode: Capacity remaining'
type: DEPENDENT
key: hadoop.namenode.capacity_remaining
delay: '0'
history: 7d
units: B
description: 'Available capacity.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=FSNamesystem'')].CapacityRemaining.first()'
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
- uuid: c73c2b6c24b846e49bdb68c3f5a01419
name: 'NameNode: Corrupt blocks'
type: DEPENDENT
key: hadoop.namenode.corrupt_blocks
delay: '0'
history: 7d
description: 'Number of corrupt blocks.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=FSNamesystem'')].CorruptBlocks.first()'
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
- uuid: 82198b21427a4e39a173369db42d9de3
name: 'NameNode: Total files'
type: DEPENDENT
key: hadoop.namenode.files_total
delay: '0'
history: 7d
description: 'Total count of files tracked by the NameNode.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=FSNamesystem'')].FilesTotal.first()'
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
- uuid: 687406d06ce94a8291b2e72bb2f8bec4
name: 'Hadoop: Get NameNode stats'
type: HTTP_AGENT
key: hadoop.namenode.get
history: '0'
trends: '0'
value_type: TEXT
url: '{$HADOOP.NAMENODE.HOST}:{$HADOOP.NAMENODE.PORT}/jmx'
tags:
- tag: component
value: raw
- uuid: ea72dc1574f348d19432a1a922b4ed35
name: 'NameNode: Get info'
type: DEPENDENT
key: hadoop.namenode.info
delay: '0'
history: '0'
trends: '0'
value_type: TEXT
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name=~''Hadoop:service=NameNode,name=*'')]'
error_handler: CUSTOM_VALUE
error_handler_params: '[]'
master_item:
key: hadoop.namenode.get
tags:
- tag: component
value: raw
- uuid: 30ee7e09067e4f00a4f26ad6c00454b2
name: 'NameNode: Missing blocks'
type: DEPENDENT
key: hadoop.namenode.missing_blocks
delay: '0'
history: 7d
description: 'Number of missing blocks.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=FSNamesystem'')].MissingBlocks.first()'
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
triggers:
- uuid: 3b92daaaddb74105a5e57c4b381e3060
expression: 'min(/Hadoop by HTTP/hadoop.namenode.missing_blocks,15m)>0'
name: 'NameNode: Cluster has missing blocks'
priority: AVERAGE
description: 'A missing block is far worse than a corrupt block, because a missing block cannot be recovered by copying a replica.'
tags:
- tag: scope
value: notice
- uuid: 3473bad0a7c94c8b9fd35cd4398e6215
name: 'NameNode: Dead DataNodes'
type: DEPENDENT
key: hadoop.namenode.num_dead_data_nodes
delay: '0'
history: 7d
description: 'Count of dead DataNodes.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=FSNamesystem'')].NumDeadDataNodes.first()'
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 6h
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
triggers:
- uuid: b2d1a26791aa4b16865b4410c50c7ceb
expression: 'min(/Hadoop by HTTP/hadoop.namenode.num_dead_data_nodes,5m)>0'
name: 'NameNode: Cluster has DataNodes in Dead state'
priority: AVERAGE
description: 'The death of a DataNode causes a flurry of network activity, as the NameNode initiates replication of blocks lost on the dead nodes.'
tags:
- tag: scope
value: notice
- uuid: 398a8c95db3248b684f222fe7b912fe3
name: 'NameNode: Alive DataNodes'
type: DEPENDENT
key: hadoop.namenode.num_live_data_nodes
delay: '0'
history: 7d
description: 'Count of alive DataNodes.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=FSNamesystem'')].NumLiveDataNodes.first()'
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 6h
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
- uuid: 15bcb22fdc7f4e2c8f24560ef641d63d
name: 'NameNode: Stale DataNodes'
type: DEPENDENT
key: hadoop.namenode.num_stale_data_nodes
delay: '0'
history: 7d
description: 'DataNodes that do not send a heartbeat within 30 seconds are marked as "stale".'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=FSNamesystem'')].StaleDataNodes.first()'
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 6h
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
- uuid: b72d54b849fc48fd8e7cdacd75943c23
name: 'NameNode: Block Pool Renaming'
type: DEPENDENT
key: hadoop.namenode.percent_block_pool_used
delay: '0'
history: 7d
value_type: FLOAT
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=NameNodeInfo'')].PercentBlockPoolUsed.first()'
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
- uuid: 3cfbf084a31b479c91be356556d43c0d
name: 'NameNode: Percent capacity remaining'
type: DEPENDENT
key: hadoop.namenode.percent_remaining
delay: '0'
history: 7d
value_type: FLOAT
units: '%'
description: 'Available capacity in percent.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=NameNodeInfo'')].PercentRemaining.first()'
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 6h
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
triggers:
- uuid: 3104295848c5497085f397b8f3e06ef6
expression: 'max(/Hadoop by HTTP/hadoop.namenode.percent_remaining,15m)<{$HADOOP.CAPACITY_REMAINING.MIN.WARN}'
name: 'NameNode: Cluster capacity remaining is low'
event_name: 'NameNode: Cluster capacity remaining is low (below {$HADOOP.CAPACITY_REMAINING.MIN.WARN}% for 15m)'
priority: WARNING
description: 'A good practice is to ensure that disk use never exceeds 80 percent capacity.'
tags:
- tag: scope
value: capacity
- uuid: a9e6c1e2f9544c71844785b4baa9c017
name: 'NameNode: RPC queue & processing time'
type: DEPENDENT
key: hadoop.namenode.rpc_processing_time_avg
delay: '0'
history: 7d
value_type: FLOAT
units: s
description: 'Average time spent on processing RPC requests.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=RpcActivityForPort9000'')].RpcProcessingTimeAvgTime.first()'
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
- uuid: 9f00149ef0c2444ebbc9327b24acd7b9
name: 'NameNode: Total load'
type: DEPENDENT
key: hadoop.namenode.total_load
delay: '0'
history: 7d
description: 'The current number of concurrent file accesses (read/write) across all DataNodes.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=FSNamesystem'')].TotalLoad.first()'
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
- uuid: 6abfe537a36646a0b10fe2c72586d249
name: 'NameNode: Transactions since last checkpoint'
type: DEPENDENT
key: hadoop.namenode.transactions_since_last_checkpoint
delay: '0'
history: 7d
description: 'Total number of transactions since last checkpoint.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=FSNamesystem'')].TransactionsSinceLastCheckpoint.first()'
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
- uuid: 249098bbeb7a43cdac59f1297ca95104
name: 'NameNode: Under-replicated blocks'
type: DEPENDENT
key: hadoop.namenode.under_replicated_blocks
delay: '0'
history: 7d
description: 'The number of blocks with insufficient replication.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=FSNamesystem'')].UnderReplicatedBlocks.first()'
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
- uuid: 7e8769eb77304b6f9c6e1d5bbd420fd0
name: 'NameNode: Uptime'
type: DEPENDENT
key: hadoop.namenode.uptime
delay: '0'
history: 7d
value_type: FLOAT
units: s
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name==''java.lang:type=Runtime'')].Uptime.first()'
- type: MULTIPLIER
parameters:
- '0.001'
master_item:
key: hadoop.namenode.get
tags:
- tag: component
value: system
triggers:
- uuid: 9fac0ae651ab40a08551945eb0a93b68
expression: 'nodata(/Hadoop by HTTP/hadoop.namenode.uptime,30m)=1'
name: 'NameNode: Failed to fetch NameNode API page'
event_name: 'NameNode: Failed to fetch NameNode API page (or no data for 30m)'
priority: WARNING
description: 'Zabbix has not received any data for items for the last 30 minutes.'
manual_close: 'YES'
dependencies:
- name: 'NameNode: Service is unavailable'
expression: 'last(/Hadoop by HTTP/net.tcp.service["tcp","{$HADOOP.NAMENODE.HOST}","{$HADOOP.NAMENODE.PORT}"])=0'
tags:
- tag: scope
value: availability
- uuid: 84d866bc0dc3486d9c5dc9beefec8d31
expression: 'last(/Hadoop by HTTP/hadoop.namenode.uptime)<10m'
name: 'NameNode: Service has been restarted'
event_name: 'NameNode: Service has been restarted (uptime < 10m)'
priority: INFO
description: 'Uptime is less than 10 minutes.'
manual_close: 'YES'
tags:
- tag: scope
value: notice
- uuid: 396eb8f791d54254b08ddee553d3d944
name: 'NameNode: Failed volumes'
type: DEPENDENT
key: hadoop.namenode.volume_failures_total
delay: '0'
history: 7d
description: 'Number of failed volumes.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=NameNode,name=FSNamesystem'')].VolumeFailuresTotal.first()'
master_item:
key: hadoop.namenode.info
tags:
- tag: component
value: namenode
triggers:
- uuid: fcf791b6d0594dbb9ddfc3f93bc94825
expression: 'min(/Hadoop by HTTP/hadoop.namenode.volume_failures_total,15m)>0'
name: 'NameNode: Cluster has volume failures'
priority: AVERAGE
description: 'HDFS now allows for disks to fail in place, without affecting DataNode operations, until a threshold value is reached. This is set on each DataNode via the dfs.datanode.failed.volumes.tolerated property; it defaults to 0, meaning that any volume failure will shut down the DataNode; on a production cluster where DataNodes typically have 6, 8, or 12 disks, setting this parameter to 1 or 2 is typically the best practice.'
tags:
- tag: scope
value: notice
- uuid: 6d7546c5d15d4e478b2e87e35d5306b0
name: 'Hadoop: Get NodeManagers states'
type: HTTP_AGENT
key: hadoop.nodemanagers.get
history: '0'
trends: '0'
value_type: TEXT
preprocessing:
- type: JAVASCRIPT
parameters:
- 'return JSON.stringify(JSON.parse(JSON.parse(value).beans[0].LiveNodeManagers))'
url: '{$HADOOP.RESOURCEMANAGER.HOST}:{$HADOOP.RESOURCEMANAGER.PORT}/jmx?qry=Hadoop:service=ResourceManager,name=RMNMInfo'
tags:
- tag: component
value: raw
- uuid: e693cff98ec74cc198ec6b5e973f116c
name: 'Hadoop: Get ResourceManager stats'
type: HTTP_AGENT
key: hadoop.resourcemanager.get
history: '0'
trends: '0'
value_type: TEXT
url: '{$HADOOP.RESOURCEMANAGER.HOST}:{$HADOOP.RESOURCEMANAGER.PORT}/jmx'
tags:
- tag: component
value: raw
- uuid: 5b9200a5a39c41c2b4b88e7d41d90e7b
name: 'ResourceManager: Get info'
type: DEPENDENT
key: hadoop.resourcemanager.info
delay: '0'
history: '0'
trends: '0'
value_type: TEXT
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name=~''Hadoop:service=ResourceManager,name=*'')]'
error_handler: CUSTOM_VALUE
error_handler_params: '[]'
master_item:
key: hadoop.resourcemanager.get
tags:
- tag: component
value: raw
- uuid: 63d4fe7384044027b08b99698355fd8b
name: 'ResourceManager: Active NMs'
type: DEPENDENT
key: hadoop.resourcemanager.num_active_nm
delay: '0'
history: 7d
description: 'Number of Active NodeManagers.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=ResourceManager,name=ClusterMetrics'')].NumActiveNMs.first()'
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 6h
master_item:
key: hadoop.resourcemanager.info
tags:
- tag: component
value: resourcemanager
triggers:
- uuid: eb02a30f45394e4d84d9d7239002ed40
expression: 'max(/Hadoop by HTTP/hadoop.resourcemanager.num_active_nm,5m)=0'
name: 'ResourceManager: Cluster has no active NodeManagers'
priority: HIGH
description: 'Cluster is unable to execute any jobs without at least one NodeManager.'
tags:
- tag: scope
value: notice
- uuid: 3fccfdd8738544ca8969ade842430fc8
name: 'ResourceManager: Decommissioned NMs'
type: DEPENDENT
key: hadoop.resourcemanager.num_decommissioned_nm
delay: '0'
history: 7d
description: 'Number of Decommissioned NodeManagers.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=ResourceManager,name=ClusterMetrics'')].NumDecommissionedNMs.first()'
master_item:
key: hadoop.resourcemanager.info
tags:
- tag: component
value: resourcemanager
- uuid: 9aad193a9e074575878e44aa96ff4237
name: 'ResourceManager: Decommissioning NMs'
type: DEPENDENT
key: hadoop.resourcemanager.num_decommissioning_nm
delay: '0'
history: 7d
description: 'Number of Decommissioning NodeManagers.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=ResourceManager,name=ClusterMetrics'')].NumDecommissioningNMs.first()'
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 6h
master_item:
key: hadoop.resourcemanager.info
tags:
- tag: component
value: resourcemanager
- uuid: c4bbf5295b2a44619e2b641468071f9b
name: 'ResourceManager: Lost NMs'
type: DEPENDENT
key: hadoop.resourcemanager.num_lost_nm
delay: '0'
history: 7d
description: 'Number of Lost NodeManagers.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=ResourceManager,name=ClusterMetrics'')].NumLostNMs.first()'
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 6h
master_item:
key: hadoop.resourcemanager.info
tags:
- tag: component
value: resourcemanager
- uuid: b7791ce30e8f4aa7b5eea2ee7ca7eef9
name: 'ResourceManager: Rebooted NMs'
type: DEPENDENT
key: hadoop.resourcemanager.num_rebooted_nm
delay: '0'
history: 7d
description: 'Number of Rebooted NodeManagers.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=ResourceManager,name=ClusterMetrics'')].NumRebootedNMs.first()'
master_item:
key: hadoop.resourcemanager.info
tags:
- tag: component
value: resourcemanager
- uuid: 666152b3bf544a29b9e58a9f417c0ab8
name: 'ResourceManager: Shutdown NMs'
type: DEPENDENT
key: hadoop.resourcemanager.num_shutdown_nm
delay: '0'
history: 7d
description: 'Number of Shutdown NodeManagers.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=ResourceManager,name=ClusterMetrics'')].NumShutdownNMs.first()'
master_item:
key: hadoop.resourcemanager.info
tags:
- tag: component
value: resourcemanager
- uuid: e6aa4b4b29414f2fb1f06bd536552c1c
name: 'ResourceManager: Unhealthy NMs'
type: DEPENDENT
key: hadoop.resourcemanager.num_unhealthy_nm
delay: '0'
history: 7d
description: 'Number of Unhealthy NodeManagers.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=ResourceManager,name=ClusterMetrics'')].NumUnhealthyNMs.first()'
master_item:
key: hadoop.resourcemanager.info
tags:
- tag: component
value: resourcemanager
triggers:
- uuid: 0f35a0fa7a404559a3df225b906f0653
expression: 'min(/Hadoop by HTTP/hadoop.resourcemanager.num_unhealthy_nm,15m)>0'
name: 'ResourceManager: Cluster has unhealthy NodeManagers'
priority: AVERAGE
description: 'YARN considers any node with disk utilization exceeding the value specified under the property yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage (in yarn-site.xml) to be unhealthy. Ample disk space is critical to ensure uninterrupted operation of a Hadoop cluster, and large numbers of unhealthyNodes (the number to alert on depends on the size of your cluster) should be quickly investigated and resolved.'
tags:
- tag: scope
value: notice
- uuid: c4c3195326e34ebcb57e5039beffce7c
name: 'ResourceManager: RPC queue & processing time'
type: DEPENDENT
key: hadoop.resourcemanager.rpc_processing_time_avg
delay: '0'
history: 7d
value_type: FLOAT
units: s
description: 'Average time spent on processing RPC requests.'
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.name==''Hadoop:service=ResourceManager,name=RpcActivityForPort8031'')].RpcProcessingTimeAvgTime.first()'
master_item:
key: hadoop.resourcemanager.info
tags:
- tag: component
value: resourcemanager
- uuid: 4e74ca69a84d441e95e2c20afd25fada
name: 'ResourceManager: Uptime'
type: DEPENDENT
key: hadoop.resourcemanager.uptime
delay: '0'
history: 7d
value_type: FLOAT
units: s
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name==''java.lang:type=Runtime'')].Uptime.first()'
- type: MULTIPLIER
parameters:
- '0.001'
master_item:
key: hadoop.resourcemanager.get
tags:
- tag: component
value: system
triggers:
- uuid: 7d4d026992344602a199966a8308a571
expression: 'nodata(/Hadoop by HTTP/hadoop.resourcemanager.uptime,30m)=1'
name: 'ResourceManager: Failed to fetch ResourceManager API page'
event_name: 'ResourceManager: Failed to fetch ResourceManager API page (or no data for 30m)'
priority: WARNING
description: 'Zabbix has not received any data for items for the last 30 minutes.'
manual_close: 'YES'
dependencies:
- name: 'ResourceManager: Service is unavailable'
expression: 'last(/Hadoop by HTTP/net.tcp.service["tcp","{$HADOOP.RESOURCEMANAGER.HOST}","{$HADOOP.RESOURCEMANAGER.PORT}"])=0'
tags:
- tag: scope
value: availability
- uuid: ade7cc30a4184ef89ed896bae56e0b18
expression: 'last(/Hadoop by HTTP/hadoop.resourcemanager.uptime)<10m'
name: 'ResourceManager: Service has been restarted'
event_name: 'ResourceManager: Service has been restarted (uptime < 10m)'
priority: INFO
description: 'Uptime is less than 10 minutes.'
manual_close: 'YES'
tags:
- tag: scope
value: notice
- uuid: 66a87b21d32c436bb2d2eb23ec328f91
name: 'NameNode: Service response time'
type: SIMPLE
key: 'net.tcp.service.perf["tcp","{$HADOOP.NAMENODE.HOST}","{$HADOOP.NAMENODE.PORT}"]'
history: 7d
value_type: FLOAT
units: s
description: 'Hadoop NameNode API performance.'
tags:
- tag: component
value: network
triggers:
- uuid: 4e4a6ab28fe5492d8fe4e291b8a586dc
expression: 'min(/Hadoop by HTTP/net.tcp.service.perf["tcp","{$HADOOP.NAMENODE.HOST}","{$HADOOP.NAMENODE.PORT}"],5m)>{$HADOOP.NAMENODE.RESPONSE_TIME.MAX.WARN}'
name: 'NameNode: Service response time is too high'
event_name: 'NameNode: Service response time is too high (over {$HADOOP.NAMENODE.RESPONSE_TIME.MAX.WARN} for 5m)'
priority: WARNING
manual_close: 'YES'
dependencies:
- name: 'NameNode: Service is unavailable'
expression: 'last(/Hadoop by HTTP/net.tcp.service["tcp","{$HADOOP.NAMENODE.HOST}","{$HADOOP.NAMENODE.PORT}"])=0'
tags:
- tag: scope
value: performance
- uuid: 98b11f1156dc472fbce27ca053e01d4e
name: 'ResourceManager: Service response time'
type: SIMPLE
key: 'net.tcp.service.perf["tcp","{$HADOOP.RESOURCEMANAGER.HOST}","{$HADOOP.RESOURCEMANAGER.PORT}"]'
history: 7d
value_type: FLOAT
units: s
description: 'Hadoop ResourceManager API performance.'
tags:
- tag: component
value: network
triggers:
- uuid: e8e55f4c7e9e4823927a8c1345d3b941
expression: 'min(/Hadoop by HTTP/net.tcp.service.perf["tcp","{$HADOOP.RESOURCEMANAGER.HOST}","{$HADOOP.RESOURCEMANAGER.PORT}"],5m)>{$HADOOP.RESOURCEMANAGER.RESPONSE_TIME.MAX.WARN}'
name: 'ResourceManager: Service response time is too high'
event_name: 'ResourceManager: Service response time is too high (over {$HADOOP.RESOURCEMANAGER.RESPONSE_TIME.MAX.WARN} for 5m)'
priority: WARNING
manual_close: 'YES'
dependencies:
- name: 'ResourceManager: Service is unavailable'
expression: 'last(/Hadoop by HTTP/net.tcp.service["tcp","{$HADOOP.RESOURCEMANAGER.HOST}","{$HADOOP.RESOURCEMANAGER.PORT}"])=0'
tags:
- tag: scope
value: performance
- uuid: 2c52d856e07e4524abf3c2ae4b47c6b6
name: 'NameNode: Service status'
type: SIMPLE
key: 'net.tcp.service["tcp","{$HADOOP.NAMENODE.HOST}","{$HADOOP.NAMENODE.PORT}"]'
history: 7d
description: 'Hadoop NameNode API port availability.'
valuemap:
name: 'Service state'
preprocessing:
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 10m
tags:
- tag: component
value: health
- tag: component
value: network
triggers:
- uuid: f7e16c4ec91e4c04b13b73ee817c71d7
expression: 'last(/Hadoop by HTTP/net.tcp.service["tcp","{$HADOOP.NAMENODE.HOST}","{$HADOOP.NAMENODE.PORT}"])=0'
name: 'NameNode: Service is unavailable'
priority: AVERAGE
manual_close: 'YES'
tags:
- tag: scope
value: availability
- uuid: 615b75c42ebe471da798a0613667d499
name: 'ResourceManager: Service status'
type: SIMPLE
key: 'net.tcp.service["tcp","{$HADOOP.RESOURCEMANAGER.HOST}","{$HADOOP.RESOURCEMANAGER.PORT}"]'
history: 7d
description: 'Hadoop ResourceManager API port availability.'
valuemap:
name: 'Service state'
preprocessing:
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 10m
tags:
- tag: component
value: health
- tag: component
value: network
triggers:
- uuid: a9ac7ede0c004fe18ab9f1fee36ad2b2
expression: 'last(/Hadoop by HTTP/net.tcp.service["tcp","{$HADOOP.RESOURCEMANAGER.HOST}","{$HADOOP.RESOURCEMANAGER.PORT}"])=0'
name: 'ResourceManager: Service is unavailable'
priority: AVERAGE
manual_close: 'YES'
tags:
- tag: scope
value: availability
discovery_rules:
- uuid: 0f05e90a6fc547d18f291ae2264db9d1
name: 'Data node discovery'
type: HTTP_AGENT
key: hadoop.datanode.discovery
delay: 1h
item_prototypes:
- uuid: ef570f8b37c545bd880b7df20bd19f06
name: '{#HOSTNAME}: Admin state'
type: DEPENDENT
key: 'hadoop.datanode.admin_state[{#HOSTNAME}]'
delay: '0'
history: 7d
trends: '0'
value_type: CHAR
description: 'Administrative state.'
preprocessing:
- type: JSONPATH
parameters:
- $.adminState
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 6h
master_item:
key: 'hadoop.datanode.raw_info[{#HOSTNAME}]'
tags:
- tag: component
value: datanode
- uuid: 14904ca75991456784d2082c14b7ec88
name: '{#HOSTNAME}: Used'
type: DEPENDENT
key: 'hadoop.datanode.dfs_used[{#HOSTNAME}]'
delay: '0'
history: 7d
units: B
description: 'Used disk space.'
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name==''Hadoop:service=DataNode,name=FSDatasetState'')].DfsUsed.first()'
master_item:
key: 'hadoop.datanode.get[{#HOSTNAME}]'
tags:
- tag: component
value: capacity
- uuid: 6d2d030b3ddb41a394faede737329bbb
name: 'Hadoop DataNode {#HOSTNAME}: Get stats'
type: HTTP_AGENT
key: 'hadoop.datanode.get[{#HOSTNAME}]'
history: '0'
trends: '0'
value_type: TEXT
url: '{#INFOADDR}/jmx'
tags:
- tag: component
value: raw
- uuid: 01bc20e53e314089a55b270961062c00
name: '{#HOSTNAME}: JVM Garbage collection time'
type: DEPENDENT
key: 'hadoop.datanode.jvm.gc_time[{#HOSTNAME}]'
delay: '0'
history: 7d
units: '!ms'
description: 'The JVM garbage collection time in milliseconds.'
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name==''Hadoop:service=DataNode,name=JvmMetrics'')].GcTimeMillis.first()'
master_item:
key: 'hadoop.datanode.get[{#HOSTNAME}]'
tags:
- tag: component
value: datanode
- uuid: 4cae9eef95f24810a6607de5348b7b54
name: '{#HOSTNAME}: JVM Heap usage'
type: DEPENDENT
key: 'hadoop.datanode.jvm.mem_heap_used[{#HOSTNAME}]'
delay: '0'
history: 7d
value_type: FLOAT
units: '!MB'
description: 'The JVM heap usage in MBytes.'
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name==''Hadoop:service=DataNode,name=JvmMetrics'')].MemHeapUsedM.first()'
master_item:
key: 'hadoop.datanode.get[{#HOSTNAME}]'
tags:
- tag: component
value: capacity
- uuid: dc30742dba2e4e5d99ca237615ffaef3
name: '{#HOSTNAME}: JVM Threads'
type: DEPENDENT
key: 'hadoop.datanode.jvm.threads[{#HOSTNAME}]'
delay: '0'
history: 7d
description: 'The number of JVM threads.'
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name==''java.lang:type=Threading'')].ThreadCount.first()'
master_item:
key: 'hadoop.datanode.get[{#HOSTNAME}]'
tags:
- tag: component
value: datanode
- uuid: 57c00b46aef94c018806cdae43adfab5
name: '{#HOSTNAME}: Number of failed volumes'
type: DEPENDENT
key: 'hadoop.datanode.numfailedvolumes[{#HOSTNAME}]'
delay: '0'
history: 7d
description: 'Number of failed storage volumes.'
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name==''Hadoop:service=DataNode,name=FSDatasetState'')].NumFailedVolumes.first()'
master_item:
key: 'hadoop.datanode.get[{#HOSTNAME}]'
tags:
- tag: component
value: datanode
- uuid: a6541492d4f7426b8016d1a8932b87ce
name: '{#HOSTNAME}: Oper state'
type: DEPENDENT
key: 'hadoop.datanode.oper_state[{#HOSTNAME}]'
delay: '0'
history: 7d
trends: '0'
value_type: CHAR
description: 'Operational state.'
preprocessing:
- type: JSONPATH
parameters:
- $.operState
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 6h
master_item:
key: 'hadoop.datanode.raw_info[{#HOSTNAME}]'
tags:
- tag: component
value: datanode
trigger_prototypes:
- uuid: 9f657289a04041e5bcaa1947f62f607d
expression: 'last(/Hadoop by HTTP/hadoop.datanode.oper_state[{#HOSTNAME}])<>"Live"'
name: '{#HOSTNAME}: DataNode has state {ITEM.VALUE}.'
priority: AVERAGE
description: 'The state is different from normal.'
tags:
- tag: scope
value: notice
- uuid: e1f9badba66147e6aaa2f895e6638fb9
name: 'Hadoop DataNode {#HOSTNAME}: Get raw info'
type: DEPENDENT
key: 'hadoop.datanode.raw_info[{#HOSTNAME}]'
delay: '0'
history: '0'
trends: '0'
value_type: TEXT
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.HostName==''{#HOSTNAME}'')].first()'
error_handler: DISCARD_VALUE
master_item:
key: hadoop.datanodes.get
tags:
- tag: component
value: raw
- uuid: 5a46ec3c89eb40d4ad57cec2080c66f8
name: '{#HOSTNAME}: Remaining'
type: DEPENDENT
key: 'hadoop.datanode.remaining[{#HOSTNAME}]'
delay: '0'
history: 7d
units: B
description: 'Remaining disk space.'
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name==''Hadoop:service=DataNode,name=FSDatasetState'')].Remaining.first()'
master_item:
key: 'hadoop.datanode.get[{#HOSTNAME}]'
tags:
- tag: component
value: capacity
- uuid: 2ac19ff8ee7f480f9974be56ab06eaaf
name: '{#HOSTNAME}: Uptime'
type: DEPENDENT
key: 'hadoop.datanode.uptime[{#HOSTNAME}]'
delay: '0'
history: 7d
value_type: FLOAT
units: s
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name==''java.lang:type=Runtime'')].Uptime.first()'
- type: MULTIPLIER
parameters:
- '0.001'
master_item:
key: 'hadoop.datanode.get[{#HOSTNAME}]'
tags:
- tag: component
value: system
trigger_prototypes:
- uuid: 3eccb9daf76f4bde88b424cf6f2d21f6
expression: 'nodata(/Hadoop by HTTP/hadoop.datanode.uptime[{#HOSTNAME}],30m)=1'
name: '{#HOSTNAME}: Failed to fetch DataNode API page'
event_name: '{#HOSTNAME}: Failed to fetch DataNode API page (or no data for 30m)'
priority: WARNING
description: 'Zabbix has not received any data for items for the last 30 minutes.'
manual_close: 'YES'
dependencies:
- name: '{#HOSTNAME}: DataNode has state {ITEM.VALUE}.'
expression: 'last(/Hadoop by HTTP/hadoop.datanode.oper_state[{#HOSTNAME}])<>"Live"'
tags:
- tag: scope
value: availability
- uuid: e40298d300764251abcf93d5df3d9a67
expression: 'last(/Hadoop by HTTP/hadoop.datanode.uptime[{#HOSTNAME}])<10m'
name: '{#HOSTNAME}: Service has been restarted'
event_name: '{#HOSTNAME}: Service has been restarted (uptime < 10m)'
priority: INFO
description: 'Uptime is less than 10 minutes.'
manual_close: 'YES'
tags:
- tag: scope
value: notice
- uuid: 62b4ca9b1e8a43aa89fbeb78ac16c8cf
name: '{#HOSTNAME}: Version'
type: DEPENDENT
key: 'hadoop.datanode.version[{#HOSTNAME}]'
delay: '0'
history: 7d
trends: '0'
value_type: CHAR
description: 'DataNode software version.'
preprocessing:
- type: JSONPATH
parameters:
- $.version
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 6h
master_item:
key: 'hadoop.datanode.raw_info[{#HOSTNAME}]'
tags:
- tag: component
value: system
graph_prototypes:
- uuid: c497416bcce1416ebcede7fc491ccdba
name: '{#HOSTNAME}: DataNode {#HOSTNAME} DFS size'
type: STACKED
graph_items:
- drawtype: FILLED_REGION
color: 199C0D
item:
host: 'Hadoop by HTTP'
key: 'hadoop.datanode.dfs_used[{#HOSTNAME}]'
- sortorder: '1'
drawtype: FILLED_REGION
color: F63100
item:
host: 'Hadoop by HTTP'
key: 'hadoop.datanode.remaining[{#HOSTNAME}]'
url: '{$HADOOP.NAMENODE.HOST}:{$HADOOP.NAMENODE.PORT}/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo'
preprocessing:
- type: JAVASCRIPT
parameters:
- |
try{
parsed = JSON.parse(value);
var result = [];
function getNodes(nodes) {
Object.keys(nodes).forEach(function (field) {
var Node = {};
Node['{#HOSTNAME}'] = field || '';
Node['{#INFOADDR}'] = nodes[field].infoAddr || '';
result.push(Node);
});
}
getNodes(JSON.parse(parsed.beans[0].LiveNodes));
getNodes(JSON.parse(parsed.beans[0].DeadNodes));
getNodes(JSON.parse(parsed.beans[0].DecomNodes));
getNodes(JSON.parse(parsed.beans[0].EnteringMaintenanceNodes));
return JSON.stringify(result);
}
catch (error) {
throw 'Failed to process response received from Hadoop.';
}
- uuid: de2d5f97843345668bc0b8c8336b9c14
name: 'Node manager discovery'
type: HTTP_AGENT
key: hadoop.nodemanager.discovery
delay: 1h
item_prototypes:
- uuid: ffa4704e099a4f1a8b49add245938501
name: '{#HOSTNAME}: Available memory'
type: DEPENDENT
key: 'hadoop.nodemanager.availablememory[{#HOSTNAME}]'
delay: '0'
history: 7d
units: '!MB'
preprocessing:
- type: JSONPATH
parameters:
- $.AvailableMemoryMB
master_item:
key: 'hadoop.nodemanager.raw_info[{#HOSTNAME}]'
tags:
- tag: component
value: memory
- uuid: e8d0ea2c96b643f899e370ab73c5c262
name: '{#HOSTNAME}: Container launch avg duration'
type: DEPENDENT
key: 'hadoop.nodemanager.container_launch_duration_avg[{#HOSTNAME}]'
delay: '0'
history: 7d
value_type: FLOAT
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name==''Hadoop:service=NodeManager,name=NodeManagerMetrics'')].ContainerLaunchDurationAvgTime.first()'
master_item:
key: 'hadoop.nodemanager.get[{#HOSTNAME}]'
tags:
- tag: component
value: nodemanager
- uuid: 23c89dfb26a34b77bf34fcf543f719f2
name: 'Hadoop NodeManager {#HOSTNAME}: Get stats'
type: HTTP_AGENT
key: 'hadoop.nodemanager.get[{#HOSTNAME}]'
history: '0'
trends: '0'
value_type: TEXT
url: '{#NODEHTTPADDRESS}/jmx'
tags:
- tag: component
value: raw
- uuid: 82e289c999a246a6bd1feb85349d0348
name: '{#HOSTNAME}: JVM Garbage collection time'
type: DEPENDENT
key: 'hadoop.nodemanager.jvm.gc_time[{#HOSTNAME}]'
delay: '0'
history: 7d
units: '!ms'
description: 'The JVM garbage collection time in milliseconds.'
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name==''Hadoop:service=NodeManager,name=JvmMetrics'')].GcTimeMillis.first()'
master_item:
key: 'hadoop.nodemanager.get[{#HOSTNAME}]'
tags:
- tag: component
value: nodemanager
- uuid: 4032f0a266c44b34896e8179bbed2419
name: '{#HOSTNAME}: JVM Heap usage'
type: DEPENDENT
key: 'hadoop.nodemanager.jvm.mem_heap_used[{#HOSTNAME}]'
delay: '0'
history: 7d
value_type: FLOAT
units: '!MB'
description: 'The JVM heap usage in MBytes.'
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name==''Hadoop:service=NodeManager,name=JvmMetrics'')].MemHeapUsedM.first()'
master_item:
key: 'hadoop.nodemanager.get[{#HOSTNAME}]'
tags:
- tag: component
value: nodemanager
- uuid: d7485913b2db4e31a8f02f63f8c18913
name: '{#HOSTNAME}: JVM Threads'
type: DEPENDENT
key: 'hadoop.nodemanager.jvm.threads[{#HOSTNAME}]'
delay: '0'
history: 7d
description: 'The number of JVM threads.'
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name==''java.lang:type=Threading'')].ThreadCount.first()'
master_item:
key: 'hadoop.nodemanager.get[{#HOSTNAME}]'
tags:
- tag: component
value: nodemanager
- uuid: 662cafd31e194db8808c75789bf712eb
name: '{#HOSTNAME}: Number of containers'
type: DEPENDENT
key: 'hadoop.nodemanager.numcontainers[{#HOSTNAME}]'
delay: '0'
history: 7d
trends: '0'
value_type: CHAR
preprocessing:
- type: JSONPATH
parameters:
- $.NumContainers
master_item:
key: 'hadoop.nodemanager.raw_info[{#HOSTNAME}]'
tags:
- tag: component
value: nodemanager
- uuid: a8b11a7fb3244792abf4ffa9461e4712
name: 'Hadoop NodeManager {#HOSTNAME}: Get raw info'
type: DEPENDENT
key: 'hadoop.nodemanager.raw_info[{#HOSTNAME}]'
delay: '0'
history: '0'
trends: '0'
value_type: TEXT
preprocessing:
- type: JSONPATH
parameters:
- '$.[?(@.HostName==''{#HOSTNAME}'')].first()'
error_handler: DISCARD_VALUE
master_item:
key: hadoop.nodemanagers.get
tags:
- tag: component
value: raw
- uuid: 01a5bcdbfc1c4a84a471738998aed372
name: '{#HOSTNAME}: RPC queue & processing time'
type: DEPENDENT
key: 'hadoop.nodemanager.rpc_processing_time_avg[{#HOSTNAME}]'
delay: '0'
history: 7d
value_type: FLOAT
description: 'Average time spent on processing RPC requests.'
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name==''Hadoop:service=NodeManager,name=RpcActivityForPort8040'')].RpcProcessingTimeAvgTime.first()'
master_item:
key: 'hadoop.nodemanager.get[{#HOSTNAME}]'
tags:
- tag: component
value: nodemanager
- uuid: bab9c705d31e42ce9af65b396e18504b
name: '{#HOSTNAME}: State'
type: DEPENDENT
key: 'hadoop.nodemanager.state[{#HOSTNAME}]'
delay: '0'
history: 7d
trends: '0'
value_type: CHAR
description: 'State of the node - valid values are: NEW, RUNNING, UNHEALTHY, DECOMMISSIONING, DECOMMISSIONED, LOST, REBOOTED, SHUTDOWN.'
preprocessing:
- type: JSONPATH
parameters:
- $.State
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 6h
master_item:
key: 'hadoop.nodemanager.raw_info[{#HOSTNAME}]'
tags:
- tag: component
value: nodemanager
trigger_prototypes:
- uuid: 8752a292093347fcb16d3f06dd97c5c3
expression: 'last(/Hadoop by HTTP/hadoop.nodemanager.state[{#HOSTNAME}])<>"RUNNING"'
name: '{#HOSTNAME}: NodeManager has state {ITEM.VALUE}.'
priority: AVERAGE
description: 'The state is different from normal.'
tags:
- tag: scope
value: notice
- uuid: f8f6799130d34848a7dfb65815939c48
name: '{#HOSTNAME}: Uptime'
type: DEPENDENT
key: 'hadoop.nodemanager.uptime[{#HOSTNAME}]'
delay: '0'
history: 7d
value_type: FLOAT
units: s
preprocessing:
- type: JSONPATH
parameters:
- '$.beans[?(@.name==''java.lang:type=Runtime'')].Uptime.first()'
- type: MULTIPLIER
parameters:
- '0.001'
master_item:
key: 'hadoop.nodemanager.get[{#HOSTNAME}]'
tags:
- tag: component
value: system
trigger_prototypes:
- uuid: 6f8a6308d4334dd9bebe7af2fa3fb831
expression: 'nodata(/Hadoop by HTTP/hadoop.nodemanager.uptime[{#HOSTNAME}],30m)=1'
name: '{#HOSTNAME}: Failed to fetch NodeManager API page'
event_name: '{#HOSTNAME}: Failed to fetch NodeManager API page (or no data for 30m)'
priority: WARNING
description: 'Zabbix has not received any data for items for the last 30 minutes.'
manual_close: 'YES'
dependencies:
- name: '{#HOSTNAME}: NodeManager has state {ITEM.VALUE}.'
expression: 'last(/Hadoop by HTTP/hadoop.nodemanager.state[{#HOSTNAME}])<>"RUNNING"'
tags:
- tag: scope
value: availability
- uuid: 05f3cf8ed34f4a708df508f0e50e119d
expression: 'last(/Hadoop by HTTP/hadoop.nodemanager.uptime[{#HOSTNAME}])<10m'
name: '{#HOSTNAME}: Service has been restarted'
event_name: '{#HOSTNAME}: Service has been restarted (uptime < 10m)'
priority: INFO
description: 'Uptime is less than 10 minutes.'
manual_close: 'YES'
tags:
- tag: scope
value: notice
- uuid: d92b66e61a5244a995693ab8aedee96e
name: '{#HOSTNAME}: Used memory'
type: DEPENDENT
key: 'hadoop.nodemanager.usedmemory[{#HOSTNAME}]'
delay: '0'
history: 7d
units: '!MB'
preprocessing:
- type: JSONPATH
parameters:
- $.UsedMemoryMB
master_item:
key: 'hadoop.nodemanager.raw_info[{#HOSTNAME}]'
tags:
- tag: component
value: memory
- uuid: c4d46de2d6d341f5a2c1826236f94e5e
name: '{#HOSTNAME}: Version'
type: DEPENDENT
key: 'hadoop.nodemanager.version[{#HOSTNAME}]'
delay: '0'
history: 7d
trends: '0'
value_type: CHAR
preprocessing:
- type: JSONPATH
parameters:
- $.NodeManagerVersion
- type: DISCARD_UNCHANGED_HEARTBEAT
parameters:
- 6h
master_item:
key: 'hadoop.nodemanager.raw_info[{#HOSTNAME}]'
tags:
- tag: component
value: system
url: '{$HADOOP.RESOURCEMANAGER.HOST}:{$HADOOP.RESOURCEMANAGER.PORT}/jmx?qry=Hadoop:service=ResourceManager,name=RMNMInfo'
preprocessing:
- type: JAVASCRIPT
parameters:
- |
try {
parsed = JSON.parse(value);
var result = [];
function getNodes(nodes) {
Object.keys(nodes).forEach(function (field) {
var Node = {};
Node['{#HOSTNAME}'] = nodes[field].HostName || '';
Node['{#NODEHTTPADDRESS}'] = nodes[field].NodeHTTPAddress || '';
result.push(Node);
});
}
getNodes(JSON.parse(parsed.beans[0].LiveNodeManagers));
return JSON.stringify(result);
}
catch (error) {
throw 'Failed to process response received from Hadoop.';
}
tags:
- tag: class
value: application
- tag: target
value: hadoop
macros:
- macro: '{$HADOOP.CAPACITY_REMAINING.MIN.WARN}'
value: '20'
description: 'The Hadoop cluster capacity remaining percent for trigger expression.'
- macro: '{$HADOOP.NAMENODE.HOST}'
value: NameNode
description: 'The Hadoop NameNode host IP address or FQDN.'
- macro: '{$HADOOP.NAMENODE.PORT}'
value: '9870'
description: 'The Hadoop NameNode Web-UI port.'
- macro: '{$HADOOP.NAMENODE.RESPONSE_TIME.MAX.WARN}'
value: 10s
description: 'The Hadoop NameNode API page maximum response time in seconds for trigger expression.'
- macro: '{$HADOOP.RESOURCEMANAGER.HOST}'
value: ResourceManager
description: 'The Hadoop ResourceManager host IP address or FQDN.'
- macro: '{$HADOOP.RESOURCEMANAGER.PORT}'
value: '8088'
description: 'The Hadoop ResourceManager Web-UI port.'
- macro: '{$HADOOP.RESOURCEMANAGER.RESPONSE_TIME.MAX.WARN}'
value: 10s
description: 'The Hadoop ResourceManager API page maximum response time in seconds for trigger expression.'
dashboards:
- uuid: 474829439a064a6ba1c33d8d81e6e832
name: 'Hadoop: Overview'
pages:
- name: Main
widgets:
- type: graph
'y': '2'
width: '12'
height: '5'
fields:
- type: INTEGER
name: source_type
value: '1'
- type: ITEM
name: itemid
value:
host: 'Hadoop by HTTP'
key: hadoop.namenode.rpc_processing_time_avg
- type: graph
x: '12'
'y': '2'
width: '12'
height: '5'
fields:
- type: INTEGER
name: source_type
value: '1'
- type: ITEM
name: itemid
value:
host: 'Hadoop by HTTP'
key: hadoop.resourcemanager.rpc_processing_time_avg
- type: graph
'y': '7'
width: '12'
height: '5'
fields:
- type: GRAPH
name: graphid
value:
host: 'Hadoop by HTTP'
name: 'NameNode: DataNodes'
- type: graph
x: '12'
'y': '7'
width: '12'
height: '5'
fields:
- type: GRAPH
name: graphid
value:
host: 'Hadoop by HTTP'
name: 'NameNode: NMs'
- type: item
name: 'NameNode response time'
x: '8'
width: '4'
fields:
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: show
value: '2'
- type: ITEM
name: itemid
value:
host: 'Hadoop by HTTP'
key: 'net.tcp.service.perf["tcp","{$HADOOP.NAMENODE.HOST}","{$HADOOP.NAMENODE.PORT}"]'
- type: item
name: 'NameNode status'
width: '4'
fields:
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: show
value: '2'
- type: ITEM
name: itemid
value:
host: 'Hadoop by HTTP'
key: 'net.tcp.service["tcp","{$HADOOP.NAMENODE.HOST}","{$HADOOP.NAMENODE.PORT}"]'
- type: item
name: 'NameNode uptime'
x: '4'
width: '4'
fields:
- type: INTEGER
name: show
value: '2'
- type: ITEM
name: itemid
value:
host: 'Hadoop by HTTP'
key: hadoop.namenode.uptime
- type: item
name: 'ResourceManager response time'
x: '20'
width: '4'
fields:
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: show
value: '2'
- type: ITEM
name: itemid
value:
host: 'Hadoop by HTTP'
key: 'net.tcp.service.perf["tcp","{$HADOOP.RESOURCEMANAGER.HOST}","{$HADOOP.RESOURCEMANAGER.PORT}"]'
- type: item
name: 'ResourceManager status'
x: '12'
width: '4'
fields:
- type: INTEGER
name: show
value: '4'
- type: INTEGER
name: show
value: '2'
- type: ITEM
name: itemid
value:
host: 'Hadoop by HTTP'
key: 'net.tcp.service["tcp","{$HADOOP.RESOURCEMANAGER.HOST}","{$HADOOP.RESOURCEMANAGER.PORT}"]'
- type: item
name: 'ResourceManager uptime'
x: '16'
width: '4'
fields:
- type: INTEGER
name: show
value: '2'
- type: ITEM
name: itemid
value:
host: 'Hadoop by HTTP'
key: hadoop.resourcemanager.uptime
valuemaps:
- uuid: 6c967c4df18d4c7ebb0fd4be17df292a
name: 'Service state'
mappings:
- value: '0'
newvalue: Down
- value: '1'
newvalue: Up
graphs:
- uuid: 632a641116194105983cc581b1bd890d
name: 'NameNode: DataNodes'
graph_items:
- color: 199C0D
calc_fnc: ALL
item:
host: 'Hadoop by HTTP'
key: hadoop.namenode.num_live_data_nodes
- sortorder: '1'
color: F63100
calc_fnc: ALL
item:
host: 'Hadoop by HTTP'
key: hadoop.namenode.num_dead_data_nodes
- sortorder: '2'
color: 00611C
calc_fnc: ALL
item:
host: 'Hadoop by HTTP'
key: hadoop.namenode.num_stale_data_nodes
- uuid: 96ef586f2f554d028efcbb75b4c7024b
name: 'NameNode: NMs'
graph_items:
- color: 199C0D
calc_fnc: ALL
item:
host: 'Hadoop by HTTP'
key: hadoop.resourcemanager.num_active_nm
- sortorder: '1'
color: F63100
calc_fnc: ALL
item:
host: 'Hadoop by HTTP'
key: hadoop.resourcemanager.num_lost_nm
- sortorder: '2'
color: 00611C
calc_fnc: ALL
item:
host: 'Hadoop by HTTP'
key: hadoop.resourcemanager.num_rebooted_nm
- sortorder: '3'
color: F7941D
calc_fnc: ALL
item:
host: 'Hadoop by HTTP'
key: hadoop.resourcemanager.num_shutdown_nm
- sortorder: '4'
color: FC6EA3
calc_fnc: ALL
item:
host: 'Hadoop by HTTP'
key: hadoop.resourcemanager.num_unhealthy_nm