1
0
Fork 0
mirror of https://github.com/shouptech/nagios-plugin-check-rabbitmq.git synced 2026-02-03 14:49:41 +00:00
nagios-plugin-check-rabbitmq/check_rabbitmq.py
Mike Shoup 0ba59c23bb Fix exit call on python 2.4.3
PEP 8 style fixes (pylint)
2015-04-29 08:58:14 -06:00

204 lines
7.1 KiB
Python
Executable file

#!/usr/bin/env python
#
# check_rabbitmq.py
"""A program for remotely checking the health of RabbitMQ instance. Requires
the management API available."""
from optparse import OptionParser
import sys
import urllib2
import base64
import math
try:
import json
except ImportError:
# simplejson can be used with Python 2.4
import simplejson as json
PLUGIN_VERSION = "0.1"
class RabbitAPIChecker(object):
"""Performs checks against the RabbitMQ API and returns the results"""
# Nagios status codes (Nagios expects one of these to be returned)
STATE_OK = 0
STATE_WARNING = 1
STATE_CRITICAL = 2
STATE_UNKNOWN = 3
def __init__(self, hostname, username, password, port=15672):
self.hostname = hostname
self.username = username
self.password = password
self.port = port
def check_triggered_alarm(self, args):
"""Checks the node for a triggered alarm"""
alarm = args[0]
node = args[1]
url = "http://%s:%s/api/nodes/%s" % (self.hostname, self.port, node)
result = self.fetch_from_api(url)
try:
if result[alarm]:
message = "CRITICAL - %s triggered for %s" % (alarm, node)
state_code = self.STATE_CRITICAL
else:
message = "OK - %s is not triggered for %s" % (alarm, node)
state_code = self.STATE_OK
except KeyError:
message = "UNKNOWN - %s is not a valid alarm for %s" % (alarm, node)
state_code = self.STATE_UNKNOWN
return (message, state_code)
def check_sockets(self, args, critical=90, warning=80):
"""Checks the percentage of sockets used"""
node = args[1]
url = "http://%s:%s/api/nodes/%s" % (self.hostname, self.port, node)
result = self.fetch_from_api(url)
per_sockets_used = math.ceil(
100 * float(result['sockets_used']) / result['sockets_total'])
if per_sockets_used >= critical:
message = "CRITICAL - %d%% of sockets in use" % per_sockets_used
state_code = self.STATE_CRITICAL
elif per_sockets_used >= warning:
message = "WARNING - %d%% of sockets in use" % per_sockets_used
state_code = self.STATE_WARNING
else:
message = "OK - %d%% of sockets in use" % per_sockets_used
state_code = self.STATE_OK
return (message, state_code)
def check_fd(self, args, critical=90, warning=80):
"""Checks the percentage of file descriptors used"""
node = args[1]
url = "http://%s:%s/api/nodes/%s" % (self.hostname, self.port, node)
result = self.fetch_from_api(url)
per_fd_used = math.ceil(
100 * float(result['fd_used']) / result['fd_total'])
if per_fd_used >= critical:
message = "CRITICAL - %d%% of file descriptors in use" % per_fd_used
state_code = self.STATE_CRITICAL
elif per_fd_used >= warning:
message = "WARNING - %d%% of file descriptors in use" % per_fd_used
state_code = self.STATE_WARNING
else:
message = "OK - %d%% of file descriptors in use" % per_fd_used
state_code = self.STATE_OK
return (message, state_code)
def check_nodes(self, args=None, critical=2, warning=1):
""" Checks if all nodes on the cluster are running"""
if args:
return ("UNKNOWN - Unexpected arguments found", self.STATE_UNKNOWN)
url = "http://%s:%s/api/nodes" % (self.hostname, self.port)
results = self.fetch_from_api(url)
nodes_not_running = []
message = "OK - All nodes are running"
state_code = self.STATE_OK
for node in results:
if not node['running']:
nodes_not_running.append(node['name'])
if len(nodes_not_running) >= critical:
message = "CRITICAL - Found nodes not running (%s)" % (
", ".join(nodes_not_running))
state_code = self.STATE_CRITICAL
elif len(nodes_not_running) >= warning:
message = "WARNING - Found nodes not running (%s)" % (
", ".join(nodes_not_running))
state_code = self.STATE_WARNING
return (message, state_code)
def fetch_from_api(self, url):
"""Calls the API and processes the JSON result."""
request = urllib2.Request(url)
base64string = base64.encodestring(
'%s:%s' % (self.username, self.password)).replace('\n', '')
request.add_header("Authorization", "Basic %s" % base64string)
http_result = urllib2.urlopen(request)
json_result = json.load(http_result)
http_result.close()
return json_result
def main():
"""Main entry point for program"""
usage = "%prog [options] -H|--hostname HOST ACTION"
parser = OptionParser(usage=usage,
version="%prog "+PLUGIN_VERSION)
parser.add_option("-u", "--username", default="guest",
help="Username with monitoring access")
parser.add_option("-p", "--password", default="guest",
help="Password for user with monitoring access")
parser.add_option("-P", "--port", default=15672,
help="Port to run the API checks against")
parser.add_option("-H", "--hostname",
help="Host to check")
parser.add_option("-c", "--critical", type="int",
help="Critical level")
parser.add_option("-w", "--warning", type="int",
help="Warning level")
(options, args) = parser.parse_args()
# Check for required arguments
if len(args) < 1 or options.hostname == None:
parser.print_usage()
return RabbitAPIChecker.STATE_UNKNOWN
checker = RabbitAPIChecker(options.hostname, options.username,
options.password, options.port)
# Define actions available, will be found in args[0]
actions = {'mem_alarm': checker.check_triggered_alarm,
'disk_free_alarm': checker.check_triggered_alarm,
'check_sockets': checker.check_sockets,
'check_fd': checker.check_fd,
'check_nodes': checker.check_nodes}
try:
if options.critical and options.warning:
(message, state_code) = actions[args[0]](
args[0:], options.critical, options.warning)
elif options.critical:
(message, state_code) = actions[args[0]](args[0:], options.critical)
elif options.warning:
(message, state_code) = actions[args[0]](
args[0:], warning=options.warning)
else:
(message, state_code) = actions[args[0]](args[0:])
except KeyError:
print "UNKNOWN - %s is not a valid action" % args[0]
return RabbitAPIChecker.STATE_UNKNOWN
except urllib2.HTTPError, exception:
print "UNKNOWN - %s" % exception
return RabbitAPIChecker.STATE_UNKNOWN
except IndexError:
print "UNKNOWN - %s requires one or more options" % args[0]
return RabbitAPIChecker.STATE_UNKNOWN
print message
return state_code
if __name__ == "__main__":
sys.exit(main())