From 1e2387474a449452b78520b9ad96a8b4b5e99722 Mon Sep 17 00:00:00 2001 From: Harald Pfeiffer Date: Wed, 17 Apr 2019 19:07:19 +0200 Subject: initial commit of source fetch --- .../nagios/bin/pmp-check-aws-rds.py | 562 +++++++++++++++++++ .../nagios/bin/pmp-check-lvm-snapshots | 187 +++++++ .../nagios/bin/pmp-check-mongo.py | 594 +++++++++++++++++++++ .../nagios/bin/pmp-check-mysql-deadlocks | 189 +++++++ .../nagios/bin/pmp-check-mysql-deleted-files | 286 ++++++++++ .../nagios/bin/pmp-check-mysql-file-privs | 289 ++++++++++ .../nagios/bin/pmp-check-mysql-innodb | 368 +++++++++++++ .../nagios/bin/pmp-check-mysql-pidfile | 291 ++++++++++ .../nagios/bin/pmp-check-mysql-processlist | 323 +++++++++++ .../nagios/bin/pmp-check-mysql-replication-delay | 280 ++++++++++ .../nagios/bin/pmp-check-mysql-replication-running | 242 +++++++++ .../nagios/bin/pmp-check-mysql-status | 482 +++++++++++++++++ .../nagios/bin/pmp-check-mysql-ts-count | 210 ++++++++ .../nagios/bin/pmp-check-pt-table-checksum | 239 +++++++++ .../nagios/bin/pmp-check-unix-memory | 207 +++++++ 15 files changed, 4749 insertions(+) create mode 100755 nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-aws-rds.py create mode 100755 nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-lvm-snapshots create mode 100755 nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mongo.py create mode 100755 nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-deadlocks create mode 100755 nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-deleted-files create mode 100755 nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-file-privs create mode 100755 nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-innodb create mode 100755 nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-pidfile create mode 100755 nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-processlist create mode 100755 nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-replication-delay create mode 100755 nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-replication-running create mode 100755 nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-status create mode 100755 nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-ts-count create mode 100755 nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-pt-table-checksum create mode 100755 nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-unix-memory (limited to 'nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin') diff --git a/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-aws-rds.py b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-aws-rds.py new file mode 100755 index 0000000..73b7660 --- /dev/null +++ b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-aws-rds.py @@ -0,0 +1,562 @@ +#!/usr/bin/env python +"""Nagios plugin for Amazon RDS monitoring. + +This program is part of $PROJECT_NAME$ +License: GPL License (see COPYING) + +Author Roman Vynar +Copyright 2014-2015 Percona LLC and/or its affiliates +""" + +import datetime +import optparse +import pprint +import sys + +import boto +import boto.rds +import boto.ec2.cloudwatch + +# Nagios status codes +OK = 0 +WARNING = 1 +CRITICAL = 2 +UNKNOWN = 3 + + +class RDS(object): + + """RDS connection class""" + + def __init__(self, region, profile=None, identifier=None): + """Get RDS instance details""" + self.region = region + self.profile = profile + self.identifier = identifier + + if self.region == 'all': + self.regions_list = [reg.name for reg in boto.rds.regions()] + else: + self.regions_list = [self.region] + + self.info = None + if self.identifier: + for reg in self.regions_list: + try: + rds = boto.rds.connect_to_region(reg, profile_name=self.profile) + self.info = rds.get_all_dbinstances(self.identifier) + except (boto.provider.ProfileNotFoundError, boto.exception.BotoServerError) as msg: + debug(msg) + else: + # Exit on the first region and identifier match + self.region = reg + break + + def get_info(self): + """Get RDS instance info""" + if self.info: + return self.info[0] + else: + return None + + def get_list(self): + """Get list of available instances by region(s)""" + result = dict() + for reg in self.regions_list: + try: + rds = boto.rds.connect_to_region(reg, profile_name=self.profile) + result[reg] = rds.get_all_dbinstances() + except (boto.provider.ProfileNotFoundError, boto.exception.BotoServerError) as msg: + debug(msg) + + return result + + def get_metric(self, metric, start_time, end_time, step): + """Get RDS metric from CloudWatch""" + cw_conn = boto.ec2.cloudwatch.connect_to_region(self.region, profile_name=self.profile) + result = cw_conn.get_metric_statistics( + step, + start_time, + end_time, + metric, + 'AWS/RDS', + 'Average', + dimensions={'DBInstanceIdentifier': [self.identifier]} + ) + if result: + if len(result) > 1: + # Get the last point + result = sorted(result, key=lambda k: k['Timestamp']) + result.reverse() + + result = float('%.2f' % result[0]['Average']) + + return result + + +def debug(val): + """Debugging output""" + global options + if options.debug: + print 'DEBUG: %s' % val + + +def main(): + """Main function""" + global options + + short_status = { + OK: 'OK', + WARNING: 'WARN', + CRITICAL: 'CRIT', + UNKNOWN: 'UNK' + } + + # DB instance classes as listed on + # http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.DBInstanceClass.html + db_classes = { + 'db.t1.micro': 0.615, + 'db.m1.small': 1.7, + 'db.m1.medium': 3.75, + 'db.m1.large': 7.5, + 'db.m1.xlarge': 15, + 'db.m4.large': 8, + 'db.m4.xlarge': 16, + 'db.m4.2xlarge': 32, + 'db.m4.4xlarge': 64, + 'db.m4.10xlarge': 160, + 'db.r3.large': 15, + 'db.r3.xlarge': 30.5, + 'db.r3.2xlarge': 61, + 'db.r3.4xlarge': 122, + 'db.r3.8xlarge': 244, + 'db.t2.micro': 1, + 'db.t2.small': 2, + 'db.t2.medium': 4, + 'db.t2.large': 8, + 'db.m3.medium': 3.75, + 'db.m3.large': 7.5, + 'db.m3.xlarge': 15, + 'db.m3.2xlarge': 30, + 'db.m2.xlarge': 17.1, + 'db.m2.2xlarge': 34.2, + 'db.m2.4xlarge': 68.4, + 'db.cr1.8xlarge': 244, + } + + # RDS metrics http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/rds-metricscollected.html + metrics = { + 'status': 'RDS availability', + 'load': 'CPUUtilization', + 'memory': 'FreeableMemory', + 'storage': 'FreeStorageSpace' + } + + units = ('percent', 'GB') + + # Parse options + parser = optparse.OptionParser() + parser.add_option('-l', '--list', help='list of all DB instances', + action='store_true', default=False, dest='db_list') + parser.add_option('-n', '--profile', default=None, + help='AWS profile from ~/.boto or /etc/boto.cfg. Default: None, fallbacks to "[Credentials]".') + parser.add_option('-r', '--region', default='us-east-1', + help='AWS region. Default: us-east-1. If set to "all", we try to detect the instance region ' + 'across all of them, note this will be slower than if you specify the region explicitly.') + parser.add_option('-i', '--ident', help='DB instance identifier') + parser.add_option('-p', '--print', help='print status and other details for a given DB instance', + action='store_true', default=False, dest='printinfo') + parser.add_option('-m', '--metric', help='metric to check: [%s]' % ', '.join(metrics.keys())) + parser.add_option('-w', '--warn', help='warning threshold') + parser.add_option('-c', '--crit', help='critical threshold') + parser.add_option('-u', '--unit', help='unit of thresholds for "storage" and "memory" metrics: [%s].' + 'Default: percent' % ', '.join(units), default='percent') + parser.add_option('-t', '--time', help='time period in minutes to query. Default: 5', + type='int', default=5) + parser.add_option('-a', '--avg', help='time average in minutes to request. Default: 1', + type='int', default=1) + parser.add_option('-f', '--forceunknown', help='force alerts on unknown status. This prevents issues related to ' + 'AWS Cloudwatch throttling limits Default: False', + action='store_true', default=False) + parser.add_option('-d', '--debug', help='enable debug output', + action='store_true', default=False) + options, _ = parser.parse_args() + + if options.debug: + boto.set_stream_logger('boto') + + rds = RDS(region=options.region, profile=options.profile, identifier=options.ident) + + # Check args + if len(sys.argv) == 1: + parser.print_help() + sys.exit() + elif options.db_list: + info = rds.get_list() + print 'List of all DB instances in %s region(s):' % (options.region,) + pprint.pprint(info) + sys.exit() + elif not options.ident: + parser.print_help() + parser.error('DB identifier is not set.') + elif options.printinfo: + info = rds.get_info() + if info: + pprint.pprint(vars(info)) + else: + print 'No DB instance "%s" found on your AWS account and %s region(s).' % (options.ident, options.region) + + sys.exit() + elif not options.metric or options.metric not in metrics.keys(): + parser.print_help() + parser.error('Metric is not set or not valid.') + elif not options.warn and options.metric != 'status': + parser.print_help() + parser.error('Warning threshold is not set.') + elif not options.crit and options.metric != 'status': + parser.print_help() + parser.error('Critical threshold is not set.') + elif options.avg <= 0 and options.metric != 'status': + parser.print_help() + parser.error('Average must be greater than zero.') + elif options.time <= 0 and options.metric != 'status': + parser.print_help() + parser.error('Time must be greater than zero.') + + now = datetime.datetime.utcnow() + status = None + note = '' + perf_data = None + + # RDS Status + if options.metric == 'status': + info = rds.get_info() + if not info: + status = UNKNOWN + note = 'Unable to get RDS instance' + else: + status = OK + try: + version = info.EngineVersion + except: + version = info.engine_version + + note = '%s %s. Status: %s' % (info.engine, version, info.status) + + # RDS Load Average + elif options.metric == 'load': + # Check thresholds + try: + warns = [float(x) for x in options.warn.split(',')] + crits = [float(x) for x in options.crit.split(',')] + fail = len(warns) + len(crits) + except: + fail = 0 + + if fail != 6: + parser.error('Warning and critical thresholds should be 3 comma separated numbers, e.g. 20,15,10') + + loads = [] + fail = False + j = 0 + perf_data = [] + for i in [1, 5, 15]: + if i == 1: + # Some stats are delaying to update on CloudWatch. + # Let's pick a few points for 1-min load avg and get the last point. + points = 5 + else: + points = i + + load = rds.get_metric(metrics[options.metric], now - datetime.timedelta(seconds=points * 60), now, i * 60) + if not load: + status = UNKNOWN + note = 'Unable to get RDS statistics' + perf_data = None + break + + loads.append(str(load)) + perf_data.append('load%s=%s;%s;%s;0;100' % (i, load, warns[j], crits[j])) + + # Compare thresholds + if not fail: + if warns[j] > crits[j]: + parser.error('Parameter inconsistency: warning threshold is greater than critical.') + elif load >= crits[j]: + status = CRITICAL + fail = True + elif load >= warns[j]: + status = WARNING + + j = j + 1 + + if status != UNKNOWN: + if status is None: + status = OK + + note = 'Load average: %s%%' % '%, '.join(loads) + perf_data = ' '.join(perf_data) + + # RDS Free Storage + # RDS Free Memory + elif options.metric in ['storage', 'memory']: + # Check thresholds + try: + warn = float(options.warn) + crit = float(options.crit) + except: + parser.error('Warning and critical thresholds should be integers.') + + if crit > warn: + parser.error('Parameter inconsistency: critical threshold is greater than warning.') + + if options.unit not in units: + parser.print_help() + parser.error('Unit is not valid.') + + info = rds.get_info() + free = rds.get_metric(metrics[options.metric], now - datetime.timedelta(seconds=options.time * 60), + now, options.avg * 60) + if not info or not free: + status = UNKNOWN + note = 'Unable to get RDS details and statistics' + else: + if options.metric == 'storage': + storage = float(info.allocated_storage) + elif options.metric == 'memory': + try: + storage = db_classes[info.instance_class] + except: + print 'Unknown DB instance class "%s"' % info.instance_class + sys.exit(CRITICAL) + + free = '%.2f' % (free / 1024 ** 3) + free_pct = '%.2f' % (float(free) / storage * 100) + if options.unit == 'percent': + val = float(free_pct) + val_max = 100 + elif options.unit == 'GB': + val = float(free) + val_max = storage + + # Compare thresholds + if val <= crit: + status = CRITICAL + elif val <= warn: + status = WARNING + + if status is None: + status = OK + + note = 'Free %s: %s GB (%.0f%%) of %s GB' % (options.metric, free, float(free_pct), storage) + perf_data = 'free_%s=%s;%s;%s;0;%s' % (options.metric, val, warn, crit, val_max) + + # Final output + if status != UNKNOWN and perf_data: + print '%s %s | %s' % (short_status[status], note, perf_data) + elif status == UNKNOWN and not options.forceunknown: + print '%s %s | null' % ('OK', note) + sys.exit(0) + else: + print '%s %s' % (short_status[status], note) + + sys.exit(status) + + +if __name__ == '__main__': + main() + +# ############################################################################ +# Documentation +# ############################################################################ +""" +=pod + +=head1 NAME + +pmp-check-aws-rds.py - Check Amazon RDS metrics. + +=head1 SYNOPSIS + + Usage: pmp-check-aws-rds.py [options] + + Options: + -h, --help show this help message and exit + -l, --list list of all DB instances + -n PROFILE, --profile-name=PROFILE + AWS profile from ~/.boto or /etc/boto.cfg. Default: + None, fallbacks to "[Credentials]". + -r REGION, --region=REGION + AWS region. Default: us-east-1. If set to "all", we + try to detect the instance region across all of them, + note this will be slower than you specify the region. + -i IDENT, --ident=IDENT + DB instance identifier + -p, --print print status and other details for a given DB instance + -m METRIC, --metric=METRIC + metric to check: [status, load, storage, memory] + -w WARN, --warn=WARN warning threshold + -c CRIT, --crit=CRIT critical threshold + -u UNIT, --unit=UNIT unit of thresholds for "storage" and "memory" metrics: + [percent, GB]. Default: percent + -t TIME, --time=TIME time period in minutes to query. Default: 5 + -a AVG, --avg=AVG time average in minutes to request. Default: 1 + -f, --forceunknown force alerts on unknown status. This prevents issues + related to AWS Cloudwatch throttling limits Default: + False + -d, --debug enable debug output + +=head1 REQUIREMENTS + +This plugin is written on Python and utilizes the module C (Python interface +to Amazon Web Services) to get various RDS metrics from CloudWatch and compare +them against the thresholds. + +* Install the package: C or C +* Create a config /etc/boto.cfg or ~nagios/.boto with your AWS API credentials. + See http://code.google.com/p/boto/wiki/BotoConfig + +This plugin that is supposed to be run by Nagios, i.e. under ``nagios`` user, +should have permissions to read the config /etc/boto.cfg or ~nagios/.boto. + +Example: + + [root@centos6 ~]# cat /etc/boto.cfg + [Credentials] + aws_access_key_id = THISISATESTKEY + aws_secret_access_key = thisisatestawssecretaccesskey + +If you do not use this config with other tools such as our Cacti script, +you can secure this file the following way: + + [root@centos6 ~]# chown nagios /etc/boto.cfg + [root@centos6 ~]# chmod 600 /etc/boto.cfg + +=head1 DESCRIPTION + +The plugin provides 4 checks and some options to list and print RDS details: + +* RDS Status +* RDS Load Average +* RDS Free Storage +* RDS Free Memory + +To get the list of all RDS instances under AWS account: + + # ./pmp-check-aws-rds.py -l + +To get the detailed status of RDS instance identified as C: + + # ./pmp-check-aws-rds.py -i blackbox -p + +Nagios check for the overall status. Useful if you want to set the rest +of the checks dependent from this one: + + # ./pmp-check-aws-rds.py -i blackbox -m status + OK mysql 5.1.63. Status: available + +Nagios check for CPU utilization, specify thresholds as percentage of +1-min., 5-min., 15-min. average accordingly: + + # ./pmp-check-aws-rds.py -i blackbox -m load -w 90,85,80 -c 98,95,90 + OK Load average: 18.36%, 18.51%, 15.95% | load1=18.36;90.0;98.0;0;100 load5=18.51;85.0;95.0;0;100 load15=15.95;80.0;90.0;0;100 + +Nagios check for the free memory, specify thresholds as percentage: + + # ./pmp-check-aws-rds.py -i blackbox -m memory -w 5 -c 2 + OK Free memory: 5.90 GB (9%) of 68 GB | free_memory=8.68;5.0;2.0;0;100 + # ./pmp-check-aws-rds.py -i blackbox -m memory -u GB -w 4 -c 2 + OK Free memory: 5.90 GB (9%) of 68 GB | free_memory=5.9;4.0;2.0;0;68 + +Nagios check for the free storage space, specify thresholds as percentage or GB: + + # ./pmp-check-aws-rds.py -i blackbox -m storage -w 10 -c 5 + OK Free storage: 162.55 GB (33%) of 500.0 GB | free_storage=32.51;10.0;5.0;0;100 + # ./pmp-check-aws-rds.py -i blackbox -m storage -u GB -w 10 -c 5 + OK Free storage: 162.55 GB (33%) of 500.0 GB | free_storage=162.55;10.0;5.0;0;500.0 + +By default, the region is set to ``us-east-1``. You can re-define it globally in boto config or +specify with -r option. The following command will list all instances across all regions under your AWS account: + + # ./pmp-check-aws-rds.py -r all -l + +The following command will show the status for the first instance identified as ``blackbox`` in all regions: + + # ./pmp-check-aws-rds.py -r all -i blackbox -p + +Remember, scanning regions are slower operation than specifying it explicitly. + +=head1 CONFIGURATION + +Here is the excerpt of potential Nagios config: + + define host{ + use mysql-host + host_name blackbox + alias blackbox + address blackbox.abcdefgh.us-east-1.rds.amazonaws.com + } + + define servicedependency{ + host_name blackbox + service_description RDS Status + dependent_service_description RDS Load Average, RDS Free Storage, RDS Free Memory + execution_failure_criteria w,c,u,p + notification_failure_criteria w,c,u,p + } + + define service{ + use active-service + host_name blackbox + service_description RDS Status + check_command check_rds!status!0!0 + } + + define service{ + use active-service + host_name blackbox + service_description RDS Load Average + check_command check_rds!load!90,85,80!98,95,90 + } + + define service{ + use active-service + host_name blackbox + service_description RDS Free Storage + check_command check_rds!storage!10!5 + } + + define service{ + use active-service + host_name blackbox + service_description RDS Free Memory + check_command check_rds!memory!5!2 + } + + define command{ + command_name check_rds + command_line $USER1$/pmp-check-aws-rds.py -i $HOSTALIAS$ -m $ARG1$ -w $ARG2$ -c $ARG3$ + } + +=head1 COPYRIGHT, LICENSE, AND WARRANTY + +This program is copyright 2014 Percona LLC and/or its affiliates. +Feedback and improvements are welcome. + +THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation, version 2. You should have received a copy of the GNU General +Public License along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + +=head1 VERSION + +$PROJECT_NAME$ pmp-check-aws-rds.py $VERSION$ + +=cut + +""" diff --git a/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-lvm-snapshots b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-lvm-snapshots new file mode 100755 index 0000000..6aa4d11 --- /dev/null +++ b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-lvm-snapshots @@ -0,0 +1,187 @@ +#!/bin/sh + +# ######################################################################## +# This program is part of $PROJECT_NAME$ +# License: GPL License (see COPYING) +# Authors: +# Baron Schwartz +# ######################################################################## + +# ######################################################################## +# Redirect STDERR to STDOUT; Nagios doesn't handle STDERR. +# ######################################################################## +exec 2>&1 + +# ######################################################################## +# Set up constants, etc. +# ######################################################################## +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 +STATE_DEPENDENT=4 + +# ######################################################################## +# Print the name and fullness of every LVM snapshot that is open and +# nearly full. The input is the file with 'lvs', and the allowable fullness. +# In many cases lvs will report "File descriptor %d (...) leaked" and we ignore +# this, as it's only a warning that usually happens from a shell. +# ######################################################################## +check_lvm_snapshot_fullness() { + local FILE="$1" + local FULL="$2" + awk -v full="$FULL" ' + $1 != "LV" && $1 != "File" && $6 !~ /[^0-9.]/ && $6 > full { + print $2 "/" $1 "[" $5 "]=" $6 "%" + }' "${FILE}" +} + +# ######################################################################## +# Run the program. +# ######################################################################## +main() { + + # Get options + for o; do + case "${o}" in + -w) shift; OPT_WARN="${1}"; shift; ;; + -c) shift; OPT_CRIT="${1}"; shift; ;; + --version) grep -A2 '^=head1 VERSION' "$0" | tail -n1; exit 0 ;; + --help) perl -00 -ne 'm/^ Usage:/ && print' "$0"; exit 0 ;; + -*) echo "Unknown option ${o}. Try --help."; exit 1; ;; + esac + done + OPT_WARN=${OPT_WARN:-90} + OPT_CRIT=${OPT_CRIT:-95} + if is_not_sourced; then + if [ -n "$1" ]; then + echo "WARN spurious command-line options: $@" + exit 1 + fi + fi + + local NOTE="OK no full LVM snapshot volumes" + local TEMP=$(mktemp -t "${0##*/}.XXXXXX") || exit $? + trap "rm -f '${TEMP}' >/dev/null 2>&1" EXIT + + # The lvs command is usually in /usr/sbin. But if it's run as a non-root + # user, it will print out "WARNING: Running as a non-root user. Functionality + # may be unavailable." and exit with success anyway. So we have to detect + # this and make the plugin exit UNKNOWN in that case. If there is a $1 it's + # the output of lvs. + PATH="$PATH:/usr/sbin:/sbin" + if [ -z "$1" ]; then + lvs > "${TEMP}" 2>&1 + else + cat "$1" > "${TEMP}" 2>/dev/null # For testing only + fi + + if grep 'command not found' "${TEMP}" > /dev/null 2>&1; then + NOTE="OK $(cat "${TEMP}")" + elif grep 'WARNING: Running as a non-root user' "${TEMP}" >/dev/null 2>&1; then + NOTE="UNK You must execute lvs with root privileges" + else + local VOLS=$(check_lvm_snapshot_fullness "${TEMP}" "${OPT_CRIT}") + if [ "${VOLS}" ]; then + NOTE="CRIT LVM snapshot volumes over ${OPT_CRIT}% full: ${VOLS}" + else + VOLS=$(check_lvm_snapshot_fullness "${TEMP}" "${OPT_WARN}") + if [ "${VOLS}" ]; then + NOTE="WARN LVM snapshot volumes over ${OPT_WARN}% full: ${VOLS}" + fi + fi + fi + + echo $NOTE +} + +# ######################################################################## +# Determine whether this program is being executed directly, or sourced/included +# from another file. +# ######################################################################## +is_not_sourced() { + [ "${0##*/}" = "pmp-check-lvm-snapshots" ] || [ "${0##*/}" = "bash" -a "$_" = "$0" ] +} + +# ######################################################################## +# Execute the program if it was not included from another file. +# This makes it possible to include without executing, and thus test. +# ######################################################################## +if is_not_sourced; then + OUTPUT=$(main "$@") + EXITSTATUS=$STATE_UNKNOWN + case "${OUTPUT}" in + UNK*) EXITSTATUS=$STATE_UNKNOWN; ;; + OK*) EXITSTATUS=$STATE_OK; ;; + WARN*) EXITSTATUS=$STATE_WARNING; ;; + CRIT*) EXITSTATUS=$STATE_CRITICAL; ;; + esac + echo "${OUTPUT}" + exit $EXITSTATUS +fi + +# ############################################################################ +# Documentation +# ############################################################################ +: <<'DOCUMENTATION' +=pod + +=head1 NAME + +pmp-check-lvm-snapshots - Alert when LVM snapshots are running out of copy-on-write space. + +=head1 SYNOPSIS + + Usage: pmp-check-lvm-snapshots [OPTIONS] + Options: + -c CRIT Critical threshold; default 95%. + -w WARN Warning threshold; default 90%. + --help Print help and exit. + --version Print version and exit. + Options must be given as --option value, not --option=value or -Ovalue. + Use perldoc to read embedded documentation with more details. + +=head1 DESCRIPTION + +This Nagios plugin looks at the output of the 'lvs' command to find LVM snapshot volumes +that are beginning to run out of copy-on-write space. If a snapshot fills up its +copy-on-write space, it will fail. This is also a useful way to detect whether +some process, such as a backup, failed to release a snapshot volume after +finishing with it. + +=head1 PRIVILEGES + +This plugin does not access MySQL. + +This plugin executes the following UNIX commands that may need special privileges: + +=over + +=item * + +lvs + +=back + +=head1 COPYRIGHT, LICENSE, AND WARRANTY + +This program is copyright 2012-$CURRENT_YEAR$ Baron Schwartz, 2012-$CURRENT_YEAR$ Percona Inc. +Feedback and improvements are welcome. + +THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation, version 2. You should have received a copy of the GNU General +Public License along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + +=head1 VERSION + +$PROJECT_NAME$ pmp-check-lvm-snapshots $VERSION$ + +=cut + +DOCUMENTATION diff --git a/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mongo.py b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mongo.py new file mode 100755 index 0000000..0de0987 --- /dev/null +++ b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mongo.py @@ -0,0 +1,594 @@ +#!/usr/bin/env python2.7 +"""MongoDB Nagios check script + +This program is part of $PROJECT_NAME$ +License: GPL License (see COPYING) + +Author David Murphy +Copyright 2014-2015 Percona LLC and/or its affiliates +""" + +import sys +import time +import optparse +import os +import stat +import pickle +import traceback +import pprint + +from types import FunctionType +# Not yet implemented +# import DeepDiff + +try: + import pymongo +except ImportError, e: + print e + sys.exit(2) + +# As of pymongo v 1.9 the SON API is part of the BSON package, therefore attempt +# to import from there and fall back to pymongo in cases of older pymongo +if pymongo.version >= "1.9": + import bson.son as son +else: + import pymongo.son as son + + +# Adding special behavior for optparse +class OptionParsingError(RuntimeError): + def __init__(self, msg): + self.msg = msg + + +class ModifiedOptionParser(optparse.OptionParser): + def error(self, msg): + raise OptionParsingError(msg) + +def unicode_truncate(s, length, encoding='utf-8'): + encoded = s.encode(encoding)[:length] + return encoded.decode(encoding, 'ignore') + +def parse_options(args): + funcList = [] + for item_name, item_type in NagiosMongoChecks.__dict__.items(): + if type(item_type) is FunctionType and item_name.startswith("check_") and item_name is not 'check_levels': + funcList.append(item_name) + p = ModifiedOptionParser() + + p.add_option('-H', '--host', action='store', type='string', dest='host', default='127.0.0.1', help='The hostname you want to connect to') + p.add_option('-P', '--port', action='store', type='int', dest='port', default=27017, help='The port mongodb is running on') + p.add_option('-u', '--user', action='store', type='string', dest='user', default=None, help='The username you want to login as') + p.add_option('-p', '--password', action='store', type='string', dest='passwd', default=None, help='The password you want to use for that user') + p.add_option('-W', '--warning', action='store', dest='warning', default=None, help='The warning threshold you want to set') + p.add_option('-C', '--critical', action='store', dest='critical', default=None, help='The critical threshold you want to set') + p.add_option('-A', '--action', action='store', type='choice', dest='action', default='check_connect', + choices=funcList, help="The action you want to take. Valid choices are (%s) Default: %s" % (", ".join(funcList), 'check_connect')) + p.add_option('-s', '--ssl', dest='ssl', default=False, help='Connect using SSL') + p.add_option('-r', '--replicaset', dest='replicaset', default=None, help='Connect to replicaset') + p.add_option('-c', '--collection', action='store', dest='collection', default='foo', help='Specify the collection in check_cannary_test') + p.add_option('-d', '--database', action='store', dest='database', default='tmp', help='Specify the database in check_cannary_test') + p.add_option('-q', '--query', action='store', dest='query', default='{"_id":1}', help='Specify the query in check_cannary_test') + p.add_option('--statusfile', action='store', dest='status_filename', default='status.dat', help='File to current store state data in for delta checks') + p.add_option('--backup-statusfile', action='store', dest='status_filename_backup', default='status_backup.dat', + help='File to previous store state data in for delta checks') + p.add_option('--max-stale', action='store', dest='max_stale', type='int', default=60, help='Age of status file to make new checks (seconds)') + # Add options for output stat file + try: + result = p.parse_args() + except OptionParsingError, e: + if 'no such option' in e.msg: + sys.exit("UNKNOWN - No such options of %s" % e.msg.split(":")[1]) + if 'invalid choice' in e.msg: + error_item = e.msg.split(":")[2].split("'")[1] + sys.exit('UNKNOWN - No such action of %s found!' % error_item) + return result + + +def return_result(result_type, message): + if result_type == "ok": + print "OK - " + message + sys.exit(0) + elif result_type == "critical": + print "CRITICAL - " + message + sys.exit(2) + elif result_type == "warning": + print "WARNING - " + message + sys.exit(1) + else: + print "UNKNOWN - " + message + sys.exit(2) + + +def main(argv): + options, arguments = parse_options(argv) + check(options, options.action) + + +def check(args, check_name): + try: + checksObj = globals()['NagiosMongoChecks'](args) + run_check = getattr(checksObj, check_name) + result_type, message = run_check(args, args.warning, args.critical) + except Exception, e: + raise + print(traceback.extract_tb(sys.exc_info()[-1], 1)) + return_result("critical", str(e)) + return_result(result_type, message) + + +class NagiosMongoChecks: + # need to initialize variables and such still + def __init__(self, args): + # setup inital values from optParse + self.host = '127.0.0.1' + self.port = 27017 + self.user = None + self.password = None + self.warning = None + self.critical = None + self.action = 'check_connect' + self.ssl = False + self.replicaset = None + self.collection = 'foo' + self.database = 'tmp' + self.query = '{"_id":1}' + self.status_filename = 'status.dat' + self.status_filename_backup = 'status_backup.dat' + self.max_stale = 60 + + for option in vars(args): + setattr(self, option, getattr(args, option)) + + # Fix filepaths to be relative + if not self.status_filename.startswith("/") or not self.status_filename.startswith(".."): + self.status_filename_backup = "%s/%s" % (os.curdir, self.status_filename_backup) + self.status_filename = "%s/%s" % (os.curdir, self.status_filename) + + # ammend known intenal values we will need + self.current_status = {} + self.last_status = {} + self.connection = None + self.connection_time = None + self.pyMongoError = None + + self.connect() + + if self.file_age(self.status_filename) <= self.max_stale: + # Save status_file contents status as current_status + self.get_last_status(True) + # Save status_filename_backup contents as last_status + self.get_last_status(False, self.status_filename_backup) + else: + if self.connection is None: + raise pymongo.errors.ConnectionFailure(self.pyMongoError or "No connection Found, did connect fail?") + # Get fresh current_status from server + self.current_status = self.sanatize(self.get_server_status()) + # user last status_filename contents as last_status + self.get_last_status(False, self.status_filename) + # Not yet implemented + # self.compute_deltas() + + # get last status + # check if needs refresh, refresh if needed + # set last/current to self.current_status + pass + + def get_last_status(self, returnAsCurrent, forceFile=None): + # Open file using self.file + try: + file_name = forceFile if forceFile is not None else self.status_filename + fileObject = open(file_name, 'r') + if returnAsCurrent is None or returnAsCurrent is False: + self.last_status = pickle.load(fileObject) + else: + self.current_status = pickle.load(fileObject) + except Exception: + return False + return True + + def get_server_status(self): + try: + data = self.connection['admin'].command(pymongo.son_manipulator.SON([('serverStatus', 1)])) + except: + try: + data = self.connection['admin'].command(son.SON([('serverStatus', 1)])) + except Exception, e: + if type(e).__name__ == "OperationFailure": + sys.exit("UNKNOWN - Not authorized!") + else: + sys.exit("UNKNOWN - Unable to run serverStatus: %s::%s" % (type(e).__name__, unicode_truncate(e.message, 45))) + + if self.current_status is None: + self.current_status = data + + return data + + # figure out how to use this one later + def rotate_files(self): + # 1)this needs to rename self.status_filename to status_filename_backup + # 2) Save current_status to self.status_filename ( new file ) + if self.last_status == {}: + # Build the last status file for future deltas from current data + self.save_file(self.status_filename_backup, self.current_status) + # Set the current status file to empty to set the aging clock + self.save_file(self.status_filename, {}) + sys.exit("UNKNOWN - No status data present, please try again in %s seconds" % self.max_stale) + else: + self.save_file(self.status_filename_backup, self.last_status) + self.save_file(self.status_filename, self.current_status) + + + def save_file(self, filename, contents): + try: + pickle.dump(contents, open(filename, "wb")) + except Exception, e: + sys.exit("UNKNOWN - Error saving stat file %s: %s" % (filename, e.message)) + + # TODO - Fill in all check defaults + def get_default(self, key, level): + + defaults = { + 'check_connections': {'warning': 15000, 'critical': 19000}, + 'check_connect': {'warning': 50, 'critical': 100}, + 'check_queues': {'warning': 30, 'critical': 100}, + 'check_lock_pct': {'warning': 30, 'critical': 50}, + 'check_repl_lag': {'warning': 200, 'critical': 500}, + # 'check_flushing': {'warning':XX, 'critical': XX}, + 'check_total_indexes': {'warning': 100, 'critical': 300}, + 'check_cannary_test': {'warning': 30, 'critical': 50}, + 'check_oplog': {'warning': 36, 'critical': 24}, + 'check_index_ratio': {'warning': .9, 'critical': .8}, + } + try: + return defaults[key][level] + except KeyError: + sys.exit("UNKNOWN - Missing defaults found for %s please use -w and -c" % key) + + # Not yet implemented + # def compute_deltas(self): + # deltas = [] + # for item in DeepDiff(self.last_status, self.current_status)['values_changed']: + # name = item.split(":")[0].split("root")[1].replace("['", "").replace("']", ".")[:-1] + # if 'time' not in item.lower(): + # values = item.split(":")[1] + # print(values) + # old, new = values.split("===>") + # print("%s: %s - %s = %s" % (name, new, old, float(new)-float(old))) + # deltas[name] = float(new) - float(old) + # self.delta_data = deltas + # return True + + def file_age(self, filename): + try: + age = time.time() - os.stat(filename)[stat.ST_CTIME] + except OSError: + age = 999999 + return age + + # TODO - Add meat to this if needed, here for future planning + def sanatize(self, status_output): + return status_output + + def connect(self): + start_time = time.time() + try: + # ssl connection for pymongo > 2.3 + if self.replicaset is None: + con = pymongo.MongoClient(self.host, self.port, ssl=self.ssl, serverSelectionTimeoutMS=2500) + else: + con = pymongo.MongoClient(self.host, self.port, ssl=self.ssl, replicaSet=self.replicaset, serverSelectionTimeoutMS=2500) + if (self.user and self.passwd) and not con['admin'].authenticate(self.user, self.passwd): + sys.exit("CRITICAL - Username and password incorrect") + except Exception, e: + raise + if isinstance(e, pymongo.errors.AutoReconnect) and str(e).find(" is an arbiter") != -1: + # We got a pymongo AutoReconnect exception that tells us we connected to an Arbiter Server + # This means: Arbiter is reachable and can answer requests/votes - this is all we need to know from an arbiter + print "OK - State: 7 (Arbiter)" + sys.exit(0) + con = None + self.pyMongoError = str(e) + if con is not None: + try: + con['admin'].command(pymongo.son_manipulator.SON([('ping', 1)])) + except Exception, e: + sys.exit("UNKNOWN - Unable to run commands, possible auth issue: %s" % e.message) + self.connection_time = round(time.time() - start_time, 2) + version = con.server_info()['version'].split('.') + self.mongo_version = (version[0], version[1], version[2]) + self.connection = con + + def check_levels(self, check_result, warning_level, critical_level, message): + if check_result < warning_level: + return "ok", message + elif check_result > critical_level: + return "critical", message + elif check_result > warning_level and check_result < critical_level: + return "warning", message + else: + return "unknown", "Unable to parse %s into a result" % check_result + + def check_connect(self, args, warning_level, critical_level): + warning_level = warning_level or self.get_default('check_connect', 'warning') + critical_level = critical_level or self.get_default('check_connect', 'critical') + con_time = self.connection_time + message = "Connection time %.2f ms" % con_time + return self.check_levels(float(con_time), float(warning_level), float(critical_level), message) + + def check_connections(self, args, warning_level, critical_level): + warning_level = warning_level or self.get_default('check_connections', 'warning') + critical_level = critical_level or self.get_default('check_connections', 'critical') + connections = self.current_status['connections'] + connections['total'] = connections['available'] + connections['current'] + used_percent = int((connections['current'] / connections['total']) * 100) + message = "%i%% connections used ( %d of %d )" % (used_percent, connections['current'], connections['total']) + return self.check_levels(float(used_percent), int(warning_level), int(critical_level), message) + + def check_lock_pct(self, args, warning_level, critical_level): + warning_level = warning_level or self.get_default('check_lock_pct', 'warning') + critical_level = critical_level or self.get_default('check_lock_pct', 'critical') + if self.mongo_version >= ('2', '7', '0'): + return "ok", "Mongo 3.0 and above do not have lock %" + lockTime = self.current_status['globalLock']['lockTime'] - self.last_status['globalLock']['lockTime'] + totalTime = self.current_status['globalLock']['totalTime'] - self.last_status['globalLock']['totalTime'] + lock_percent = int((lockTime / totalTime) * 100) + message = "%i%% locking found (over 100%% is possible)" % (lock_percent) + return self.check_levels(lock_percent, warning_level, critical_level, message) + + def check_flushing(self, args, warning_level, critical_level): + warning_level = warning_level or self.get_default('check_flushing', 'warning') + critical_level = critical_level or self.get_default('check_flushing', 'critical') + flushData = self.current_status['backgroundFlushing'] + if args.average: + flush_time = flushData['average_ms'] + stat_type = "Average" + else: + flush_time = flushData['last_ms'] + stat_type = "Last" + + message = "%s Flush Time: %.2fms" % (stat_type, flush_time) + return self.check_levels(flush_time, warning_level, critical_level, message) + + def check_index_ratio(self, args, warning_level, critical_level): + warning_level = warning_level or self.get_default('check_index_ratio', 'warning') + critical_level = critical_level or self.get_default('check_index_ratio', 'critical') + message = None + + indexCounters = self.current_status['indexCounters'] + if 'note' in indexCounters: + ratio = 1.0 + message = "not supported defaulting to 1.0 ratio" + elif self.mongo_version >= ('2', '4', '0'): + ratio = indexCounters['missRatio'] + else: + ratio = indexCounters['btree']['missRatio'] + if message is None: + message = "Miss Ratio: %.2f" % ratio + return self.check_levels(ratio, warning_level, critical_level, message) + + def check_have_primary(self, args, warning_level, critical_level): + replset_status = self.connection['admin'].command("replSetGetStatus") + for member in replset_status['members']: + if member['state'] == 1: + return "ok", "Cluster has primary" + return "critical", "Cluster has no primary!" + + def check_total_indexes(self, args, warning_level, critical_level): + warning_level = warning_level or self.get_default('check_total_indexes', 'warning') + critical_level = critical_level or self.get_default('check_total_indexes', 'critical') + index_count = 0 + database_count = 0 + for database in self.connection.database_names(): + if database not in ["admin", "local"]: + database_count += 1 + self.connection[database]['system.indexes'].count() + index_count += self.connection[database]['system.indexes'].count() + message = "Found %d indexes in %d databases" % (index_count, database_count) + return self.check_levels(index_count, warning_level, critical_level, message) + + def check_queues(self, args, warning_level, critical_level): + warning_level = warning_level or self.get_default('check_queues', 'warning') + critical_level = critical_level or self.get_default('check_queues', 'critical') + currentQueue = self.current_status['globalLock']['currentQueue'] + currentQueue['total'] = currentQueue['readers'] + currentQueue['writers'] + message = "Queue Sizes: read (%d) write(%d) total (%d)" % (currentQueue['readers'], currentQueue['writers'], currentQueue['total']) + return self.check_levels(currentQueue['total'], warning_level, critical_level, message) + + def check_oplog(self, args, warning_level, critical_level): + warning_level = warning_level or self.get_default('check_oplog', 'warning') + critical_level = critical_level or self.get_default('check_oplog', 'critical') + if 'local' not in self.connection.database_names() or 'oplog.rs' not in self.connection['local'].collection_names(): + return "critical", "We do not seem to be in a replset!" + oplog = self.connection['local']['oplog.rs'] + first_ts = oplog.find().sort("$natural", pymongo.ASCENDING).limit(1)[0]['ts'] + last_ts = oplog.find().sort("$natural", pymongo.DESCENDING).limit(1)[0]['ts'] + oplog_range = (last_ts.as_datetime() - first_ts.as_datetime()) + oplog_range_hours = oplog_range.total_seconds() / 60 / 60 + message = "Oplog Time is %d hours" % (oplog_range_hours) + return self.check_levels(int(oplog_range_hours), warning_level, critical_level, message) + + def check_election(self, args, warning_level, critical_level): + replset_status = self.connection['admin'].command("replSetGetStatus") + for member in replset_status['members']: + if member['stateStr'] == "PRIMARY": + #last_primary = member.name + last_primary = member['name'] + for member in replset_status['members']: + if member['stateStr'] == "PRIMARY": + current_primary = member['name'] + message = "Old PRI: %s New PRI: %s" % (last_primary, current_primary) + if current_primary == last_primary: + return "ok", message + else: + return "critical", message + + def is_balanced(self): + chunks = {} + + # Loop through each of the chunks, tallying things up + for chunk in self.connection["config"]["chunks"].find(): + namespace = chunk['ns'] + shard = chunk['shard'] + if namespace not in chunks: + chunks[namespace] = {'shards': {}, 'total': 0} + if shard not in chunks[namespace]['shards']: + chunks[namespace]['shards'][shard] = 0 + chunks[namespace]['shards'][shard] += 1 + chunks[namespace]['total'] += 1 + + shardsCount = self.connection["config"]["shards"].count() + chunksCount = self.connection["config"]["chunks"].count() + + # Different migration thresholds depending on cluster size + # http://docs.mongodb.org/manual/core/sharding-internals/#sharding-migration-thresholds + if chunksCount < 20: + threshold = 2 + elif chunksCount < 80 and chunksCount > 21: + threshold = 4 + else: + threshold = 8 + + # Default to balanced state, any failure will then mark it as False forevermore + isBalanced = True + # Loop through each ns and determine if it's balanced or not + for ns in chunks: + balanced = chunks[ns]['total'] / shardsCount + for shard in chunks[ns]['shards']: + if shard > balanced - threshold and shard < balanced + threshold: + pass + else: + isBalanced = False + + return isBalanced + + def check_balance(self, args, warning_level, critical_level): + if self.is_balanced() is True: + return "ok", "Shards are balanced by chunk counts" + else: + return "critcal", "Shards are not balanced by chunk and need review" + + def check_cannary_test(self, args, warning_level, critical_level): + warning_level = warning_level or self.get_default('check_cannary_test', 'warning') + critical_level = critical_level or self.get_default('check_cannary_test', 'critical') + # this does not check for a timeout, we assume NRPE or Nagios will alert on that timeout. + try: + start = time.time() + self.connection[self.database][self.collection].find_one(self.query) + time_range = (time.time() - start).total_seconds + message = "Collection %s.%s query took: %d s" % (self.database, self.collection, time_range) + return self.check_levels(time_range, warning_level, critical_level, message) + except Exception, e: + message = "Collection %s.%s query FAILED: %s" % (self.database, self.collection, e) + return "critical", message + + def check_repl_lag(self, args, warning_level, critical_level): + warning_level = warning_level or self.get_default('check_repl_lag', 'warning') + critical_level = critical_level or self.get_default('check_repl_lag', 'critical') + + # make a write incase the client is not writing, but us an update to avoid wasting space + self.connection['test']['lag_check'].update({"_id":1}, {"_id": 1, "x": 1}) + # get a fresh status for the replset + try: + replset_status = self.connection['admin'].command("replSetGetStatus") + except Exception, e: + return "critical", "Are your running with --replset? - %s" % (e) + + for member in replset_status['members']: + if member['stateStr'] == "PRIMARY": + primary = member + if 'self' in member and member['self'] is True: + hostOptimeDate = member['optimeDate'] + + if primary is not None: + highest_optimeDate = primary['optimeDate'] + highest_name = primary['name'] + else: + # find the most current secondary as there is not primary + highest_optimeDate = time.gmtime(0) + for member in replset_status['members']: + if member['optimeDate'] > highest_optimeDate: + highest_optimeDate = member['optimeDate'] + highest_name = member['name'] + + rep_lag_seconds = (highest_optimeDate - hostOptimeDate).seconds + rep_lag_hours = round(rep_lag_seconds/60/60, 4) + message = "Lagging %s by %.4f hours" % (highest_name, rep_lag_hours) + return self.check_levels(rep_lag_hours, warning_level, critical_level, message) + +# +# main app +# +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) + +# ############################################################################ +# Documentation +# ############################################################################ +""" +=pod + +=head1 NAME + +pmp-check-mongo.py - MongoDB Nagios check script. + +=head1 SYNOPSIS + + Usage: pmp-check-mongo.py [options] + + Options: + -h, --help show this help message and exit + -H HOST, --host=HOST The hostname you want to connect to + -P PORT, --port=PORT The port mongodb is running on + -u USER, --user=USER The username you want to login as + -p PASSWD, --password=PASSWD + The password you want to use for that user + -W WARNING, --warning=WARNING + The warning threshold you want to set + -C CRITICAL, --critical=CRITICAL + The critical threshold you want to set + -A ACTION, --action=ACTION + The action you want to take. Valid choices are + (check_connections, check_election, check_lock_pct, + check_repl_lag, check_flushing, check_total_indexes, + check_balance, check_queues, check_cannary_test, + check_have_primary, check_oplog, check_index_ratio, + check_connect) Default: check_connect + -s SSL, --ssl=SSL Connect using SSL + -r REPLICASET, --replicaset=REPLICASET + Connect to replicaset + -c COLLECTION, --collection=COLLECTION + Specify the collection in check_cannary_test + -d DATABASE, --database=DATABASE + Specify the database in check_cannary_test + -q QUERY, --query=QUERY + Specify the query in check_cannary_test + --statusfile=STATUS_FILENAME + File to current store state data in for delta checks + --backup-statusfile=STATUS_FILENAME_BACKUP + File to previous store state data in for delta checks + --max-stale=MAX_STALE + Age of status file to make new checks (seconds) + +=head1 COPYRIGHT, LICENSE, AND WARRANTY + +This program is copyright 2014 Percona LLC and/or its affiliates. +Feedback and improvements are welcome. + +THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation, version 2. You should have received a copy of the GNU General +Public License along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + +=head1 VERSION + +$PROJECT_NAME$ pmp-check-mongo.py $VERSION$ + +=cut + +""" diff --git a/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-deadlocks b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-deadlocks new file mode 100755 index 0000000..0545f06 --- /dev/null +++ b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-deadlocks @@ -0,0 +1,189 @@ +#!/bin/sh + +# ######################################################################## +# This program is part of $PROJECT_NAME$ +# License: GPL License (see COPYING) +# Authors: +# Baron Schwartz +# Depends-on: pt-deadlock-logger +# ######################################################################## + +# ######################################################################## +# Redirect STDERR to STDOUT; Nagios doesn't handle STDERR. +# ######################################################################## +exec 2>&1 + +# ######################################################################## +# Set up constants, etc. +# ######################################################################## +STATE_OK=0 +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 +STATE_DEPENDENT=4 + +# ######################################################################## +# Run the program. +# ######################################################################## +main() { + # Get options + for o; do + case "${o}" in + -c) shift; OPT_CRIT="${1}"; shift; ;; + --defaults-file) shift; OPT_DEFT="${1}"; shift; ;; + -H) shift; OPT_HOST="${1}"; shift; ;; + -i) shift; OPT_INTERVAL="${1}"; shift; ;; + -l) shift; OPT_USER="${1}"; shift; ;; + -L) shift; OPT_LOPA="${1}"; shift; ;; + -p) shift; OPT_PASS="${1}"; shift; ;; + -P) shift; OPT_PORT="${1}"; shift; ;; + -S) shift; OPT_SOCK="${1}"; shift; ;; + -T) shift; OPT_TABLE="${1}"; shift; ;; + -w) shift; OPT_WARN="${1}"; shift; ;; + --version) grep -A2 '^=head1 VERSION' "$0" | tail -n1; exit 0 ;; + --help) perl -00 -ne 'm/^ Usage:/ && print' "$0"; exit 0 ;; + -*) echo "Unknown option ${o}. Try --help."; exit 1; ;; + esac + done + OPT_WARN=${OPT_WARN:-12} + OPT_CRIT=${OPT_CRIT:-60} + OPT_INTERVAL=${OPT_INTERVAL:-1} + OPT_TABLE="${OPT_TABLE:-percona.deadlocks}" + if [ -e '/etc/nagios/mysql.cnf' ]; then + OPT_DEFT="${OPT_DEFT:-/etc/nagios/mysql.cnf}" + fi + if is_not_sourced; then + if [ -n "$1" ]; then + echo "WARN spurious command-line options: $@" + exit 1 + fi + fi + + LEVEL=$(mysql_exec "SELECT COUNT(*) FROM ${OPT_TABLE} WHERE server IN ('${OPT_HOST}', @@hostname) AND ts >= NOW() - INTERVAL ${OPT_INTERVAL}*60 SECOND") + if [ $? = 0 ]; then + NOTE="${LEVEL:-UNKNOWN} deadlocks in last ${OPT_INTERVAL} minutes" + if [ "${LEVEL:-0}" -gt "${OPT_CRIT}" ]; then + NOTE="CRIT $NOTE" + elif [ "${LEVEL:-0}" -gt "${OPT_WARN}" ]; then + NOTE="WARN $NOTE" + else + NOTE="OK $NOTE" + fi + + # Build the common perf data output for graph trending + PERFDATA="deadlocks=${LEVEL:-0};${OPT_WARN};${OPT_CRIT};0;" + NOTE="$NOTE | $PERFDATA" + else + NOTE="UNK could not count deadlocks" + fi + echo $NOTE +} + +# ######################################################################## +# Execute a MySQL command. +# ######################################################################## +mysql_exec() { + mysql ${OPT_DEFT:+--defaults-file="${OPT_DEFT}"} ${OPT_HOST:+-h"${OPT_HOST}"} ${OPT_USER:+-u"${OPT_USER}"} \ + ${OPT_PASS:+-p"${OPT_PASS}"} ${OPT_SOCK:+-S"${OPT_SOCK}"} ${OPT_PORT:+-P"${OPT_PORT}"} \ + ${OPT_LOPA:+--login-path="${OPT_LOPA}"} -ss -e "$1" +} + +# ######################################################################## +# Determine whether this program is being executed directly, or sourced/included +# from another file. +# ######################################################################## +is_not_sourced() { + [ "${0##*/}" = "pmp-check-mysql-deadlocks" ] || [ "${0##*/}" = "bash" -a "$_" = "$0" ] +} + +# ######################################################################## +# Execute the program if it was not included from another file. +# This makes it possible to include without executing, and thus test. +# ######################################################################## +if is_not_sourced; then + OUTPUT=$(main "$@") + EXITSTATUS=$STATE_UNKNOWN + case "${OUTPUT}" in + UNK*) EXITSTATUS=$STATE_UNKNOWN; ;; + OK*) EXITSTATUS=$STATE_OK; ;; + WARN*) EXITSTATUS=$STATE_WARNING; ;; + CRIT*) EXITSTATUS=$STATE_CRITICAL; ;; + esac + echo "${OUTPUT}" + exit $EXITSTATUS +fi + +# ############################################################################ +# Documentation +# ############################################################################ +: <<'DOCUMENTATION' +=pod + +=head1 NAME + +pmp-check-mysql-deadlocks - Alert when pt-deadlock-logger has recorded too many recent deadlocks. + +=head1 SYNOPSIS + + Usage: pmp-check-mysql-deadlocks [OPTIONS] + Options: + -c CRIT Critical threshold; default 60. + --defaults-file FILE Only read mysql options from the given file. + Defaults to /etc/nagios/mysql.cnf if it exists. + -H HOST MySQL hostname. + -i INTERVAL Interval over which to count, in minutes; default 1. + -l USER MySQL username. + -L LOGIN-PATH Use login-path to access MySQL (with MySQL client 5.6). + -p PASS MySQL password. + -P PORT MySQL port. + -S SOCKET MySQL socket file. + -T TABLE The database.table that pt-deadlock-logger uses; default percona.deadlocks. + -w WARN Warning threshold; default 12. + --help Print help and exit. + --version Print version and exit. + Options must be given as --option value, not --option=value or -Ovalue. + Use perldoc to read embedded documentation with more details. + +=head1 DESCRIPTION + +This Nagios plugin looks at the table that pt-deadlock-logger (part of Percona +Toolkit) maintains, and when there have been too many recent deadlocks, it +alerts. + +=head1 PRIVILEGES + +This plugin executes the following commands against MySQL: + +=over + +=item * + +C the system variable C<@@tmpdir>. + +=back + +This plugin executes the following UNIX commands that may need special privileges: + +=over + +=item * + +ps + +=item * + +C or C (Linux), C (BSD) + +=back + +The plugin should be able to find mysqld PID using C command. + +On BSD, if C option C is set to 0, C +will not return mysqld PID if the plugin run from non-root user. + +=head1 COPYRIGHT, LICENSE, AND WARRANTY + +This program is copyright 2012-$CURRENT_YEAR$ Baron Schwartz, 2012-$CURRENT_YEAR$ Percona Inc. +Feedback and improvements are welcome. + +THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation, version 2. You should have received a copy of the GNU General +Public License along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + +=head1 VERSION + +$PROJECT_NAME$ pmp-check-mysql-deleted-files $VERSION$ + +=cut + +DOCUMENTATION diff --git a/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-file-privs b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-file-privs new file mode 100755 index 0000000..fa4df1c --- /dev/null +++ b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-file-privs @@ -0,0 +1,289 @@ +#!/bin/sh + +# ######################################################################## +# This program is part of $PROJECT_NAME$ +# License: GPL License (see COPYING) +# Authors: +# Baron Schwartz +# ######################################################################## + +# ######################################################################## +# Redirect STDERR to STDOUT; Nagios doesn't handle STDERR. +# ######################################################################## +exec 2>&1 + +# ######################################################################## +# Set up constants, etc. +# ######################################################################## +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 +STATE_DEPENDENT=4 + +# ######################################################################## +# Run the program. +# ######################################################################## +main() { + # Get options + for o; do + case "${o}" in + -c) shift; OPT_CRIT="${1}"; shift; ;; + --defaults-file) shift; OPT_DEFT="${1}"; shift; ;; + -g) shift; OPT_UNIX_GROUP="${1}"; shift; ;; + -H) shift; OPT_HOST="${1}"; shift; ;; + -l) shift; OPT_USER="${1}"; shift; ;; + -L) shift; OPT_LOPA="${1}"; shift; ;; + -p) shift; OPT_PASS="${1}"; shift; ;; + -P) shift; OPT_PORT="${1}"; shift; ;; + -S) shift; OPT_SOCK="${1}"; shift; ;; + -u) shift; OPT_UNIX_USER="${1}"; shift; ;; + -w) shift; OPT_WARN="${1}"; shift; ;; + --version) grep -A2 '^=head1 VERSION' "$0" | tail -n1; exit 0 ;; + --help) perl -00 -ne 'm/^ Usage:/ && print' "$0"; exit 0 ;; + -*) echo "Unknown option ${o}. Try --help."; exit 1; ;; + esac + done + OPT_UNIX_GROUP="${OPT_UNIX_GROUP:-mysql}" + OPT_UNIX_USER="${OPT_UNIX_USER:-mysql}" + if [ -e '/etc/nagios/mysql.cnf' ]; then + OPT_DEFT="${OPT_DEFT:-/etc/nagios/mysql.cnf}" + fi + if is_not_sourced; then + if [ -n "$1" ]; then + echo "WARN spurious command-line options: $@" + exit 1 + fi + fi + + # Set the exit status in case there are any problems. + NOTE="UNK could not determine the datadir location." + + # Set up files to hold one or more data directory locations. + local TEMP=$(mktemp -t "${0##*/}.XXXXXX") || exit $? + local DATADIRS=$(mktemp -t "${0##*/}.XXXXXX") || exit $? + trap "rm -f '${TEMP}' '${DATADIRS}' >/dev/null 2>&1" EXIT + + # If any connection option was given, then try to log in to find the datadir. + if [ "${OPT_DEFT}${OPT_HOST}${OPT_USER}${OPT_PASS}${OPT_PORT}${OPT_SOCK}" ]; then + # If this fails (e.g. we can't log in), then there will be no line in the + # file, and later we won't change the exit code / note away from "UNK". + mysql_exec "SELECT IF(@@datadir LIKE '/%', @@datadir, CONCAT(@@basedir, @@datadir))" >> "${DATADIRS}" + else + # Find all MySQL server instances. + for pid in $(_pidof mysqld); do + ps -p ${pid} -o pid,command | grep "${pid}" >> "${TEMP}" + done + # The ${TEMP} file may now contain lines like the following sample: + # 13822 /usr/sbin/mysqld --defaults-file=/var/lib/mysql/my.cnf \ + # --basedir=/usr --datadir=/var/lib/mysql/data/ \ + # --pid-file=/var/run/mysqld/mysqld.pid \ + # --socket=/var/run/mysqld/mysqld.sock + # Now the task is to read find any reference to a --datadir option. + # We store these into the $DATADIRS temp file. + # TODO: maybe in the future we can detect the user/group under which the + # process runs, and assume that is the right value, rather than defaulting + # to 'mysql'. + while read pid command; do + if echo "${command}" | grep datadir >/dev/null 2>&1; then + # Strip off everything up to and including --datadir= + command="${command##*--datadir=}" + # Strip off any options that follow this, assuming that there's not + # a space followed by a dash in the datadir's path. + echo "${command%% -*}" >> "${DATADIRS}" + fi + done < "${TEMP}" + fi + + WRONG="" + NOTE2="" + > ${TEMP} + while read datadir; do + FILES="$(find "${datadir}" \! -group "${OPT_UNIX_GROUP}" -o \! -user "${OPT_UNIX_USER}" 2>>${TEMP})" + if [ "${FILES}" ]; then + WRONG=1 + NOTE2="${NOTE2:+${NOTE2} }${FILES}" + fi + NOTE="OK all files/directories have correct ownership." + done < "${DATADIRS}" + + if [ -s "${TEMP}" ]; then + NOTE="UNK `cat ${TEMP}`" + elif [ "${WRONG}" ]; then + if [ "${OPT_CRIT}" ]; then + NOTE="CRIT files with wrong ownership: ${NOTE2}" + else + NOTE="WARN files with wrong ownership: ${NOTE2}" + fi + fi + + echo $NOTE +} + +# ######################################################################## +# Execute a MySQL command. +# ######################################################################## +mysql_exec() { + mysql ${OPT_DEFT:+--defaults-file="${OPT_DEFT}"} \ + ${OPT_LOPA:+--login-path="${OPT_LOPA}"} \ + ${OPT_HOST:+-h"${OPT_HOST}"} ${OPT_PORT:+-P"${OPT_PORT}"} \ + ${OPT_USER:+-u"${OPT_USER}"} ${OPT_PASS:+-p"${OPT_PASS}"} \ + ${OPT_SOCK:+-S"${OPT_SOCK}"} -ss -e "$1" +} + +# ######################################################################## +# A wrapper around pidof, which might not exist. The first argument is the +# command name to match. +# ######################################################################## +_pidof() { + if ! pidof "${1}" 2>/dev/null; then + ps axo pid,ucomm | awk -v comm="${1}" '$2 == comm { print $1 }' + fi +} + +# ######################################################################## +# Determine whether this program is being executed directly, or sourced/included +# from another file. +# ######################################################################## +is_not_sourced() { + [ "${0##*/}" = "pmp-check-mysql-file-privs" ] || [ "${0##*/}" = "bash" -a "$_" = "$0" ] +} + +# ######################################################################## +# Execute the program if it was not included from another file. +# This makes it possible to include without executing, and thus test. +# ######################################################################## +if is_not_sourced; then + OUTPUT=$(main "$@") + EXITSTATUS=$STATE_UNKNOWN + case "${OUTPUT}" in + UNK*) EXITSTATUS=$STATE_UNKNOWN; ;; + OK*) EXITSTATUS=$STATE_OK; ;; + WARN*) EXITSTATUS=$STATE_WARNING; ;; + CRIT*) EXITSTATUS=$STATE_CRITICAL; ;; + esac + echo "${OUTPUT}" + exit $EXITSTATUS +fi + +# ############################################################################ +# Documentation +# ############################################################################ +: <<'DOCUMENTATION' +=pod + +=head1 NAME + +pmp-check-mysql-file-privs - Alert if MySQL data directory privileges are wrong. + +=head1 SYNOPSIS + + Usage: pmp-check-mysql-file-privs [OPTIONS] + Options: + -c CRIT Critical threshold; makes a privilege issue critical. + --defaults-file FILE Only read mysql options from the given file. + Defaults to /etc/nagios/mysql.cnf if it exists. + -g GROUP The Unix group who should own the files; default mysql. + -H HOST MySQL hostname. + -l USER MySQL username. + -L LOGIN-PATH Use login-path to access MySQL (with MySQL client 5.6). + -p PASS MySQL password. + -P PORT MySQL port. + -S SOCKET MySQL socket file. + -u USER The Unix user who should own the files; default mysql. + -w WARN Warning threshold; ignored. + --help Print help and exit. + --version Print version and exit. + Options must be given as --option value, not --option=value or -Ovalue. + Use perldoc to read embedded documentation with more details. + +=head1 DESCRIPTION + +This Nagios plugin checks to make sure that the MySQL data directory, and its +contents, is owned by the correct Unix user and group. If the ownership is +incorrect, then the server might fail due to lack of permission to modify its +data. For example, suppose a system administrator enters a database directory +and creates a file that is owned by root. Now a database administrator issues a +DROP TABLE command, which fails because it is unable to remove the file and thus +the non-empty directory cannot be removed either. + +The plugin accepts the -g and -u options to specify which Unix user and group +should own the data directory and its contents. This is usually the user account +under which MySQL runs, which is mysql by default on most systems. The plugin +assumes that user and group by default, too. + +The plugin accepts the -w and -c options for compatibility with standard Nagios +plugin conventions, but they are not based on a threshold. Instead, the plugin +raises a warning by default, and if the -c option is given, it raises an error +instead, regardless of the option's value. + +By default, this plugin will attempt to detect all running instances of MySQL, +and verify the data directory ownership for each one. It does this purely by +examining the Unix process table with the C tool. However, in some cases +the process's command line does not list the path to the data directory. If the +tool fails to detect the MySQL server process, or if you wish to limit the check +to a single instance in the event that there are multiple instances on a single +server, then you can specify MySQL authentication options. This will cause the +plugin to skip examining the Unix processlist, log into MySQL, and examine the +datadir variable from SHOW VARIABLES to find the location of the data directory. + +In case an user you are calling this plugin from has no permissions to examine +the datadir the plugin raises an unknown with the explanation. + +=head1 PRIVILEGES + +This plugin executes the following commands against MySQL: + +=over + +=item * + +C against the C InnoDB transaction and lock tables. + +=back + +This plugin executes no UNIX commands that may need special privileges. + +=head1 COPYRIGHT, LICENSE, AND WARRANTY + +This program is copyright 2012-$CURRENT_YEAR$ Baron Schwartz, 2012-$CURRENT_YEAR$ Percona Inc. +Feedback and improvements are welcome. + +THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation, version 2. You should have received a copy of the GNU General +Public License along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + +=head1 VERSION + +$PROJECT_NAME$ pmp-check-mysql-innodb $VERSION$ + +=cut + +DOCUMENTATION diff --git a/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-pidfile b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-pidfile new file mode 100755 index 0000000..bd6c0e5 --- /dev/null +++ b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-pidfile @@ -0,0 +1,291 @@ +#!/bin/sh + +# ######################################################################## +# This program is part of $PROJECT_NAME$ +# License: GPL License (see COPYING) +# Authors: +# Baron Schwartz +# ######################################################################## + +# ######################################################################## +# Redirect STDERR to STDOUT; Nagios doesn't handle STDERR. +# ######################################################################## +exec 2>&1 + +# ######################################################################## +# Set up constants, etc. +# ######################################################################## +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 +STATE_DEPENDENT=4 + +# ######################################################################## +# Run the program. +# ######################################################################## +main() { + # Get options + for o; do + case "${o}" in + -c) shift; OPT_CRIT="${1}"; shift; ;; + --defaults-file) shift; OPT_DEFT="${1}"; shift; ;; + -H) shift; OPT_HOST="${1}"; shift; ;; + -l) shift; OPT_USER="${1}"; shift; ;; + -L) shift; OPT_LOPA="${1}"; shift; ;; + -p) shift; OPT_PASS="${1}"; shift; ;; + -P) shift; OPT_PORT="${1}"; shift; ;; + -S) shift; OPT_SOCK="${1}"; shift; ;; + -w) shift; OPT_WARN="${1}"; shift; ;; + --version) grep -A2 '^=head1 VERSION' "$0" | tail -n1; exit 0 ;; + --help) perl -00 -ne 'm/^ Usage:/ && print' "$0"; exit 0 ;; + -*) echo "Unknown option ${o}. Try --help."; exit 1; ;; + esac + done + if [ -e '/etc/nagios/mysql.cnf' ]; then + OPT_DEFT="${OPT_DEFT:-/etc/nagios/mysql.cnf}" + fi + if is_not_sourced; then + if [ -n "$1" ]; then + echo "WARN spurious command-line options: $@" + exit 1 + fi + fi + + # Set the exit status in case there are any problems. + NOTE="UNK could not determine the PID file location." + + # Set up files to hold one or more PID file locations. + local TEMP=$(mktemp -t "${0##*/}.XXXXXX") || exit $? + local FILES=$(mktemp -t "${0##*/}.XXXXXX") || exit $? + trap "rm -f '${TEMP}' '${FILES}' >/dev/null 2>&1" EXIT + + # If any connection option was given, then try to log in to find the PID + # file. + if [ "${OPT_DEFT}${OPT_HOST}${OPT_USER}${OPT_PASS}${OPT_PORT}${OPT_SOCK}" ]; then + # If this fails (e.g. we can't log in), then there will be no line in the + # file, and later we won't change the exit code / note away from "UNK". + if mysql_exec "SHOW GLOBAL VARIABLES" > "${TEMP}"; then + get_pidfile "${TEMP}" >> "${FILES}" + fi + else + # Find all MySQL server instances. + for pid in $(_pidof mysqld); do + ps -p ${pid} -o pid,command | grep "${pid}" >> "${TEMP}" + done + # The ${TEMP} file may now contain lines like the following sample: + # 13822 /usr/sbin/mysqld --defaults-file=/var/lib/mysql/my.cnf \ + # --basedir=/usr --datadir=/var/lib/mysql/data/ \ + # --pid-file=/var/run/mysqld/mysqld.pid \ + # --socket=/var/run/mysqld/mysqld.sock + # Now the task is to read find any reference to a --pid-file or --pid_file option. + # We store these into the $FILES temp file. + while read pid command; do + if echo "${command}" | grep pid.file >/dev/null 2>&1; then + # Strip off everything up to and including --pid-file= + command="${command##*--pid?file=}" + # Strip off any options that follow this, assuming that there's not + # a space followed by a dash in the pidfile's path. + echo "${command%% -*}" >> "${FILES}" + fi + done < "${TEMP}" + fi + + # TODO: maybe in the future we can also check whether the PID in the file is + # correct. TODO: maybe we should also alert on which PID is missing its + # pidfile. + MISSING="" + NOTE2="" + while read pidfile; do + if [ ! -e "${pidfile}" ]; then + MISSING=1 + NOTE2="${NOTE2:+${NOTE2}; }missing ${pidfile}" + fi + NOTE="OK all PID files exist." + done < "${FILES}" + + if [ "${MISSING}" ]; then + if [ "${OPT_CRIT}" ]; then + NOTE="CRIT ${NOTE2}" + else + NOTE="WARN ${NOTE2}" + fi + fi + + echo $NOTE +} + +# ######################################################################## +# Execute a MySQL command. +# ######################################################################## +mysql_exec() { + mysql ${OPT_DEFT:+--defaults-file="${OPT_DEFT}"} \ + ${OPT_LOPA:+--login-path="${OPT_LOPA}"} \ + ${OPT_HOST:+-h"${OPT_HOST}"} ${OPT_PORT:+-P"${OPT_PORT}"} \ + ${OPT_USER:+-u"${OPT_USER}"} ${OPT_PASS:+-p"${OPT_PASS}"} \ + ${OPT_SOCK:+-S"${OPT_SOCK}"} -ss -e "$1" +} + +# ######################################################################## +# A wrapper around pidof, which might not exist. The first argument is the +# command name to match. +# ######################################################################## +_pidof() { + if ! pidof "${1}" 2>/dev/null; then + ps axo pid,ucomm | awk -v comm="${1}" '$2 == comm { print $1 }' + fi +} + +# ######################################################################## +# Unfortunately, MySQL 5.0 doesn't have a system variable @@pid_file, so +# we have to use SHOW VARIABLES and a temp file. In 5.1 and newer we +# could have done it in a single SQL statement: +# SELECT IF(@@pid_file LIKE '/%', @@pid_file, +# CONCAT(@@basedir, @@pid_file))" >> "${FILES}" +# The first argument is the file that contains SHOW VARIABLES. +# ######################################################################## +get_pidfile() { + awk ' + /^pid_file/ { pid_file = $2 } + /^basedir/ { basedir = $2 } + END { + if ( substr(pid_file, 1, 1) != "/" ) { + pid_file = basedir pid_file; + } + print pid_file; + } + ' "$1" +} + +# ######################################################################## +# Determine whether this program is being executed directly, or sourced/included +# from another file. +# ######################################################################## +is_not_sourced() { + [ "${0##*/}" = "pmp-check-mysql-pidfile" ] || [ "${0##*/}" = "bash" -a "$_" = "$0" ] +} + +# ######################################################################## +# Execute the program if it was not included from another file. +# This makes it possible to include without executing, and thus test. +# ######################################################################## +if is_not_sourced; then + OUTPUT=$(main "$@") + EXITSTATUS=$STATE_UNKNOWN + case "${OUTPUT}" in + UNK*) EXITSTATUS=$STATE_UNKNOWN; ;; + OK*) EXITSTATUS=$STATE_OK; ;; + WARN*) EXITSTATUS=$STATE_WARNING; ;; + CRIT*) EXITSTATUS=$STATE_CRITICAL; ;; + esac + echo "${OUTPUT}" + exit $EXITSTATUS +fi + +# ############################################################################ +# Documentation +# ############################################################################ +: <<'DOCUMENTATION' +=pod + +=head1 NAME + +pmp-check-mysql-pidfile - Alert when the mysqld PID file is missing. + +=head1 SYNOPSIS + + Usage: pmp-check-mysql-pidfile [OPTIONS] + Options: + -c CRIT Critical threshold; makes a missing PID file critical. + --defaults-file FILE Only read mysql options from the given file. + Defaults to /etc/nagios/mysql.cnf if it exists. + -H HOST MySQL hostname. + -l USER MySQL username. + -L LOGIN-PATH Use login-path to access MySQL (with MySQL client 5.6). + -p PASS MySQL password. + -P PORT MySQL port. + -S SOCKET MySQL socket file. + -w WARN Warning threshold; ignored. + --help Print help and exit. + --version Print version and exit. + Options must be given as --option value, not --option=value or -Ovalue. + Use perldoc to read embedded documentation with more details. + +=head1 DESCRIPTION + +This Nagios plugin checks to make sure that the MySQL PID file is not missing. +The PID file contains the process ID of the MySQL server process, and is used by +init scripts to start and stop the server. If it is deleted for some reason, +then it is likely that the init script will not work correctly. The file can be +deleted by poorly written scripts, an accident, or a mistaken attempt to restart +MySQL while it is already running, especially if mysqld is executed directly +instead of using the init script. + +The plugin accepts the -w and -c options for compatibility with standard Nagios +plugin conventions, but they are not based on a threshold. Instead, the plugin +raises a warning by default, and if the -c option is given, it raises an error +instead, regardless of the option. + +By default, this plugin will attempt to detect all running instances of MySQL, +and verify the PID file's existence for each one. It does this purely by +examining the Unix process table with the C tool. However, in some cases +the process's command line does not list the path to the PID file. If the tool +fails to detect the MySQL server process, or if you wish to limit the check to a +single instance in the event that there are multiple instances on a single +server, then you can specify MySQL authentication options. This will cause the +plugin to skip examining the Unix processlist, log into MySQL, and examine the +pid_file variable from SHOW VARIABLES to find the location of the PID file. + +=head1 PRIVILEGES + +This plugin executes the following commands against MySQL: + +=over + +=item * + +C + +=back + +This plugin executes no UNIX commands that may need special privileges. + +=head1 COPYRIGHT, LICENSE, AND WARRANTY + +This program is copyright 2012-$CURRENT_YEAR$ Baron Schwartz, 2012-$CURRENT_YEAR$ Percona Inc. +Feedback and improvements are welcome. + +THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation, version 2. You should have received a copy of the GNU General +Public License along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + +=head1 VERSION + +$PROJECT_NAME$ pmp-check-mysql-processlist $VERSION$ + +=cut + +DOCUMENTATION diff --git a/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-replication-delay b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-replication-delay new file mode 100755 index 0000000..1f02585 --- /dev/null +++ b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-mysql-replication-delay @@ -0,0 +1,280 @@ +#!/bin/sh + +# ######################################################################## +# This program is part of $PROJECT_NAME$ +# License: GPL License (see COPYING) +# Authors: +# Baron Schwartz, Roman Vynar +# ######################################################################## + +# ######################################################################## +# Redirect STDERR to STDOUT; Nagios doesn't handle STDERR. +# ######################################################################## +exec 2>&1 + +# ######################################################################## +# Set up constants, etc. +# ######################################################################## +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 +STATE_DEPENDENT=4 + +# ######################################################################## +# Run the program. +# ######################################################################## +main() { + # Get options + OPT_ENSURE_SBM=0 + MIN_DELAY_SET=0 + for o; do + case "${o}" in + -c) shift; OPT_CRIT="${1}"; shift; ;; + --defaults-file) shift; OPT_DEFT="${1}"; shift; ;; + -H) shift; OPT_HOST="${1}"; shift; ;; + -l) shift; OPT_USER="${1}"; shift; ;; + -L) shift; OPT_LOPA="${1}"; shift; ;; + -m) shift; OPT_MIN="${1}"; MIN_DELAY_SET=1; shift; ;; + -p) shift; OPT_PASS="${1}"; shift; ;; + -P) shift; OPT_PORT="${1}"; shift; ;; + -S) shift; OPT_SOCK="${1}"; shift; ;; + -s) shift; OPT_SRVID="${1}"; shift; ;; + -T) shift; OPT_TABLE="${1}"; shift; ;; + -u) shift; OPT_UTC=1; ;; + -w) shift; OPT_WARN="${1}"; shift; ;; + --master-conn) shift; OPT_MASTERCONN="${1}"; shift; ;; + --channel) shift; OPT_CHANNEL="${1}"; shift; ;; + --unconfigured) shift; OPT_REPLNOTSET=1; ;; + --ensure-sbm) shift; OPT_ENSURE_SBM=1; ;; + --version) grep -A2 '^=head1 VERSION' "$0" | tail -n1; exit 0 ;; + --help) perl -00 -ne 'm/^ Usage:/ && print' "$0"; exit 0 ;; + -*) echo "Unknown option ${o}. Try --help."; exit 1; ;; + esac + done + OPT_WARN=${OPT_WARN:-300} + OPT_CRIT=${OPT_CRIT:-600} + OPT_MIN=${OPT_MIN:-0} + if [ -e '/etc/nagios/mysql.cnf' ]; then + OPT_DEFT="${OPT_DEFT:-/etc/nagios/mysql.cnf}" + fi + if is_not_sourced; then + if [ -n "$1" ]; then + echo "WARN spurious command-line options: $@" + exit 1 + fi + fi + + # Get replication delay from a heartbeat table or from SHOW SLAVE STATUS. + get_slave_status $1 + if [ "${OPT_TABLE}" ]; then + if [ -z "${OPT_UTC}" ]; then + NOW_FUNC='UNIX_TIMESTAMP()' + else + NOW_FUNC='UNIX_TIMESTAMP(UTC_TIMESTAMP)' + fi + if [ "${OPT_SRVID}" == "MASTER" ]; then + if [ "${MYSQL_CONN}" = 0 ]; then + OPT_SRVID=$(awk '/Master_Server_Id/{print $2}' "${TEMP_SLAVEDATA}") + fi + fi + SQL="SELECT MAX(${NOW_FUNC} - ROUND(UNIX_TIMESTAMP(ts))) AS delay + FROM ${OPT_TABLE} WHERE (${OPT_SRVID:-0} = 0 OR server_id = ${OPT_SRVID:-0})" + LEVEL=$(mysql_exec "${SQL}") + MYSQL_CONN=$? + else + if [ "${MYSQL_CONN}" = 0 ]; then + LEVEL=$(awk '/Seconds_Behind_Master/{print $2}' "${TEMP_SLAVEDATA}") + fi + fi + + # Check for SQL thread errors + LAST_SLAVE_ERRNO=$(awk '/Last_SQL_Errno/{print $2}' "${TEMP_SLAVEDATA}") + + # Build the common perf data output for graph trending + PERFDATA="replication_delay=${LEVEL:-0};${OPT_WARN};${OPT_CRIT};0;" + + # Test whether the delay is too long. + if [ "$MYSQL_CONN" = 0 ]; then + NOTE="${LEVEL:-0} seconds of replication delay" + if [ "${LEVEL:-""}" = "NULL" ]; then + test ${MIN_DELAY_SET} -eq 1 && \ + test ${LAST_SLAVE_ERRNO} -eq 0 && \ + test ${OPT_ENSURE_SBM} -eq 0 && \ + NOTE="OK NULL seconds of replication delay" || NOTE="UNK replica is stopped" + elif [ -z "${LEVEL}" -a "${OPT_REPLNOTSET}" ]; then + NOTE="UNK This server is not configured as a replica." + # pt-slave-delayed slave + elif [ ${MIN_DELAY_SET} -eq 1 ] && [ "${LEVEL:-0}" -lt "${OPT_MIN}" ]; then + NOTE="CRIT (delayed slave) $NOTE | $PERFDATA" + elif [ "${LEVEL:-0}" -gt "${OPT_CRIT}" ]; then + NOTE="CRIT $NOTE | $PERFDATA" + elif [ "${LEVEL:-0}" -gt "${OPT_WARN}" ]; then + NOTE="WARN $NOTE | $PERFDATA" + else + NOTE="OK $NOTE | $PERFDATA" + fi + else + NOTE="UNK could not determine replication delay" + fi + echo $NOTE +} + +# ######################################################################## +# Execute a MySQL command. +# ######################################################################## +mysql_exec() { + mysql ${OPT_DEFT:+--defaults-file="${OPT_DEFT}"} \ + ${OPT_LOPA:+--login-path="${OPT_LOPA}"} \ + ${OPT_HOST:+-h"${OPT_HOST}"} ${OPT_PORT:+-P"${OPT_PORT}"} \ + ${OPT_USER:+-u"${OPT_USER}"} ${OPT_PASS:+-p"${OPT_PASS}"} \ + ${OPT_SOCK:+-S"${OPT_SOCK}"} -ss -e "$1" +} + +# ######################################################################## +# Determine whether this program is being executed directly, or sourced/included +# from another file. +# ######################################################################## +is_not_sourced() { + [ "${0##*/}" = "pmp-check-mysql-replication-delay" ] || [ "${0##*/}" = "bash" -a "$_" = "$0" ] +} + +# ######################################################################## +# Captures the "SHOW SLAVE STATUS" output into a temp file. +# ######################################################################## +get_slave_status() { + TEMP_SLAVEDATA=$(mktemp -t "${0##*/}.XXXXXX") || exit $? + trap "rm -f '${TEMP_SLAVEDATA}' >/dev/null 2>&1" EXIT + if [ -z "$1" ]; then + if [ "${OPT_MASTERCONN}" ]; then + # MariaDB multi-source replication + mysql_exec "SHOW SLAVE '${OPT_MASTERCONN}' STATUS\G" > "${TEMP_SLAVEDATA}" + elif [ "${OPT_CHANNEL}" ]; then + mysql_exec "SHOW SLAVE STATUS FOR CHANNEL '${OPT_CHANNEL}'\G" > "${TEMP_SLAVEDATA}" + else + # Leverage lock-free SHOW SLAVE STATUS if available + mysql_exec "SHOW SLAVE STATUS NONBLOCKING\G" > "${TEMP_SLAVEDATA}" 2>/dev/null || + mysql_exec "SHOW SLAVE STATUS NOLOCK\G" > "${TEMP_SLAVEDATA}" 2>/dev/null || + mysql_exec "SHOW SLAVE STATUS\G" > "${TEMP_SLAVEDATA}" + fi + MYSQL_CONN=$? + else + # This is for testing only. + cat "$1" > "${TEMP_SLAVEDATA}" 2>/dev/null + MYSQL_CONN=0 + fi +} + +# ######################################################################## +# Execute the program if it was not included from another file. +# This makes it possible to include without executing, and thus test. +# ######################################################################## +if is_not_sourced; then + OUTPUT=$(main "$@") + EXITSTATUS=$STATE_UNKNOWN + case "${OUTPUT}" in + UNK*) EXITSTATUS=$STATE_UNKNOWN; ;; + OK*) EXITSTATUS=$STATE_OK; ;; + WARN*) EXITSTATUS=$STATE_WARNING; ;; + CRIT*) EXITSTATUS=$STATE_CRITICAL; ;; + esac + echo "${OUTPUT}" + exit $EXITSTATUS +fi + +# ############################################################################ +# Documentation +# ############################################################################ +: <<'DOCUMENTATION' +=pod + +=head1 NAME + +pmp-check-mysql-replication-delay - Alert when MySQL replication becomes delayed. + +=head1 SYNOPSIS + + Usage: pmp-check-mysql-replication-delay [OPTIONS] + Options: + -c CRIT Critical threshold; default 600. + --defaults-file FILE Only read mysql options from the given file. + Defaults to /etc/nagios/mysql.cnf if it exists. + -H HOST MySQL hostname. + -l USER MySQL username. + -L LOGIN-PATH Use login-path to access MySQL (with MySQL client 5.6). + -m CRIT Minimal threshold to ensure for delayed slaves; default 0. + -p PASS MySQL password. + -P PORT MySQL port. + -S SOCKET MySQL socket file. + -s SERVERID MySQL server ID of master, if using pt-heartbeat table. If + the parameter is set to "MASTER" the plugin will lookup the + server_id of the master + -T TABLE Heartbeat table used by pt-heartbeat. + -u Use UTC time to count the delay in case pt-heartbeat is run + with --utc option. + -w WARN Warning threshold; default 300. + --master-conn NAME Master connection name for MariaDB multi-source replication. + --channel NAME Master channel name for multi-source replication (MySQL 5.7.6+). + --unconfigured Alert when replica is not configured at all; default no. + --ensure-sbm Disallow Seconds_Behind_Master to be NULL for delayed slaves when -m is used + --help Print help and exit. + --version Print version and exit. + Options must be given as --option value, not --option=value or -Ovalue. + Use perldoc to read embedded documentation with more details. + +=head1 DESCRIPTION + +This Nagios plugin examines whether MySQL replication is delayed too much. By +default it uses SHOW SLAVE STATUS, but the output of the Seconds_behind_master +column from this command is unreliable, so it is better to use pt-heartbeat from +Percona Toolkit instead. Use the -T option to specify which table pt-heartbeat +updates. Use the -s option to specify the master's server_id to compare +against; otherwise the plugin reports the maximum delay from any server. Use +the -s options with the value "MASTER" to have plugin lookup the master's server_id + +If you want to run this check against the delayed slaves, e.g. those running +with pt-slave-delay tool, you may want to use -m option specifying the minimal +delay that should be ongoing, otherwise the plugin will alert critical. + +=head1 PRIVILEGES + +This plugin executes the following commands against MySQL: + +=over + +=item * + +C + +or + +=item * + +C from the supplied table. + +=back + +This plugin executes no UNIX commands that may need special privileges. + +=head1 COPYRIGHT, LICENSE, AND WARRANTY + +This program is copyright 2012-$CURRENT_YEAR$ Baron Schwartz, 2012-$CURRENT_YEAR$ Percona Inc. +Feedback and improvements are welcome. + +THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation, version 2. You should have received a copy of the GNU General +Public License along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + +=head1 VERSION + +$PROJECT_NAME$ pmp-check-mysql-ts-count $VERSION$ + +=cut + +DOCUMENTATION diff --git a/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-pt-table-checksum b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-pt-table-checksum new file mode 100755 index 0000000..f8df9b8 --- /dev/null +++ b/nagios-plugins-contrib-24.20190301~bpo9+1/percona-nagios-plugins/nagios/bin/pmp-check-pt-table-checksum @@ -0,0 +1,239 @@ +#!/bin/bash + +# ######################################################################## +# This program is part of $PROJECT_NAME$ +# License: GPL License (see COPYING) +# Authors: +# Baron Schwartz, Roman Vynar +# ######################################################################## + +# ######################################################################## +# Redirect STDERR to STDOUT; Nagios doesn't handle STDERR. +# ######################################################################## +exec 2>&1 + +# ######################################################################## +# Set up constants, etc. +# ######################################################################## +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 +STATE_DEPENDENT=4 + +# ######################################################################## +# Run the program. +# ######################################################################## +main() { + # Get options + for o; do + case "${o}" in + -c) shift; OPT_CRIT="${1}"; shift; ;; + --defaults-file) shift; OPT_DEFT="${1}"; shift; ;; + -H) shift; OPT_HOST="${1}"; shift; ;; + -i) shift; OPT_INTERVAL="${1}"; shift; ;; + -l) shift; OPT_USER="${1}"; shift; ;; + -L) shift; OPT_LOPA="${1}"; shift; ;; + -p) shift; OPT_PASS="${1}"; shift; ;; + -P) shift; OPT_PORT="${1}"; shift; ;; + -S) shift; OPT_SOCK="${1}"; shift; ;; + -T) shift; OPT_TABLE="${1}"; shift; ;; + -w) shift; OPT_WARN="${1}"; shift; ;; + --version) grep -A2 '^=head1 VERSION' "$0" | tail -n1; exit 0 ;; + --help) perl -00 -ne 'm/^ Usage:/ && print' "$0"; exit 0 ;; + -*) echo "Unknown option ${o}. Try --help."; exit 1; ;; + esac + done + OPT_INTERVAL=${OPT_INTERVAL:-0} + OPT_TABLE="${OPT_TABLE:-percona.checksums}" + if [ -e '/etc/nagios/mysql.cnf' ]; then + OPT_DEFT="${OPT_DEFT:-/etc/nagios/mysql.cnf}" + fi + if is_not_sourced; then + if [ -n "$1" ]; then + echo "WARN spurious command-line options: $@" + exit 1 + fi + fi + + NOTE="UNK couldn't query the checksum table" + + # Set up a temp file to hold error messages from MySQL. + TEMP=$(mktemp -t "${0##*/}.XXXXXX") || exit $? + trap "rm -f '${TEMP}' >/dev/null 2>&1" EXIT + + # Get the query from the documentation and execute it. + SQL=$(get_magic_query "${0}" checksum_diff_query) + PROBLEMS=$(mysql_exec "${SQL/CHECKSUM_TABLE/${OPT_TABLE}}" 2>"${TEMP}") + if [ $? = 0 ]; then + if [ "${PROBLEMS}" ]; then + NOTE="pt-table-checksum found ${PROBLEMS}" + if [ "${OPT_CRIT}" ]; then + NOTE="CRIT $NOTE" + else + NOTE="WARN $NOTE" + fi + else + NOTE="OK pt-table-checksum found no out-of-sync tables" + if [ "${OPT_INTERVAL}" -gt 0 ]; then + RECENT_CHUNKS=$(mysql_exec "SELECT IF(COALESCE(MAX(ts), NOW()) > NOW() - INTERVAL ${OPT_INTERVAL} DAY, 1, 0) FROM ${OPT_TABLE}") + if [ "${RECENT_CHUNKS}" = 0 ]; then + NOTE="pt-table-checksum was not run over last ${OPT_INTERVAL} days" + if [ "${OPT_CRIT}" ]; then + NOTE="CRIT $NOTE" + else + NOTE="WARN $NOTE" + fi + fi + fi + fi + else + if grep "Table '${OPT_TABLE}' doesn't exist" "${TEMP}" >/dev/null 2>&1; then + NOTE="UNK table '${OPT_TABLE}' doesn't exist" + fi + fi + echo $NOTE +} + +# ######################################################################## +# Execute a MySQL command. +# ######################################################################## +mysql_exec() { + mysql ${OPT_DEFT:+--defaults-file="${OPT_DEFT}"} \ + ${OPT_LOPA:+--login-path="${OPT_LOPA}"} \ + ${OPT_HOST:+-h"${OPT_HOST}"} ${OPT_PORT:+-P"${OPT_PORT}"} \ + ${OPT_USER:+-u"${OPT_USER}"} ${OPT_PASS:+-p"${OPT_PASS}"} \ + ${OPT_SOCK:+-S"${OPT_SOCK}"} -ss -e "$1" +} + +# ######################################################################## +# Retrieve a paragraph from the given file, which includes MAGIC_$2 as a +# pattern. +# ######################################################################## +get_magic_query() { + perl -00 -ne"m/MAGIC_$2/ && print" "$1" +} + +# ######################################################################## +# Determine whether this program is being executed directly, or sourced/included +# from another file. +# ######################################################################## +is_not_sourced() { + [ "${0##*/}" = "pmp-check-pt-table-checksum" ] || [ "${0##*/}" = "bash" -a "$_" = "$0" ] +} + +# ######################################################################## +# Execute the program if it was not included from another file. +# This makes it possible to include without executing, and thus test. +# ######################################################################## +if is_not_sourced; then + OUTPUT=$(main "$@") + EXITSTATUS=$STATE_UNKNOWN + case "${OUTPUT}" in + UNK*) EXITSTATUS=$STATE_UNKNOWN; ;; + OK*) EXITSTATUS=$STATE_OK; ;; + WARN*) EXITSTATUS=$STATE_WARNING; ;; + CRIT*) EXITSTATUS=$STATE_CRITICAL; ;; + esac + echo "${OUTPUT}" + exit $EXITSTATUS +fi + +# ############################################################################ +# Documentation +# ############################################################################ +: <<'DOCUMENTATION' +=pod + +=head1 NAME + +pmp-check-pt-table-checksum - Alert when pt-table-checksum finds data differences on a replica. + +=head1 SYNOPSIS + + Usage: pmp-check-pt-table-checksum [OPTIONS] + Options: + -c CRIT Raise a critical error instead of a warning. + --defaults-file FILE Only read mysql options from the given file. + Defaults to /etc/nagios/mysql.cnf if it exists. + -H HOST MySQL hostname. + -l USER MySQL username. + -L LOGIN-PATH Use login-path to access MySQL (with MySQL client 5.6). + -p PASS MySQL password. + -P PORT MySQL port. + -S SOCKET MySQL socket file. + -i INTERVAL Interval over which to ensure pt-table-checksum was run, + in days; default - not to check. + -T TABLE The checksum table; default percona.checksums + -w WARN Warning threshold; ignored. + --help Print help and exit. + --version Print version and exit. + Options must be given as --option value, not --option=value or -Ovalue. + Use perldoc to read embedded documentation with more details. + +=head1 DESCRIPTION + +This Nagios plugin examines whether MySQL replication has drifted out of sync +with the master's data, according to checks performed by the pt-table-checksum +tool in Percona Toolkit. It uses the following query to determine whether the +server's data matches its master's: + + SELECT /* MAGIC_checksum_diff_query */ + CONCAT( + COUNT(*), + ' chunks differ in ', + COUNT(DISTINCT CONCAT(db, tbl)), + ' tables, including ', + MIN(CONCAT(db, '.', tbl))) + FROM CHECKSUM_TABLE + WHERE master_cnt <> this_cnt OR master_crc <> this_crc + OR ISNULL(master_crc) <> ISNULL(this_crc) + HAVING COUNT(*) > 0 + +The word CHECKSUM_TABLE is replaced by the value of the -T option. If the table +specified by -T does not exist, unknown is raised. + +Optionally, you can specify an interval in days over which to ensure pt-table-checksum +was run. It is useful in cases when the cron job doing the checksumming suddenly +stopped working. This option will have an effect when no diffs are found and the +checksum table is not empty. + +Alerts are raised at a WARNING level by default, but specifying the -c option +with any value will change this to CRITICAL instead. + +=head1 PRIVILEGES + +This plugin executes the following commands against MySQL: + +=over + +=item * + +C