#!/bin/sh # ######################################################################## # This program is part of $PROJECT_NAME$ # License: GPL License (see COPYING) # Authors: # Baron Schwartz, Roman Vynar # ######################################################################## # ######################################################################## # Redirect STDERR to STDOUT; Nagios doesn't handle STDERR. # ######################################################################## exec 2>&1 # ######################################################################## # Set up constants, etc. # ######################################################################## STATE_OK=0 STATE_WARNING=1 STATE_CRITICAL=2 STATE_UNKNOWN=3 STATE_DEPENDENT=4 # ######################################################################## # Run the program. # ######################################################################## main() { # Get options OPT_ENSURE_SBM=0 MIN_DELAY_SET=0 for o; do case "${o}" in -c) shift; OPT_CRIT="${1}"; shift; ;; --defaults-file) shift; OPT_DEFT="${1}"; shift; ;; -H) shift; OPT_HOST="${1}"; shift; ;; -l) shift; OPT_USER="${1}"; shift; ;; -L) shift; OPT_LOPA="${1}"; shift; ;; -m) shift; OPT_MIN="${1}"; MIN_DELAY_SET=1; shift; ;; -p) shift; OPT_PASS="${1}"; shift; ;; -P) shift; OPT_PORT="${1}"; shift; ;; -S) shift; OPT_SOCK="${1}"; shift; ;; -s) shift; OPT_SRVID="${1}"; shift; ;; -T) shift; OPT_TABLE="${1}"; shift; ;; -u) shift; OPT_UTC=1; ;; -w) shift; OPT_WARN="${1}"; shift; ;; --master-conn) shift; OPT_MASTERCONN="${1}"; shift; ;; --channel) shift; OPT_CHANNEL="${1}"; shift; ;; --unconfigured) shift; OPT_REPLNOTSET=1; ;; --ensure-sbm) shift; OPT_ENSURE_SBM=1; ;; --version) grep -A2 '^=head1 VERSION' "$0" | tail -n1; exit 0 ;; --help) perl -00 -ne 'm/^ Usage:/ && print' "$0"; exit 0 ;; -*) echo "Unknown option ${o}. Try --help."; exit 1; ;; esac done OPT_WARN=${OPT_WARN:-300} OPT_CRIT=${OPT_CRIT:-600} OPT_MIN=${OPT_MIN:-0} if [ -e '/etc/nagios/mysql.cnf' ]; then OPT_DEFT="${OPT_DEFT:-/etc/nagios/mysql.cnf}" fi if is_not_sourced; then if [ -n "$1" ]; then echo "WARN spurious command-line options: $@" exit 1 fi fi # Get replication delay from a heartbeat table or from SHOW SLAVE STATUS. get_slave_status $1 if [ "${OPT_TABLE}" ]; then if [ -z "${OPT_UTC}" ]; then NOW_FUNC='UNIX_TIMESTAMP()' else NOW_FUNC='UNIX_TIMESTAMP(UTC_TIMESTAMP)' fi if [ "${OPT_SRVID}" == "MASTER" ]; then if [ "${MYSQL_CONN}" = 0 ]; then OPT_SRVID=$(awk '/Master_Server_Id/{print $2}' "${TEMP_SLAVEDATA}") fi fi SQL="SELECT MAX(${NOW_FUNC} - ROUND(UNIX_TIMESTAMP(ts))) AS delay FROM ${OPT_TABLE} WHERE (${OPT_SRVID:-0} = 0 OR server_id = ${OPT_SRVID:-0})" LEVEL=$(mysql_exec "${SQL}") MYSQL_CONN=$? else if [ "${MYSQL_CONN}" = 0 ]; then LEVEL=$(awk '/Seconds_Behind_Master/{print $2}' "${TEMP_SLAVEDATA}") fi fi # Check for SQL thread errors LAST_SLAVE_ERRNO=$(awk '/Last_SQL_Errno/{print $2}' "${TEMP_SLAVEDATA}") # Build the common perf data output for graph trending PERFDATA="replication_delay=${LEVEL:-0};${OPT_WARN};${OPT_CRIT};0;" # Test whether the delay is too long. if [ "$MYSQL_CONN" = 0 ]; then NOTE="${LEVEL:-0} seconds of replication delay" if [ "${LEVEL:-""}" = "NULL" ]; then test ${MIN_DELAY_SET} -eq 1 && \ test ${LAST_SLAVE_ERRNO} -eq 0 && \ test ${OPT_ENSURE_SBM} -eq 0 && \ NOTE="OK NULL seconds of replication delay" || NOTE="UNK replica is stopped" elif [ -z "${LEVEL}" -a "${OPT_REPLNOTSET}" ]; then NOTE="UNK This server is not configured as a replica." # pt-slave-delayed slave elif [ ${MIN_DELAY_SET} -eq 1 ] && [ "${LEVEL:-0}" -lt "${OPT_MIN}" ]; then NOTE="CRIT (delayed slave) $NOTE | $PERFDATA" elif [ "${LEVEL:-0}" -gt "${OPT_CRIT}" ]; then NOTE="CRIT $NOTE | $PERFDATA" elif [ "${LEVEL:-0}" -gt "${OPT_WARN}" ]; then NOTE="WARN $NOTE | $PERFDATA" else NOTE="OK $NOTE | $PERFDATA" fi else NOTE="UNK could not determine replication delay" fi echo $NOTE } # ######################################################################## # Execute a MySQL command. # ######################################################################## mysql_exec() { mysql ${OPT_DEFT:+--defaults-file="${OPT_DEFT}"} \ ${OPT_LOPA:+--login-path="${OPT_LOPA}"} \ ${OPT_HOST:+-h"${OPT_HOST}"} ${OPT_PORT:+-P"${OPT_PORT}"} \ ${OPT_USER:+-u"${OPT_USER}"} ${OPT_PASS:+-p"${OPT_PASS}"} \ ${OPT_SOCK:+-S"${OPT_SOCK}"} -ss -e "$1" } # ######################################################################## # Determine whether this program is being executed directly, or sourced/included # from another file. # ######################################################################## is_not_sourced() { [ "${0##*/}" = "pmp-check-mysql-replication-delay" ] || [ "${0##*/}" = "bash" -a "$_" = "$0" ] } # ######################################################################## # Captures the "SHOW SLAVE STATUS" output into a temp file. # ######################################################################## get_slave_status() { TEMP_SLAVEDATA=$(mktemp -t "${0##*/}.XXXXXX") || exit $? trap "rm -f '${TEMP_SLAVEDATA}' >/dev/null 2>&1" EXIT if [ -z "$1" ]; then if [ "${OPT_MASTERCONN}" ]; then # MariaDB multi-source replication mysql_exec "SHOW SLAVE '${OPT_MASTERCONN}' STATUS\G" > "${TEMP_SLAVEDATA}" elif [ "${OPT_CHANNEL}" ]; then mysql_exec "SHOW SLAVE STATUS FOR CHANNEL '${OPT_CHANNEL}'\G" > "${TEMP_SLAVEDATA}" else # Leverage lock-free SHOW SLAVE STATUS if available mysql_exec "SHOW SLAVE STATUS NONBLOCKING\G" > "${TEMP_SLAVEDATA}" 2>/dev/null || mysql_exec "SHOW SLAVE STATUS NOLOCK\G" > "${TEMP_SLAVEDATA}" 2>/dev/null || mysql_exec "SHOW SLAVE STATUS\G" > "${TEMP_SLAVEDATA}" fi MYSQL_CONN=$? else # This is for testing only. cat "$1" > "${TEMP_SLAVEDATA}" 2>/dev/null MYSQL_CONN=0 fi } # ######################################################################## # Execute the program if it was not included from another file. # This makes it possible to include without executing, and thus test. # ######################################################################## if is_not_sourced; then OUTPUT=$(main "$@") EXITSTATUS=$STATE_UNKNOWN case "${OUTPUT}" in UNK*) EXITSTATUS=$STATE_UNKNOWN; ;; OK*) EXITSTATUS=$STATE_OK; ;; WARN*) EXITSTATUS=$STATE_WARNING; ;; CRIT*) EXITSTATUS=$STATE_CRITICAL; ;; esac echo "${OUTPUT}" exit $EXITSTATUS fi # ############################################################################ # Documentation # ############################################################################ : <<'DOCUMENTATION' =pod =head1 NAME pmp-check-mysql-replication-delay - Alert when MySQL replication becomes delayed. =head1 SYNOPSIS Usage: pmp-check-mysql-replication-delay [OPTIONS] Options: -c CRIT Critical threshold; default 600. --defaults-file FILE Only read mysql options from the given file. Defaults to /etc/nagios/mysql.cnf if it exists. -H HOST MySQL hostname. -l USER MySQL username. -L LOGIN-PATH Use login-path to access MySQL (with MySQL client 5.6). -m CRIT Minimal threshold to ensure for delayed slaves; default 0. -p PASS MySQL password. -P PORT MySQL port. -S SOCKET MySQL socket file. -s SERVERID MySQL server ID of master, if using pt-heartbeat table. If the parameter is set to "MASTER" the plugin will lookup the server_id of the master -T TABLE Heartbeat table used by pt-heartbeat. -u Use UTC time to count the delay in case pt-heartbeat is run with --utc option. -w WARN Warning threshold; default 300. --master-conn NAME Master connection name for MariaDB multi-source replication. --channel NAME Master channel name for multi-source replication (MySQL 5.7.6+). --unconfigured Alert when replica is not configured at all; default no. --ensure-sbm Disallow Seconds_Behind_Master to be NULL for delayed slaves when -m is used --help Print help and exit. --version Print version and exit. Options must be given as --option value, not --option=value or -Ovalue. Use perldoc to read embedded documentation with more details. =head1 DESCRIPTION This Nagios plugin examines whether MySQL replication is delayed too much. By default it uses SHOW SLAVE STATUS, but the output of the Seconds_behind_master column from this command is unreliable, so it is better to use pt-heartbeat from Percona Toolkit instead. Use the -T option to specify which table pt-heartbeat updates. Use the -s option to specify the master's server_id to compare against; otherwise the plugin reports the maximum delay from any server. Use the -s options with the value "MASTER" to have plugin lookup the master's server_id If you want to run this check against the delayed slaves, e.g. those running with pt-slave-delay tool, you may want to use -m option specifying the minimal delay that should be ongoing, otherwise the plugin will alert critical. =head1 PRIVILEGES This plugin executes the following commands against MySQL: =over =item * C or =item * C