Add a fixed nagios memory check on Ubuntu 16.04

This commit is contained in:
Andrea Dell'Amico 2018-05-03 12:23:34 +02:00
parent ffc72e6f4f
commit 5a0e5d1ea0
4 changed files with 180 additions and 5 deletions

View File

@ -0,0 +1,164 @@
#!/usr/bin/perl
#
# check_memory - Check free(1) data against given tresholds
#
# Copyright (C) 2007 Thomas Guyot-Sionnest <tguyot@gmail.com>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
use strict;
use warnings;
use vars qw($PROGNAME $VERSION $FREECMD $UNIT);
use Nagios::Plugin;
$PROGNAME = "check_memory";
$VERSION = '1.0.1';
$FREECMD = '/usr/bin/free';
$UNIT = 'M';
my $np = Nagios::Plugin->new(
usage => "Usage: %s [ -w <warning_threshold> ] [ -c <critical_threshold> ]\n"
. ' [ -u <unit> ]',
version => $VERSION,
plugin => $PROGNAME,
blurb => 'Check free(1) data against given tresholds',
timeout => 30,
);
$np->add_arg(
spec => 'warning|w=s',
help => "-w, --warning=THRESHOLD[%]\n"
. " Warning threshold (in bytes or percent) for free memory. See\n"
. " http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n"
. " for the threshold format. Alternatively this can be defined as a percentage\n"
. ' of minimum free memory (warning and critical must be in the same format).',
required => 0,
);
$np->add_arg(
spec => 'critical|c=s',
help => "-c, --critical=THRESHOLD[%]\n"
. " Critical threshold (in bytes or percent) for free memory. See\n"
. " http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n"
. " for the threshold format. Alternatively this can be defined as a percentage\n"
. ' of minimum free memory (warning and critical must be in the same format).',
required => 0,
);
$np->add_arg(
spec => 'unit|u=s',
help => "-u, --unit=UNIT\n"
. " Unit to use for human-redeable output. Can be 'b', 'K' 'M' or 'G' for\n"
. " bytes, KiB, MiB or GiB respectively (default: '$UNIT').",
default => $UNIT,
required => 0,
);
$np->getopts;
# Assign, then check args
my $multiple;
my $unit = $np->opts->unit;
if ($unit eq 'M') {
$multiple = 1024 * 1024;
} elsif ( $unit eq 'K') {
$multiple = 1024;
} elsif ( $unit eq 'b') {
$multiple = 1;
} elsif ( $unit eq 'G') {
$multiple = 1024 * 1024 * 1024;
} else {
$np->nagios_exit('UNKNOWN', "Unit must be one of 'b', 'K', 'M' or 'G', case-sensitive.");
}
my $verbose = $np->opts->verbose;
# Would better fit later but doing it here validates thresholds
my $warning = $np->opts->warning;
my $critical = $np->opts->critical;
$np->set_thresholds(
warning => ((defined($warning) && $warning !~ /^\d+%$/) ? $warning : undef),
critical => ((defined($critical) && $critical !~ /^\d+%$/) ? $critical : undef),
);
# Better safe than sorry
alarm $np->opts->timeout;
# We always get bytes, then calculate units ourselves
warn("Running: '$FREECMD -b'\n") if ($verbose);
open(RESULT, "$FREECMD -b |")
or $np->nagios_exit('CRITICAL', "Could not run $FREECMD");
warn("Output from $FREECMD:\n") if ($verbose > 1);
my $new_format = 0;
my ($total, $used, $free);
while (<RESULT>) {
warn(" $_") if ($verbose > 1);
# New `free` output from procps doesn't provide "buffers/cache" anymore, but
# provides a better estimate of available memory ("available" column).
$new_format = 1 if m{^\s+total\s+used\s+free\s+shared\s+buff/cache\s+available$};
if ($new_format and /^Mem:\s+(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)$/) {
$total = $1;
$free = $2; # available column
$used = $total - $free; # used is everything which is not available
} elsif (m#^\-/\+\ buffers/cache:\s*(\d+)\s+(\d+)#) {
$used = $1;
$free = $2;
$total = $used + $free;
}
}
close(RESULT);
alarm(0);
$np->nagios_exit('CRITICAL', "Unable to interpret $FREECMD output") if (!defined($free));
if (defined($warning) && $warning =~ /^\d+%$/) {
if ($warning) {
$warning =~ s/%//;
$warning = $total / 100 * $warning;
$warning .= ':';
}
warn("Calculated threshold (from percentage): warn=>$warning\n") if ($verbose);
}
if (defined($critical) && $critical =~ /^\d+%$/) {
if ($critical) {
$critical =~ s/%//;
$critical = $total / 100 * $critical;
$critical .= ':';
}
warn("Calculated threshold (from percentage): crit=>$critical\n") if ($verbose);
}
$np->set_thresholds(
warning => $warning,
critical => $critical,
);
$np->add_perfdata(
label => "free",
value => $free,
uom => 'b',
threshold => $np->threshold,
);
my $freeprint = int($free/$multiple);
$np->nagios_exit($np->check_threshold($free), "$freeprint$unit free");

View File

@ -1,9 +1,9 @@
---
- include: nagios.yml
- include: dell-omsa.yml
- import_tasks: nagios.yml
- import_tasks: dell-omsa.yml
when: dell_system is defined
- include: postgresql-nagios.yml
- import_tasks: postgresql-nagios.yml
when: nagios_postgresql_check is defined and nagios_postgresql_check
#- include: nsca.yml
- include: hardware-checks.yml
#- import_tasks: nsca.yml
- import_tasks: hardware-checks.yml
when: nagios_hw is defined and nagios_hw

View File

@ -25,6 +25,13 @@
- check_netint.pl
tags: nagios
- name: Install a fixed check_memory on Ubuntu 16.04
copy: src={{ item }} dest={{ nagios_plugins_dir }}/{{ item }} owner=root group=nagios mode=0755
with_items:
- check_memory.xenial
when: ansible_distribution_release == 'xenial'
tags: nagios
- name: Install the global nrpe commands file
template: src=common-nrpe.cfg.j2 dest=/etc/nagios/nrpe.d/common.cfg owner=root group=root mode=444
notify: Restart NRPE server

View File

@ -2,7 +2,11 @@
command[global_check_disk]={{ nagios_plugins_dir }}/check_disk -w {{ nagios_check_disk_w }}% -c {{ nagios_check_disk_c }}% -X tmpfs -X proc -X sysfs -X devpts -X dev -A -i /mnt/.*
command[global_check_load]={{ nagios_plugins_dir }}/check_load {% if nagios_load_per_cpu %}-r{% endif %} -w {{ nagios_load_w }} -c {{ nagios_load_c }}
{% if ansible_distribution_release == 'xenial' %}
command[global_check_memory]={{ nagios_plugins_dir }}/check_memory.xenial -u M -w 5% -c 1%
{% else %}
command[global_check_memory]={{ nagios_plugins_dir }}/check_memory -u M -w 5% -c 1%
{% endif %}
command[global_check_swap]={{ nagios_plugins_dir }}/check_swap -w 20% -c 5%
command[global_check_zombie_procs]={{ nagios_plugins_dir }}/check_procs -w 5 -c 10 -s Z
command[global_check_total_procs]={{ nagios_plugins_dir }}/check_procs -w 800 -c 1000