diff --git a/nagios/files/check_memory.xenial b/nagios/files/check_memory.xenial new file mode 100644 index 0000000..2042dfa --- /dev/null +++ b/nagios/files/check_memory.xenial @@ -0,0 +1,164 @@ +#!/usr/bin/perl +# +# check_memory - Check free(1) data against given tresholds +# +# Copyright (C) 2007 Thomas Guyot-Sionnest +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + + +use strict; +use warnings; +use vars qw($PROGNAME $VERSION $FREECMD $UNIT); +use Nagios::Plugin; + +$PROGNAME = "check_memory"; +$VERSION = '1.0.1'; +$FREECMD = '/usr/bin/free'; +$UNIT = 'M'; + +my $np = Nagios::Plugin->new( + usage => "Usage: %s [ -w ] [ -c ]\n" + . ' [ -u ]', + version => $VERSION, + plugin => $PROGNAME, + blurb => 'Check free(1) data against given tresholds', + timeout => 30, +); + +$np->add_arg( + spec => 'warning|w=s', + help => "-w, --warning=THRESHOLD[%]\n" + . " Warning threshold (in bytes or percent) for free memory. See\n" + . " http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n" + . " for the threshold format. Alternatively this can be defined as a percentage\n" + . ' of minimum free memory (warning and critical must be in the same format).', + required => 0, +); + +$np->add_arg( + spec => 'critical|c=s', + help => "-c, --critical=THRESHOLD[%]\n" + . " Critical threshold (in bytes or percent) for free memory. See\n" + . " http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n" + . " for the threshold format. Alternatively this can be defined as a percentage\n" + . ' of minimum free memory (warning and critical must be in the same format).', + required => 0, +); + +$np->add_arg( + spec => 'unit|u=s', + help => "-u, --unit=UNIT\n" + . " Unit to use for human-redeable output. Can be 'b', 'K' 'M' or 'G' for\n" + . " bytes, KiB, MiB or GiB respectively (default: '$UNIT').", + default => $UNIT, + required => 0, +); + +$np->getopts; + +# Assign, then check args + +my $multiple; +my $unit = $np->opts->unit; +if ($unit eq 'M') { + $multiple = 1024 * 1024; +} elsif ( $unit eq 'K') { + $multiple = 1024; +} elsif ( $unit eq 'b') { + $multiple = 1; +} elsif ( $unit eq 'G') { + $multiple = 1024 * 1024 * 1024; +} else { + $np->nagios_exit('UNKNOWN', "Unit must be one of 'b', 'K', 'M' or 'G', case-sensitive."); +} +my $verbose = $np->opts->verbose; + +# Would better fit later but doing it here validates thresholds +my $warning = $np->opts->warning; +my $critical = $np->opts->critical; +$np->set_thresholds( + warning => ((defined($warning) && $warning !~ /^\d+%$/) ? $warning : undef), + critical => ((defined($critical) && $critical !~ /^\d+%$/) ? $critical : undef), +); + +# Better safe than sorry +alarm $np->opts->timeout; + +# We always get bytes, then calculate units ourselves +warn("Running: '$FREECMD -b'\n") if ($verbose); +open(RESULT, "$FREECMD -b |") + or $np->nagios_exit('CRITICAL', "Could not run $FREECMD"); + +warn("Output from $FREECMD:\n") if ($verbose > 1); +my $new_format = 0; +my ($total, $used, $free); +while () { + warn(" $_") if ($verbose > 1); + # New `free` output from procps doesn't provide "buffers/cache" anymore, but + # provides a better estimate of available memory ("available" column). + $new_format = 1 if m{^\s+total\s+used\s+free\s+shared\s+buff/cache\s+available$}; + + if ($new_format and /^Mem:\s+(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)$/) { + $total = $1; + $free = $2; # available column + $used = $total - $free; # used is everything which is not available + } elsif (m#^\-/\+\ buffers/cache:\s*(\d+)\s+(\d+)#) { + $used = $1; + $free = $2; + $total = $used + $free; + } +} + +close(RESULT); +alarm(0); + +$np->nagios_exit('CRITICAL', "Unable to interpret $FREECMD output") if (!defined($free)); + +if (defined($warning) && $warning =~ /^\d+%$/) { + if ($warning) { + $warning =~ s/%//; + $warning = $total / 100 * $warning; + $warning .= ':'; + } + warn("Calculated threshold (from percentage): warn=>$warning\n") if ($verbose); +} + +if (defined($critical) && $critical =~ /^\d+%$/) { + if ($critical) { + $critical =~ s/%//; + $critical = $total / 100 * $critical; + $critical .= ':'; + } + warn("Calculated threshold (from percentage): crit=>$critical\n") if ($verbose); +} + +$np->set_thresholds( + warning => $warning, + critical => $critical, +); + +$np->add_perfdata( + label => "free", + value => $free, + uom => 'b', + threshold => $np->threshold, +); + +my $freeprint = int($free/$multiple); + +$np->nagios_exit($np->check_threshold($free), "$freeprint$unit free"); + diff --git a/nagios/tasks/main.yml b/nagios/tasks/main.yml index 7b22eff..45417fd 100644 --- a/nagios/tasks/main.yml +++ b/nagios/tasks/main.yml @@ -1,9 +1,9 @@ --- -- include: nagios.yml -- include: dell-omsa.yml +- import_tasks: nagios.yml +- import_tasks: dell-omsa.yml when: dell_system is defined -- include: postgresql-nagios.yml +- import_tasks: postgresql-nagios.yml when: nagios_postgresql_check is defined and nagios_postgresql_check -#- include: nsca.yml -- include: hardware-checks.yml +#- import_tasks: nsca.yml +- import_tasks: hardware-checks.yml when: nagios_hw is defined and nagios_hw diff --git a/nagios/tasks/nagios.yml b/nagios/tasks/nagios.yml index b2d3fb2..c8ae102 100644 --- a/nagios/tasks/nagios.yml +++ b/nagios/tasks/nagios.yml @@ -25,6 +25,13 @@ - check_netint.pl tags: nagios +- name: Install a fixed check_memory on Ubuntu 16.04 + copy: src={{ item }} dest={{ nagios_plugins_dir }}/{{ item }} owner=root group=nagios mode=0755 + with_items: + - check_memory.xenial + when: ansible_distribution_release == 'xenial' + tags: nagios + - name: Install the global nrpe commands file template: src=common-nrpe.cfg.j2 dest=/etc/nagios/nrpe.d/common.cfg owner=root group=root mode=444 notify: Restart NRPE server diff --git a/nagios/templates/common-nrpe.cfg.j2 b/nagios/templates/common-nrpe.cfg.j2 index 338da03..9e2d76c 100644 --- a/nagios/templates/common-nrpe.cfg.j2 +++ b/nagios/templates/common-nrpe.cfg.j2 @@ -2,7 +2,11 @@ command[global_check_disk]={{ nagios_plugins_dir }}/check_disk -w {{ nagios_check_disk_w }}% -c {{ nagios_check_disk_c }}% -X tmpfs -X proc -X sysfs -X devpts -X dev -A -i /mnt/.* command[global_check_load]={{ nagios_plugins_dir }}/check_load {% if nagios_load_per_cpu %}-r{% endif %} -w {{ nagios_load_w }} -c {{ nagios_load_c }} +{% if ansible_distribution_release == 'xenial' %} +command[global_check_memory]={{ nagios_plugins_dir }}/check_memory.xenial -u M -w 5% -c 1% +{% else %} command[global_check_memory]={{ nagios_plugins_dir }}/check_memory -u M -w 5% -c 1% +{% endif %} command[global_check_swap]={{ nagios_plugins_dir }}/check_swap -w 20% -c 5% command[global_check_zombie_procs]={{ nagios_plugins_dir }}/check_procs -w 5 -c 10 -s Z command[global_check_total_procs]={{ nagios_plugins_dir }}/check_procs -w 800 -c 1000