From 5a0e5d1ea028004f867b5117e243bbc037bf992c Mon Sep 17 00:00:00 2001
From: Andrea Dell'Amico <adellam@isti.cnr.it>
Date: Thu, 3 May 2018 12:23:34 +0200
Subject: [PATCH] Add a fixed nagios memory check on Ubuntu 16.04

---
 nagios/files/check_memory.xenial    | 164 ++++++++++++++++++++++++++++
 nagios/tasks/main.yml               |  10 +-
 nagios/tasks/nagios.yml             |   7 ++
 nagios/templates/common-nrpe.cfg.j2 |   4 +
 4 files changed, 180 insertions(+), 5 deletions(-)
 create mode 100644 nagios/files/check_memory.xenial

diff --git a/nagios/files/check_memory.xenial b/nagios/files/check_memory.xenial
new file mode 100644
index 0000000..2042dfa
--- /dev/null
+++ b/nagios/files/check_memory.xenial
@@ -0,0 +1,164 @@
+#!/usr/bin/perl
+#
+# check_memory  -  Check free(1) data against given tresholds
+#
+# Copyright (C) 2007 Thomas Guyot-Sionnest <tguyot@gmail.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+
+
+use strict;
+use warnings;
+use vars qw($PROGNAME $VERSION $FREECMD $UNIT);
+use Nagios::Plugin;
+
+$PROGNAME = "check_memory";
+$VERSION = '1.0.1';
+$FREECMD = '/usr/bin/free';
+$UNIT = 'M';
+
+my $np = Nagios::Plugin->new(
+  usage => "Usage: %s [ -w <warning_threshold> ] [ -c <critical_threshold> ]\n"
+    . '   [ -u <unit> ]',
+  version => $VERSION,
+  plugin  => $PROGNAME,
+  blurb => 'Check free(1) data against given tresholds',
+  timeout => 30,
+);
+
+$np->add_arg(
+  spec => 'warning|w=s',
+  help => "-w, --warning=THRESHOLD[%]\n"
+    . "   Warning threshold (in bytes or percent) for free memory. See\n"
+    . "   http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n"
+    . "   for the threshold format. Alternatively this can be defined as a percentage\n"
+    . '   of minimum free memory (warning and critical must be in the same format).',
+  required => 0,
+);
+
+$np->add_arg(
+  spec => 'critical|c=s',
+  help => "-c, --critical=THRESHOLD[%]\n"
+    . "   Critical threshold (in bytes or percent) for free memory. See\n"
+    . "   http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT\n"
+    . "   for the threshold format. Alternatively this can be defined as a percentage\n"
+    . '   of minimum free memory (warning and critical must be in the same format).',
+  required => 0,
+);
+
+$np->add_arg(
+  spec => 'unit|u=s',
+  help => "-u, --unit=UNIT\n"
+    . "   Unit to use for human-redeable output. Can be 'b', 'K' 'M' or 'G' for\n"
+    . "   bytes, KiB, MiB or GiB respectively (default: '$UNIT').",
+  default => $UNIT,
+  required => 0,
+);
+
+$np->getopts;
+
+# Assign, then check args
+
+my $multiple;
+my $unit = $np->opts->unit;
+if ($unit eq 'M') {
+  $multiple = 1024 * 1024;
+} elsif ( $unit eq 'K') {
+  $multiple = 1024;
+} elsif ( $unit eq 'b') {
+  $multiple = 1;
+} elsif ( $unit eq 'G') {
+  $multiple = 1024 * 1024 * 1024;
+} else {
+  $np->nagios_exit('UNKNOWN', "Unit must be one of 'b', 'K', 'M' or 'G', case-sensitive.");
+}
+my $verbose = $np->opts->verbose;
+
+# Would better fit later but doing it here validates thresholds
+my $warning = $np->opts->warning;
+my $critical = $np->opts->critical;
+$np->set_thresholds(
+    warning => ((defined($warning) && $warning !~ /^\d+%$/) ? $warning : undef),
+    critical => ((defined($critical) && $critical !~ /^\d+%$/) ? $critical : undef),
+);
+
+# Better safe than sorry
+alarm $np->opts->timeout;
+
+# We always get bytes, then calculate units ourselves
+warn("Running: '$FREECMD -b'\n") if ($verbose);
+open(RESULT, "$FREECMD -b |")
+  or $np->nagios_exit('CRITICAL', "Could not run $FREECMD");
+
+warn("Output from $FREECMD:\n") if ($verbose > 1);
+my $new_format = 0;
+my ($total, $used, $free);
+while (<RESULT>) {
+  warn("  $_") if ($verbose > 1);
+  # New `free` output from procps doesn't provide "buffers/cache" anymore, but
+  # provides a better estimate of available memory ("available" column).
+  $new_format = 1 if m{^\s+total\s+used\s+free\s+shared\s+buff/cache\s+available$};
+
+  if ($new_format and /^Mem:\s+(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)$/) {
+    $total = $1;
+    $free = $2; # available column
+    $used = $total - $free; # used is everything which is not available
+  } elsif (m#^\-/\+\ buffers/cache:\s*(\d+)\s+(\d+)#) {
+    $used = $1;
+    $free = $2;
+    $total = $used + $free;
+  }
+}
+
+close(RESULT);
+alarm(0);
+
+$np->nagios_exit('CRITICAL', "Unable to interpret $FREECMD output") if (!defined($free));
+
+if (defined($warning) && $warning =~ /^\d+%$/) {
+  if ($warning) {
+    $warning =~ s/%//;
+    $warning = $total / 100 * $warning;
+    $warning .= ':';
+  }
+  warn("Calculated threshold (from percentage): warn=>$warning\n") if ($verbose);
+}
+
+if (defined($critical) && $critical =~ /^\d+%$/) {
+  if ($critical) {
+    $critical =~ s/%//;
+    $critical = $total / 100 * $critical;
+    $critical .= ':';
+  }
+  warn("Calculated threshold (from percentage): crit=>$critical\n") if ($verbose);
+}
+
+$np->set_thresholds(
+  warning => $warning,
+  critical => $critical,
+);
+
+$np->add_perfdata(
+  label => "free",
+  value => $free,
+  uom => 'b',
+  threshold => $np->threshold,
+);
+
+my $freeprint = int($free/$multiple);
+
+$np->nagios_exit($np->check_threshold($free), "$freeprint$unit free");
+
diff --git a/nagios/tasks/main.yml b/nagios/tasks/main.yml
index 7b22eff..45417fd 100644
--- a/nagios/tasks/main.yml
+++ b/nagios/tasks/main.yml
@@ -1,9 +1,9 @@
 ---
-- include: nagios.yml
-- include: dell-omsa.yml
+- import_tasks: nagios.yml
+- import_tasks: dell-omsa.yml
   when: dell_system is defined
-- include: postgresql-nagios.yml
+- import_tasks: postgresql-nagios.yml
   when: nagios_postgresql_check is defined and nagios_postgresql_check
-#- include: nsca.yml
-- include: hardware-checks.yml
+#- import_tasks: nsca.yml
+- import_tasks: hardware-checks.yml
   when: nagios_hw is defined and nagios_hw
diff --git a/nagios/tasks/nagios.yml b/nagios/tasks/nagios.yml
index b2d3fb2..c8ae102 100644
--- a/nagios/tasks/nagios.yml
+++ b/nagios/tasks/nagios.yml
@@ -25,6 +25,13 @@
     - check_netint.pl
   tags: nagios
 
+- name: Install a fixed check_memory on Ubuntu 16.04
+  copy: src={{ item }} dest={{ nagios_plugins_dir }}/{{ item }} owner=root group=nagios mode=0755
+  with_items:
+    - check_memory.xenial
+  when: ansible_distribution_release == 'xenial'
+  tags: nagios
+
 - name: Install the global nrpe commands file
   template: src=common-nrpe.cfg.j2 dest=/etc/nagios/nrpe.d/common.cfg owner=root group=root mode=444
   notify: Restart NRPE server
diff --git a/nagios/templates/common-nrpe.cfg.j2 b/nagios/templates/common-nrpe.cfg.j2
index 338da03..9e2d76c 100644
--- a/nagios/templates/common-nrpe.cfg.j2
+++ b/nagios/templates/common-nrpe.cfg.j2
@@ -2,7 +2,11 @@
 command[global_check_disk]={{ nagios_plugins_dir }}/check_disk -w {{ nagios_check_disk_w }}% -c {{ nagios_check_disk_c }}% -X tmpfs -X proc -X sysfs -X devpts -X dev -A -i /mnt/.*
 
 command[global_check_load]={{ nagios_plugins_dir }}/check_load {% if nagios_load_per_cpu %}-r{% endif %} -w {{ nagios_load_w }} -c {{ nagios_load_c }}
+{% if ansible_distribution_release == 'xenial' %}
+command[global_check_memory]={{ nagios_plugins_dir }}/check_memory.xenial -u M -w 5% -c 1%
+{% else %}
 command[global_check_memory]={{ nagios_plugins_dir }}/check_memory -u M -w 5% -c 1%
+{% endif %}
 command[global_check_swap]={{ nagios_plugins_dir }}/check_swap -w 20% -c 5%
 command[global_check_zombie_procs]={{ nagios_plugins_dir }}/check_procs -w 5 -c 10 -s Z
 command[global_check_total_procs]={{ nagios_plugins_dir }}/check_procs -w 800 -c 1000