d4science-ghn-cluster/group_vars/mongo_cluster_prod/ganglia.yml: ganglia cluster for the mongodb prod.

library/roles/mongodb-org: Install the specific ganglia plugin if ganglia monitoring is enabled.
library/roles/couchdb: Install the specific ganglia plugin if ganglia monitoring is enabled.
This commit is contained in:
Andrea Dell'Amico 2015-11-23 20:16:04 +01:00
parent fcbeed87c8
commit 1f77909502
12 changed files with 1231 additions and 60 deletions

View File

@ -67,3 +67,6 @@ couchdb_replicator_options:
# - { section: 'httpd', option: 'bind_address', value: '{{ couchdb_bind_address }}', state: 'present' } # - { section: 'httpd', option: 'bind_address', value: '{{ couchdb_bind_address }}', state: 'present' }
# - { section: 'httpd', option: 'config_whitelist', value: '[{httpd,config_whitelist}, {log,level}]', state: 'present' } # - { section: 'httpd', option: 'config_whitelist', value: '[{httpd,config_whitelist}, {log,level}]', state: 'present' }
ganglia_enabled: False
couchdb_ganglia_url: http://localhost:5984/_stats
couchdb_ganglia_refresh_rate: 60

322
couchdb/files/couchdb.py Normal file
View File

@ -0,0 +1,322 @@
### This script reports couchdb metrics to ganglia.
### License to use, modify, and distribute under the GPL
### http://www.gnu.org/licenses/gpl.txt
import logging
import os
import subprocess
import sys
import threading
import time
import traceback
import urllib2
import json
logging.basicConfig(level=logging.ERROR)
_Worker_Thread = None
class UpdateCouchdbThread(threading.Thread):
def __init__(self, params):
threading.Thread.__init__(self)
self.running = False
self.shuttingdown = False
self.refresh_rate = int(params['refresh_rate'])
self.metrics = {}
self.settings = {}
self.stats_url = params['stats_url']
self._metrics_lock = threading.Lock()
self._settings_lock = threading.Lock()
def shutdown(self):
self.shuttingdown = True
if not self.running:
return
self.join()
def run(self):
global _Lock
self.running = True
while not self.shuttingdown:
time.sleep(self.refresh_rate)
self.refresh_metrics()
self.running = False
@staticmethod
def _get_couchdb_stats(url, refresh_rate):
if refresh_rate == 60 or refresh_rate == 300 or refresh_rate == 900:
url += '?range=' + str(refresh_rate)
else:
logging.warning('The specified refresh_rate of %d is invalid and has been substituted with 60!' % refresh_rate)
url += '?range=60'
# Set time out for urlopen to 2 seconds otherwise we run into the possibility of hosing gmond
c = urllib2.urlopen(url, None, 2)
json_data = c.read()
c.close()
data = json.loads(json_data)
couchdb = data['couchdb']
httpd = data['httpd']
request_methods = data['httpd_request_methods']
status_codes = data['httpd_status_codes']
result = {}
for first_level_key in data:
for second_level_key in data[first_level_key]:
value = data[first_level_key][second_level_key]['current']
if value is None:
value = 0
else:
if second_level_key in ['open_databases', 'open_os_files', 'clients_requesting_changes']:
print second_level_key + ': ' + str(value)
value = int(value)
else:
# We need to devide by the range as couchdb provides no per second values
value = float(value) / refresh_rate
result['couchdb_' + first_level_key + '_' + second_level_key ] = value
return result
def refresh_metrics(self):
logging.debug('refresh metrics')
try:
logging.debug(' opening URL: ' + str(self.stats_url))
data = UpdateCouchdbThread._get_couchdb_stats(self.stats_url, self.refresh_rate)
except:
logging.warning('error refreshing metrics')
logging.warning(traceback.print_exc(file=sys.stdout))
try:
self._metrics_lock.acquire()
self.metrics = {}
for k, v in data.items():
self.metrics[k] = v
except:
logging.warning('error refreshing metrics')
logging.warning(traceback.print_exc(file=sys.stdout))
return False
finally:
self._metrics_lock.release()
if not self.metrics:
logging.warning('error refreshing metrics')
return False
logging.debug('success refreshing metrics')
logging.debug('metrics: ' + str(self.metrics))
return True
def metric_of(self, name):
logging.debug('getting metric: ' + name)
try:
if name in self.metrics:
try:
self._metrics_lock.acquire()
logging.debug('metric: %s = %s' % (name, self.metrics[name]))
return self.metrics[name]
finally:
self._metrics_lock.release()
except:
logging.warning('failed to fetch ' + name)
return 0
def setting_of(self, name):
logging.debug('getting setting: ' + name)
try:
if name in self.settings:
try:
self._settings_lock.acquire()
logging.debug('setting: %s = %s' % (name, self.settings[name]))
return self.settings[name]
finally:
self._settings_lock.release()
except:
logging.warning('failed to fetch ' + name)
return 0
def metric_init(params):
logging.debug('init: ' + str(params))
global _Worker_Thread
METRIC_DEFAULTS = {
'units': 'requests/s',
'groups': 'couchdb',
'slope': 'both',
'value_type': 'float',
'format': '%.3f',
'description': '',
'call_back': metric_of
}
descriptions = dict(
couchdb_couchdb_auth_cache_hits={
'units': 'hits/s',
'description': 'Number of authentication cache hits'},
couchdb_couchdb_auth_cache_misses={
'units': 'misses/s',
'description': 'Number of authentication cache misses'},
couchdb_couchdb_database_reads={
'units': 'reads/s',
'description': 'Number of times a document was read from a database'},
couchdb_couchdb_database_writes={
'units': 'writes/s',
'description': 'Number of times a document was changed'},
couchdb_couchdb_open_databases={
'value_type': 'uint',
'format': '%d',
'units': 'databases',
'description': 'Number of open databases'},
couchdb_couchdb_open_os_files={
'value_type': 'uint',
'format': '%d',
'units': 'files',
'description': 'Number of file descriptors CouchDB has open'},
couchdb_couchdb_request_time={
'units': 'ms',
'description': 'Request time'},
couchdb_httpd_bulk_requests={
'description': 'Number of bulk requests'},
couchdb_httpd_clients_requesting_changes={
'value_type': 'uint',
'format': '%d',
'units': 'clients',
'description': 'Number of clients for continuous _changes'},
couchdb_httpd_requests={
'description': 'Number of HTTP requests'},
couchdb_httpd_temporary_view_reads={
'units': 'reads',
'description': 'Number of temporary view reads'},
couchdb_httpd_view_reads={
'description': 'Number of view reads'},
couchdb_httpd_request_methods_COPY={
'description': 'Number of HTTP COPY requests'},
couchdb_httpd_request_methods_DELETE={
'description': 'Number of HTTP DELETE requests'},
couchdb_httpd_request_methods_GET={
'description': 'Number of HTTP GET requests'},
couchdb_httpd_request_methods_HEAD={
'description': 'Number of HTTP HEAD requests'},
couchdb_httpd_request_methods_POST={
'description': 'Number of HTTP POST requests'},
couchdb_httpd_request_methods_PUT={
'description': 'Number of HTTP PUT requests'},
couchdb_httpd_status_codes_200={
'units': 'responses/s',
'description': 'Number of HTTP 200 OK responses'},
couchdb_httpd_status_codes_201={
'units': 'responses/s',
'description': 'Number of HTTP 201 Created responses'},
couchdb_httpd_status_codes_202={
'units': 'responses/s',
'description': 'Number of HTTP 202 Accepted responses'},
couchdb_httpd_status_codes_301={
'units': 'responses/s',
'description': 'Number of HTTP 301 Moved Permanently responses'},
couchdb_httpd_status_codes_304={
'units': 'responses/s',
'description': 'Number of HTTP 304 Not Modified responses'},
couchdb_httpd_status_codes_400={
'units': 'responses/s',
'description': 'Number of HTTP 400 Bad Request responses'},
couchdb_httpd_status_codes_401={
'units': 'responses/s',
'description': 'Number of HTTP 401 Unauthorized responses'},
couchdb_httpd_status_codes_403={
'units': 'responses/s',
'description': 'Number of HTTP 403 Forbidden responses'},
couchdb_httpd_status_codes_404={
'units': 'responses/s',
'description': 'Number of HTTP 404 Not Found responses'},
couchdb_httpd_status_codes_405={
'units': 'responses/s',
'description': 'Number of HTTP 405 Method Not Allowed responses'},
couchdb_httpd_status_codes_409={
'units': 'responses/s',
'description': 'Number of HTTP 409 Conflict responses'},
couchdb_httpd_status_codes_412={
'units': 'responses/s',
'description': 'Number of HTTP 412 Precondition Failed responses'},
couchdb_httpd_status_codes_500={
'units': 'responses/s',
'description': 'Number of HTTP 500 Internal Server Error responses'})
if _Worker_Thread is not None:
raise Exception('Worker thread already exists')
_Worker_Thread = UpdateCouchdbThread(params)
_Worker_Thread.refresh_metrics()
_Worker_Thread.start()
descriptors = []
for name, desc in descriptions.iteritems():
d = desc.copy()
d['name'] = str(name)
[ d.setdefault(key, METRIC_DEFAULTS[key]) for key in METRIC_DEFAULTS.iterkeys() ]
descriptors.append(d)
return descriptors
def metric_of(name):
global _Worker_Thread
return _Worker_Thread.metric_of(name)
def setting_of(name):
global _Worker_Thread
return _Worker_Thread.setting_of(name)
def metric_cleanup():
global _Worker_Thread
if _Worker_Thread is not None:
_Worker_Thread.shutdown()
logging.shutdown()
pass
if __name__ == '__main__':
from optparse import OptionParser
try:
logging.debug('running from the cmd line')
parser = OptionParser()
parser.add_option('-u', '--URL', dest='stats_url', default='http://127.0.0.1:5984/_stats', help='URL for couchdb stats page')
parser.add_option('-q', '--quiet', dest='quiet', action='store_true', default=False)
parser.add_option('-r', '--refresh-rate', dest='refresh_rate', default=60)
parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False)
(options, args) = parser.parse_args()
descriptors = metric_init({
'stats_url': options.stats_url,
'refresh_rate': options.refresh_rate
})
if options.debug:
from pprint import pprint
pprint(descriptors)
for d in descriptors:
v = d['call_back'](d['name'])
if not options.quiet:
print ' {0}: {1} {2} [{3}]' . format(d['name'], v, d['units'], d['description'])
os._exit(1)
except KeyboardInterrupt:
time.sleep(0.2)
os._exit(1)
except StandardError:
traceback.print_exc()
os._exit(1)
finally:
metric_cleanup()

View File

@ -0,0 +1,13 @@
---
#
# The ganglia plugin comes from https://github.com/ganglia/gmond_python_modules
#
- name: Install the ganglia plugin for Couchdb
copy: src=couchdb.py dest=/usr/lib/ganglia/python_modules/couchdb.py owner=root group=root mode=0644
notify: Restart ganglia monitor
tags: ganglia
- name: Distribute the ganglia (gmond) configuration for the Couchdb plugin
template: src=couchdb.pyconf.j2 dest=/etc/ganglia/conf.d/couchdb.pyconf owner=root group=root mode=444
notify: Restart ganglia monitor
tags: ganglia

View File

@ -1,5 +1,7 @@
--- ---
- include: couchdb-old-package.yml - include: couchdb-1.yml
when: couchdb_use_old_package when: couchdb_use_old_package
- include: couchdb-2.yml - include: couchdb-2.yml
when: not couchdb_use_old_package when: not couchdb_use_old_package
- include: ganglia-plugin.yml
when: ganglia_enabled

View File

@ -0,0 +1,207 @@
#
modules {
module {
name = 'couchdb'
language = 'python'
param stats_url {
value = '{{ couchdb_ganglia_url }}'
}
param refresh_rate {
value = '{{ couchdb_ganglia_refresh_rate }}'
}
}
}
collection_group {
collect_every = 10
time_threshold = 20
metric {
name = 'couchdb_couchdb_auth_cache_hits'
title = 'Number of authentication cache hits'
value_threshold = 1.0
}
metric {
name = 'couchdb_couchdb_auth_cache_misses'
title = 'Number of authentication cache misses'
value_threshold = 1.0
}
metric {
name = 'couchdb_couchdb_database_reads'
title = 'Number of times a document was read from a database'
value_threshold = 1.0
}
metric {
name = 'couchdb_couchdb_database_writes'
title = 'Number of times a document was changed'
value_threshold = 1.0
}
metric {
name = 'couchdb_couchdb_open_databases'
title = 'Number of open databases'
value_threshold = 1.0
}
metric {
name = 'couchdb_couchdb_open_os_files'
title = 'Number of file descriptors CouchDB has open'
value_threshold = 1.0
}
metric {
name = 'couchdb_couchdb_request_time'
title = 'Request Time'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_bulk_requests'
title = 'Number of bulk requests'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_clients_requesting_changes'
title = 'Number of clients for continuous _changes'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_requests'
title = 'Number of HTTP requests'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_temporary_view_reads'
title = 'Number of temporary view reads'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_view_reads'
title = 'Number of view reads'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_request_methods_COPY'
title = 'Number of HTTP COPY requests'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_request_methods_DELETE'
title = 'Number of HTTP DELETE requests'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_request_methods_GET'
title = 'Number of HTTP GET requests'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_request_methods_HEAD'
title = 'Number of HTTP HEAD requests'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_request_methods_POST'
title = 'Number of HTTP POST requests'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_request_methods_PUT'
title = 'Number of HTTP PUT requests'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_status_codes_200'
title = 'Number of HTTP 200 OK responses'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_status_codes_201'
title = 'Number of HTTP 201 Created responses'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_status_codes_202'
title = 'Number of HTTP 202 Accepted responses'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_status_codes_301'
title = 'Number of HTTP 301 Moved Permanently responses'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_status_codes_304'
title = 'Number of HTTP 304 Not Modified responses'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_status_codes_400'
title = 'Number of HTTP 400 Bad Request responses'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_status_codes_401'
title = 'Number of HTTP 401 Unauthorized responses'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_status_codes_403'
title = 'Number of HTTP 403 Forbidden responses'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_status_codes_404'
title = 'Number of HTTP 404 Not Found responses'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_status_codes_405'
title = 'Number of HTTP 405 Method Not Allowed responses'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_status_codes_409'
title = 'Number of HTTP 409 Conflict responses'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_status_codes_412'
title = 'Number of HTTP 412 Precondition Failed responses'
value_threshold = 1.0
}
metric {
name = 'couchdb_httpd_status_codes_500'
title = 'Number of HTTP 500 Internal Server Error responses'
value_threshold = 1.0
}
}

View File

@ -1,10 +1,12 @@
# These are for reference only. # These are for reference only.
# Define your own set of variables # Define your own set of variables
# #
ganglia_gmond_cluster: "CNR-ISTI NeMIS Cluster" #ganglia_gmond_cluster: "Ganglia Cluster"
ganglia_gmond_cluster_port: 8649 #ganglia_gmond_cluster_port: 8649
ganglia_gmond_mcast_addr: 239.2.11.71 #ganglia_gmond_mcast_addr: 239.2.11.71
ganglia_gmetad_host: monitoring.research-infrastructures.eu #ganglia_gmetad_host: ganglia-gmetad
ganglia_gmond_send_metadata_interval: 60 ganglia_gmond_send_metadata_interval: 60
# Needed to build the correct firewall rules when jmxtrans is in use # Needed to build the correct firewall rules when jmxtrans is in use
ganglia_gmond_use_jmxtrans: False ganglia_gmond_use_jmxtrans: False
# Used by other roles to install specific ganglia iptables rules or some specific ganglia plugins. Or not.
ganglia_enabled: False

496
mongodb-org/files/mongodb.py Executable file
View File

@ -0,0 +1,496 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# MongoDB gmond module for Ganglia
#
# Copyright (C) 2011 by Michael T. Conigliaro <mike [at] conigliaro [dot] org>.
# All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
import json
import os
import re
import socket
import string
import time
import copy
NAME_PREFIX = 'mongodb_'
PARAMS = {
'server_status' : '~/mongodb-osx-x86_64-1.8.1/bin/mongo --host mongodb04.example.com --port 27018 --quiet --eval "printjson(db.serverStatus())"',
'rs_status' : '~/mongodb-osx-x86_64-1.8.1/bin/mongo --host mongodb04.example.com --port 27018 --quiet --eval "printjson(rs.status())"'
}
METRICS = {
'time' : 0,
'data' : {}
}
LAST_METRICS = copy.deepcopy(METRICS)
METRICS_CACHE_TTL = 3
def flatten(d, pre = '', sep = '_'):
"""Flatten a dict (i.e. dict['a']['b']['c'] => dict['a_b_c'])"""
new_d = {}
for k,v in d.items():
if type(v) == dict:
new_d.update(flatten(d[k], '%s%s%s' % (pre, k, sep)))
else:
new_d['%s%s' % (pre, k)] = v
return new_d
def get_metrics():
"""Return all metrics"""
global METRICS, LAST_METRICS
if (time.time() - METRICS['time']) > METRICS_CACHE_TTL:
metrics = {}
for status_type in PARAMS.keys():
# get raw metric data
io = os.popen(PARAMS[status_type])
# clean up
metrics_str = ''.join(io.readlines()).strip() # convert to string
metrics_str = re.sub('\w+\((.*)\)', r"\1", metrics_str) # remove functions
# convert to flattened dict
try:
if status_type == 'server_status':
metrics.update(flatten(json.loads(metrics_str)))
else:
metrics.update(flatten(json.loads(metrics_str), pre='%s_' % status_type))
except ValueError:
metrics = {}
# update cache
LAST_METRICS = copy.deepcopy(METRICS)
METRICS = {
'time': time.time(),
'data': metrics
}
return [METRICS, LAST_METRICS]
def get_value(name):
"""Return a value for the requested metric"""
# get metrics
metrics = get_metrics()[0]
# get value
name = name[len(NAME_PREFIX):] # remove prefix from name
try:
result = metrics['data'][name]
except StandardError:
result = 0
return result
def get_rate(name):
"""Return change over time for the requested metric"""
# get metrics
[curr_metrics, last_metrics] = get_metrics()
# get rate
name = name[len(NAME_PREFIX):] # remove prefix from name
try:
rate = float(curr_metrics['data'][name] - last_metrics['data'][name]) / \
float(curr_metrics['time'] - last_metrics['time'])
if rate < 0:
rate = float(0)
except StandardError:
rate = float(0)
return rate
def get_opcounter_rate(name):
"""Return change over time for an opcounter metric"""
master_rate = get_rate(name)
repl_rate = get_rate(name.replace('opcounters_', 'opcountersRepl_'))
return master_rate + repl_rate
def get_globalLock_ratio(name):
"""Return the global lock ratio"""
try:
result = get_rate(NAME_PREFIX + 'globalLock_lockTime') / \
get_rate(NAME_PREFIX + 'globalLock_totalTime') * 100
except ZeroDivisionError:
result = 0
return result
def get_indexCounters_btree_miss_ratio(name):
"""Return the btree miss ratio"""
try:
result = get_rate(NAME_PREFIX + 'indexCounters_btree_misses') / \
get_rate(NAME_PREFIX + 'indexCounters_btree_accesses') * 100
except ZeroDivisionError:
result = 0
return result
def get_connections_current_ratio(name):
"""Return the percentage of connections used"""
try:
result = float(get_value(NAME_PREFIX + 'connections_current')) / \
float(get_value(NAME_PREFIX + 'connections_available')) * 100
except ZeroDivisionError:
result = 0
return result
def get_slave_delay(name):
"""Return the replica set slave delay"""
# get metrics
metrics = get_metrics()[0]
# no point checking my optime if i'm not replicating
if 'rs_status_myState' not in metrics['data'] or metrics['data']['rs_status_myState'] != 2:
result = 0
# compare my optime with the master's
else:
master = {}
slave = {}
try:
for member in metrics['data']['rs_status_members']:
if member['state'] == 1:
master = member
if member['name'].split(':')[0] == socket.getfqdn():
slave = member
result = max(0, master['optime']['t'] - slave['optime']['t']) / 1000
except KeyError:
result = 0
return result
def get_asserts_total_rate(name):
"""Return the total number of asserts per second"""
return float(reduce(lambda memo,obj: memo + get_rate('%sasserts_%s' % (NAME_PREFIX, obj)),
['regular', 'warning', 'msg', 'user', 'rollovers'], 0))
def metric_init(lparams):
"""Initialize metric descriptors"""
global PARAMS
# set parameters
for key in lparams:
PARAMS[key] = lparams[key]
# define descriptors
time_max = 60
groups = 'mongodb'
descriptors = [
{
'name': NAME_PREFIX + 'opcounters_insert',
'call_back': get_opcounter_rate,
'time_max': time_max,
'value_type': 'float',
'units': 'Inserts/Sec',
'slope': 'both',
'format': '%f',
'description': 'Inserts',
'groups': groups
},
{
'name': NAME_PREFIX + 'opcounters_query',
'call_back': get_opcounter_rate,
'time_max': time_max,
'value_type': 'float',
'units': 'Queries/Sec',
'slope': 'both',
'format': '%f',
'description': 'Queries',
'groups': groups
},
{
'name': NAME_PREFIX + 'opcounters_update',
'call_back': get_opcounter_rate,
'time_max': time_max,
'value_type': 'float',
'units': 'Updates/Sec',
'slope': 'both',
'format': '%f',
'description': 'Updates',
'groups': groups
},
{
'name': NAME_PREFIX + 'opcounters_delete',
'call_back': get_opcounter_rate,
'time_max': time_max,
'value_type': 'float',
'units': 'Deletes/Sec',
'slope': 'both',
'format': '%f',
'description': 'Deletes',
'groups': groups
},
{
'name': NAME_PREFIX + 'opcounters_getmore',
'call_back': get_opcounter_rate,
'time_max': time_max,
'value_type': 'float',
'units': 'Getmores/Sec',
'slope': 'both',
'format': '%f',
'description': 'Getmores',
'groups': groups
},
{
'name': NAME_PREFIX + 'opcounters_command',
'call_back': get_opcounter_rate,
'time_max': time_max,
'value_type': 'float',
'units': 'Commands/Sec',
'slope': 'both',
'format': '%f',
'description': 'Commands',
'groups': groups
},
{
'name': NAME_PREFIX + 'backgroundFlushing_flushes',
'call_back': get_rate,
'time_max': time_max,
'value_type': 'float',
'units': 'Flushes/Sec',
'slope': 'both',
'format': '%f',
'description': 'Flushes',
'groups': groups
},
{
'name': NAME_PREFIX + 'mem_mapped',
'call_back': get_value,
'time_max': time_max,
'value_type': 'uint',
'units': 'MB',
'slope': 'both',
'format': '%u',
'description': 'Memory-mapped Data',
'groups': groups
},
{
'name': NAME_PREFIX + 'mem_virtual',
'call_back': get_value,
'time_max': time_max,
'value_type': 'uint',
'units': 'MB',
'slope': 'both',
'format': '%u',
'description': 'Process Virtual Size',
'groups': groups
},
{
'name': NAME_PREFIX + 'mem_resident',
'call_back': get_value,
'time_max': time_max,
'value_type': 'uint',
'units': 'MB',
'slope': 'both',
'format': '%u',
'description': 'Process Resident Size',
'groups': groups
},
{
'name': NAME_PREFIX + 'extra_info_page_faults',
'call_back': get_rate,
'time_max': time_max,
'value_type': 'float',
'units': 'Faults/Sec',
'slope': 'both',
'format': '%f',
'description': 'Page Faults',
'groups': groups
},
{
'name': NAME_PREFIX + 'globalLock_ratio',
'call_back': get_globalLock_ratio,
'time_max': time_max,
'value_type': 'float',
'units': '%',
'slope': 'both',
'format': '%f',
'description': 'Global Write Lock Ratio',
'groups': groups
},
{
'name': NAME_PREFIX + 'indexCounters_btree_miss_ratio',
'call_back': get_indexCounters_btree_miss_ratio,
'time_max': time_max,
'value_type': 'float',
'units': '%',
'slope': 'both',
'format': '%f',
'description': 'BTree Page Miss Ratio',
'groups': groups
},
{
'name': NAME_PREFIX + 'globalLock_currentQueue_total',
'call_back': get_value,
'time_max': time_max,
'value_type': 'uint',
'units': 'Operations',
'slope': 'both',
'format': '%u',
'description': 'Total Operations Waiting for Lock',
'groups': groups
},
{
'name': NAME_PREFIX + 'globalLock_currentQueue_readers',
'call_back': get_value,
'time_max': time_max,
'value_type': 'uint',
'units': 'Operations',
'slope': 'both',
'format': '%u',
'description': 'Readers Waiting for Lock',
'groups': groups
},
{
'name': NAME_PREFIX + 'globalLock_currentQueue_writers',
'call_back': get_value,
'time_max': time_max,
'value_type': 'uint',
'units': 'Operations',
'slope': 'both',
'format': '%u',
'description': 'Writers Waiting for Lock',
'groups': groups
},
{
'name': NAME_PREFIX + 'globalLock_activeClients_total',
'call_back': get_value,
'time_max': time_max,
'value_type': 'uint',
'units': 'Clients',
'slope': 'both',
'format': '%u',
'description': 'Total Active Clients',
'groups': groups
},
{
'name': NAME_PREFIX + 'globalLock_activeClients_readers',
'call_back': get_value,
'time_max': time_max,
'value_type': 'uint',
'units': 'Clients',
'slope': 'both',
'format': '%u',
'description': 'Active Readers',
'groups': groups
},
{
'name': NAME_PREFIX + 'globalLock_activeClients_writers',
'call_back': get_value,
'time_max': time_max,
'value_type': 'uint',
'units': 'Clients',
'slope': 'both',
'format': '%u',
'description': 'Active Writers',
'groups': groups
},
{
'name': NAME_PREFIX + 'connections_current',
'call_back': get_value,
'time_max': time_max,
'value_type': 'uint',
'units': 'Connections',
'slope': 'both',
'format': '%u',
'description': 'Open Connections',
'groups': groups
},
{
'name': NAME_PREFIX + 'connections_current_ratio',
'call_back': get_connections_current_ratio,
'time_max': time_max,
'value_type': 'float',
'units': '%',
'slope': 'both',
'format': '%f',
'description': 'Percentage of Connections Used',
'groups': groups
},
{
'name': NAME_PREFIX + 'slave_delay',
'call_back': get_slave_delay,
'time_max': time_max,
'value_type': 'uint',
'units': 'Seconds',
'slope': 'both',
'format': '%u',
'description': 'Replica Set Slave Delay',
'groups': groups
},
{
'name': NAME_PREFIX + 'asserts_total',
'call_back': get_asserts_total_rate,
'time_max': time_max,
'value_type': 'float',
'units': 'Asserts/Sec',
'slope': 'both',
'format': '%f',
'description': 'Asserts',
'groups': groups
}
]
return descriptors
def metric_cleanup():
"""Cleanup"""
pass
# the following code is for debugging and testing
if __name__ == '__main__':
descriptors = metric_init(PARAMS)
while True:
for d in descriptors:
print (('%s = %s') % (d['name'], d['format'])) % (d['call_back'](d['name']))
print ''
time.sleep(METRICS_CACHE_TTL)

View File

@ -0,0 +1,14 @@
---
#
# The ganglia plugin comes from https://github.com/ganglia/gmond_python_modules
#
- name: Install the ganglia plugin for MongoDB
copy: src=mongodb.py dest=/usr/lib/ganglia/python_modules/mongodb.py owner=root group=root mode=0444
notify: Restart ganglia monitor
tags: [ 'ganglia', 'mongodb' ]
- name: Distribute the ganglia (gmond) configuration for the MongoDB plugin
template: src=mongodb.pyconf.j2 dest=/etc/ganglia/conf.d/mongodb.pyconf owner=root group=root mode=0444
notify: Restart ganglia monitor
tags: [ 'ganglia', 'mongodb' ]

View File

@ -1,56 +1,4 @@
--- ---
- name: Install the mongodb apt key - include: mongodb.yml
#apt_key: id=7F0CEB10 state=present - include: ganglia-plugin.yml
raw: apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 7F0CEB10 when: ganglia_enabled
when: mongodb_install_from_external_repo
tags: mongodb
- name: Install the mongodb repository
copy: content="deb http://downloads-distro.mongodb.org/repo/ubuntu-upstart dist 10gen" dest=/etc/apt/sources.list.d/mongodb.list owner=root group=root mode=044
when: mongodb_install_from_external_repo
register: external_repo
tags: mongodb
- name: Install the latest version of mongodb server
apt: pkg={{ item }} state={{ mongodb_pkg_state }} update_cache=yes
with_items:
- mongodb-org
when:
- mongodb_install_from_external_repo
- mongodb_install_packages
tags: mongodb
- name: Install the mongodb defaults file
copy: content="ENABLE_MONGODB={{ mongodb_start_server }}" dest=/etc/default/mongodb owner=root group=root mode=0444
when: mongodb_install_conf
tags: mongodb
- name: Create the mongodb db directory
file: dest={{ mongodb_dbpath }} state=directory owner={{ mongodb_user }} group={{ mongodb_group }} mode=0755
when: mongodb_install_conf
tags: mongodb
- name: Create the mongodb log directory
file: dest={{ mongodb_logdir }} state=directory owner={{ mongodb_user }} group={{ mongodb_group }} mode=0755
when: mongodb_install_conf
tags: mongodb
- name: Install the mongodb 2.6 configuration
template: src=mongod-2.6.conf.j2 dest=/etc/mongod.conf owner=root group=root mode=0444
when: mongodb_install_conf
tags: mongodb
- name: Install the cron job that manages log files rotation
template: src=mongo_log_rotate.sh.j2 dest=/etc/cron.daily/mongo_log_rotate owner=root group=root mode=0555
tags: [ 'mongodb', 'mongo_logrotate' ]
- name: Ensure mongodb is started
service: name=mongod state=started enabled=yes
when: ( mongodb_start_server is defined ) and ( mongodb_start_server == 'yes' ) and ( mongodb_install_conf )
tags: mongodb
- name: Ensure mongod is stopped and disabled
service: name=mongod state=stopped enabled=no
when: ( mongodb_start_server is defined ) and ( mongodb_start_server == 'no' ) and ( mongodb_install_conf )
tags: mongodb

View File

@ -0,0 +1,55 @@
---
- name: Install the mongodb apt key
#apt_key: id=7F0CEB10 state=present
raw: apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 7F0CEB10
when: mongodb_install_from_external_repo
tags: mongodb
- name: Install the mongodb repository
copy: content="deb http://downloads-distro.mongodb.org/repo/ubuntu-upstart dist 10gen" dest=/etc/apt/sources.list.d/mongodb.list owner=root group=root mode=044
when: mongodb_install_from_external_repo
register: external_repo
tags: mongodb
- name: Install the latest version of mongodb server
apt: pkg={{ item }} state={{ mongodb_pkg_state }} update_cache=yes
with_items:
- mongodb-org
when:
- mongodb_install_from_external_repo
- mongodb_install_packages
tags: mongodb
- name: Install the mongodb defaults file
copy: content="ENABLE_MONGODB={{ mongodb_start_server }}" dest=/etc/default/mongodb owner=root group=root mode=0444
when: mongodb_install_conf
tags: mongodb
- name: Create the mongodb db directory
file: dest={{ mongodb_dbpath }} state=directory owner={{ mongodb_user }} group={{ mongodb_group }} mode=0755
when: mongodb_install_conf
tags: mongodb
- name: Create the mongodb log directory
file: dest={{ mongodb_logdir }} state=directory owner={{ mongodb_user }} group={{ mongodb_group }} mode=0755
when: mongodb_install_conf
tags: mongodb
- name: Install the mongodb 2.6 configuration
template: src=mongod-2.6.conf.j2 dest=/etc/mongod.conf owner=root group=root mode=0444
when: mongodb_install_conf
tags: mongodb
- name: Install the cron job that manages log files rotation
template: src=mongo_log_rotate.sh.j2 dest=/etc/cron.daily/mongo_log_rotate owner=root group=root mode=0555
tags: [ 'mongodb', 'mongo_logrotate' ]
- name: Ensure mongodb is started
service: name=mongod state=started enabled=yes
when: ( mongodb_start_server is defined ) and ( mongodb_start_server == 'yes' ) and ( mongodb_install_conf )
tags: mongodb
- name: Ensure mongod is stopped and disabled
service: name=mongod state=stopped enabled=no
when: ( mongodb_start_server is defined ) and ( mongodb_start_server == 'no' ) and ( mongodb_install_conf )
tags: mongodb

View File

@ -0,0 +1,109 @@
modules {
module {
name = "mongodb"
language = "python"
param server_status {
value = "mongo --quiet --eval 'printjson(db.serverStatus())'"
}
param rs_status {
value = "mongo --quiet --eval 'printjson(rs.status())'"
}
}
}
collection_group {
collect_every = 30
time_threshold = 90
metric {
name = "mongodb_opcounters_insert"
title = "Inserts"
}
metric {
name = "mongodb_opcounters_query"
title = "Queries"
}
metric {
name = "mongodb_opcounters_update"
title = "Updates"
}
metric {
name = "mongodb_opcounters_delete"
title = "Deletes"
}
metric {
name = "mongodb_opcounters_getmore"
title = "Getmores"
}
metric {
name = "mongodb_opcounters_command"
title = "Commands"
}
metric {
name = "mongodb_backgroundFlushing_flushes"
title = "Flushes"
}
metric {
name = "mongodb_mem_mapped"
title = "Memory-mapped Data"
}
metric {
name = "mongodb_mem_virtual"
title = "Process Virtual Size"
}
metric {
name = "mongodb_mem_resident"
title = "Process Resident Size"
}
metric {
name = "mongodb_extra_info_page_faults"
title = "Page Faults"
}
metric {
name = "mongodb_globalLock_ratio"
title = "Global Write Lock Ratio"
}
metric {
name = "mongodb_indexCounters_btree_miss_ratio"
title = "BTree Page Miss Ratio"
}
metric {
name = "mongodb_globalLock_currentQueue_total"
title = "Total Operations Waiting for Lock"
}
metric {
name = "mongodb_globalLock_currentQueue_readers"
title = "Readers Waiting for Lock"
}
metric {
name = "mongodb_globalLock_currentQueue_writers"
title = "Writers Waiting for Lock"
}
metric {
name = "mongodb_globalLock_activeClients_total"
title = "Total Active Clients"
}
metric {
name = "mongodb_globalLock_activeClients_readers"
title = "Active Readers"
}
metric {
name = "mongodb_globalLock_activeClients_writers"
title = "Active Writers"
}
metric {
name = "mongodb_connections_current"
title = "Open Connections"
}
metric {
name = "mongodb_connections_current_ratio"
title = "Open Connections"
}
metric {
name = "mongodb_slave_delay"
title = "Replica Set Slave Delay"
}
metric {
name = "mongodb_asserts_total"
title = "Asserts per Second"
}
}