#!/usr/bin/env python # -*- coding: utf-8 -*- # # MongoDB gmond module for Ganglia # # Copyright (C) 2011 by Michael T. Conigliaro . # All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # import json import os import re import socket import string import time import copy NAME_PREFIX = 'mongodb_' PARAMS = { 'server_status' : '~/mongodb-osx-x86_64-1.8.1/bin/mongo --host mongodb04.example.com --port 27018 --quiet --eval "printjson(db.serverStatus())"', 'rs_status' : '~/mongodb-osx-x86_64-1.8.1/bin/mongo --host mongodb04.example.com --port 27018 --quiet --eval "printjson(rs.status())"' } METRICS = { 'time' : 0, 'data' : {} } LAST_METRICS = copy.deepcopy(METRICS) METRICS_CACHE_TTL = 3 def flatten(d, pre = '', sep = '_'): """Flatten a dict (i.e. dict['a']['b']['c'] => dict['a_b_c'])""" new_d = {} for k,v in d.items(): if type(v) == dict: new_d.update(flatten(d[k], '%s%s%s' % (pre, k, sep))) else: new_d['%s%s' % (pre, k)] = v return new_d def get_metrics(): """Return all metrics""" global METRICS, LAST_METRICS if (time.time() - METRICS['time']) > METRICS_CACHE_TTL: metrics = {} for status_type in PARAMS.keys(): # get raw metric data io = os.popen(PARAMS[status_type]) # clean up metrics_str = ''.join(io.readlines()).strip() # convert to string metrics_str = re.sub('\w+\((.*)\)', r"\1", metrics_str) # remove functions # convert to flattened dict try: if status_type == 'server_status': metrics.update(flatten(json.loads(metrics_str))) else: metrics.update(flatten(json.loads(metrics_str), pre='%s_' % status_type)) except ValueError: metrics = {} # update cache LAST_METRICS = copy.deepcopy(METRICS) METRICS = { 'time': time.time(), 'data': metrics } return [METRICS, LAST_METRICS] def get_value(name): """Return a value for the requested metric""" # get metrics metrics = get_metrics()[0] # get value name = name[len(NAME_PREFIX):] # remove prefix from name try: result = metrics['data'][name] except StandardError: result = 0 return result def get_rate(name): """Return change over time for the requested metric""" # get metrics [curr_metrics, last_metrics] = get_metrics() # get rate name = name[len(NAME_PREFIX):] # remove prefix from name try: rate = float(curr_metrics['data'][name] - last_metrics['data'][name]) / \ float(curr_metrics['time'] - last_metrics['time']) if rate < 0: rate = float(0) except StandardError: rate = float(0) return rate def get_opcounter_rate(name): """Return change over time for an opcounter metric""" master_rate = get_rate(name) repl_rate = get_rate(name.replace('opcounters_', 'opcountersRepl_')) return master_rate + repl_rate def get_globalLock_ratio(name): """Return the global lock ratio""" try: result = get_rate(NAME_PREFIX + 'globalLock_lockTime') / \ get_rate(NAME_PREFIX + 'globalLock_totalTime') * 100 except ZeroDivisionError: result = 0 return result def get_indexCounters_btree_miss_ratio(name): """Return the btree miss ratio""" try: result = get_rate(NAME_PREFIX + 'indexCounters_btree_misses') / \ get_rate(NAME_PREFIX + 'indexCounters_btree_accesses') * 100 except ZeroDivisionError: result = 0 return result def get_connections_current_ratio(name): """Return the percentage of connections used""" try: result = float(get_value(NAME_PREFIX + 'connections_current')) / \ float(get_value(NAME_PREFIX + 'connections_available')) * 100 except ZeroDivisionError: result = 0 return result def get_slave_delay(name): """Return the replica set slave delay""" # get metrics metrics = get_metrics()[0] # no point checking my optime if i'm not replicating if 'rs_status_myState' not in metrics['data'] or metrics['data']['rs_status_myState'] != 2: result = 0 # compare my optime with the master's else: master = {} slave = {} try: for member in metrics['data']['rs_status_members']: if member['state'] == 1: master = member if member['name'].split(':')[0] == socket.getfqdn(): slave = member result = max(0, master['optime']['t'] - slave['optime']['t']) / 1000 except KeyError: result = 0 return result def get_asserts_total_rate(name): """Return the total number of asserts per second""" return float(reduce(lambda memo,obj: memo + get_rate('%sasserts_%s' % (NAME_PREFIX, obj)), ['regular', 'warning', 'msg', 'user', 'rollovers'], 0)) def metric_init(lparams): """Initialize metric descriptors""" global PARAMS # set parameters for key in lparams: PARAMS[key] = lparams[key] # define descriptors time_max = 60 groups = 'mongodb' descriptors = [ { 'name': NAME_PREFIX + 'opcounters_insert', 'call_back': get_opcounter_rate, 'time_max': time_max, 'value_type': 'float', 'units': 'Inserts/Sec', 'slope': 'both', 'format': '%f', 'description': 'Inserts', 'groups': groups }, { 'name': NAME_PREFIX + 'opcounters_query', 'call_back': get_opcounter_rate, 'time_max': time_max, 'value_type': 'float', 'units': 'Queries/Sec', 'slope': 'both', 'format': '%f', 'description': 'Queries', 'groups': groups }, { 'name': NAME_PREFIX + 'opcounters_update', 'call_back': get_opcounter_rate, 'time_max': time_max, 'value_type': 'float', 'units': 'Updates/Sec', 'slope': 'both', 'format': '%f', 'description': 'Updates', 'groups': groups }, { 'name': NAME_PREFIX + 'opcounters_delete', 'call_back': get_opcounter_rate, 'time_max': time_max, 'value_type': 'float', 'units': 'Deletes/Sec', 'slope': 'both', 'format': '%f', 'description': 'Deletes', 'groups': groups }, { 'name': NAME_PREFIX + 'opcounters_getmore', 'call_back': get_opcounter_rate, 'time_max': time_max, 'value_type': 'float', 'units': 'Getmores/Sec', 'slope': 'both', 'format': '%f', 'description': 'Getmores', 'groups': groups }, { 'name': NAME_PREFIX + 'opcounters_command', 'call_back': get_opcounter_rate, 'time_max': time_max, 'value_type': 'float', 'units': 'Commands/Sec', 'slope': 'both', 'format': '%f', 'description': 'Commands', 'groups': groups }, { 'name': NAME_PREFIX + 'backgroundFlushing_flushes', 'call_back': get_rate, 'time_max': time_max, 'value_type': 'float', 'units': 'Flushes/Sec', 'slope': 'both', 'format': '%f', 'description': 'Flushes', 'groups': groups }, { 'name': NAME_PREFIX + 'mem_mapped', 'call_back': get_value, 'time_max': time_max, 'value_type': 'uint', 'units': 'MB', 'slope': 'both', 'format': '%u', 'description': 'Memory-mapped Data', 'groups': groups }, { 'name': NAME_PREFIX + 'mem_virtual', 'call_back': get_value, 'time_max': time_max, 'value_type': 'uint', 'units': 'MB', 'slope': 'both', 'format': '%u', 'description': 'Process Virtual Size', 'groups': groups }, { 'name': NAME_PREFIX + 'mem_resident', 'call_back': get_value, 'time_max': time_max, 'value_type': 'uint', 'units': 'MB', 'slope': 'both', 'format': '%u', 'description': 'Process Resident Size', 'groups': groups }, { 'name': NAME_PREFIX + 'extra_info_page_faults', 'call_back': get_rate, 'time_max': time_max, 'value_type': 'float', 'units': 'Faults/Sec', 'slope': 'both', 'format': '%f', 'description': 'Page Faults', 'groups': groups }, { 'name': NAME_PREFIX + 'globalLock_ratio', 'call_back': get_globalLock_ratio, 'time_max': time_max, 'value_type': 'float', 'units': '%', 'slope': 'both', 'format': '%f', 'description': 'Global Write Lock Ratio', 'groups': groups }, { 'name': NAME_PREFIX + 'indexCounters_btree_miss_ratio', 'call_back': get_indexCounters_btree_miss_ratio, 'time_max': time_max, 'value_type': 'float', 'units': '%', 'slope': 'both', 'format': '%f', 'description': 'BTree Page Miss Ratio', 'groups': groups }, { 'name': NAME_PREFIX + 'globalLock_currentQueue_total', 'call_back': get_value, 'time_max': time_max, 'value_type': 'uint', 'units': 'Operations', 'slope': 'both', 'format': '%u', 'description': 'Total Operations Waiting for Lock', 'groups': groups }, { 'name': NAME_PREFIX + 'globalLock_currentQueue_readers', 'call_back': get_value, 'time_max': time_max, 'value_type': 'uint', 'units': 'Operations', 'slope': 'both', 'format': '%u', 'description': 'Readers Waiting for Lock', 'groups': groups }, { 'name': NAME_PREFIX + 'globalLock_currentQueue_writers', 'call_back': get_value, 'time_max': time_max, 'value_type': 'uint', 'units': 'Operations', 'slope': 'both', 'format': '%u', 'description': 'Writers Waiting for Lock', 'groups': groups }, { 'name': NAME_PREFIX + 'globalLock_activeClients_total', 'call_back': get_value, 'time_max': time_max, 'value_type': 'uint', 'units': 'Clients', 'slope': 'both', 'format': '%u', 'description': 'Total Active Clients', 'groups': groups }, { 'name': NAME_PREFIX + 'globalLock_activeClients_readers', 'call_back': get_value, 'time_max': time_max, 'value_type': 'uint', 'units': 'Clients', 'slope': 'both', 'format': '%u', 'description': 'Active Readers', 'groups': groups }, { 'name': NAME_PREFIX + 'globalLock_activeClients_writers', 'call_back': get_value, 'time_max': time_max, 'value_type': 'uint', 'units': 'Clients', 'slope': 'both', 'format': '%u', 'description': 'Active Writers', 'groups': groups }, { 'name': NAME_PREFIX + 'connections_current', 'call_back': get_value, 'time_max': time_max, 'value_type': 'uint', 'units': 'Connections', 'slope': 'both', 'format': '%u', 'description': 'Open Connections', 'groups': groups }, { 'name': NAME_PREFIX + 'connections_current_ratio', 'call_back': get_connections_current_ratio, 'time_max': time_max, 'value_type': 'float', 'units': '%', 'slope': 'both', 'format': '%f', 'description': 'Percentage of Connections Used', 'groups': groups }, { 'name': NAME_PREFIX + 'slave_delay', 'call_back': get_slave_delay, 'time_max': time_max, 'value_type': 'uint', 'units': 'Seconds', 'slope': 'both', 'format': '%u', 'description': 'Replica Set Slave Delay', 'groups': groups }, { 'name': NAME_PREFIX + 'asserts_total', 'call_back': get_asserts_total_rate, 'time_max': time_max, 'value_type': 'float', 'units': 'Asserts/Sec', 'slope': 'both', 'format': '%f', 'description': 'Asserts', 'groups': groups } ] return descriptors def metric_cleanup(): """Cleanup""" pass # the following code is for debugging and testing if __name__ == '__main__': descriptors = metric_init(PARAMS) while True: for d in descriptors: print (('%s = %s') % (d['name'], d['format'])) % (d['call_back'](d['name'])) print '' time.sleep(METRICS_CACHE_TTL)