ansible-roles/ganglia/templates/gmond.j2

/* This configuration is as close to 2.5.x default behavior as possible 
   The values closely match ./gmond/metric.h definitions in 2.5.x */ 
globals {                    
  daemonize = yes              
  setuid = yes             
  user = ganglia              
  debug_level = 0               
#  max_udp_msg_len = 1472        
  mute = no             
  deaf = no             
  host_dmax = 3600 /*secs */ 
  cleanup_threshold = 300 /*secs */ 
  gexec = no             
  allow_extra_data = yes
  send_metadata_interval = {{ ganglia_gmond_send_metadata_interval }}
} 

/* If a cluster attribute is specified, then all gmond hosts are wrapped inside 
 * of a <CLUSTER> tag.  If you do not specify a cluster tag, then all <HOSTS> will 
 * NOT be wrapped inside of a <CLUSTER> tag. */ 
cluster { 
  name = "{{ ganglia_gmond_cluster }}" 
  owner = "{{ ganglia_gmond_cluster_owner }}" 
  latlong = "unspecified" 
  url = "unspecified" 
} 

/* The host section describes attributes of the host, like the location */ 
host { 
  location = "{{ ganglia_gmond_location }}" 
} 

/* Feel free to specify as many udp_send_channels as you like.  Gmond 
   used to only support having a single channel */ 
{% if not ganglia_unicast_mode %}
udp_send_channel { 
  #bind_hostname = yes
  mcast_join = {{ ganglia_gmond_mcast_addr }}
  port = {{ ganglia_gmond_cluster_port }} 
  ttl = 1 
} 

/* You can specify as many udp_recv_channels as you like as well. */ 
udp_recv_channel { 
  mcast_join = {{ ganglia_gmond_mcast_addr }}
  port = {{ ganglia_gmond_cluster_port }}
  buffer = {{ ganglia_udp_recv_buffer }}
} 

{% else %}
{% for host in ganglia_gmetad_sources %}
udp_send_channel { 
  host = {{ host }}
  port = {{ ganglia_gmond_cluster_port }} 
  ttl = 1 
} 
{% endfor %}

{% endif %}
udp_recv_channel { 
  port = {{ ganglia_gmond_cluster_port }} 
}

/* You can specify as many tcp_accept_channels as you like to share 
   an xml description of the state of the cluster */ 
tcp_accept_channel { 
  port = {{ ganglia_gmond_cluster_port }} 
} 

/* Each metrics module that is referenced by gmond must be specified and 
   loaded. If the module has been statically linked with gmond, it does not 
   require a load path. However all dynamically loadable modules must include 
   a load path. */ 
modules { 
  module { 
    name = "core_metrics" 
  } 
  module { 
    name = "cpu_module" 
    path = "/usr/lib/ganglia/modcpu.so" 
  } 
  module { 
    name = "disk_module" 
    path = "/usr/lib/ganglia/moddisk.so" 
  } 
  module { 
    name = "load_module" 
    path = "/usr/lib/ganglia/modload.so" 
  } 
  module { 
    name = "mem_module" 
    path = "/usr/lib/ganglia/modmem.so" 
  } 
  module { 
    name = "net_module" 
    path = "/usr/lib/ganglia/modnet.so" 
  } 
  module { 
    name = "proc_module" 
    path = "/usr/lib/ganglia/modproc.so" 
  } 
  module { 
    name = "sys_module" 
    path = "/usr/lib/ganglia/modsys.so" 
  } 
} 

include ('/etc/ganglia/conf.d/*.conf') 


/* The old internal 2.5.x metric array has been replaced by the following 
   collection_group directives.  What follows is the default behavior for 
   collecting and sending metrics that is as close to 2.5.x behavior as 
   possible. */

/* This collection group will cause a heartbeat (or beacon) to be sent every 
   20 seconds.  In the heartbeat is the GMOND_STARTED data which expresses 
   the age of the running gmond. */ 
collection_group { 
  collect_once = yes 
  time_threshold = 20 
  metric { 
    name = "heartbeat" 
  } 
} 

/* This collection group will send general info about this host every 1200 secs. 
   This information doesn't change between reboots and is only collected once. */ 
collection_group { 
  collect_once = yes 
  time_threshold = 1200 
  metric { 
    name = "cpu_num" 
    title = "CPU Count" 
  } 
  metric { 
    name = "cpu_speed" 
    title = "CPU Speed" 
  } 
  metric { 
    name = "mem_total" 
    title = "Memory Total" 
  } 
  /* Should this be here? Swap can be added/removed between reboots. */ 
  metric { 
    name = "swap_total" 
    title = "Swap Space Total" 
  } 
  metric { 
    name = "boottime" 
    title = "Last Boot Time" 
  } 
  metric { 
    name = "machine_type" 
    title = "Machine Type" 
  } 
  metric { 
    name = "os_name" 
    title = "Operating System" 
  } 
  metric { 
    name = "os_release" 
    title = "Operating System Release" 
  } 
  metric { 
    name = "location" 
    title = "Location" 
  } 
} 

/* This collection group will send the status of gexecd for this host every 300 secs */
/* Unlike 2.5.x the default behavior is to report gexecd OFF.  */ 
collection_group { 
  collect_once = yes 
  time_threshold = 300 
  metric { 
    name = "gexec" 
    title = "Gexec Status" 
  } 
} 

/* This collection group will collect the CPU status info every 20 secs. 
   The time threshold is set to 90 seconds.  In honesty, this time_threshold could be 
   set significantly higher to reduce unneccessary network chatter. */ 
collection_group { 
  collect_every = 20 
  time_threshold = 360
  /* CPU status */ 
  metric { 
    name = "cpu_user"  
    value_threshold = "1.0" 
    title = "CPU User" 
  } 
  metric { 
    name = "cpu_system"   
    value_threshold = "1.0" 
    title = "CPU System" 
  } 
  metric { 
    name = "cpu_idle"  
    value_threshold = "5.0" 
    title = "CPU Idle" 
  } 
  metric { 
    name = "cpu_nice"  
    value_threshold = "1.0" 
    title = "CPU Nice" 
  } 
  metric { 
    name = "cpu_aidle" 
    value_threshold = "5.0" 
    title = "CPU aidle" 
  } 
  metric { 
    name = "cpu_wio" 
    value_threshold = "1.0" 
    title = "CPU wio" 
  } 
  /* The next two metrics are optional if you want more detail... 
     ... since they are accounted for in cpu_system.  
  metric { 
    name = "cpu_intr" 
    value_threshold = "1.0" 
    title = "CPU intr" 
  } 
  metric { 
    name = "cpu_sintr" 
    value_threshold = "1.0" 
    title = "CPU sintr" 
  } 
  */ 
} 

collection_group { 
  collect_every = 20 
  time_threshold = 90 
  /* Load Averages */ 
  metric { 
    name = "load_one" 
    value_threshold = "1.0" 
    title = "One Minute Load Average" 
  } 
  metric { 
    name = "load_five" 
    value_threshold = "1.0" 
    title = "Five Minute Load Average" 
  } 
  metric { 
    name = "load_fifteen" 
    value_threshold = "1.0" 
    title = "Fifteen Minute Load Average" 
  }
} 

/* This group collects the number of running and total processes */ 
collection_group { 
  collect_every = 80 
  time_threshold = 950 
  metric { 
    name = "proc_run" 
    value_threshold = "1.0" 
    title = "Total Running Processes" 
  } 
  metric { 
    name = "proc_total" 
    value_threshold = "1.0" 
    title = "Total Processes" 
  } 
}

/* This collection group grabs the volatile memory metrics every 40 secs and 
   sends them at least every 180 secs.  This time_threshold can be increased 
   significantly to reduce unneeded network traffic. */ 
collection_group { 
  collect_every = 40 
  time_threshold = 180 
  metric { 
    name = "mem_free" 
    value_threshold = "1024.0" 
    title = "Free Memory" 
  } 
  metric { 
    name = "mem_shared" 
    value_threshold = "1024.0" 
    title = "Shared Memory" 
  } 
  metric { 
    name = "mem_buffers" 
    value_threshold = "1024.0" 
    title = "Memory Buffers" 
  } 
  metric { 
    name = "mem_cached" 
    value_threshold = "1024.0" 
    title = "Cached Memory" 
  } 
  metric { 
    name = "swap_free" 
    value_threshold = "1024.0" 
    title = "Free Swap Space" 
  } 
} 

collection_group { 
  collect_every = 40 
  time_threshold = 300 
  metric { 
    name = "bytes_out" 
    value_threshold = 4096 
    title = "Bytes Sent" 
  } 
  metric { 
    name = "bytes_in" 
    value_threshold = 4096 
    title = "Bytes Received" 
  } 
  metric { 
    name = "pkts_in" 
    value_threshold = 256 
    title = "Packets Received" 
  } 
  metric { 
    name = "pkts_out" 
    value_threshold = 256 
    title = "Packets Sent" 
  } 
}

/* Different than 2.5.x default since the old config made no sense */ 
collection_group { 
  collect_every = 1800 
  time_threshold = 3600 
  metric { 
    name = "disk_total" 
    value_threshold = 1.0 
    title = "Total Disk Space" 
  } 
}

collection_group { 
  collect_every = 40 
  time_threshold = 180 
  metric { 
    name = "disk_free" 
    value_threshold = 1.0 
    title = "Disk Space Available" 
  } 
  metric { 
    name = "part_max_used" 
    value_threshold = 1.0 
    title = "Maximum Disk Space Used" 
  } 
}
Major refactoring. Moved all the library roles under 'library/roles' and changed all the occurrances inside all the playbooks. 2015-05-28 11:32:57 +02:00			`/* This configuration is as close to 2.5.x default behavior as possible`
			`The values closely match ./gmond/metric.h definitions in 2.5.x */`
			`globals {`
			`daemonize = yes`
			`setuid = yes`
			`user = ganglia`
			`debug_level = 0`
			`# max_udp_msg_len = 1472`
			`mute = no`
			`deaf = no`
			`host_dmax = 3600 /secs /`
			`cleanup_threshold = 300 /secs /`
			`gexec = no`
			`allow_extra_data = yes`
			`send_metadata_interval = {{ ganglia_gmond_send_metadata_interval }}`
			`}`

			`/* If a cluster attribute is specified, then all gmond hosts are wrapped inside`
			`* of a <CLUSTER> tag. If you do not specify a cluster tag, then all <HOSTS> will`
			`* NOT be wrapped inside of a <CLUSTER> tag. */`
			`cluster {`
			`name = "{{ ganglia_gmond_cluster }}"`
			`owner = "{{ ganglia_gmond_cluster_owner }}"`
			`latlong = "unspecified"`
			`url = "unspecified"`
			`}`

			`/* The host section describes attributes of the host, like the location */`
			`host {`
			`location = "{{ ganglia_gmond_location }}"`
			`}`

			`/* Feel free to specify as many udp_send_channels as you like. Gmond`
			`used to only support having a single channel */`
library/roles/ganglia: Change templates and defaults to support an unicast configuration. library/roles/iptables: Rules to support a ganglia configuration that runs over unicast and not multicast. 2016-07-12 19:15:00 +02:00			`{% if not ganglia_unicast_mode %}`
Major refactoring. Moved all the library roles under 'library/roles' and changed all the occurrances inside all the playbooks. 2015-05-28 11:32:57 +02:00			`udp_send_channel {`
library/roles/ganglia: Change templates and defaults to support an unicast configuration. library/roles/iptables: Rules to support a ganglia configuration that runs over unicast and not multicast. 2016-07-12 19:15:00 +02:00			`#bind_hostname = yes`
Major refactoring. Moved all the library roles under 'library/roles' and changed all the occurrances inside all the playbooks. 2015-05-28 11:32:57 +02:00			`mcast_join = {{ ganglia_gmond_mcast_addr }}`
			`port = {{ ganglia_gmond_cluster_port }}`
			`ttl = 1`
			`}`

			`/* You can specify as many udp_recv_channels as you like as well. */`
			`udp_recv_channel {`
			`mcast_join = {{ ganglia_gmond_mcast_addr }}`
library/roles/ganglia: Mess with the kernel rmem values when a huge UDP buffer is needed. library/roles/couchbase/tasks/ganglia-plugin.yml: Big UDP buffer to store the internal couchdb data. 2016-08-01 19:04:53 +02:00			`port = {{ ganglia_gmond_cluster_port }}`
			`buffer = {{ ganglia_udp_recv_buffer }}`
Major refactoring. Moved all the library roles under 'library/roles' and changed all the occurrances inside all the playbooks. 2015-05-28 11:32:57 +02:00			`}`

library/roles/ganglia: Change templates and defaults to support an unicast configuration. library/roles/iptables: Rules to support a ganglia configuration that runs over unicast and not multicast. 2016-07-12 19:15:00 +02:00			`{% else %}`
			`{% for host in ganglia_gmetad_sources %}`
			`udp_send_channel {`
			`host = {{ host }}`
Major refactoring. Moved all the library roles under 'library/roles' and changed all the occurrances inside all the playbooks. 2015-05-28 11:32:57 +02:00			`port = {{ ganglia_gmond_cluster_port }}`
library/roles/ganglia: Change templates and defaults to support an unicast configuration. library/roles/iptables: Rules to support a ganglia configuration that runs over unicast and not multicast. 2016-07-12 19:15:00 +02:00			`ttl = 1`
Major refactoring. Moved all the library roles under 'library/roles' and changed all the occurrances inside all the playbooks. 2015-05-28 11:32:57 +02:00			`}`
library/roles/ganglia: Change templates and defaults to support an unicast configuration. library/roles/iptables: Rules to support a ganglia configuration that runs over unicast and not multicast. 2016-07-12 19:15:00 +02:00			`{% endfor %}`

			`{% endif %}`
			`udp_recv_channel {`
			`port = {{ ganglia_gmond_cluster_port }}`
			`}`
Major refactoring. Moved all the library roles under 'library/roles' and changed all the occurrances inside all the playbooks. 2015-05-28 11:32:57 +02:00
			`/* You can specify as many tcp_accept_channels as you like to share`
			`an xml description of the state of the cluster */`
			`tcp_accept_channel {`
			`port = {{ ganglia_gmond_cluster_port }}`
			`}`

			`/* Each metrics module that is referenced by gmond must be specified and`
			`loaded. If the module has been statically linked with gmond, it does not`
			`require a load path. However all dynamically loadable modules must include`
			`a load path. */`
			`modules {`
			`module {`
			`name = "core_metrics"`
			`}`
			`module {`
			`name = "cpu_module"`
			`path = "/usr/lib/ganglia/modcpu.so"`
			`}`
			`module {`
			`name = "disk_module"`
			`path = "/usr/lib/ganglia/moddisk.so"`
			`}`
			`module {`
			`name = "load_module"`
			`path = "/usr/lib/ganglia/modload.so"`
			`}`
			`module {`
			`name = "mem_module"`
			`path = "/usr/lib/ganglia/modmem.so"`
			`}`
			`module {`
			`name = "net_module"`
			`path = "/usr/lib/ganglia/modnet.so"`
			`}`
			`module {`
			`name = "proc_module"`
			`path = "/usr/lib/ganglia/modproc.so"`
			`}`
			`module {`
			`name = "sys_module"`
			`path = "/usr/lib/ganglia/modsys.so"`
			`}`
			`}`

			`include ('/etc/ganglia/conf.d/*.conf')`


			`/* The old internal 2.5.x metric array has been replaced by the following`
			`collection_group directives. What follows is the default behavior for`
			`collecting and sending metrics that is as close to 2.5.x behavior as`
			`possible. */`

			`/* This collection group will cause a heartbeat (or beacon) to be sent every`
			`20 seconds. In the heartbeat is the GMOND_STARTED data which expresses`
			`the age of the running gmond. */`
			`collection_group {`
			`collect_once = yes`
			`time_threshold = 20`
			`metric {`
			`name = "heartbeat"`
			`}`
			`}`

			`/* This collection group will send general info about this host every 1200 secs.`
			`This information doesn't change between reboots and is only collected once. */`
			`collection_group {`
			`collect_once = yes`
			`time_threshold = 1200`
			`metric {`
			`name = "cpu_num"`
			`title = "CPU Count"`
			`}`
			`metric {`
			`name = "cpu_speed"`
			`title = "CPU Speed"`
			`}`
			`metric {`
			`name = "mem_total"`
			`title = "Memory Total"`
			`}`
			`/* Should this be here? Swap can be added/removed between reboots. */`
			`metric {`
			`name = "swap_total"`
			`title = "Swap Space Total"`
			`}`
			`metric {`
			`name = "boottime"`
			`title = "Last Boot Time"`
			`}`
			`metric {`
			`name = "machine_type"`
			`title = "Machine Type"`
			`}`
			`metric {`
			`name = "os_name"`
			`title = "Operating System"`
			`}`
			`metric {`
			`name = "os_release"`
			`title = "Operating System Release"`
			`}`
			`metric {`
			`name = "location"`
			`title = "Location"`
			`}`
			`}`

			`/* This collection group will send the status of gexecd for this host every 300 secs */`
			`/* Unlike 2.5.x the default behavior is to report gexecd OFF. */`
			`collection_group {`
			`collect_once = yes`
			`time_threshold = 300`
			`metric {`
			`name = "gexec"`
			`title = "Gexec Status"`
			`}`
			`}`

			`/* This collection group will collect the CPU status info every 20 secs.`
			`The time threshold is set to 90 seconds. In honesty, this time_threshold could be`
			`set significantly higher to reduce unneccessary network chatter. */`
			`collection_group {`
			`collect_every = 20`
library/roles/ganglia: Mess with the kernel rmem values when a huge UDP buffer is needed. library/roles/couchbase/tasks/ganglia-plugin.yml: Big UDP buffer to store the internal couchdb data. 2016-08-01 19:04:53 +02:00			`time_threshold = 360`
Major refactoring. Moved all the library roles under 'library/roles' and changed all the occurrances inside all the playbooks. 2015-05-28 11:32:57 +02:00			`/* CPU status */`
			`metric {`
			`name = "cpu_user"`
			`value_threshold = "1.0"`
			`title = "CPU User"`
			`}`
			`metric {`
			`name = "cpu_system"`
			`value_threshold = "1.0"`
			`title = "CPU System"`
			`}`
			`metric {`
			`name = "cpu_idle"`
			`value_threshold = "5.0"`
			`title = "CPU Idle"`
			`}`
			`metric {`
			`name = "cpu_nice"`
			`value_threshold = "1.0"`
			`title = "CPU Nice"`
			`}`
			`metric {`
			`name = "cpu_aidle"`
			`value_threshold = "5.0"`
			`title = "CPU aidle"`
			`}`
			`metric {`
			`name = "cpu_wio"`
			`value_threshold = "1.0"`
			`title = "CPU wio"`
			`}`
			`/* The next two metrics are optional if you want more detail...`
			`... since they are accounted for in cpu_system.`
			`metric {`
			`name = "cpu_intr"`
			`value_threshold = "1.0"`
			`title = "CPU intr"`
			`}`
			`metric {`
			`name = "cpu_sintr"`
			`value_threshold = "1.0"`
			`title = "CPU sintr"`
			`}`
			`*/`
			`}`

			`collection_group {`
			`collect_every = 20`
			`time_threshold = 90`
			`/* Load Averages */`
			`metric {`
			`name = "load_one"`
			`value_threshold = "1.0"`
			`title = "One Minute Load Average"`
			`}`
			`metric {`
			`name = "load_five"`
			`value_threshold = "1.0"`
			`title = "Five Minute Load Average"`
			`}`
			`metric {`
			`name = "load_fifteen"`
			`value_threshold = "1.0"`
			`title = "Fifteen Minute Load Average"`
			`}`
			`}`

			`/* This group collects the number of running and total processes */`
			`collection_group {`
			`collect_every = 80`
			`time_threshold = 950`
			`metric {`
			`name = "proc_run"`
			`value_threshold = "1.0"`
			`title = "Total Running Processes"`
			`}`
			`metric {`
			`name = "proc_total"`
			`value_threshold = "1.0"`
			`title = "Total Processes"`
			`}`
			`}`

			`/* This collection group grabs the volatile memory metrics every 40 secs and`
			`sends them at least every 180 secs. This time_threshold can be increased`
			`significantly to reduce unneeded network traffic. */`
			`collection_group {`
			`collect_every = 40`
			`time_threshold = 180`
			`metric {`
			`name = "mem_free"`
			`value_threshold = "1024.0"`
			`title = "Free Memory"`
			`}`
			`metric {`
			`name = "mem_shared"`
			`value_threshold = "1024.0"`
			`title = "Shared Memory"`
			`}`
			`metric {`
			`name = "mem_buffers"`
			`value_threshold = "1024.0"`
			`title = "Memory Buffers"`
			`}`
			`metric {`
			`name = "mem_cached"`
			`value_threshold = "1024.0"`
			`title = "Cached Memory"`
			`}`
			`metric {`
			`name = "swap_free"`
			`value_threshold = "1024.0"`
			`title = "Free Swap Space"`
			`}`
			`}`

			`collection_group {`
			`collect_every = 40`
			`time_threshold = 300`
			`metric {`
			`name = "bytes_out"`
			`value_threshold = 4096`
			`title = "Bytes Sent"`
			`}`
			`metric {`
			`name = "bytes_in"`
			`value_threshold = 4096`
			`title = "Bytes Received"`
			`}`
			`metric {`
			`name = "pkts_in"`
			`value_threshold = 256`
			`title = "Packets Received"`
			`}`
			`metric {`
			`name = "pkts_out"`
			`value_threshold = 256`
			`title = "Packets Sent"`
			`}`
			`}`

			`/* Different than 2.5.x default since the old config made no sense */`
			`collection_group {`
			`collect_every = 1800`
			`time_threshold = 3600`
			`metric {`
			`name = "disk_total"`
			`value_threshold = 1.0`
			`title = "Total Disk Space"`
			`}`
			`}`

			`collection_group {`
			`collect_every = 40`
			`time_threshold = 180`
			`metric {`
			`name = "disk_free"`
			`value_threshold = 1.0`
			`title = "Disk Space Available"`
			`}`
			`metric {`
			`name = "part_max_used"`
			`value_threshold = 1.0`
			`title = "Maximum Disk Space Used"`
			`}`
			`}`