diff --git a/couchbase/tasks/ganglia-plugin.yml b/couchbase/tasks/ganglia-plugin.yml index 695450fc..d1b57123 100644 --- a/couchbase/tasks/ganglia-plugin.yml +++ b/couchbase/tasks/ganglia-plugin.yml @@ -29,3 +29,4 @@ when: not couchbase_ganglia_plugin_enabled notify: Restart ganglia monitor tags: [ 'ganglia', 'couchbase' ] + diff --git a/ganglia/defaults/main.yml b/ganglia/defaults/main.yml index aedcbd7a..433831ad 100644 --- a/ganglia/defaults/main.yml +++ b/ganglia/defaults/main.yml @@ -5,7 +5,10 @@ #ganglia_gmond_cluster_port: 8649 #ganglia_gmond_mcast_addr: 239.2.11.71 #ganglia_gmetad_host: ganglia-gmetad -ganglia_gmond_send_metadata_interval: 30 +ganglia_gmond_send_metadata_interval: 600 +# Set it to yes if you need a buffer bigger than the default +ganglia_set_rmem: False +ganglia_udp_recv_buffer: 131070 # Needed to build the correct firewall rules when jmxtrans is in use ganglia_gmond_use_jmxtrans: False # Used by other roles to install specific ganglia iptables rules or some specific ganglia plugins. Or not. diff --git a/ganglia/tasks/main.yml b/ganglia/tasks/main.yml index c0165f17..b81384c4 100644 --- a/ganglia/tasks/main.yml +++ b/ganglia/tasks/main.yml @@ -79,3 +79,9 @@ notify: Restart ganglia monitor when: is_precise tags: [ 'monitoring', 'ganglia' ] + +- name: Set the kernel UDP buffer limits (net.core.rmem_max) to a value that is double of the ganglia udp buffer + sysctl: name=net.core.rmem_max value={{ ganglia_udp_recv_buffer *2 }} reload=yes state=present + when: ganglia_set_rmem + tags: [ 'monitoring', 'ganglia' ] + diff --git a/ganglia/templates/gmond.j2 b/ganglia/templates/gmond.j2 index d33a2a07..e972175d 100644 --- a/ganglia/templates/gmond.j2 +++ b/ganglia/templates/gmond.j2 @@ -43,7 +43,8 @@ udp_send_channel { /* You can specify as many udp_recv_channels as you like as well. */ udp_recv_channel { mcast_join = {{ ganglia_gmond_mcast_addr }} - port = {{ ganglia_gmond_cluster_port }} + port = {{ ganglia_gmond_cluster_port }} + buffer = {{ ganglia_udp_recv_buffer }} } {% else %} @@ -183,7 +184,7 @@ collection_group { set significantly higher to reduce unneccessary network chatter. */ collection_group { collect_every = 20 - time_threshold = 180 + time_threshold = 360 /* CPU status */ metric { name = "cpu_user"