diff --git a/hadoop/CDH/defaults/main.yml b/hadoop/CDH/defaults/main.yml index c3d6b8e3..47b09700 100644 --- a/hadoop/CDH/defaults/main.yml +++ b/hadoop/CDH/defaults/main.yml @@ -48,3 +48,4 @@ cdh_zeppelin_work_dirs: cdh_zeppelin_ldap_auth: True cdh_zeppelin_dedicated_node: False +cdh_impala_load_balancer: False \ No newline at end of file diff --git a/hadoop/CDH/tasks/main.yml b/hadoop/CDH/tasks/main.yml index edfdd39d..ec27c482 100644 --- a/hadoop/CDH/tasks/main.yml +++ b/hadoop/CDH/tasks/main.yml @@ -7,4 +7,6 @@ when: cdh_oozie_server - import_tasks: zeppelin.yml when: cdh_zeppelin_node +- import_tasks: services-haproxy.yml + when: cdh_impala_load_balancer diff --git a/hadoop/CDH/tasks/services-haproxy.yml b/hadoop/CDH/tasks/services-haproxy.yml new file mode 100644 index 00000000..3a9fbfe2 --- /dev/null +++ b/hadoop/CDH/tasks/services-haproxy.yml @@ -0,0 +1,7 @@ +--- +- block: + - name: Install the haproxy configuration + template: src=hue-hive-impala_haproxy.cfg dest=/etc/haproxy/haproxy.cfg owner=root group=root + notify: Reload haproxy + + tags: [ 'cdh', 'haproxy', 'impala', 'hue', 'hive' ] \ No newline at end of file diff --git a/hadoop/CDH/templates/hue-hive-impala_haproxy.cfg b/hadoop/CDH/templates/hue-hive-impala_haproxy.cfg new file mode 100644 index 00000000..bdf72ea0 --- /dev/null +++ b/hadoop/CDH/templates/hue-hive-impala_haproxy.cfg @@ -0,0 +1,84 @@ +global + # References: + # https://www.cloudera.com/documentation/enterprise/5-13-x/topics/impala_proxy.html + # https://www.cloudera.com/documentation/enterprise/5-13-x/topics/hue_sec_ha.html#concept_ef1_1pf_51b + # + # To have these messages end up in /var/log/haproxy.log you will + # need to: + # + # 1) configure syslog to accept network log events. This is done + # by adding the '-r' option to the SYSLOGD_OPTIONS in + # /etc/sysconfig/syslog + # + # 2) configure local2 events to go to the /var/log/haproxy.log + # file. A line like the following can be added to + # /etc/sysconfig/syslog + # + # local2.* /var/log/haproxy.log + # + log 127.0.0.1 local0 + log 127.0.0.1 local1 notice + chroot /var/lib/haproxy + pidfile /run/haproxy.pid + maxconn 4000 + user haproxy + group haproxy + daemon + + # turn on stats unix socket + stats socket /var/lib/haproxy/stats + +#--------------------------------------------------------------------- +# common defaults that all the 'listen' and 'backend' sections will +# use if not designated in their block +# +# You might need to adjust timing values to prevent timeouts. +#--------------------------------------------------------------------- +defaults + mode http + log global + option httplog + option dontlognull + option http-server-close + option forwardfor except 127.0.0.0/8 + option redispatch + retries 3 + maxconn 3000 + contimeout 5000 + clitimeout 50000 + srvtimeout 50000 + +# +# This sets up the admin page for HA Proxy at port 25002. +# +# listen stats :25002 +# balance +# mode http +# stats enable +# stats auth username:password + +# This is the setup for Impala. Impala client connect to load_balancer_host:25003. +# HAProxy will balance connections among the list of servers listed below. +# The list of Impalad is listening at port 21000 for beeswax (impala-shell) or original ODBC driver. +# For JDBC or ODBC version 2.x driver, use port 21050 instead of 21000. +listen impala :25003 + mode tcp + option tcplog + balance leastconn +{% for host in groups['workers'] %} + server {{ host }} {{host }}:21000 check +{% endfor %} + +# Setup for Hue or other JDBC-enabled applications. +# In particular, Hue requires sticky sessions. +# The application connects to load_balancer_host:21051, and HAProxy balances +# connections to the associated hosts, where Impala listens for JDBC +# requests on port 21050. +listen impalajdbc :21051 + mode tcp + option tcplog + balance source +{% for host in groups['workers'] %} + server {{ host }} {{host }}:21050 check +{% endfor %} +