diff --git a/hadoop/CDH/defaults/main.yml b/hadoop/CDH/defaults/main.yml index 6a153589..c3d6b8e3 100644 --- a/hadoop/CDH/defaults/main.yml +++ b/hadoop/CDH/defaults/main.yml @@ -30,3 +30,21 @@ postgresql_jdbc_driver_version: 42.1.4 cdh_spark2_enabled: True cdh_spark2_jar: SPARK2_ON_YARN-2.2.0.cloudera2.jar cdh_spark2_csd_url: 'http://archive.cloudera.com/spark2/csd/{{ cdh_spark2_jar }}' + +cdh_zeppelin_node: False +cdh_zeppelin_version: 0.7.3 +cdh_zeppelin_dir: 'zeppelin-{{ cdh_zeppelin_version }}-bin-all' +chd_zeppelin_archive: '{{ cdh_zeppelin_dir }}.tgz' +cdh_zeppelin_download_url: 'http://mirror.nohup.it/apache/zeppelin/zeppelin-{{ cdh_zeppelin_version }}/{{ chd_zeppelin_archive }}' +cdh_zeppelin_user: zeppelin +cdh_zeppelin_group: '{{ cdh_zeppelin_user }}' +cdh_zeppelin_http_port: 8080 +cdh_zeppelin_home: /srv/zeppelin +cdh_zeppelin_work_dirs: + - '{{ cdh_zeppelin_home }}/notebook' + - '{{ cdh_zeppelin_home }}/log' + - '{{ cdh_zeppelin_home }}/run' + - '{{ cdh_zeppelin_home }}/base_tmp/tmp' +cdh_zeppelin_ldap_auth: True +cdh_zeppelin_dedicated_node: False + diff --git a/hadoop/CDH/handlers/main.yml b/hadoop/CDH/handlers/main.yml index d7d88002..7a0f164d 100644 --- a/hadoop/CDH/handlers/main.yml +++ b/hadoop/CDH/handlers/main.yml @@ -1,3 +1,6 @@ --- - name: Restart cloudera-scm-server service: name=cloudera-scm-server state=restarted + +- name: Restart zeppelin + service: name=zeppelin state=restarted diff --git a/hadoop/CDH/meta/main.yml b/hadoop/CDH/meta/main.yml new file mode 100644 index 00000000..9de6b423 --- /dev/null +++ b/hadoop/CDH/meta/main.yml @@ -0,0 +1,5 @@ +--- +dependencies: + - { role: '../../library/roles/oracle-jdk', when: openjdk_install is not defined or not openjdk_install } + - { role: '../../library/roles/openjdk', when: openjdk_install | default(False) } + - { role: '../../library/roles/nginx', when: cdh_zeppelin_dedicated_node } diff --git a/hadoop/CDH/tasks/main.yml b/hadoop/CDH/tasks/main.yml index 28b7138e..edfdd39d 100644 --- a/hadoop/CDH/tasks/main.yml +++ b/hadoop/CDH/tasks/main.yml @@ -5,3 +5,6 @@ when: cdh_manager_install - import_tasks: oozie.yml when: cdh_oozie_server +- import_tasks: zeppelin.yml + when: cdh_zeppelin_node + diff --git a/hadoop/CDH/tasks/zeppelin.yml b/hadoop/CDH/tasks/zeppelin.yml new file mode 100644 index 00000000..6aeec9de --- /dev/null +++ b/hadoop/CDH/tasks/zeppelin.yml @@ -0,0 +1,36 @@ +--- +- block: + - name: Create a user that will run the zeppelin service + user: name={{ cdh_zeppelin_user }} home={{ cdh_zeppelin_home }} createhome=no shell=/usr/sbin/nologin system=yes + + - name: Create the zeppelin home directory + file: dest={{ cdh_zeppelin_home }} state=directory mode=0755 + + - name: Create the zeppelin data directories + file: dest={{ item }} state=directory owner={{ cdh_zeppelin_user }} group={{ cdh_zeppelin_group }} + with_items: '{{ cdh_zeppelin_work_dirs }}' + + - name: Download zeppelin + get_url: url={{ cdh_zeppelin_download_url }} dest=/srv/{{ chd_zeppelin_archive }} + + - name: Unpack the zeppelin distribution + unarchive: remote_src=yes src=/srv/{{ chd_zeppelin_archive }} dest={{ cdh_zeppelin_home }} owner=root group=root + args: + creates: '{{ cdh_zeppelin_home }}/{{ cdh_zeppelin_dir }}' + + - name: Install the zeppelin env file + template: src=zeppelin-env.sh dest={{ cdh_zeppelin_home }}/{{ cdh_zeppelin_dir }}/conf/zeppelin-env.sh owner=root group=root mode=0444 + notify: Restart zeppelin + + - name: Install the zeppelin upstart startup script + template: src=zeppelin_upstart.conf dest=/etc/init/zeppelin.conf + when: ansible_service_mgr != 'systemd' + + - name: Ensure that the zeppelin service is enabled and running + service: name=zeppelin state=started enabled=yes + + - name: Ensure that apache2 installed by CDM is stopped and disabled + service: name=apache2 state=stopped enabled=no + notify: Restart nginx + + tags: [ 'cdh', 'zeppelin' ] \ No newline at end of file diff --git a/hadoop/CDH/templates/shiro.ini b/hadoop/CDH/templates/shiro.ini new file mode 100644 index 00000000..0a3187a6 --- /dev/null +++ b/hadoop/CDH/templates/shiro.ini @@ -0,0 +1,86 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +[users] +# List of users with their password allowed to access Zeppelin. +# To use a different strategy (LDAP / Database / ...) check the shiro doc at http://shiro.apache.org/configuration.html#Configuration-INISections +admin = password1, admin +user1 = password2, role1, role2 +user2 = password3, role3 +user3 = password4, role2 + +# Sample LDAP configuration, for user Authentication, currently tested for single Realm +[main] +### A sample for configuring Active Directory Realm +#activeDirectoryRealm = org.apache.zeppelin.realm.ActiveDirectoryGroupRealm +#activeDirectoryRealm.systemUsername = userNameA + +#use either systemPassword or hadoopSecurityCredentialPath, more details in http://zeppelin.apache.org/docs/latest/security/shiroauthentication.html +#activeDirectoryRealm.systemPassword = passwordA +#activeDirectoryRealm.hadoopSecurityCredentialPath = jceks://file/user/zeppelin/zeppelin.jceks +#activeDirectoryRealm.searchBase = CN=Users,DC=SOME_GROUP,DC=COMPANY,DC=COM +#activeDirectoryRealm.url = ldap://ldap.test.com:389 +#activeDirectoryRealm.groupRolesMap = "CN=admin,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"admin","CN=finance,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"finance","CN=hr,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"hr" +#activeDirectoryRealm.authorizationCachingEnabled = false + +### A sample for configuring LDAP Directory Realm +#ldapRealm = org.apache.zeppelin.realm.LdapGroupRealm +## search base for ldap groups (only relevant for LdapGroupRealm): +#ldapRealm.contextFactory.environment[ldap.searchBase] = dc=COMPANY,dc=COM +#ldapRealm.contextFactory.url = ldap://ldap.test.com:389 +#ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM +#ldapRealm.contextFactory.authenticationMechanism = simple + +### A sample PAM configuration +#pamRealm=org.apache.zeppelin.realm.PamRealm +#pamRealm.service=sshd + +### A sample for configuring ZeppelinHub Realm +#zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm +## Url of ZeppelinHub +#zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com +#securityManager.realms = $zeppelinHubRealm + +sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager + +### If caching of user is required then uncomment below lines +#cacheManager = org.apache.shiro.cache.MemoryConstrainedCacheManager +#securityManager.cacheManager = $cacheManager + +securityManager.sessionManager = $sessionManager +# 86,400,000 milliseconds = 24 hour +securityManager.sessionManager.globalSessionTimeout = 86400000 +shiro.loginUrl = /api/login + +[roles] +role1 = * +role2 = * +role3 = * +admin = * + +[urls] +# This section is used for url-based security. +# You can secure interpreter, configuration and credential information by urls. Comment or uncomment the below urls that you want to hide. +# anon means the access is anonymous. +# authc means Form based Auth Security +# To enfore security, comment the line below and uncomment the next one +/api/version = anon +#/api/interpreter/** = authc, roles[admin] +#/api/configurations/** = authc, roles[admin] +#/api/credential/** = authc, roles[admin] +#/** = anon +/** = authc diff --git a/hadoop/CDH/templates/zeppelin-env.sh b/hadoop/CDH/templates/zeppelin-env.sh new file mode 100644 index 00000000..d179a6a6 --- /dev/null +++ b/hadoop/CDH/templates/zeppelin-env.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +export ZEPPELIN_LOG_DIR="{{ cdh_zeppelin_home }}/log" +export ZEPPELIN_PID_DIR="{{ cdh_zeppelin_home }}/run" +export ZEPPELIN_WAR_TEMPDIR="{{ cdh_zeppelin_home }}/base_tmp/tmp" +export ZEPPELIN_NOTEBOOK_DIR="{{ cdh_zeppelin_home }}/notebook" + +export ZEPPELIN_MEM="-Xms4096m -Xmx4096m" +export ZEPPELIN_INTP_MEM="-Xms4096m -Xmx4096m" + +# export MASTER= # Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode. +export SPARK_HOME=/opt/cloudera/parcels/CDH-5.9.3-1.cdh5.9.3.p0.4/lib/spark +export DEFAULT_HADOOP_HOME=/opt/cloudera/parcels/CDH-5.9.3-1.cdh5.9.3.p0.4/lib/hadoop +export SPARK_JAR_HDFS_PATH=${SPARK_JAR_HDFS_PATH:-''} +export SPARK_LAUNCH_WITH_SCALA=0 +export SPARK_LIBRARY_PATH=${SPARK_HOME}/lib +export SCALA_LIBRARY_PATH=${SPARK_HOME}/lib + +SPARK_PYTHON_PATH="" +if [ -n "$SPARK_PYTHON_PATH" ]; then + export PYTHONPATH="$PYTHONPATH:$SPARK_PYTHON_PATH" +fi + +export HADOOP_HOME=${HADOOP_HOME:-$DEFAULT_HADOOP_HOME} + +if [ -n "$HADOOP_HOME" ]; then + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${HADOOP_HOME}/lib/native +fi + +SPARK_EXTRA_LIB_PATH="" +if [ -n "$SPARK_EXTRA_LIB_PATH" ]; then + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$SPARK_EXTRA_LIB_PATH +fi + +export LD_LIBRARY_PATH + +HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-$SPARK_CONF_DIR/yarn-conf} +HIVE_CONF_DIR=${HIVE_CONF_DIR:-/etc/hive/conf} +export MASTER=yarn-client diff --git a/hadoop/CDH/templates/zeppelin_upstart.conf b/hadoop/CDH/templates/zeppelin_upstart.conf new file mode 100644 index 00000000..39976e0f --- /dev/null +++ b/hadoop/CDH/templates/zeppelin_upstart.conf @@ -0,0 +1,19 @@ +description "Zeppelin" + +start on (local-filesystems and net-device-up IFACE!=lo) +stop on shutdown + +# Respawn the process on unexpected termination +respawn + +# respawn the job up to 7 times within a 5 second period. +# If the job exceeds these values, it will be stopped and marked as failed. +respawn limit 7 5 + +# Run unprivileged +setuid {{ cdh_zeppelin_user }} +setgid {{ cdh_zeppelin_group }} + +# zeppelin was installed in /usr/share/zeppelin in this example +chdir {{ cdh_zeppelin_home }}/{{ cdh_zeppelin_dir }} +exec bin/zeppelin-daemon.sh upstart