Add the zeppelin installation and configuration (with nginx in front of it). no LDAP yet.

This commit is contained in:
Andrea Dell'Amico 2018-04-12 21:22:54 +02:00
parent 10cdbfd5b5
commit b318a4f16d
8 changed files with 209 additions and 0 deletions

View File

@ -30,3 +30,21 @@ postgresql_jdbc_driver_version: 42.1.4
cdh_spark2_enabled: True cdh_spark2_enabled: True
cdh_spark2_jar: SPARK2_ON_YARN-2.2.0.cloudera2.jar cdh_spark2_jar: SPARK2_ON_YARN-2.2.0.cloudera2.jar
cdh_spark2_csd_url: 'http://archive.cloudera.com/spark2/csd/{{ cdh_spark2_jar }}' cdh_spark2_csd_url: 'http://archive.cloudera.com/spark2/csd/{{ cdh_spark2_jar }}'
cdh_zeppelin_node: False
cdh_zeppelin_version: 0.7.3
cdh_zeppelin_dir: 'zeppelin-{{ cdh_zeppelin_version }}-bin-all'
chd_zeppelin_archive: '{{ cdh_zeppelin_dir }}.tgz'
cdh_zeppelin_download_url: 'http://mirror.nohup.it/apache/zeppelin/zeppelin-{{ cdh_zeppelin_version }}/{{ chd_zeppelin_archive }}'
cdh_zeppelin_user: zeppelin
cdh_zeppelin_group: '{{ cdh_zeppelin_user }}'
cdh_zeppelin_http_port: 8080
cdh_zeppelin_home: /srv/zeppelin
cdh_zeppelin_work_dirs:
- '{{ cdh_zeppelin_home }}/notebook'
- '{{ cdh_zeppelin_home }}/log'
- '{{ cdh_zeppelin_home }}/run'
- '{{ cdh_zeppelin_home }}/base_tmp/tmp'
cdh_zeppelin_ldap_auth: True
cdh_zeppelin_dedicated_node: False

View File

@ -1,3 +1,6 @@
--- ---
- name: Restart cloudera-scm-server - name: Restart cloudera-scm-server
service: name=cloudera-scm-server state=restarted service: name=cloudera-scm-server state=restarted
- name: Restart zeppelin
service: name=zeppelin state=restarted

5
hadoop/CDH/meta/main.yml Normal file
View File

@ -0,0 +1,5 @@
---
dependencies:
- { role: '../../library/roles/oracle-jdk', when: openjdk_install is not defined or not openjdk_install }
- { role: '../../library/roles/openjdk', when: openjdk_install | default(False) }
- { role: '../../library/roles/nginx', when: cdh_zeppelin_dedicated_node }

View File

@ -5,3 +5,6 @@
when: cdh_manager_install when: cdh_manager_install
- import_tasks: oozie.yml - import_tasks: oozie.yml
when: cdh_oozie_server when: cdh_oozie_server
- import_tasks: zeppelin.yml
when: cdh_zeppelin_node

View File

@ -0,0 +1,36 @@
---
- block:
- name: Create a user that will run the zeppelin service
user: name={{ cdh_zeppelin_user }} home={{ cdh_zeppelin_home }} createhome=no shell=/usr/sbin/nologin system=yes
- name: Create the zeppelin home directory
file: dest={{ cdh_zeppelin_home }} state=directory mode=0755
- name: Create the zeppelin data directories
file: dest={{ item }} state=directory owner={{ cdh_zeppelin_user }} group={{ cdh_zeppelin_group }}
with_items: '{{ cdh_zeppelin_work_dirs }}'
- name: Download zeppelin
get_url: url={{ cdh_zeppelin_download_url }} dest=/srv/{{ chd_zeppelin_archive }}
- name: Unpack the zeppelin distribution
unarchive: remote_src=yes src=/srv/{{ chd_zeppelin_archive }} dest={{ cdh_zeppelin_home }} owner=root group=root
args:
creates: '{{ cdh_zeppelin_home }}/{{ cdh_zeppelin_dir }}'
- name: Install the zeppelin env file
template: src=zeppelin-env.sh dest={{ cdh_zeppelin_home }}/{{ cdh_zeppelin_dir }}/conf/zeppelin-env.sh owner=root group=root mode=0444
notify: Restart zeppelin
- name: Install the zeppelin upstart startup script
template: src=zeppelin_upstart.conf dest=/etc/init/zeppelin.conf
when: ansible_service_mgr != 'systemd'
- name: Ensure that the zeppelin service is enabled and running
service: name=zeppelin state=started enabled=yes
- name: Ensure that apache2 installed by CDM is stopped and disabled
service: name=apache2 state=stopped enabled=no
notify: Restart nginx
tags: [ 'cdh', 'zeppelin' ]

View File

@ -0,0 +1,86 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
[users]
# List of users with their password allowed to access Zeppelin.
# To use a different strategy (LDAP / Database / ...) check the shiro doc at http://shiro.apache.org/configuration.html#Configuration-INISections
admin = password1, admin
user1 = password2, role1, role2
user2 = password3, role3
user3 = password4, role2
# Sample LDAP configuration, for user Authentication, currently tested for single Realm
[main]
### A sample for configuring Active Directory Realm
#activeDirectoryRealm = org.apache.zeppelin.realm.ActiveDirectoryGroupRealm
#activeDirectoryRealm.systemUsername = userNameA
#use either systemPassword or hadoopSecurityCredentialPath, more details in http://zeppelin.apache.org/docs/latest/security/shiroauthentication.html
#activeDirectoryRealm.systemPassword = passwordA
#activeDirectoryRealm.hadoopSecurityCredentialPath = jceks://file/user/zeppelin/zeppelin.jceks
#activeDirectoryRealm.searchBase = CN=Users,DC=SOME_GROUP,DC=COMPANY,DC=COM
#activeDirectoryRealm.url = ldap://ldap.test.com:389
#activeDirectoryRealm.groupRolesMap = "CN=admin,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"admin","CN=finance,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"finance","CN=hr,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"hr"
#activeDirectoryRealm.authorizationCachingEnabled = false
### A sample for configuring LDAP Directory Realm
#ldapRealm = org.apache.zeppelin.realm.LdapGroupRealm
## search base for ldap groups (only relevant for LdapGroupRealm):
#ldapRealm.contextFactory.environment[ldap.searchBase] = dc=COMPANY,dc=COM
#ldapRealm.contextFactory.url = ldap://ldap.test.com:389
#ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM
#ldapRealm.contextFactory.authenticationMechanism = simple
### A sample PAM configuration
#pamRealm=org.apache.zeppelin.realm.PamRealm
#pamRealm.service=sshd
### A sample for configuring ZeppelinHub Realm
#zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm
## Url of ZeppelinHub
#zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com
#securityManager.realms = $zeppelinHubRealm
sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager
### If caching of user is required then uncomment below lines
#cacheManager = org.apache.shiro.cache.MemoryConstrainedCacheManager
#securityManager.cacheManager = $cacheManager
securityManager.sessionManager = $sessionManager
# 86,400,000 milliseconds = 24 hour
securityManager.sessionManager.globalSessionTimeout = 86400000
shiro.loginUrl = /api/login
[roles]
role1 = *
role2 = *
role3 = *
admin = *
[urls]
# This section is used for url-based security.
# You can secure interpreter, configuration and credential information by urls. Comment or uncomment the below urls that you want to hide.
# anon means the access is anonymous.
# authc means Form based Auth Security
# To enfore security, comment the line below and uncomment the next one
/api/version = anon
#/api/interpreter/** = authc, roles[admin]
#/api/configurations/** = authc, roles[admin]
#/api/credential/** = authc, roles[admin]
#/** = anon
/** = authc

View File

@ -0,0 +1,39 @@
#!/bin/bash
export ZEPPELIN_LOG_DIR="{{ cdh_zeppelin_home }}/log"
export ZEPPELIN_PID_DIR="{{ cdh_zeppelin_home }}/run"
export ZEPPELIN_WAR_TEMPDIR="{{ cdh_zeppelin_home }}/base_tmp/tmp"
export ZEPPELIN_NOTEBOOK_DIR="{{ cdh_zeppelin_home }}/notebook"
export ZEPPELIN_MEM="-Xms4096m -Xmx4096m"
export ZEPPELIN_INTP_MEM="-Xms4096m -Xmx4096m"
# export MASTER= # Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode.
export SPARK_HOME=/opt/cloudera/parcels/CDH-5.9.3-1.cdh5.9.3.p0.4/lib/spark
export DEFAULT_HADOOP_HOME=/opt/cloudera/parcels/CDH-5.9.3-1.cdh5.9.3.p0.4/lib/hadoop
export SPARK_JAR_HDFS_PATH=${SPARK_JAR_HDFS_PATH:-''}
export SPARK_LAUNCH_WITH_SCALA=0
export SPARK_LIBRARY_PATH=${SPARK_HOME}/lib
export SCALA_LIBRARY_PATH=${SPARK_HOME}/lib
SPARK_PYTHON_PATH=""
if [ -n "$SPARK_PYTHON_PATH" ]; then
export PYTHONPATH="$PYTHONPATH:$SPARK_PYTHON_PATH"
fi
export HADOOP_HOME=${HADOOP_HOME:-$DEFAULT_HADOOP_HOME}
if [ -n "$HADOOP_HOME" ]; then
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${HADOOP_HOME}/lib/native
fi
SPARK_EXTRA_LIB_PATH=""
if [ -n "$SPARK_EXTRA_LIB_PATH" ]; then
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$SPARK_EXTRA_LIB_PATH
fi
export LD_LIBRARY_PATH
HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-$SPARK_CONF_DIR/yarn-conf}
HIVE_CONF_DIR=${HIVE_CONF_DIR:-/etc/hive/conf}
export MASTER=yarn-client

View File

@ -0,0 +1,19 @@
description "Zeppelin"
start on (local-filesystems and net-device-up IFACE!=lo)
stop on shutdown
# Respawn the process on unexpected termination
respawn
# respawn the job up to 7 times within a 5 second period.
# If the job exceeds these values, it will be stopped and marked as failed.
respawn limit 7 5
# Run unprivileged
setuid {{ cdh_zeppelin_user }}
setgid {{ cdh_zeppelin_group }}
# zeppelin was installed in /usr/share/zeppelin in this example
chdir {{ cdh_zeppelin_home }}/{{ cdh_zeppelin_dir }}
exec bin/zeppelin-daemon.sh upstart