Import the old role, add some fixes.

This commit is contained in:
Andrea Dell'Amico 2020-07-21 16:33:40 +02:00
parent 69def21d12
commit fb3a9a3d34
18 changed files with 917 additions and 69 deletions

View File

@ -1,31 +1,22 @@
Role Name Role Name
========= =========
A brief description of the role goes here. A role that
Requirements
------------
Any pre-requisites that may not be covered by Ansible itself or the role should be mentioned here. For instance, if the role uses the EC2 module, it may be a good idea to mention in this section that the boto package is required.
Role Variables Role Variables
-------------- --------------
A description of the settable variables for this role should go here, including any variables that are in defaults/main.yml, vars/main.yml, and any variables that can/should be set via parameters to the role. Any variables that are read from other roles and/or the global scope (ie. hostvars, group vars, etc.) should be mentioned here as well. The most important variables are listed below:
``` yaml
users_system_users:
- { login: 'foo', name: "Foo Bar", home: '{{ users_home_dir }}', createhome: 'yes', ssh_key: '{{ foo_ssh_key }}', shell: '/bin/bash', admin: False, log_as_root: False }
```
Dependencies Dependencies
------------ ------------
A list of other roles hosted on Galaxy should go here, plus any details in regards to parameters that may need to be set for other roles, or variables that are used from other roles. None
Example Playbook
----------------
Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too:
- hosts: servers
roles:
- { role: username.rolename, x: 42 }
License License
------- -------
@ -35,4 +26,4 @@ EUPL-1.2
Author Information Author Information
------------------ ------------------
An optional section for the role authors to include contact information, or a website (HTML is not allowed). Andrea Dell'Amico, <andrea.dellamico@isti.cnr.it>

View File

@ -1,2 +1,85 @@
--- ---
# defaults file for ansible-role-template cdh_manager_install: False
cdh_major_version: 5
cdh_release_version: '16.2'
cdh_version: '{{ cdh_major_version }}.{{ cdh_release_version }}'
#cdh_manager_repo: 'deb https://archive.cloudera.com/cm5/ubuntu/trusty/amd64/cm/dists/trusty-cm{{ cdh_version }}/'
cdh_ubuntu_repo_key: 'http://archive.cloudera.com/cm5/ubuntu/xenial/amd64/cm/archive.key'
cdh_ubuntu_packages_repo: 'deb [arch=amd64] http://archive.cloudera.com/cm5/ubuntu/xenial/amd64/cm xenial-cm5 contrib'
cdh_ubuntu_manager_repo_url: 'https://archive.cloudera.com/cdh5/ubuntu/xenial/amd64/cdh/cloudera.list'
#cdh_manager_download_url: 'http://archive.cloudera.com/cm5/installer/5.9.3/cloudera-manager-installer.bin'
cdh_csd_directory: /opt/cloudera/csd
# Set it to true on the oozie server nodes
cdh_oozie_server: False
cdh_oozie_ext_libs_url: 'https://archive.cloudera.com/gplextras/misc/ext-2.2.zip'
cdh_use_separate_postgres_db: True
cdh_postgres_db_host: localhost
cdh_postgres_db_name: cdh
cdh_postgres_db_user: cdh
cdh_postgres_db_schema: cdh
cdh_postgresql_version: 10
postgresql_jdbc_driver_version: 42.2.14
###
# Spark2: the installation is completely manual, see
# https://www.cloudera.com/documentation/spark2/latest/topics/spark2_installing.html
#
# To integrate spark2 and oozie:
# https://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.6.0/bk_spark-component-guide/content/ch_oozie-spark-action.html#spark-config-oozie-spark2
# https://community.cloudera.com/t5/Advanced-Analytics-Apache-Spark/Spark2-classpath-issues-with-Oozie/td-p/59782
cdh_spark2_enabled: True
cdh_spark2_jar: SPARK2_ON_YARN-2.4.0.cloudera2.jar
cdh_spark2_csd_url: 'http://archive.cloudera.com/spark2/csd/{{ cdh_spark2_jar }}'
cdh_zeppelin_node: False
cdh_zeppelin_version: 0.8.2
cdh_zeppelin_dir: 'zeppelin-{{ cdh_zeppelin_version }}-bin-all'
chd_zeppelin_archive: '{{ cdh_zeppelin_dir }}.tgz'
cdh_zeppelin_download_url: 'http://mirror.nohup.it/apache/zeppelin/zeppelin-{{ cdh_zeppelin_version }}/{{ chd_zeppelin_archive }}'
cdh_zeppelin_user: zeppelin
cdh_zeppelin_group: '{{ cdh_zeppelin_user }}'
cdh_zeppelin_http_port: 8080
cdh_zeppelin_home: /srv/zeppelin
cdh_zeppelin_work_dirs:
- '{{ cdh_zeppelin_home }}/notebook'
- '{{ cdh_zeppelin_home }}/log'
- '{{ cdh_zeppelin_home }}/run'
- '{{ cdh_zeppelin_home }}/base_tmp/tmp'
cdh_zeppelin_conf_files:
- zeppelin-env.sh
- shiro.ini
- zeppelin-site.xml
cdh_zeppelin_ldap_auth: True
cdh_zeppelin_notebook_public: 'false'
cdh_zeppelin_dedicated_node: False
cdh_zeppelin_use_spark2: '{{ cdh_spark2_enabled }}'
cdh_impala_load_balancer: False
cdh_zeppelin_ldap_enabled: False
cdh_zeppelin_ldap_advanced_config: True
cdh_zeppelin_ldap_starttls: 'true'
cdh_zeppelin_search_bind_authentication: 'false'
cdh_zeppelin_username_pattern: "uid={0},ou=People,dc=mycompany,dc=com"
cdh_zeppelin_ldap_search_base: "dc=mycompany,dc=com"
cdh_zeppelin_ldap_users_base: "ou=People,dc=mycompany,dc=com"
cdh_zeppelin_user_objectclass: posixUser
cdh_zeppelin_ldap_group_base: "ou=Groups,dc=mycompany,dc=com"
cdh_zeppelin_ldap_group_obj_class: groupofnames
cdh_zeppelin_group_template: "cn={0},ou=Groups,dc=mycompany,dc=com"
cdh_zeppelin_group_search_filter: '(memberUid={0})'
cdh_zeppelin_ldap_nested_groups: 'false'
cdh_zeppelin_ldap_roles_by_group: 'cdh_hadoop: userrole, cdh_admin: adminrole'
cdh_zeppelin_ldap_bind_u: zeppelin
#cdh_zeppelin_ldap_bind_pwd: "use a vault file"
cdh_zeppelin_ldap_user_attr: uid
cdh_zeppelin_ldap_member_attr: member
cdh_zeppelin_ldap_group: zeppelin_hadoop
cdh_zeppelin_ldap_url: 'ldap://ldap.test.com:389'
cdh_zeppelin_secure_urls: True
cdh_zeppelin_secure_roles: 'authc, roles[adminrole]'

2
files/apt-99cloudera Normal file
View File

@ -0,0 +1,2 @@
APT::Get::AllowUnauthenticated "true";
Acquire::AllowInsecureRepositories "true";

View File

@ -1,2 +1,6 @@
--- ---
# handlers file for ansible-role-template - name: Restart cloudera-scm-server
service: name=cloudera-scm-server state=restarted
- name: Restart zeppelin
service: name=zeppelin state=restarted

View File

@ -1,61 +1,34 @@
galaxy_info: galaxy_info:
author: your name author: Andrea Dell'Amico
description: your description description: Systems Architect
company: ISTI-CNR company: ISTI-CNR
# If the issue tracker for your role is not on github, uncomment the
# next line and provide a value
issue_tracker_url: https://redmine-s2i2s.isti.cnr.it/projects/provisioning issue_tracker_url: https://redmine-s2i2s.isti.cnr.it/projects/provisioning
# Some suggested licenses: license: EUPL 1.2+
# - BSD (default)
# - MIT
# - GPLv2
# - GPLv3
# - Apache
# - CC-BY
license: EUPL-1.2
min_ansible_version: 2.8 min_ansible_version: 2.8
# If this a Container Enabled role, provide the minimum Ansible Container version.
# min_ansible_container_version:
# Optionally specify the branch Galaxy will use when accessing the GitHub
# repo for this role. During role install, if no tags are available,
# Galaxy will use this branch. During import Galaxy will access files on
# this branch. If Travis integration is configured, only notifications for this
# branch will be accepted. Otherwise, in all cases, the repo's default branch
# (usually master) will be used.
#github_branch:
#
# Provide a list of supported platforms, and for each platform a list of versions.
# If you don't wish to enumerate all versions for a particular platform, use 'all'.
# To view available platforms and versions (or releases), visit: # To view available platforms and versions (or releases), visit:
# https://galaxy.ansible.com/api/v1/platforms/ # https://galaxy.ansible.com/api/v1/platforms/
# #
# platforms: platforms:
# - name: Fedora - name: Ubuntu
# versions: versions:
# - all - bionic
# - 25 - name: EL
# - name: SomePlatform versions:
# versions: - 7
# - all
# - 1.0
# - 7
# - 99.99
galaxy_tags: [] galaxy_tags:
# List tags for your role here, one per line. A tag is a keyword that describes - hadoop
# and categorizes the role. Users find roles by searching for tags. Be sure to
# remove the '[]' above, if you add tags to this list.
#
# NOTE: A tag is limited to a single word comprised of alphanumeric characters.
# Maximum 20 tags per role.
dependencies: []
# List your role dependencies here, one per line. Be sure to remove the '[]' above,
# if you add dependencies to this list.
dependencies:
- src: git+https://gitea-s2i2s.isti.cnr.it/ISTI-ansible-roles/ansible-role-openjdk.git
version: master
name: openjdk
state: latest
- src: git+https://gitea-s2i2s.isti.cnr.it/ISTI-ansible-roles/ansible-role-nginx.git
version: master
name: nginx
state: latest

42
tasks/cdm.yml Normal file
View File

@ -0,0 +1,42 @@
---
- block:
- name: apt key for the Cloudera Manager repository
apt_key: url='{{ cdh_ubuntu_repo_key }}' state=present
- name: Disable the signature check, because it's broken
copy: src=apt-99cloudera dest=/etc/apt/apt.conf.d/99cloudera owner=root mode='0644'
when: cdh_major_version <= 5
- name: Install the Cloudera CDH packages repository
apt_repository: repo='{{ cdh_ubuntu_packages_repo }}' update_cache=yes
- name: Install the CDH Manager server
apt: pkg=cloudera-manager-server state=present update_cache=yes cache_valid_time=1800
- name: Install the CDH Manager daemons
apt: pkg=cloudera-manager-daemons state=present update_cache=yes cache_valid_time=1800
- name: Install the Cloudera Manager DB configuration
template: src=db.properties.j2 dest=/etc/cloudera-scm-server/db.properties mode=0640 owner=cloudera-scm group=cloudera-scm
tags: [ 'cdh', 'hadoop', 'cdh_m_conf' ]
- name: Ensure that the cloudera manager daemon is running and enabled
service: name=cloudera-scm-server state=restarted enabled=yes
when:
- cdh_manager_install
- ansible_distribution_file_variety == "Debian"
tags: [ 'cdh', 'hadoop' ]
- block:
- name: Install the Postgresql client
apt: pkg=postgresql-client state=latest update_cache=yes cache_valid_time=1800
when: ansible_distribution_file_variety == "Debian"
- name: Download the jdbc driver
get_url: url=https://jdbc.postgresql.org/download/postgresql-{{ postgresql_jdbc_driver_version }}.jar dest=/srv/postgresql.jar
when:
- cdh_manager_install
tags: [ 'cdh', 'hadoop', 'cdh_manager', 'cdh_pg' ]

View File

@ -1,2 +1,12 @@
--- ---
# tasks file for ansible-role-template - import_tasks: cdm.yml
when: cdh_manager_install
- import_tasks: spark2.yml
when: cdh_manager_install
- import_tasks: oozie.yml
when: cdh_oozie_server
- import_tasks: zeppelin.yml
when: cdh_zeppelin_node
- import_tasks: services-haproxy.yml
when: cdh_impala_load_balancer

12
tasks/oozie.yml Normal file
View File

@ -0,0 +1,12 @@
---
- block:
- name: Get the Oozie external libraries needet to activate the web console
get_url: url={{ cdh_oozie_ext_libs_url }} dest=/srv/
- name: Unarchive the Oozie ext libraries needed to activate the web console
unarchive: remote_src=yes src=/srv/ext-2.2.zip dest=/var/lib/oozie owner=oozie group=oozie
args:
creates: /var/lib/oozie/ext2.2
when: cdh_oozie_server
tags: [ 'cdh', 'hadoop', 'cdh_manager', 'oozie' ]

View File

@ -0,0 +1,7 @@
---
- block:
- name: Install the haproxy configuration
template: src=hue-hive-impala_haproxy.cfg dest=/etc/haproxy/haproxy.cfg owner=root group=root
notify: Reload haproxy
tags: [ 'cdh', 'haproxy', 'impala', 'hue', 'hive' ]

8
tasks/spark2.yml Normal file
View File

@ -0,0 +1,8 @@
---
- block:
- name: Download the Spark2 jar file
get_url: url={{ cdh_spark2_csd_url }} dest={{ cdh_csd_directory }} owner=cloudera-scm group=cloudera-scm
notify: Restart cloudera-scm-server
when: cdh_spark2_enabled
tags: [ 'cdh', 'hadoop', 'spark' ]

45
tasks/zeppelin.yml Normal file
View File

@ -0,0 +1,45 @@
---
- block:
- name: Create a user that will run the zeppelin service
user: name={{ cdh_zeppelin_user }} home={{ cdh_zeppelin_home }} createhome=no shell=/usr/sbin/nologin system=yes
- name: Create the zeppelin home directory
file: dest={{ cdh_zeppelin_home }} state=directory mode=0755
- name: Create the zeppelin data directories
file: dest={{ item }} state=directory owner={{ cdh_zeppelin_user }} group={{ cdh_zeppelin_group }}
with_items: '{{ cdh_zeppelin_work_dirs }}'
- name: Download zeppelin
get_url: url={{ cdh_zeppelin_download_url }} dest=/srv/{{ chd_zeppelin_archive }}
- name: Unpack the zeppelin distribution
unarchive: remote_src=yes src=/srv/{{ chd_zeppelin_archive }} dest={{ cdh_zeppelin_home }} owner=root group=root
args:
creates: '{{ cdh_zeppelin_home }}/{{ cdh_zeppelin_dir }}'
- name: zeppelin wants to write into the conf directory
file: dest={{ cdh_zeppelin_home }}/{{ cdh_zeppelin_dir }}/conf owner={{ cdh_zeppelin_user }} group={{ cdh_zeppelin_group }}
- name: Install the zeppelin configuration files
template: src={{ item }} dest={{ cdh_zeppelin_home }}/{{ cdh_zeppelin_dir }}/conf/{{ item }} owner=root group={{ cdh_zeppelin_group }} mode=0440
with_items: '{{ cdh_zeppelin_conf_files }}'
notify: Restart zeppelin
tags: [ 'cdh', 'zeppelin', 'zeppelin_conf' ]
- name: Install the zeppelin upstart startup script
template: src=zeppelin_upstart.conf dest=/etc/init/zeppelin.conf
when: ansible_service_mgr != 'systemd'
- name: Install the zeppelin systemd startup script
template: src=zeppelin_systemd.service dest=/etc/systemd/system/zeppelin.service
when: ansible_service_mgr == 'systemd'
- name: Ensure that the zeppelin service is enabled and running
service: name=zeppelin state=started enabled=yes
- name: Ensure that apache2 installed by CDM is stopped and disabled
service: name=apache2 state=stopped enabled=no
notify: Restart nginx
tags: [ 'cdh', 'zeppelin' ]

View File

@ -0,0 +1,27 @@
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
#
# This file describes the database connection.
#
# The database type
# Currently 'mysql', 'postgresql' and 'oracle' are valid databases.
com.cloudera.cmf.db.type=postgresql
# The database host
# If a non standard port is needed, use 'hostname:port'
com.cloudera.cmf.db.host={{ cdh_postgres_db_host }}
# The database name
com.cloudera.cmf.db.name={{ cdh_postgres_db_name }}
# The database user
com.cloudera.cmf.db.user={{ cdh_postgres_db_user }}
# The database user's password
com.cloudera.cmf.db.password={{ cdh_postgres_db_pwd }}
# The db setup type
# By default, it is set to INIT
# If scm-server uses Embedded DB then it is set to EMBEDDED
# If scm-server uses External DB then it is set to EXTERNAL
com.cloudera.cmf.db.setupType=EXTERNAL

View File

@ -0,0 +1,84 @@
global
# References:
# https://www.cloudera.com/documentation/enterprise/5-13-x/topics/impala_proxy.html
# https://www.cloudera.com/documentation/enterprise/5-13-x/topics/hue_sec_ha.html#concept_ef1_1pf_51b
#
# To have these messages end up in /var/log/haproxy.log you will
# need to:
#
# 1) configure syslog to accept network log events. This is done
# by adding the '-r' option to the SYSLOGD_OPTIONS in
# /etc/sysconfig/syslog
#
# 2) configure local2 events to go to the /var/log/haproxy.log
# file. A line like the following can be added to
# /etc/sysconfig/syslog
#
# local2.* /var/log/haproxy.log
#
log 127.0.0.1 local0
log 127.0.0.1 local1 notice
chroot /var/lib/haproxy
pidfile /run/haproxy.pid
maxconn 4000
user haproxy
group haproxy
daemon
# turn on stats unix socket
stats socket /var/lib/haproxy/stats
#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#
# You might need to adjust timing values to prevent timeouts.
#---------------------------------------------------------------------
defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 3
maxconn 3000
contimeout 5000
clitimeout 50000
srvtimeout 50000
#
# This sets up the admin page for HA Proxy at port 25002.
#
# listen stats :25002
# balance
# mode http
# stats enable
# stats auth username:password
# This is the setup for Impala. Impala client connect to load_balancer_host:25003.
# HAProxy will balance connections among the list of servers listed below.
# The list of Impalad is listening at port 21000 for beeswax (impala-shell) or original ODBC driver.
# For JDBC or ODBC version 2.x driver, use port 21050 instead of 21000.
listen impala :25003
mode tcp
option tcplog
balance leastconn
{% for host in groups['workers'] %}
server {{ host }} {{host }}:21000 check
{% endfor %}
# Setup for Hue or other JDBC-enabled applications.
# In particular, Hue requires sticky sessions.
# The application connects to load_balancer_host:21051, and HAProxy balances
# connections to the associated hosts, where Impala listens for JDBC
# requests on port 21050.
listen impalajdbc :21051
mode tcp
option tcplog
balance source
{% for host in groups['workers'] %}
server {{ host }} {{host }}:21050 check
{% endfor %}

122
templates/shiro.ini Normal file
View File

@ -0,0 +1,122 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
[users]
# List of users with their password allowed to access Zeppelin.
# To use a different strategy (LDAP / Database / ...) check the shiro doc at http://shiro.apache.org/configuration.html#Configuration-INISections
{% if not cdh_zeppelin_ldap_enabled %}
admin = {{ cdh_zeppelin_admin_password | default('admin') }}, admin
{% endif %}
# Sample LDAP configuration, for user Authentication, currently tested for single Realm
[main]
{% if cdh_zeppelin_ldap_enabled %}
{% if cdh_zeppelin_ldap_advanced_config %}
ldapRealm = org.apache.zeppelin.realm.LdapRealm
ldapRealm.contextFactory.url = {{ cdh_zeppelin_ldap_url }}
ldapRealm.contextFactory.authenticationMechanism=simple
ldapRealm.contextFactory.systemUsername = {{ cdh_zeppelin_ldap_bind_u }}
ldapRealm.contextFactory.systemPassword = {{ cdh_zeppelin_ldap_bind_pwd }}
ldapRealm.authorizationEnabled = true
#ldapRealm.userDnTemplate = {{ cdh_zeppelin_username_pattern }}
# Ability to set ldap paging Size if needed default is 100
#ldapRealm.pagingSize = 200
#ldapRealm.searchBase = {{ cdh_zeppelin_ldap_search_base }}
ldapRealm.userSearchBase = {{ cdh_zeppelin_ldap_users_base }}
ldapRealm.userObjectClass = {{ cdh_zeppelin_user_objectclass }}
ldapRealm.groupSearchBase = {{ cdh_zeppelin_ldap_group_base }}
ldapRealm.groupObjectClass = {{ cdh_zeppelin_ldap_group_obj_class }}
ldapRealm.groupSearchFilter = '{{ cdh_zeppelin_group_search_filter }}'
# Allow userSearchAttribute to be customized
ldapRealm.userSearchAttributeName = {{ cdh_zeppelin_ldap_user_attr }}
ldapRealm.memberAttribute = {{ cdh_zeppelin_ldap_member_attr }}
# force usernames returned from ldap to lowercase useful for AD
#ldapRealm.userLowerCase = true
#ldapRealm.memberAttributeValueTemplate = {{cdh_zeppelin_group_template}}
# enable support for nested groups using the LDAPMATCHINGRULEINCHAIN operator
#ldapRealm.groupSearchEnableMatchingRuleInChain = {{ cdh_zeppelin_ldap_nested_groups }}
# optional mapping from physical groups to logical application roles
ldapRealm.rolesByGroup = {{ cdh_zeppelin_ldap_roles_by_group }}
# optional list of roles that are allowed to authenticate. In case not present all groups are allowed to authenticate (login).
# This changes nothing for url specific permissions that will continue to work as specified in [urls].
#ldapRealm.allowedRolesForAuthentication = adminrole,userrole
#ldapRealm.permissionsByRole = userrole = *:ToDoItem::*, adminrole = *
{% else %}
### A sample for configuring LDAP Directory Realm
ldapRealm = org.apache.zeppelin.realm.LdapGroupRealm
## search base for ldap groups (only relevant for LdapGroupRealm):
ldapRealm.contextFactory.environment[ldap.searchBase] = {{ cdh_zeppelin_ldap_search_base }}
ldapRealm.contextFactory.url = {{ cdh_zeppelin_ldap_url }}
ldapRealm.userDnTemplate = {{ cdh_zeppelin_username_pattern }}
ldapRealm.contextFactory.authenticationMechanism = simple
{% endif %}
securityManager.realms = $ldapRealm
{% endif %}
### A sample PAM configuration
#pamRealm=org.apache.zeppelin.realm.PamRealm
#pamRealm.service=sshd
### A sample for configuring ZeppelinHub Realm
#zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm
## Url of ZeppelinHub
#zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com
#securityManager.realms = $zeppelinHubRealm
sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager
### If caching of user is required then uncomment below lines
#cacheManager = org.apache.shiro.cache.MemoryConstrainedCacheManager
#securityManager.cacheManager = $cacheManager
### Enables 'HttpOnly' flag in Zeppelin cookies
cookie = org.apache.shiro.web.servlet.SimpleCookie
cookie.name = JSESSIONID
cookie.httpOnly = true
### Uncomment the below line only when Zeppelin is running over HTTPS
cookie.secure = true
sessionManager.sessionIdCookie = $cookie
securityManager.sessionManager = $sessionManager
# 86,400,000 milliseconds = 24 hour
securityManager.sessionManager.globalSessionTimeout = 86400000
shiro.loginUrl = /api/login
[roles]
admin = *
user = *
[urls]
# This section is used for url-based security.
# You can secure interpreter, configuration and credential information by urls. Comment or uncomment the below urls that you want to hide.
# anon means the access is anonymous.
# authc means Form based Auth Security
# To enfore security, comment the line below and uncomment the next one
/api/version = anon
{% if not cdh_zeppelin_secure_urls %}
/** = anon
{% else %}
{% if cdh_zeppelin_ldap_advanced_config %}
/api/interpreter/setting/restart/** = authc
/api/interpreter/** = {{ cdh_zeppelin_secure_roles }}
/api/configurations/** = {{ cdh_zeppelin_secure_roles }}
/api/credential/** = {{ cdh_zeppelin_secure_roles }}
/** = authc
{% else %}
/** = authc
{% endif %}
{% endif %}

43
templates/zeppelin-env.sh Normal file
View File

@ -0,0 +1,43 @@
#!/bin/bash
export ZEPPELIN_LOG_DIR="{{ cdh_zeppelin_home }}/log"
export ZEPPELIN_PID_DIR="{{ cdh_zeppelin_home }}/run"
export ZEPPELIN_WAR_TEMPDIR="{{ cdh_zeppelin_home }}/base_tmp/tmp"
export ZEPPELIN_NOTEBOOK_DIR="{{ cdh_zeppelin_home }}/notebook"
export ZEPPELIN_MEM="-Xms4096m -Xmx4096m"
export ZEPPELIN_INTP_MEM="-Xms4096m -Xmx4096m"
{% if cdh_zeppelin_use_spark2 %}
export SPARK_HOME=/opt/cloudera/parcels/SPARK2-2.2.0.cloudera2-1.cdh5.12.0.p0.232957/lib/spark2
{% else %}
# export MASTER= # Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode.
export SPARK_HOME=/opt/cloudera/parcels/CDH-5.9.3-1.cdh5.9.3.p0.4/lib/spark
{% endif %}
export DEFAULT_HADOOP_HOME=/opt/cloudera/parcels/CDH-5.9.3-1.cdh5.9.3.p0.4/lib/hadoop
export SPARK_JAR_HDFS_PATH=${SPARK_JAR_HDFS_PATH:-''}
export SPARK_LAUNCH_WITH_SCALA=0
export SPARK_LIBRARY_PATH=${SPARK_HOME}/lib
export SCALA_LIBRARY_PATH=${SPARK_HOME}/lib
SPARK_PYTHON_PATH=""
if [ -n "$SPARK_PYTHON_PATH" ]; then
export PYTHONPATH="$PYTHONPATH:$SPARK_PYTHON_PATH"
fi
export HADOOP_HOME=${HADOOP_HOME:-$DEFAULT_HADOOP_HOME}
if [ -n "$HADOOP_HOME" ]; then
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${HADOOP_HOME}/lib/native
fi
SPARK_EXTRA_LIB_PATH=""
if [ -n "$SPARK_EXTRA_LIB_PATH" ]; then
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$SPARK_EXTRA_LIB_PATH
fi
export LD_LIBRARY_PATH
HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-$SPARK_CONF_DIR/yarn-conf}
HIVE_CONF_DIR=${HIVE_CONF_DIR:-/etc/hive/conf}
export MASTER=yarn-client

360
templates/zeppelin-site.xml Normal file
View File

@ -0,0 +1,360 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>
<property>
<name>zeppelin.server.addr</name>
<value>0.0.0.0</value>
<description>Server address</description>
</property>
<property>
<name>zeppelin.server.port</name>
<value>8080</value>
<description>Server port.</description>
</property>
<property>
<name>zeppelin.server.ssl.port</name>
<value>8443</value>
<description>Server ssl port. (used when ssl property is set to true)</description>
</property>
<property>
<name>zeppelin.server.context.path</name>
<value>/</value>
<description>Context Path of the Web Application</description>
</property>
<property>
<name>zeppelin.war.tempdir</name>
<value>webapps</value>
<description>Location of jetty temporary directory</description>
</property>
<property>
<name>zeppelin.notebook.dir</name>
<value>notebook</value>
<description>path or URI for notebook persist</description>
</property>
<property>
<name>zeppelin.notebook.homescreen</name>
<value></value>
<description>id of notebook to be displayed in homescreen. ex) 2A94M5J1Z Empty value displays default home screen</description>
</property>
<property>
<name>zeppelin.notebook.homescreen.hide</name>
<value>false</value>
<description>hide homescreen notebook from list when this value set to true</description>
</property>
<!-- Amazon S3 notebook storage -->
<!-- Creates the following directory structure: s3://{bucket}/{username}/{notebook-id}/note.json -->
<!--
<property>
<name>zeppelin.notebook.s3.user</name>
<value>user</value>
<description>user name for s3 folder structure</description>
</property>
<property>
<name>zeppelin.notebook.s3.bucket</name>
<value>zeppelin</value>
<description>bucket name for notebook storage</description>
</property>
<property>
<name>zeppelin.notebook.s3.endpoint</name>
<value>s3.amazonaws.com</value>
<description>endpoint for s3 bucket</description>
</property>
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.S3NotebookRepo</value>
<description>notebook persistence layer implementation</description>
</property>
-->
<!-- Additionally, encryption is supported for notebook data stored in S3 -->
<!-- Use the AWS KMS to encrypt data -->
<!-- If used, the EC2 role assigned to the EMR cluster must have rights to use the given key -->
<!-- See https://aws.amazon.com/kms/ and http://docs.aws.amazon.com/kms/latest/developerguide/concepts.html -->
<!--
<property>
<name>zeppelin.notebook.s3.kmsKeyID</name>
<value>AWS-KMS-Key-UUID</value>
<description>AWS KMS key ID used to encrypt notebook data in S3</description>
</property>
-->
<!-- provide region of your KMS key -->
<!-- See http://docs.aws.amazon.com/general/latest/gr/rande.html#kms_region for region codes names -->
<!--
<property>
<name>zeppelin.notebook.s3.kmsKeyRegion</name>
<value>us-east-1</value>
<description>AWS KMS key region in your AWS account</description>
</property>
-->
<!-- Use a custom encryption materials provider to encrypt data -->
<!-- No configuration is given to the provider, so you must use system properties or another means to configure -->
<!-- See https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/s3/model/EncryptionMaterialsProvider.html -->
<!--
<property>
<name>zeppelin.notebook.s3.encryptionMaterialsProvider</name>
<value>provider implementation class name</value>
<description>Custom encryption materials provider used to encrypt notebook data in S3</description>
</property>
-->
<!-- If using Azure for storage use the following settings -->
<!--
<property>
<name>zeppelin.notebook.azure.connectionString</name>
<value>DefaultEndpointsProtocol=https;AccountName=<accountName>;AccountKey=<accountKey></value>
<description>Azure account credentials</description>
</property>
<property>
<name>zeppelin.notebook.azure.share</name>
<value>zeppelin</value>
<description>share name for notebook storage</description>
</property>
<property>
<name>zeppelin.notebook.azure.user</name>
<value>user</value>
<description>optional user name for Azure folder structure</description>
</property>
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.AzureNotebookRepo</value>
<description>notebook persistence layer implementation</description>
</property>
-->
<!-- Notebook storage layer using local file system
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.VFSNotebookRepo</value>
<description>local notebook persistence layer implementation</description>
</property>
-->
<!-- For connecting your Zeppelin with ZeppelinHub -->
<!--
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.GitNotebookRepo, org.apache.zeppelin.notebook.repo.zeppelinhub.ZeppelinHubRepo</value>
<description>two notebook persistence layers (versioned local + ZeppelinHub)</description>
</property>
-->
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.GitNotebookRepo</value>
<description>versioned notebook persistence layer implementation</description>
</property>
<property>
<name>zeppelin.notebook.one.way.sync</name>
<value>false</value>
<description>If there are multiple notebook storages, should we treat the first one as the only source of truth?</description>
</property>
<property>
<name>zeppelin.interpreter.dir</name>
<value>interpreter</value>
<description>Interpreter implementation base directory</description>
</property>
<property>
<name>zeppelin.interpreter.localRepo</name>
<value>local-repo</value>
<description>Local repository for interpreter's additional dependency loading</description>
</property>
<property>
<name>zeppelin.interpreter.dep.mvnRepo</name>
<value>http://repo1.maven.org/maven2/</value>
<description>Remote principal repository for interpreter's additional dependency loading</description>
</property>
<property>
<name>zeppelin.dep.localrepo</name>
<value>local-repo</value>
<description>Local repository for dependency loader</description>
</property>
<property>
<name>zeppelin.helium.npm.registry</name>
<value>http://registry.npmjs.org/</value>
<description>Remote Npm registry for Helium dependency loader</description>
</property>
<property>
<name>zeppelin.interpreters</name>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.python.PythonInterpreterPandasSql,org.apache.zeppelin.python.PythonCondaInterpreter,org.apache.zeppelin.python.PythonDockerInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivyPySpark3Interpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter,org.apache.zeppelin.beam.BeamInterpreter,org.apache.zeppelin.pig.PigInterpreter,org.apache.zeppelin.pig.PigQueryInterpreter,org.apache.zeppelin.scio.ScioInterpreter</value>
<description>Comma separated interpreter configurations. First interpreter become a default</description>
</property>
<property>
<name>zeppelin.interpreter.group.order</name>
<value>spark,md,angular,sh,livy,alluxio,file,psql,flink,python,ignite,lens,cassandra,geode,kylin,elasticsearch,scalding,jdbc,hbase,bigquery,beam</value>
<description></description>
</property>
<property>
<name>zeppelin.interpreter.connect.timeout</name>
<value>30000</value>
<description>Interpreter process connect timeout in msec.</description>
</property>
<property>
<name>zeppelin.interpreter.output.limit</name>
<value>102400</value>
<description>Output message from interpreter exceeding the limit will be truncated</description>
</property>
<property>
<name>zeppelin.ssl</name>
<value>false</value>
<description>Should SSL be used by the servers?</description>
</property>
<property>
<name>zeppelin.ssl.client.auth</name>
<value>false</value>
<description>Should client authentication be used for SSL connections?</description>
</property>
<property>
<name>zeppelin.ssl.keystore.path</name>
<value>keystore</value>
<description>Path to keystore relative to Zeppelin configuration directory</description>
</property>
<property>
<name>zeppelin.ssl.keystore.type</name>
<value>JKS</value>
<description>The format of the given keystore (e.g. JKS or PKCS12)</description>
</property>
<property>
<name>zeppelin.ssl.keystore.password</name>
<value>change me</value>
<description>Keystore password. Can be obfuscated by the Jetty Password tool</description>
</property>
<!--
<property>
<name>zeppelin.ssl.key.manager.password</name>
<value>change me</value>
<description>Key Manager password. Defaults to keystore password. Can be obfuscated.</description>
</property>
-->
<property>
<name>zeppelin.ssl.truststore.path</name>
<value>truststore</value>
<description>Path to truststore relative to Zeppelin configuration directory. Defaults to the keystore path</description>
</property>
<property>
<name>zeppelin.ssl.truststore.type</name>
<value>JKS</value>
<description>The format of the given truststore (e.g. JKS or PKCS12). Defaults to the same type as the keystore type</description>
</property>
<!--
<property>
<name>zeppelin.ssl.truststore.password</name>
<value>change me</value>
<description>Truststore password. Can be obfuscated by the Jetty Password tool. Defaults to the keystore password</description>
</property>
-->
<property>
<name>zeppelin.server.allowed.origins</name>
<value>*</value>
<description>Allowed sources for REST and WebSocket requests (i.e. http://onehost:8080,http://otherhost.com). If you leave * you are vulnerable to https://issues.apache.org/jira/browse/ZEPPELIN-173</description>
</property>
<property>
<name>zeppelin.anonymous.allowed</name>
<value>true</value>
<description>Anonymous user allowed by default</description>
</property>
<property>
<name>zeppelin.notebook.public</name>
<value>{{ cdh_zeppelin_notebook_public }}</value>
<description>Make notebook public by default when created, private otherwise</description>
</property>
<property>
<name>zeppelin.websocket.max.text.message.size</name>
<value>1024000</value>
<description>Size in characters of the maximum text message to be received by websocket. Defaults to 1024000</description>
</property>
<property>
<name>zeppelin.server.default.dir.allowed</name>
<value>false</value>
<description>Enable directory listings on server.</description>
</property>
<!--
<property>
<name>zeppelin.server.jetty.name</name>
<value>Jetty(7.6.0.v20120127)</value>
<description>Hardcoding Application Server name to Prevent Fingerprinting</description>
</property>
-->
<!--
<property>
<name>zeppelin.server.xframe.options</name>
<value>SAMEORIGIN</value>
<description>The X-Frame-Options HTTP response header can be used to indicate whether or not a browser should be allowed to render a page in a frame/iframe/object.</description>
</property>
-->
<!--
<property>
<name>zeppelin.server.strict.transport</name>
<value>max-age=631138519</value>
<description>The HTTP Strict-Transport-Security response header is a security feature that lets a web site tell browsers that it should only be communicated with using HTTPS, instead of using HTTP. Enable this when Zeppelin is running on HTTPS. Value is in Seconds, the default value is equivalent to 20 years.</description>
</property>
-->
<!--
<property>
<name>zeppelin.server.xxss.protection</name>
<value>1</value>
<description>The HTTP X-XSS-Protection response header is a feature of Internet Explorer, Chrome and Safari that stops pages from loading when they detect reflected cross-site scripting (XSS) attacks. When value is set to 1 and a cross-site scripting attack is detected, the browser will sanitize the page (remove the unsafe parts).</description>
</property>
-->
</configuration>

View File

@ -0,0 +1,12 @@
[Unit]
Description=Zeppelin
After=network.target
[Service]
Type=simple
User={{ cdh_zeppelin_user }}
Group={{ cdh_zeppelin_group }}
WorkingDirectory={{ cdh_zeppelin_home }}/{{ cdh_zeppelin_dir }}
ExecStart={{ cdh_zeppelin_home }}/{{ cdh_zeppelin_dir }}/bin/zeppelin-daemon.sh upstart

View File

@ -0,0 +1,23 @@
description "Zeppelin"
start on (local-filesystems and net-device-up IFACE!=lo)
stop on shutdown
# Respawn the process on unexpected termination
respawn
# respawn the job up to 7 times within a 5 second period.
# If the job exceeds these values, it will be stopped and marked as failed.
respawn limit 7 5
# Run unprivileged
setuid {{ cdh_zeppelin_user }}
setgid {{ cdh_zeppelin_group }}
pre-start script
mkdir -p /srv/zeppelin/base_tmp/tmp
end script
# zeppelin was installed in /usr/share/zeppelin in this example
chdir {{ cdh_zeppelin_home }}/{{ cdh_zeppelin_dir }}
exec bin/zeppelin-daemon.sh upstart