diff --git a/ckan/defaults/main.yml b/ckan/defaults/main.yml index 8adc2e5f..5809d594 100644 --- a/ckan/defaults/main.yml +++ b/ckan/defaults/main.yml @@ -18,12 +18,15 @@ ckan_file_storage_dir: /var/lib/ckan/dev ckan_config_file: /etc/ckan/default/production.ini ckan_solr_port: 8983 ckan_shell_user: ckan +ckan_logdir: /var/log/ckan # The order is important -ckan_geonetwork_harvester: True +ckan_geonetwork_harvester: False ckan_ckanext_harvester_url: 'git+https://github.com/ckan/ckanext-harvest.git#egg=ckanext-harvest' ckan_ckanext_spatial_url: 'git+https://github.com/okfn/ckanext-spatial.git#egg=ckanext-spatial' ckan_geonetwork_harvester_url: 'https://github.com/geosolutions-it/ckanext-geonetwork.git' +# Set this to true to install a cron job that regularly runs the harvesters +ckan_harvester_run: False ckan_pdfview: False ckan_ckanext_pdfview_url: ckanext-pdfview ckan_privatedatasets: False @@ -35,6 +38,18 @@ ckan_hierarchy_name: hierarchy_display hierarchy_form ckan_pages: False ckan_pages_url: 'git+https://github.com/ckan/ckanext-pages.git#egg=ckanext-pages' ckan_pages_name: pages +ckan_ldap: False +#ckan_ldap_url: 'git+https://github.com/NaturalHistoryMuseum/ckanext-ldap' +ckan_ldap_url: 'https://github.com/NaturalHistoryMuseum/ckanext-ldap' +ckan_ldap_name: ldap +ckan_ldap_uri: 'ldap://ldap.example.org' +ckan_ldap_base_dn: '' +ckan_ldap_search_filter: 'uid={login}' +ckan_ldap_user_fullname: 'cn' +ckan_ldap_username: uid +ckan_ldap_email: mail +ckan_ldap_prevent_edits: True +ckan_ldap_fallback: True users_system_users: - { login: '{{ ckan_shell_user }}', name: "CKAN user", home: '/usr/lib', createhome: 'no', ssh_key: '{{ francesco_mangiacrapa }}', shell: '/bin/bash', admin: False, log_as_root: True } @@ -66,12 +81,12 @@ apache_additional_modules: ckan_production_ini_opts: - { section: 'app:main', option: 'ckan.site_id', value: 'd4s_dev', state: 'present' } - { section: 'app:main', option: 'sqlalchemy.url', value: 'postgresql://{{ ckan_db_user }}:{{ ckan_db_pwd }}@{{ psql_db_host }}/{{ ckan_db_name }}', state: 'present' } - - { section: 'app:main', option: 'ckan.site_url', value: 'http://ckan-d-d4s.d4science.org', state: 'present' } + - { section: 'app:main', option: 'ckan.site_url', value: 'http://{{ ansible_fqdn }}', state: 'present' } - { section: 'app:main', option: 'solr_url', value: 'http://127.0.0.1:{{ ckan_solr_port }}/solr', state: 'present' } - { section: 'app:main', option: 'ckan.datastore.write_url', value: 'postgresql://{{ ckan_db_user }}:{{ ckan_db_pwd }}@{{ psql_db_host }}/{{ ckan_datastore_db_name }}', state: 'present' } - { section: 'app:main', option: 'ckan.datastore.read_url', value: 'postgresql://{{ ckan_datastore_db_reader }}:{{ ckan_db_pwd }}@{{ psql_db_host }}/{{ ckan_datastore_db_name }}', state: 'present' } # - { section: 'app:main', option: 'ckan.plugins', value: 'stats text_view image_view recline_view', state: 'present' } - - { section: 'app:main', option: 'ckan.plugins', value: 'stats text_view image_view recline_view datastore datapusher harvest ckan_harvester spatial_metadata spatial_query csw_harvester waf_harvester doc_harvester geonetwork_harvester pdf_view {{ ckan_privatedatasets_name }} {{ ckan_hierarchy_name }} {{ ckan_pages_name }}', state: 'present' } + - { section: 'app:main', option: 'ckan.plugins', value: 'stats text_view image_view recline_view datastore datapusher harvest', state: 'present' } - { section: 'app:main', option: 'ckan.site_title', value: 'D4Science CKAN development installation', state: 'present' } - { section: 'app:main', option: 'ckan.site_logo', value: '/base/images/ckan-logo.png', state: 'present' } - { section: 'app:main', option: 'ckan.datapusher.url', value: 'http://127.0.0.1:8800', state: 'present' } @@ -93,3 +108,5 @@ ckan_production_ini_opts: - { section: 'app:main', option: 'ckanext.pages.group_menu', value: 'false', state: 'absent' } - { section: 'app:main', option: 'ckanext.pages.organization_menu', value: 'false', state: 'absent' } +ckan_gather_fetch_pkgs: + - supervisor diff --git a/ckan/handlers/main.yml b/ckan/handlers/main.yml index dd8f55ac..addf2604 100644 --- a/ckan/handlers/main.yml +++ b/ckan/handlers/main.yml @@ -4,3 +4,6 @@ - name: Restart CKAN service: name=apache2 state=restarted sleep=10 + +- name: Reconfigure the supervisor daemon + shell: supervisorctl reread ; supervisorctl add ckan_gather_consumer ; supervisorctl add ckan_fetch_consumer ; supervisorctl start ckan_gather_consumer ; supervisorctl start ckan_fetch_consumer diff --git a/ckan/tasks/main.yml b/ckan/tasks/main.yml index 78401bbb..7da5237b 100644 --- a/ckan/tasks/main.yml +++ b/ckan/tasks/main.yml @@ -44,7 +44,7 @@ tags: [ 'ckan', 'ckan_user' ] - name: Create a log directory for the jobs run by the ckan user - file: dest=/var/log/ckan state=directory owner={{ ckan_shell_user }} group={{ ckan_shell_user }} + file: dest={{ ckan_logdir }} state=directory owner={{ ckan_shell_user }} group={{ ckan_shell_user }} tags: [ 'ckan', 'ckan_user' ] - name: Install some plugins dependencies inside the CKAN virtualenv @@ -94,12 +94,12 @@ when: ckan_geonetwork_harvester tags: [ 'ckan', 'ckan_spatial', 'ckan_plugins' ] -- name: Initialize the CKAN ckanext-harvest plugin +- name: Initialize the CKAN ckanext-spatial plugin become: True become_user: '{{ ckan_shell_user }}' shell: . /usr/lib/ckan/default/bin/activate ; paster --plugin=ckanext-spatial spatial initdb --config={{ ckan_config_file }} when: ( ckanext_spatial_install | changed ) - tags: [ 'ckan', 'ckan_harvest', 'ckan_plugins' ] + tags: [ 'ckan', 'ckan_spatial', 'ckan_plugins' ] - name: Download the CKAN Geonetwork plugin code become: True @@ -123,7 +123,7 @@ tags: [ 'ckan', 'ckan_geonetwork', 'ckan_plugins', 'tracker' ] - name: Install the cron job that runs the tracker update script - cron: name="tracker update" minute="0" job="{{ ckan_virtenv }}/bin/tracker_update > /var/log/ckan/tracker_update.log 2>&1" user={{ ckan_shell_user }} + cron: name="tracker update" minute="0" hour="3" job="{{ ckan_virtenv }}/bin/tracker_update > {{ ckan_logdir }}/tracker_update.log 2>&1" user={{ ckan_shell_user }} when: ckan_geonetwork_harvester tags: [ 'ckan', 'ckan_geonetwork', 'ckan_plugins', 'tracker' ] @@ -159,6 +159,22 @@ notify: Restart CKAN tags: [ 'ckan', 'ckan_pages', 'ckan_plugins' ] +- name: Download the CKAN LDAP plugin code + become: True + become_user: '{{ ckan_shell_user }}' + git: repo={{ ckan_ldap_url }} dest=/usr/lib/ckan/default/src/ckanext-ldap + when: ckan_ldap + register: install_ldap_plugin + tags: [ 'ckan', 'ckan_ldap', 'ckan_plugins' ] + +- name: Install the CKAN Geonetwork plugin code + become: True + become_user: '{{ ckan_shell_user }}' + shell: . /usr/lib/ckan/default/bin/activate ; cd /usr/lib/ckan/default/src/ckanext-ldap ; python setup.py develop + when: ( install_ldap_plugin | changed ) + notify: Restart CKAN + tags: [ 'ckan', 'ckan_ldap', 'ckan_plugins' ] + # - name: Overwrite the base.py ckanext-spatial plugin file to enable authentication against the Geonetwork nodes # copy: src=base.py dest=/usr/lib/ckan/default/src/ckanext-spatial/ckanext/spatial/harvesters/base.py owner={{ ckan_shell_user }} group={{ ckan_shell_user }} mode=0644 backup=yes # notify: Restart CKAN @@ -174,6 +190,21 @@ # notify: Restart CKAN # tags: [ 'ckan', 'ckan_pages', 'ckan_plugins', 'ckan_ckanext_spatial' ] +- name: Install the supervisor daemon needed to automate the gather and fetch operations + apt: pkg={{ item }} state=present + with_items: '{{ ckan_gather_fetch_pkgs }}' + tags: [ 'ckan', 'ckan_harvest' ] + +- name: Install the gather and fetch supervisor configuration + template: src=ckan_harvesting.conf.j2 dest=/etc/supervisor/conf.d/ckan_harvesting.conf owner=root group=root mode=0644 + notify: Reconfigure the supervisor daemon + tags: [ 'ckan', 'ckan_harvest' ] + +- name: Install a cron job that + cron: name="CKAN harvester" minute="0" job="{{ ckan_virtenv }}/bin/paster --plugin=ckanext-harvest harvester run --config={{ ckan_config_file }} > {{ ckan_logdir }}/harvester_run.log 2>&1" user={{ ckan_shell_user }} + when: ckan_harvester_run + tags: [ 'ckan', 'ckan_harvest', 'ckan_harvest_cron' ] + - name: Restart apache service: name=apache state=restarted enabled=yes when: ( ckan_install | changed ) @@ -184,3 +215,6 @@ when: ( ckan_install | changed ) tags: ckan +- name: Ensure that supervisord is running and enabled + service: name=supervisor state=started enabled=yes + tags: [ 'ckan', 'ckan_harvest' ] diff --git a/ckan/templates/ckan_harvesting.conf.j2 b/ckan/templates/ckan_harvesting.conf.j2 new file mode 100644 index 00000000..d050b836 --- /dev/null +++ b/ckan/templates/ckan_harvesting.conf.j2 @@ -0,0 +1,31 @@ +; =============================== +; ckan harvester +; =============================== + +[program:ckan_gather_consumer] + +command={{ ckan_virtenv }}/bin/paster --plugin=ckanext-harvest harvester gather_consumer --config={{ ckan_config_file }} + +; user that owns virtual environment. +user=ckan + +numprocs=1 +stdout_logfile={{ ckan_logdir }}/gather_consumer.log +stderr_logfile={{ ckan_logdir }}/gather_consumer.log +autostart=true +autorestart=true +startsecs=10 + +[program:ckan_fetch_consumer] + +command={{ ckan_virtenv }}/bin/paster --plugin=ckanext-harvest harvester fetch_consumer --config={{ ckan_config_file }} + +; user that owns virtual environment. +user={{ ckan_shell_user }} + +numprocs=1 +stdout_logfile={{ ckan_logdir }}/fetch_consumer.log +stderr_logfile={{ ckan_logdir }}/fetch_consumer.log +autostart=true +autorestart=true +startsecs=10 diff --git a/ckan/templates/tracker_update.sh.j2 b/ckan/templates/tracker_update.sh.j2 index 5c0bd41a..fa93312a 100644 --- a/ckan/templates/tracker_update.sh.j2 +++ b/ckan/templates/tracker_update.sh.j2 @@ -1,8 +1,17 @@ #!/bin/bash +LOCK_DIR={{ ckan_logdir }} +LOCK_FILE=$LOCK_DIR/.index_rebuild.lock . {{ ckan_virtenv }}/bin/activate +if [ -f $LOCK_FILE ] ; then + echo 'A lock file is present, exiting' + exit 2 +fi + +echo "cron pid: ${$}" > $LOCK_FILE paster --plugin=ckan tracking update -c {{ ckan_config_file }} paster --plugin=ckan search-index rebuild -r -c {{ ckan_config_file }} +rm -f $LOCK_FILE exit 0