library/roles/ckan: Split the role in 'ckan' and 'ckan-solr' because solr can run on a different host.

This commit is contained in:
Andrea Dell'Amico 2016-06-18 16:06:16 +02:00
parent 76b8a3af4f
commit 4358e9fc3a
17 changed files with 276 additions and 79 deletions

View File

@ -0,0 +1,2 @@
---
ckan_solr_port: 8983

View File

@ -0,0 +1,3 @@
---
- name: Solr Restart
service: name=tomcat-instance-{{ ckan_solr_port }} state=restarted

View File

@ -0,0 +1,12 @@
---
- name: Install the solr schema used by CKAN
file: src=/usr/lib/ckan/default/src/ckan/ckan/config/solr/schema.xml dest={{ tomcat_m_instances_base_path }}/{{ ckan_solr_port }}/solr/data/solr/collection1/conf/schema.xml state=link force=yes
when: not ckan_geonetwork_harvester
notify: Solr Restart
tags: [ 'ckan', 'solr', 'solr_schema' ]
- name: Install the solr schema used by CKAN, modified with the spatial fields
copy: src=schema.xml dest={{ tomcat_m_instances_base_path }}/{{ ckan_solr_port }}/solr/data/solr/collection1/conf/schema.xml force=yes
when: ckan_geonetwork_harvester
notify: Solr Restart
tags: [ 'ckan', 'solr', 'solr_schema' ]

187
ckan/ckan/files/schema.xml Normal file
View File

@ -0,0 +1,187 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
NB Please copy changes to this file into the multilingual schema:
ckanext/multilingual/solr/schema.xml
-->
<!-- We update the version when there is a backward-incompatible change to this
schema. In this case the version should be set to the next CKAN version number.
(x.y but not x.y.z since it needs to be a float) -->
<schema name="ckan" version="2.3">
<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
<fieldtype name="binary" class="solr.BinaryField"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
</fieldType>
<!-- A general unstemmed text field - good if one does not know the language of the field -->
<fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
</types>
<fields>
<field name="index_id" type="string" indexed="true" stored="true" required="true" />
<field name="id" type="string" indexed="true" stored="true" required="true" />
<field name="site_id" type="string" indexed="true" stored="true" required="true" />
<field name="title" type="text" indexed="true" stored="true" />
<field name="entity_type" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="dataset_type" type="string" indexed="true" stored="true" />
<field name="state" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="name" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="revision_id" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="version" type="string" indexed="true" stored="true" />
<field name="url" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="ckan_url" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="download_url" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="notes" type="text" indexed="true" stored="true"/>
<field name="author" type="textgen" indexed="true" stored="true" />
<field name="author_email" type="textgen" indexed="true" stored="true" />
<field name="maintainer" type="textgen" indexed="true" stored="true" />
<field name="maintainer_email" type="textgen" indexed="true" stored="true" />
<field name="license" type="string" indexed="true" stored="true" />
<field name="license_id" type="string" indexed="true" stored="true" />
<field name="ratings_count" type="int" indexed="true" stored="false" />
<field name="ratings_average" type="float" indexed="true" stored="false" />
<field name="tags" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="groups" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="organization" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="capacity" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="res_name" type="textgen" indexed="true" stored="true" multiValued="true" />
<field name="res_description" type="textgen" indexed="true" stored="true" multiValued="true"/>
<field name="res_format" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="res_url" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="res_type" type="string" indexed="true" stored="true" multiValued="true"/>
<!-- Fields needed by the spatial extension-->
<field name="bbox_area" type="float" indexed="true" stored="true" />
<field name="maxx" type="float" indexed="true" stored="true" />
<field name="maxy" type="float" indexed="true" stored="true" />
<field name="minx" type="float" indexed="true" stored="true" />
<field name="miny" type="float" indexed="true" stored="true" />
<!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema -->
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="urls" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="depends_on" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="dependency_of" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="derives_from" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="has_derivation" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="links_to" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="linked_from" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="child_of" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="parent_of" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="views_total" type="int" indexed="true" stored="false"/>
<field name="views_recent" type="int" indexed="true" stored="false"/>
<field name="resources_accessed_total" type="int" indexed="true" stored="false"/>
<field name="resources_accessed_recent" type="int" indexed="true" stored="false"/>
<field name="metadata_created" type="date" indexed="true" stored="true" multiValued="false"/>
<field name="metadata_modified" type="date" indexed="true" stored="true" multiValued="false"/>
<field name="indexed_ts" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
<!-- Copy the title field into titleString, and treat as a string
(rather than text type). This allows us to sort on the titleString -->
<field name="title_string" type="string" indexed="true" stored="false" />
<field name="data_dict" type="string" indexed="false" stored="true" />
<field name="validated_data_dict" type="string" indexed="false" stored="true" />
<field name="_version_" type="string" indexed="true" stored="true"/>
<dynamicField name="*_date" type="date" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="extras_*" type="text" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="res_extras_*" type="text" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="vocab_*" type="string" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*" type="string" indexed="true" stored="false"/>
</fields>
<uniqueKey>index_id</uniqueKey>
<defaultSearchField>text</defaultSearchField>
<solrQueryParser defaultOperator="AND"/>
<copyField source="url" dest="urls"/>
<copyField source="ckan_url" dest="urls"/>
<copyField source="download_url" dest="urls"/>
<copyField source="res_url" dest="urls"/>
<copyField source="extras_*" dest="text"/>
<copyField source="res_extras_*" dest="text"/>
<copyField source="vocab_*" dest="text"/>
<copyField source="urls" dest="text"/>
<copyField source="name" dest="text"/>
<copyField source="title" dest="text"/>
<copyField source="text" dest="text"/>
<copyField source="license" dest="text"/>
<copyField source="notes" dest="text"/>
<copyField source="tags" dest="text"/>
<copyField source="groups" dest="text"/>
<copyField source="organization" dest="text"/>
<copyField source="res_name" dest="text"/>
<copyField source="res_description" dest="text"/>
<copyField source="maintainer" dest="text"/>
<copyField source="author" dest="text"/>
</schema>

View File

@ -1,7 +1,4 @@
---
- name: Solr Restart
service: name=tomcat-instance-{{ ckan_solr_port }} state=restarted
- name: Restart CKAN
service: name=apache2 state=restarted sleep=10

View File

@ -1,52 +1,4 @@
---
- name: Download the CKAN distribution
get_url: url='{{ ckan_package_url }}' dest=/srv/{{ ckan_deb_file }}
tags: ckan
- name: Install the CKAN deb package
apt: deb=/srv/{{ ckan_deb_file }}
register: ckan_install
tags: ckan
- name: Configure the CKAN production configuration file
ini_file: dest={{ ckan_config_file }} section={{ item.section }} option={{ item.option }} value={{ item.value }} state={{ item.state }} backup=yes
with_items: '{{ ckan_production_ini_opts }}'
notify: Restart CKAN
tags: [ 'ckan', 'ckan_ini' ]
- name: Install the solr schema used by CKAN
file: src=/usr/lib/ckan/default/src/ckan/ckan/config/solr/schema.xml dest={{ tomcat_m_instances_base_path }}/{{ ckan_solr_port }}/solr/data/solr/collection1/conf/schema.xml state=link force=yes
when: not ckan_geonetwork_harvester
notify: Solr Restart
tags: [ 'ckan', 'solr', 'solr_schema' ]
- name: Install the solr schema used by CKAN, modified with the spatial fields
copy: src=schema.xml dest={{ tomcat_m_instances_base_path }}/{{ ckan_solr_port }}/solr/data/solr/collection1/conf/schema.xml force=yes
when: ckan_geonetwork_harvester
notify: Solr Restart
tags: [ 'ckan', 'solr', 'solr_schema' ]
- name: Create the base directory for the CKAN file storage
file: dest={{ ckan_file_storage_dir }} state=directory owner={{ apache_user }} mode=0700
tags: ckan
- name: Initialize the CKAN databases
shell: ckan db init ; ckan datastore set-permissions | su - postgres -c 'psql --set ON_ERROR_STOP=1'
when: ( ckan_install | changed )
tags: ckan
- name: Create the pip cache directory with the right permissions
file: dest={{ ckan_libdir }}/.cache owner={{ ckan_shell_user }} group={{ ckan_shell_user }} state=directory
tags: [ 'ckan', 'ckan_user' ]
- name: Assign the CKAN virtenv dir to the ckan user
file: dest={{ ckan_virtenv }} recurse=yes owner={{ ckan_shell_user }} group={{ ckan_shell_user }}
tags: [ 'ckan', 'ckan_user' ]
- name: Create a log directory for the jobs run by the ckan user
file: dest={{ ckan_logdir }} state=directory owner={{ ckan_shell_user }} group={{ ckan_shell_user }}
tags: [ 'ckan', 'ckan_user' ]
- name: Install some plugins dependencies inside the CKAN virtualenv
become: True
become_user: '{{ ckan_shell_user }}'
@ -190,31 +142,3 @@
# notify: Restart CKAN
# tags: [ 'ckan', 'ckan_pages', 'ckan_plugins', 'ckan_ckanext_spatial' ]
- name: Install the supervisor daemon needed to automate the gather and fetch operations
apt: pkg={{ item }} state=present
with_items: '{{ ckan_gather_fetch_pkgs }}'
tags: [ 'ckan', 'ckan_harvest' ]
- name: Install the gather and fetch supervisor configuration
template: src=ckan_harvesting.conf.j2 dest=/etc/supervisor/conf.d/ckan_harvesting.conf owner=root group=root mode=0644
notify: Reconfigure the supervisor daemon
tags: [ 'ckan', 'ckan_harvest' ]
- name: Install a cron job that
cron: name="CKAN harvester" minute="0" job="{{ ckan_virtenv }}/bin/paster --plugin=ckanext-harvest harvester run --config={{ ckan_config_file }} > {{ ckan_logdir }}/harvester_run.log 2>&1" user={{ ckan_shell_user }}
when: ckan_harvester_run
tags: [ 'ckan', 'ckan_harvest', 'ckan_harvest_cron' ]
- name: Restart apache
service: name=apache state=restarted enabled=yes
when: ( ckan_install | changed )
tags: ckan
- name: Restart nginx
service: name=nginx state=restarted enabled=yes
when: ( ckan_install | changed )
tags: ckan
- name: Ensure that supervisord is running and enabled
service: name=supervisor state=started enabled=yes
tags: [ 'ckan', 'ckan_harvest' ]

37
ckan/ckan/tasks/ckan.yml Normal file
View File

@ -0,0 +1,37 @@
---
- name: Download the CKAN distribution
get_url: url='{{ ckan_package_url }}' dest=/srv/{{ ckan_deb_file }}
tags: ckan
- name: Install the CKAN deb package
apt: deb=/srv/{{ ckan_deb_file }}
register: ckan_install
tags: ckan
- name: Configure the CKAN production configuration file
ini_file: dest={{ ckan_config_file }} section={{ item.section }} option={{ item.option }} value={{ item.value }} state={{ item.state }} backup=yes
with_items: '{{ ckan_production_ini_opts }}'
notify: Restart CKAN
tags: [ 'ckan', 'ckan_ini' ]
- name: Create the base directory for the CKAN file storage
file: dest={{ ckan_file_storage_dir }} state=directory owner={{ apache_user }} mode=0700
tags: ckan
- name: Initialize the CKAN databases
shell: ckan db init ; ckan datastore set-permissions | su - postgres -c 'psql --set ON_ERROR_STOP=1'
when: ( ckan_install | changed )
tags: ckan
- name: Create the pip cache directory with the right permissions
file: dest={{ ckan_libdir }}/.cache owner={{ ckan_shell_user }} group={{ ckan_shell_user }} state=directory
tags: [ 'ckan', 'ckan_user' ]
- name: Assign the CKAN virtenv dir to the ckan user
file: dest={{ ckan_virtenv }} recurse=yes owner={{ ckan_shell_user }} group={{ ckan_shell_user }}
tags: [ 'ckan', 'ckan_user' ]
- name: Create a log directory for the jobs run by the ckan user
file: dest={{ ckan_logdir }} state=directory owner={{ ckan_shell_user }} group={{ ckan_shell_user }}
tags: [ 'ckan', 'ckan_user' ]

View File

@ -0,0 +1,11 @@
---
- name: Restart apache
service: name=apache state=restarted enabled=yes
when: ( ckan_install | changed )
tags: ckan
- name: Restart nginx
service: name=nginx state=restarted enabled=yes
when: ( ckan_install | changed )
tags: ckan

5
ckan/ckan/tasks/main.yml Normal file
View File

@ -0,0 +1,5 @@
---
- include: ckan.yml
- include: ckan-plugins.yml
- include: enable-ckan.yml
- include: supervisor.yml

View File

@ -0,0 +1,19 @@
---
- name: Install the supervisor daemon needed to automate the gather and fetch operations
apt: pkg={{ item }} state=present
with_items: '{{ ckan_gather_fetch_pkgs }}'
tags: [ 'ckan', 'ckan_harvest' ]
- name: Install the gather and fetch supervisor configuration
template: src=ckan_harvesting.conf.j2 dest=/etc/supervisor/conf.d/ckan_harvesting.conf owner=root group=root mode=0644
notify: Reconfigure the supervisor daemon
tags: [ 'ckan', 'ckan_harvest' ]
- name: Install a cron job that run the harvesters
cron: name="CKAN harvester" minute="0" job="{{ ckan_virtenv }}/bin/paster --plugin=ckanext-harvest harvester run --config={{ ckan_config_file }} > {{ ckan_logdir }}/harvester_run.log 2>&1" user={{ ckan_shell_user }}
when: ckan_harvester_run
tags: [ 'ckan', 'ckan_harvest', 'ckan_harvest_cron' ]
- name: Ensure that supervisord is running and enabled
service: name=supervisor state=started enabled=yes
tags: [ 'ckan', 'ckan_harvest' ]