ckan -> library/roles/ckan

postgresql_extensions -> library/roles/postgresql_extensions d4science-ghn-cluster: The ganglia, nagios and iptables roles are now dependencies of the 'common' role.
2016-03-26 17:06:06 +01:00 · 2016-03-26 17:06:06 +01:00 · 8023613031
parent 1dfc4a8a79
commit 8023613031
6 changed files with 1286 additions and 0 deletions
--- a/ckan/files/base.py
+++ b/ckan/files/base.py
@ -0,0 +1,894 @@
 import re
 import cgitb
 import warnings
 import urllib2
 import sys
 import logging
 from string import Template
 from urlparse import urlparse
 from datetime import datetime
 import uuid
 import hashlib
 import dateutil
 import mimetypes
 from pylons import config
 from owslib import wms
 import requests
 from lxml import etree
 from ckan import plugins as p
 from ckan import model
 from ckan.lib.helpers import json
 from ckan import logic
 from ckan.lib.navl.validators import not_empty
 from ckan.lib.search.index import PackageSearchIndex
 from ckanext.harvest.harvesters.base import HarvesterBase
 from ckanext.harvest.model import HarvestObject
 from ckanext.spatial.validation import Validators, all_validators
 from ckanext.spatial.model import ISODocument
 from ckanext.spatial.interfaces import ISpatialHarvester
 log = logging.getLogger(__name__)
 DEFAULT_VALIDATOR_PROFILES = ['iso19139']
 def text_traceback():
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        res = 'the original traceback:'.join(
            cgitb.text(sys.exc_info()).split('the original traceback:')[1:]
        ).strip()
    return res
 def guess_standard(content):
    lowered = content.lower()
    if '</gmd:MD_Metadata>'.lower() in lowered:
        return 'iso'
    if '</gmi:MI_Metadata>'.lower() in lowered:
        return 'iso'
    if '</metadata>'.lower() in lowered:
        return 'fgdc'
    return 'unknown'
 def guess_resource_format(url, use_mimetypes=True):
    '''
    Given a URL try to guess the best format to assign to the resource
    The function looks for common patterns in popular geospatial services and
    file extensions, so it may not be 100% accurate. It just looks at the
    provided URL, it does not attempt to perform any remote check.
    if 'use_mimetypes' is True (default value), the mimetypes module will be
    used if no match was found before.
    Returns None if no format could be guessed.
    '''
    url = url.lower().strip()
    resource_types = {
        # OGC
        'wms': ('service=wms', 'geoserver/wms', 'mapserver/wmsserver', 'com.esri.wms.Esrimap', 'service/wms'),
        'wfs': ('service=wfs', 'geoserver/wfs', 'mapserver/wfsserver', 'com.esri.wfs.Esrimap'),
        'wcs': ('service=wcs', 'geoserver/wcs', 'imageserver/wcsserver', 'mapserver/wcsserver'),
        'sos': ('service=sos',),
        'csw': ('service=csw',),
        # ESRI
        'kml': ('mapserver/generatekml',),
        'arcims': ('com.esri.esrimap.esrimap',),
        'arcgis_rest': ('arcgis/rest/services',),
    }
    for resource_type, parts in resource_types.iteritems():
        if any(part in url for part in parts):
            return resource_type
    file_types = {
        'kml' : ('kml',),
        'kmz': ('kmz',),
        'gml': ('gml',),
    }
    for file_type, extensions in file_types.iteritems():
        if any(url.endswith(extension) for extension in extensions):
            return file_type
    resource_format, encoding = mimetypes.guess_type(url)
    if resource_format:
        return resource_format
    return None
 class SpatialHarvester(HarvesterBase):
    _user_name = None
    _site_user = None
    source_config = {}
    force_import = False
    extent_template = Template('''
    {"type": "Polygon", "coordinates": [[[$xmin, $ymin], [$xmax, $ymin], [$xmax, $ymax], [$xmin, $ymax], [$xmin, $ymin]]]}
    ''')
    ## IHarvester
    def validate_config(self, source_config):
        if not source_config:
            return source_config
        try:
            source_config_obj = json.loads(source_config)
            if 'validator_profiles' in source_config_obj:
                if not isinstance(source_config_obj['validator_profiles'], list):
                    raise ValueError('validator_profiles must be a list')
                # Check if all profiles exist
                existing_profiles = [v.name for v in all_validators]
                unknown_profiles = set(source_config_obj['validator_profiles']) - set(existing_profiles)
                if len(unknown_profiles) > 0:
                    raise ValueError('Unknown validation profile(s): %s' % ','.join(unknown_profiles))
            if 'default_tags' in source_config_obj:
                if not isinstance(source_config_obj['default_tags'],list):
                    raise ValueError('default_tags must be a list')
            if 'default_extras' in source_config_obj:
                if not isinstance(source_config_obj['default_extras'],dict):
                    raise ValueError('default_extras must be a dictionary')
            for key in ('override_extras'):
                if key in source_config_obj:
                    if not isinstance(source_config_obj[key],bool):
                        raise ValueError('%s must be boolean' % key)
        except ValueError, e:
            raise e
        return source_config
    ##
    ## SpatialHarvester
    def get_package_dict(self, iso_values, harvest_object):
        '''
        Constructs a package_dict suitable to be passed to package_create or
        package_update. See documentation on
        ckan.logic.action.create.package_create for more details
        Extensions willing to modify the dict should do so implementing the
        ISpatialHarvester interface
            import ckan.plugins as p
            from ckanext.spatial.interfaces import ISpatialHarvester
            class MyHarvester(p.SingletonPlugin):
                p.implements(ISpatialHarvester, inherit=True)
                def get_package_dict(self, context, data_dict):
                    package_dict = data_dict['package_dict']
                    package_dict['extras'].append(
                        {'key': 'my-custom-extra', 'value': 'my-custom-value'}
                    )
                    return package_dict
        If a dict is not returned by this function, the import stage will be cancelled.
        :param iso_values: Dictionary with parsed values from the ISO 19139
            XML document
        :type iso_values: dict
        :param harvest_object: HarvestObject domain object (with access to
            job and source objects)
        :type harvest_object: HarvestObject
        :returns: A dataset dictionary (package_dict)
        :rtype: dict
        '''
        tags = []
        if 'tags' in iso_values:
            for tag in iso_values['tags']:
                tag = tag[:50] if len(tag) > 50 else tag
                tags.append({'name': tag})
        # Add default_tags from config
        default_tags = self.source_config.get('default_tags',[])
        if default_tags:
           for tag in default_tags:
              tags.append({'name': tag})
        package_dict = {
            'title': iso_values['title'],
            'notes': iso_values['abstract'],
            'tags': tags,
            'resources': [],
        }
        # We need to get the owner organization (if any) from the harvest
        # source dataset
        source_dataset = model.Package.get(harvest_object.source.id)
        if source_dataset.owner_org:
            package_dict['owner_org'] = source_dataset.owner_org
        # Package name
        package = harvest_object.package
        if package is None or package.title != iso_values['title']:
            name = self._gen_new_name(iso_values['title'])
            if not name:
                name = self._gen_new_name(str(iso_values['guid']))
            if not name:
                raise Exception('Could not generate a unique name from the title or the GUID. Please choose a more unique title.')
            package_dict['name'] = name
        else:
            package_dict['name'] = package.name
        extras = {
            'guid': harvest_object.guid,
            'spatial_harvester': True,
        }
        # Just add some of the metadata as extras, not the whole lot
        for name in [
            # Essentials
            'spatial-reference-system',
            'guid',
            # Usefuls
            'dataset-reference-date',
            'metadata-language',  # Language
            'metadata-date',  # Released
            'coupled-resource',
            'contact-email',
            'frequency-of-update',
            'spatial-data-service-type',
        ]:
            extras[name] = iso_values[name]
        if len(iso_values.get('progress', [])):
            extras['progress'] = iso_values['progress'][0]
        else:
            extras['progress'] = ''
        if len(iso_values.get('resource-type', [])):
            extras['resource-type'] = iso_values['resource-type'][0]
        else:
            extras['resource-type'] = ''
        extras['licence'] = iso_values.get('use-constraints', '')
        def _extract_first_license_url(licences):
            for licence in licences:
                o = urlparse(licence)
                if o.scheme and o.netloc:
                    return licence
            return None
        if len(extras['licence']):
            license_url_extracted = _extract_first_license_url(extras['licence'])
            if license_url_extracted:
                extras['licence_url'] = license_url_extracted
        # Metadata license ID check for package
        use_constraints = iso_values.get('use-constraints')
        if use_constraints:
            context = {'model': model, 'session': model.Session, 'user': self._get_user_name()}
            license_list = p.toolkit.get_action('license_list')(context, {})
            for constraint in use_constraints:
                package_license = None
                for license in license_list:
                    if constraint.lower() == license.get('id') or constraint == license.get('url'):
                        package_license = license.get('id')
                        break
                if package_license:
                    package_dict['license_id'] = package_license
                    break
        extras['access_constraints'] = iso_values.get('limitations-on-public-access', '')
        # Grpahic preview
        browse_graphic = iso_values.get('browse-graphic')
        if browse_graphic:
            browse_graphic = browse_graphic[0]
            extras['graphic-preview-file'] = browse_graphic.get('file')
            if browse_graphic.get('description'):
                extras['graphic-preview-description'] = browse_graphic.get('description')
            if browse_graphic.get('type'):
                extras['graphic-preview-type'] = browse_graphic.get('type')
        for key in ['temporal-extent-begin', 'temporal-extent-end']:
            if len(iso_values[key]) > 0:
                extras[key] = iso_values[key][0]
        # Save responsible organization roles
        if iso_values['responsible-organisation']:
            parties = {}
            for party in iso_values['responsible-organisation']:
                if party['organisation-name'] in parties:
                    if not party['role'] in parties[party['organisation-name']]:
                        parties[party['organisation-name']].append(party['role'])
                else:
                    parties[party['organisation-name']] = [party['role']]
            extras['responsible-party'] = [{'name': k, 'roles': v} for k, v in parties.iteritems()]
        if len(iso_values['bbox']) > 0:
            bbox = iso_values['bbox'][0]
            extras['bbox-east-long'] = bbox['east']
            extras['bbox-north-lat'] = bbox['north']
            extras['bbox-south-lat'] = bbox['south']
            extras['bbox-west-long'] = bbox['west']
            try:
                xmin = float(bbox['west'])
                xmax = float(bbox['east'])
                ymin = float(bbox['south'])
                ymax = float(bbox['north'])
            except ValueError, e:
                self._save_object_error('Error parsing bounding box value: {0}'.format(str(e)),
                                    harvest_object, 'Import')
            else:
                # Construct a GeoJSON extent so ckanext-spatial can register the extent geometry
                # Some publishers define the same two corners for the bbox (ie a point),
                # that causes problems in the search if stored as polygon
                if xmin == xmax or ymin == ymax:
                    extent_string = Template('{"type": "Point", "coordinates": [$x, $y]}').substitute(
                        x=xmin, y=ymin
                    )
                    self._save_object_error('Point extent defined instead of polygon',
                                     harvest_object, 'Import')
                else:
                    extent_string = self.extent_template.substitute(
                        xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax
                    )
                extras['spatial'] = extent_string.strip()
        else:
            log.debug('No spatial extent defined for this object')
        resource_locators = iso_values.get('resource-locator', []) +\
            iso_values.get('resource-locator-identification', [])
        if len(resource_locators):
            for resource_locator in resource_locators:
                url = resource_locator.get('url', '').strip()
                if url:
                    resource = {}
                    resource['format'] = guess_resource_format(url)
                    if resource['format'] == 'wms' and config.get('ckanext.spatial.harvest.validate_wms', False):
                        # Check if the service is a view service
                        test_url = url.split('?')[0] if '?' in url else url
                        if self._is_wms(test_url):
                            resource['verified'] = True
                            resource['verified_date'] = datetime.now().isoformat()
                    resource.update(
                        {
                            'url': url,
                            'name': resource_locator.get('name') or p.toolkit._('Unnamed resource'),
                            'description': resource_locator.get('description') or  '',
                            'resource_locator_protocol': resource_locator.get('protocol') or '',
                            'resource_locator_function': resource_locator.get('function') or '',
                        })
                    package_dict['resources'].append(resource)
        # Add default_extras from config
        default_extras = self.source_config.get('default_extras',{})
        if default_extras:
           override_extras = self.source_config.get('override_extras',False)
           for key,value in default_extras.iteritems():
              log.debug('Processing extra %s', key)
              if not key in extras or override_extras:
                 # Look for replacement strings
                 if isinstance(value,basestring):
                    value = value.format(harvest_source_id=harvest_object.job.source.id,
                             harvest_source_url=harvest_object.job.source.url.strip('/'),
                             harvest_source_title=harvest_object.job.source.title,
                             harvest_job_id=harvest_object.job.id,
                             harvest_object_id=harvest_object.id)
                 extras[key] = value
        extras_as_dict = []
        for key, value in extras.iteritems():
            if isinstance(value, (list, dict)):
                extras_as_dict.append({'key': key, 'value': json.dumps(value)})
            else:
                extras_as_dict.append({'key': key, 'value': value})
        package_dict['extras'] = extras_as_dict
        return package_dict
    def transform_to_iso(self, original_document, original_format, harvest_object):
        '''
        DEPRECATED: Use the transform_to_iso method of the ISpatialHarvester
        interface
        '''
        self.__base_transform_to_iso_called = True
        return None
    def import_stage(self, harvest_object):
        context = {
            'model': model,
            'session': model.Session,
            'user': self._get_user_name(),
        }
        log = logging.getLogger(__name__ + '.import')
        log.debug('Import stage for harvest object: %s', harvest_object.id)
        if not harvest_object:
            log.error('No harvest object received')
            return False
        self._set_source_config(harvest_object.source.config)
        if self.force_import:
            status = 'change'
        else:
            status = self._get_object_extra(harvest_object, 'status')
        # Get the last harvested object (if any)
        previous_object = model.Session.query(HarvestObject) \
                          .filter(HarvestObject.guid==harvest_object.guid) \
                          .filter(HarvestObject.current==True) \
                          .first()
        if status == 'delete':
            # Delete package
            context.update({
                'ignore_auth': True,
            })
            p.toolkit.get_action('package_delete')(context, {'id': harvest_object.package_id})
            log.info('Deleted package {0} with guid {1}'.format(harvest_object.package_id, harvest_object.guid))
            return True
        # Check if it is a non ISO document
        original_document = self._get_object_extra(harvest_object, 'original_document')
        original_format = self._get_object_extra(harvest_object, 'original_format')
        if original_document and original_format:
            #DEPRECATED use the ISpatialHarvester interface method
            self.__base_transform_to_iso_called = False
            content = self.transform_to_iso(original_document, original_format, harvest_object)
            if not self.__base_transform_to_iso_called:
                log.warn('Deprecation warning: calling transform_to_iso directly is deprecated. ' +
                         'Please use the ISpatialHarvester interface method instead.')
            for harvester in p.PluginImplementations(ISpatialHarvester):
                content = harvester.transform_to_iso(original_document, original_format, harvest_object)
            if content:
                harvest_object.content = content
            else:
                self._save_object_error('Transformation to ISO failed', harvest_object, 'Import')
                return False
        else:
            if harvest_object.content is None:
                self._save_object_error('Empty content for object {0}'.format(harvest_object.id), harvest_object, 'Import')
                return False
            # Validate ISO document
            is_valid, profile, errors = self._validate_document(harvest_object.content, harvest_object)
            if not is_valid:
                # If validation errors were found, import will stop unless
                # configuration per source or per instance says otherwise
                continue_import = p.toolkit.asbool(config.get('ckanext.spatial.harvest.continue_on_validation_errors', False)) or \
                    self.source_config.get('continue_on_validation_errors')
                if not continue_import:
                    return False
        # Parse ISO document
        try:
            iso_parser = ISODocument(harvest_object.content)
            iso_values = iso_parser.read_values()
        except Exception, e:
            self._save_object_error('Error parsing ISO document for object {0}: {1}'.format(harvest_object.id, str(e)),
                                    harvest_object, 'Import')
            return False
        # Flag previous object as not current anymore
        if previous_object and not self.force_import:
            previous_object.current = False
            previous_object.add()
        # Update GUID with the one on the document
        iso_guid = iso_values['guid']
        if iso_guid and harvest_object.guid != iso_guid:
            # First make sure there already aren't current objects
            # with the same guid
            existing_object = model.Session.query(HarvestObject.id) \
                            .filter(HarvestObject.guid==iso_guid) \
                            .filter(HarvestObject.current==True) \
                            .first()
            if existing_object:
                self._save_object_error('Object {0} already has this guid {1}'.format(existing_object.id, iso_guid),
                        harvest_object, 'Import')
                return False
            harvest_object.guid = iso_guid
            harvest_object.add()
        # Generate GUID if not present (i.e. it's a manual import)
        if not harvest_object.guid:
            m = hashlib.md5()
            m.update(harvest_object.content.encode('utf8', 'ignore'))
            harvest_object.guid = m.hexdigest()
            harvest_object.add()
        # Get document modified date
        try:
            metadata_modified_date = dateutil.parser.parse(iso_values['metadata-date'], ignoretz=True)
        except ValueError:
            self._save_object_error('Could not extract reference date for object {0} ({1})'
                        .format(harvest_object.id, iso_values['metadata-date']), harvest_object, 'Import')
            return False
        harvest_object.metadata_modified_date = metadata_modified_date
        harvest_object.add()
        # Build the package dict
        package_dict = self.get_package_dict(iso_values, harvest_object)
        for harvester in p.PluginImplementations(ISpatialHarvester):
            package_dict = harvester.get_package_dict(context, {
                'package_dict': package_dict,
                'iso_values': iso_values,
                'xml_tree': iso_parser.xml_tree,
                'harvest_object': harvest_object,
            })
        if not package_dict:
            log.error('No package dict returned, aborting import for object {0}'.format(harvest_object.id))
            return False
        # Create / update the package
        context.update({
           'extras_as_string': True,
           'api_version': '2',
           'return_id_only': True})
        if self._site_user and context['user'] == self._site_user['name']:
            context['ignore_auth'] = True
        # The default package schema does not like Upper case tags
        tag_schema = logic.schema.default_tags_schema()
        tag_schema['name'] = [not_empty, unicode]
        # Flag this object as the current one
        harvest_object.current = True
        harvest_object.add()
        if status == 'new':
            package_schema = logic.schema.default_create_package_schema()
            package_schema['tags'] = tag_schema
            context['schema'] = package_schema
            # We need to explicitly provide a package ID, otherwise ckanext-spatial
            # won't be be able to link the extent to the package.
            package_dict['id'] = unicode(uuid.uuid4())
            package_schema['id'] = [unicode]
            # Save reference to the package on the object
            harvest_object.package_id = package_dict['id']
            harvest_object.add()
            # Defer constraints and flush so the dataset can be indexed with
            # the harvest object id (on the after_show hook from the harvester
            # plugin)
            model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
            model.Session.flush()
            try:
                package_id = p.toolkit.get_action('package_create')(context, package_dict)
                log.info('Created new package %s with guid %s', package_id, harvest_object.guid)
            except p.toolkit.ValidationError, e:
                self._save_object_error('Validation Error: %s' % str(e.error_summary), harvest_object, 'Import')
                return False
        elif status == 'change':
            # Check if the modified date is more recent
            if not self.force_import and previous_object and harvest_object.metadata_modified_date <= previous_object.metadata_modified_date:
                # Assign the previous job id to the new object to
                # avoid losing history
                harvest_object.harvest_job_id = previous_object.job.id
                harvest_object.add()
                # Delete the previous object to avoid cluttering the object table
                previous_object.delete()
                # Reindex the corresponding package to update the reference to the
                # harvest object
                if ((config.get('ckanext.spatial.harvest.reindex_unchanged', True) != 'False'
                    or self.source_config.get('reindex_unchanged') != 'False')
                    and harvest_object.package_id):
                    context.update({'validate': False, 'ignore_auth': True})
                    try:
                        package_dict = logic.get_action('package_show')(context,
                            {'id': harvest_object.package_id})
                    except p.toolkit.ObjectNotFound:
                        pass
                    else:
                        for extra in package_dict.get('extras', []):
                            if extra['key'] == 'harvest_object_id':
                                extra['value'] = harvest_object.id
                        if package_dict:
                            package_index = PackageSearchIndex()
                            package_index.index_package(package_dict)
                log.info('Document with GUID %s unchanged, skipping...' % (harvest_object.guid))
            else:
                package_schema = logic.schema.default_update_package_schema()
                package_schema['tags'] = tag_schema
                context['schema'] = package_schema
                package_dict['id'] = harvest_object.package_id
                try:
                    package_id = p.toolkit.get_action('package_update')(context, package_dict)
                    log.info('Updated package %s with guid %s', package_id, harvest_object.guid)
                except p.toolkit.ValidationError, e:
                    self._save_object_error('Validation Error: %s' % str(e.error_summary), harvest_object, 'Import')
                    return False
        model.Session.commit()
        return True
    ##
    def _is_wms(self, url):
        '''
        Checks if the provided URL actually points to a Web Map Service.
        Uses owslib WMS reader to parse the response.
        '''
        try:
            capabilities_url = wms.WMSCapabilitiesReader().capabilities_url(url)
            res = urllib2.urlopen(capabilities_url, None, 10)
            xml = res.read()
            s = wms.WebMapService(url, xml=xml)
            return isinstance(s.contents, dict) and s.contents != {}
        except Exception, e:
            log.error('WMS check for %s failed with exception: %s' % (url, str(e)))
        return False
    def _get_object_extra(self, harvest_object, key):
        '''
        Helper function for retrieving the value from a harvest object extra,
        given the key
        '''
        for extra in harvest_object.extras:
            if extra.key == key:
                return extra.value
        return None
    def _set_source_config(self, config_str):
        '''
        Loads the source configuration JSON object into a dict for
        convenient access
        '''
        if config_str:
            self.source_config = json.loads(config_str)
            log.debug('Using config: %r', self.source_config)
        else:
            self.source_config = {}
    def _get_validator(self):
        '''
        Returns the validator object using the relevant profiles
        The profiles to be used are assigned in the following order:
        1. 'validator_profiles' property of the harvest source config object
        2. 'ckan.spatial.validator.profiles' configuration option in the ini file
        3. Default value as defined in DEFAULT_VALIDATOR_PROFILES
        '''
        if not hasattr(self, '_validator'):
            if hasattr(self, 'source_config') and self.source_config.get('validator_profiles', None):
                profiles = self.source_config.get('validator_profiles')
            elif config.get('ckan.spatial.validator.profiles', None):
                profiles = [
                    x.strip() for x in
                    config.get('ckan.spatial.validator.profiles').split(',')
                ]
            else:
                profiles = DEFAULT_VALIDATOR_PROFILES
            self._validator = Validators(profiles=profiles)
            # Add any custom validators from extensions
            for plugin_with_validators in p.PluginImplementations(ISpatialHarvester):
                custom_validators = plugin_with_validators.get_validators()
                for custom_validator in custom_validators:
                    if custom_validator not in all_validators:
                        self._validator.add_validator(custom_validator)
        return self._validator
    def _get_user_name(self):
        '''
        Returns the name of the user that will perform the harvesting actions
        (deleting, updating and creating datasets)
        By default this will be the internal site admin user. This is the
        recommended setting, but if necessary it can be overridden with the
        `ckanext.spatial.harvest.user_name` config option, eg to support the
        old hardcoded 'harvest' user:
           ckanext.spatial.harvest.user_name = harvest
        '''
        if self._user_name:
            return self._user_name
        context = {'model': model,
                   'ignore_auth': True,
                   'defer_commit': True, # See ckan/ckan#1714
                  }
        self._site_user = p.toolkit.get_action('get_site_user')(context, {})
        config_user_name = config.get('ckanext.spatial.harvest.user_name')
        if config_user_name:
            self._user_name = config_user_name
        else:
            self._user_name = self._site_user['name']
        return self._user_name
    def _get_content(self, url):
        '''
        DEPRECATED: Use _get_content_as_unicode instead
        '''
        parts = urlparse.urlparse(url)
        if parts.username and parts.password:
            auth_url = url.rsplit('/', 1)[0]
            auth_url = auth_url + '/xml.user.login'
            auth_url = urlparse.urlunparse((
                parts.scheme,
                parts.netloc,
                parts.path
            ))
            log.error('Authenticate agains Geonetwork. User is %s and password is %s', parts.username, parts.password)
            auth_data = minidom.Document()
            root = auth_data.createElement('request')
            auth_data.appendChild(root)
            username_tag = auth_data.createElement('username')
            user_data = auth_data.createTextNode(parts.username)
            username_tag.appendChild(user_data)
            root.appendChild(username_tag)
            password_tag = auth_data.createElement('password')
            password_data = auth_data.createTextNode(parts.password)
            password_tag.appendChild(password_data)
            root.appendChild(password_tag)
            xml_auth_data = auth_data.toprettyxml(indent="  ")
            req_headers = {'Content-Type': 'application/xml'}
            sess = requests.Session()
            req = sess.post(url=auth_url, data=xml_auth_data, headers=req_headers)
            opener = urllib2.build_opener()
            opener.addheaders.append(('Set-Cookie', req.cookie))
        url = url.replace(' ', '%20')
        if opener:
            http_response = opener.open(url)
        else:
            http_response = urllib2.urlopen(url)
        return http_response.read()
    def _get_content_as_unicode(self, url):
        '''
        Get remote content as unicode.
        We let requests handle the conversion [1] , which will use the
        content-type header first or chardet if the header is missing
        (requests uses its own embedded chardet version).
        As we will be storing and serving the contents as unicode, we actually
        replace the original XML encoding declaration with an UTF-8 one.
        [1] http://github.com/kennethreitz/requests/blob/63243b1e3b435c7736acf1e51c0f6fa6666d861d/requests/models.py#L811
        '''
        parts = urlparse.urlparse(url)
        if parts.username and parts.password:
            auth_url = url.rsplit('/', 1)[0]
            auth_url = auth_url + '/xml.user.login'
            auth_url = urlparse.urlunparse((
                parts.scheme,
                parts.netloc,
                parts.path
            ))
            log.error('Authenticate against Geonetwork. User is %s and password is %s', parts.username, parts.password)
            auth_data = minidom.Document()
            root = auth_data.createElement('request')
            auth_data.appendChild(root)
            username_tag = auth_data.createElement('username')
            user_data = auth_data.createTextNode(parts.username)
            username_tag.appendChild(user_data)
            root.appendChild(username_tag)
            password_tag = auth_data.createElement('password')
            password_data = auth_data.createTextNode(parts.password)
            password_tag.appendChild(password_data)
            root.appendChild(password_tag)
            xml_auth_data = auth_data.toprettyxml(indent="  ")
            req_headers = {'Content-Type': 'application/xml'}
            geo_session = requests.Session()
            geo_session.post(url=auth_url, data=xml_auth_data, headers=req_headers)
        url = url.replace(' ', '%20')
        if geo_session:
            response = geo_session.get(url, timeout=10)
        else:
            response = requests.get(url, timeout=10)
        content = response.text
        # Remove original XML declaration
        content = re.sub('<\?xml(.*)\?>', '', content)
        # Get rid of the BOM and other rubbish at the beginning of the file
        content = re.sub('.*?<', '<', content, 1)
        content = content[content.index('<'):]
        return content
    def _validate_document(self, document_string, harvest_object, validator=None):
        '''
        Validates an XML document with the default, or if present, the
        provided validators.
        It will create a HarvestObjectError for each validation error found,
        so they can be shown properly on the frontend.
        Returns a tuple, with a boolean showing whether the validation passed
        or not, the profile used and a list of errors (tuples with error
        message and error lines if present).
        '''
        if not validator:
            validator = self._get_validator()
        document_string = re.sub('<\?xml(.*)\?>', '', document_string)
        try:
            xml = etree.fromstring(document_string)
        except etree.XMLSyntaxError, e:
            self._save_object_error('Could not parse XML file: {0}'.format(str(e)), harvest_object, 'Import')
            return False, None, []
        valid, profile, errors = validator.is_valid(xml)
        if not valid:
            log.error('Validation errors found using profile {0} for object with GUID {1}'.format(profile, harvest_object.guid))
            for error in errors:
                self._save_object_error(error[0], harvest_object, 'Validation', line=error[1])
        return valid, profile, errors
--- a/ckan/files/schema.xml
+++ b/ckan/files/schema.xml
@ -0,0 +1,187 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <!--
     NB Please copy changes to this file into the multilingual schema:
        ckanext/multilingual/solr/schema.xml
 -->
 <!-- We update the version when there is a backward-incompatible change to this
 schema. In this case the version should be set to the next CKAN version number.
 (x.y but not x.y.z since it needs to be a float) -->
 <schema name="ckan" version="2.3">
 <types>
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
    <fieldtype name="binary" class="solr.BinaryField"/>
    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
    <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
        <analyzer type="index">
            <tokenizer class="solr.WhitespaceTokenizerFactory"/>
            <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
            <filter class="solr.LowerCaseFilterFactory"/>
            <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
            <filter class="solr.ASCIIFoldingFilterFactory"/>
        </analyzer>
        <analyzer type="query">
            <tokenizer class="solr.WhitespaceTokenizerFactory"/>
            <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
            <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
            <filter class="solr.LowerCaseFilterFactory"/>
            <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
            <filter class="solr.ASCIIFoldingFilterFactory"/>
        </analyzer>
    </fieldType>
    <!-- A general unstemmed text field - good if one does not know the language of the field -->
    <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
        <analyzer type="index">
            <tokenizer class="solr.WhitespaceTokenizerFactory"/>
            <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
            <filter class="solr.LowerCaseFilterFactory"/>
        </analyzer>
        <analyzer type="query">
            <tokenizer class="solr.WhitespaceTokenizerFactory"/>
            <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
            <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
            <filter class="solr.LowerCaseFilterFactory"/>
        </analyzer>
    </fieldType>
 </types>
 <fields>
    <field name="index_id" type="string" indexed="true" stored="true" required="true" />
    <field name="id" type="string" indexed="true" stored="true" required="true" />
    <field name="site_id" type="string" indexed="true" stored="true" required="true" />
    <field name="title" type="text" indexed="true" stored="true" />
    <field name="entity_type" type="string" indexed="true" stored="true" omitNorms="true" />
    <field name="dataset_type" type="string" indexed="true" stored="true" />
    <field name="state" type="string" indexed="true" stored="true" omitNorms="true" />
    <field name="name" type="string" indexed="true" stored="true" omitNorms="true" />
    <field name="revision_id" type="string" indexed="true" stored="true" omitNorms="true" />
    <field name="version" type="string" indexed="true" stored="true" />
    <field name="url" type="string" indexed="true" stored="true" omitNorms="true" />
    <field name="ckan_url" type="string" indexed="true" stored="true" omitNorms="true" />
    <field name="download_url" type="string" indexed="true" stored="true" omitNorms="true" />
    <field name="notes" type="text" indexed="true" stored="true"/>
    <field name="author" type="textgen" indexed="true" stored="true" />
    <field name="author_email" type="textgen" indexed="true" stored="true" />
    <field name="maintainer" type="textgen" indexed="true" stored="true" />
    <field name="maintainer_email" type="textgen" indexed="true" stored="true" />
    <field name="license" type="string" indexed="true" stored="true" />
    <field name="license_id" type="string" indexed="true" stored="true" />
    <field name="ratings_count" type="int" indexed="true" stored="false" />
    <field name="ratings_average" type="float" indexed="true" stored="false" />
    <field name="tags" type="string" indexed="true" stored="true" multiValued="true"/>
    <field name="groups" type="string" indexed="true" stored="true" multiValued="true"/>
    <field name="organization" type="string" indexed="true" stored="true" multiValued="false"/>
    <field name="capacity" type="string" indexed="true" stored="true" multiValued="false"/>
    <field name="res_name" type="textgen" indexed="true" stored="true" multiValued="true" />
    <field name="res_description" type="textgen" indexed="true" stored="true" multiValued="true"/>
    <field name="res_format" type="string" indexed="true" stored="true" multiValued="true"/>
    <field name="res_url" type="string" indexed="true" stored="true" multiValued="true"/>
    <field name="res_type" type="string" indexed="true" stored="true" multiValued="true"/>
    <!-- Fields needed by the spatial extension-->
    <field name="bbox_area" type="float" indexed="true" stored="true" />
    <field name="maxx" type="float" indexed="true" stored="true" />
    <field name="maxy" type="float" indexed="true" stored="true" />
    <field name="minx" type="float" indexed="true" stored="true" />
    <field name="miny" type="float" indexed="true" stored="true" />
    <!-- catchall field, containing all other searchable text fields (implemented
         via copyField further on in this schema  -->
    <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
    <field name="urls" type="text" indexed="true" stored="false" multiValued="true"/>
    <field name="depends_on" type="text" indexed="true" stored="false" multiValued="true"/>
    <field name="dependency_of" type="text" indexed="true" stored="false" multiValued="true"/>
    <field name="derives_from" type="text" indexed="true" stored="false" multiValued="true"/>
    <field name="has_derivation" type="text" indexed="true" stored="false" multiValued="true"/>
    <field name="links_to" type="text" indexed="true" stored="false" multiValued="true"/>
    <field name="linked_from" type="text" indexed="true" stored="false" multiValued="true"/>
    <field name="child_of" type="text" indexed="true" stored="false" multiValued="true"/>
    <field name="parent_of" type="text" indexed="true" stored="false" multiValued="true"/>
    <field name="views_total" type="int" indexed="true" stored="false"/>
    <field name="views_recent" type="int" indexed="true" stored="false"/>
    <field name="resources_accessed_total" type="int" indexed="true" stored="false"/>
    <field name="resources_accessed_recent" type="int" indexed="true" stored="false"/>
    <field name="metadata_created" type="date" indexed="true" stored="true" multiValued="false"/>
    <field name="metadata_modified" type="date" indexed="true" stored="true" multiValued="false"/>
    <field name="indexed_ts" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
    <!-- Copy the title field into titleString, and treat as a string
         (rather than text type).  This allows us to sort on the titleString -->
    <field name="title_string" type="string" indexed="true" stored="false" />
    <field name="data_dict" type="string" indexed="false" stored="true" />
    <field name="validated_data_dict" type="string" indexed="false" stored="true" />
    <field name="_version_" type="string" indexed="true" stored="true"/>
    <dynamicField name="*_date" type="date" indexed="true" stored="true" multiValued="false"/>
    <dynamicField name="extras_*" type="text" indexed="true" stored="true" multiValued="false"/>
    <dynamicField name="res_extras_*" type="text" indexed="true" stored="true" multiValued="true"/>
    <dynamicField name="vocab_*" type="string" indexed="true" stored="true" multiValued="true"/>
    <dynamicField name="*" type="string" indexed="true"  stored="false"/>
 </fields>
 <uniqueKey>index_id</uniqueKey>
 <defaultSearchField>text</defaultSearchField>
 <solrQueryParser defaultOperator="AND"/>
 <copyField source="url" dest="urls"/>
 <copyField source="ckan_url" dest="urls"/>
 <copyField source="download_url" dest="urls"/>
 <copyField source="res_url" dest="urls"/>
 <copyField source="extras_*" dest="text"/>
 <copyField source="res_extras_*" dest="text"/>
 <copyField source="vocab_*" dest="text"/>
 <copyField source="urls" dest="text"/>
 <copyField source="name" dest="text"/>
 <copyField source="title" dest="text"/>
 <copyField source="text" dest="text"/>
 <copyField source="license" dest="text"/>
 <copyField source="notes" dest="text"/>
 <copyField source="tags" dest="text"/>
 <copyField source="groups" dest="text"/>
 <copyField source="organization" dest="text"/>
 <copyField source="res_name" dest="text"/>
 <copyField source="res_description" dest="text"/>
 <copyField source="maintainer" dest="text"/>
 <copyField source="author" dest="text"/>
 </schema>
--- a/ckan/handlers/main.yml
+++ b/ckan/handlers/main.yml
@ -0,0 +1,6 @@
 ---
 - name: Solr Restart
  service: name=tomcat-instance-{{ ckan_solr_port }} state=restarted
 - name: Restart CKAN
  service: name=apache2 state=restarted sleep=10
--- a/ckan/tasks/main.yml
+++ b/ckan/tasks/main.yml
@ -0,0 +1,181 @@
 ---
 - name: Download the CKAN distribution
  get_url: url='{{ ckan_package_url }}' dest=/srv/{{ ckan_deb_file }}
  tags: ckan
 - name: Install the CKAN deb package
  apt: deb=/srv/{{ ckan_deb_file }}
  register: ckan_install
  tags: ckan
 - name: Configure the CKAN production configuration file
  ini_file: dest={{ ckan_config_file }} section={{ item.section }} option={{ item.option }} value={{ item.value }} state={{ item.state }} backup=yes
  with_items: '{{ ckan_production_ini_opts }}'
  notify: Restart CKAN
  tags: [ 'ckan', 'ckan_ini' ]
 - name: Install the solr schema used by CKAN
  file: src=/usr/lib/ckan/default/src/ckan/ckan/config/solr/schema.xml dest={{ tomcat_m_instances_base_path }}/{{ ckan_solr_port }}/solr/data/solr/collection1/conf/schema.xml state=link force=yes
  when: not ckan_geonetwork_harvester
  notify: Solr Restart
  tags: [ 'ckan', 'solr', 'solr_schema' ]
 - name: Install the solr schema used by CKAN, modified with the spatial fields
  copy: src=schema.xml dest={{ tomcat_m_instances_base_path }}/{{ ckan_solr_port }}/solr/data/solr/collection1/conf/schema.xml force=yes
  when: ckan_geonetwork_harvester
  notify: Solr Restart
  tags: [ 'ckan', 'solr', 'solr_schema' ]
 - name: Create the base directory for the CKAN file storage
  file: dest={{ ckan_file_storage_dir }} state=directory owner={{ apache_user }} mode=0700
  tags: ckan
 - name: Initialize the CKAN databases
  shell: ckan db init ; ckan datastore set-permissions | su - postgres -c 'psql --set ON_ERROR_STOP=1'
  when: ( ckan_install | changed )
  tags: ckan
 - name: Assign the CKAN virtenv dir to the ckan user
  file: dest={{ ckan_virtenv }} recurse=yes owner={{ ckan_shell_user }} group={{ ckan_shell_user }}
  tags: [ 'ckan', 'ckan_user' ]
 - name: Create a log directory for the jobs run by the ckan user
  file: dest=/var/log/ckan state=directory owner={{ ckan_shell_user }} group={{ ckan_shell_user }}
  tags: [ 'ckan', 'ckan_user' ]
 - name: Install some plugins dependencies inside the CKAN virtualenv
  become: True
  become_user: '{{ ckan_shell_user }}'
  pip: name={{ item }} virtualenv={{ ckan_virtenv }}
  with_items: '{{ ckan_pip_dependencies }}'
  when: ckan_geonetwork_harvester
  tags: [ 'ckan', 'geonetwork', 'ckan_plugins', 'ckan_pip_deps' ]
 - name: Download the CKAN ckanext-harvest plugin
  become: True
  become_user: '{{ ckan_shell_user }}'
  pip: name='{{ ckan_ckanext_harvester_url }}' virtualenv={{ ckan_virtenv }}
  notify: Restart CKAN
  when: ckan_geonetwork_harvester
  register: ckanext_harvest_install
  tags: [ 'ckan', 'geonetwork', 'ckan_plugins' ]
 - name: Download the CKAN ckanext-harvest requirements
  become: True
  become_user: '{{ ckan_shell_user }}'
  pip: requirements={{ ckan_virtenv }}/src/ckanext-harvest/pip-requirements.txt virtualenv={{ ckan_virtenv }}
  when: ckan_geonetwork_harvester
  tags: [ 'ckan', 'geonetwork', 'ckan_plugins' ]
 - name: Initialize the CKAN ckanext-harvest plugin
  become: True
  become_user: '{{ ckan_shell_user }}'
  shell: . /usr/lib/ckan/default/bin/activate ; paster --plugin=ckanext-harvest harvester initdb --config={{ ckan_config_file }}
  when: ( ckanext_harvest_install | changed )
  tags: [ 'ckan', 'geonetwork', 'ckan_plugins' ]
 - name: Download the CKAN ckanext-spatial plugin
  become: True
  become_user: '{{ ckan_shell_user }}'
  pip: name='{{ ckan_ckanext_spatial_url }}' virtualenv={{ ckan_virtenv }}
  notify: Restart CKAN
  when: ckan_geonetwork_harvester
  register: ckanext_spatial_install
  tags: [ 'ckan', 'ckan_spatial', 'ckan_plugins' ]
 - name: Download the CKAN ckanext-spatial requirements
  become: True
  become_user: '{{ ckan_shell_user }}'
  pip: requirements={{ ckan_virtenv }}/src/ckanext-spatial/pip-requirements.txt virtualenv={{ ckan_virtenv }}
  when: ckan_geonetwork_harvester
  tags: [ 'ckan', 'ckan_spatial', 'ckan_plugins' ]
 - name: Initialize the CKAN ckanext-harvest plugin
  become: True
  become_user: '{{ ckan_shell_user }}'
  shell: . /usr/lib/ckan/default/bin/activate ; paster --plugin=ckanext-spatial spatial initdb --config={{ ckan_config_file }}
  when: ( ckanext_spatial_install | changed )
  tags: [ 'ckan', 'ckan_harvest', 'ckan_plugins' ]
 - name: Download the CKAN Geonetwork plugin code
  become: True
  become_user: '{{ ckan_shell_user }}'
  git: repo={{ ckan_geonetwork_harvester_url }} dest=/usr/lib/ckan/default/src/ckanext-geonetwork
  when: ckan_geonetwork_harvester
  register: install_geonetwork_harvester
  tags: [ 'ckan', 'ckan_geonetwork', 'ckan_plugins' ]
 - name: Install the CKAN Geonetwork plugin code
  become: True
  become_user: '{{ ckan_shell_user }}'
  shell: . /usr/lib/ckan/default/bin/activate ; cd /usr/lib/ckan/default/src/ckanext-geonetwork ; python setup.py develop
  when: ( install_geonetwork_harvester | changed )
  notify: Restart CKAN
  tags: [ 'ckan', 'ckan_geonetwork', 'ckan_plugins' ]
 - name: Install the script that updates the tracking data
  template: src=tracker_update.sh.j2 dest={{ ckan_virtenv }}/bin/tracker_update owner={{ ckan_shell_user }} group={{ ckan_shell_user }} mode=0555
  when: ckan_geonetwork_harvester
  tags: [ 'ckan', 'ckan_geonetwork', 'ckan_plugins', 'tracker' ]
 - name: Install the cron job that runs the tracker update script
  cron: name="tracker update" minute="0" job="{{ ckan_virtenv }}/bin/tracker_update > /var/log/ckan/tracker_update.log 2>&1" user={{ ckan_shell_user }}
  when: ckan_geonetwork_harvester
  tags: [ 'ckan', 'ckan_geonetwork', 'ckan_plugins', 'tracker' ]
 - name: Download the CKAN PDF viewer plugin
  become: True
  become_user: '{{ ckan_shell_user }}'
  pip: name='{{ ckan_ckanext_pdfview_url }}' virtualenv={{ ckan_virtenv }}
  when: ckan_pdfview
  notify: Restart CKAN
  tags: [ 'ckan', 'ckan_pdfview', 'ckan_plugins' ]
 - name: Download the CKAN Privatedatasets extension
  become: True
  become_user: '{{ ckan_shell_user }}'
  pip: name='{{ ckan_privatedatasets_url }}' virtualenv={{ ckan_virtenv }}
  when: ckan_privatedatasets
  notify: Restart CKAN
  tags: [ 'ckan', 'ckan_privdatasets', 'ckan_plugins' ]
 - name: Download the CKAN hierarchy plugin code
  become: True
  become_user: '{{ ckan_shell_user }}'
  pip: name='{{ ckan_hierarchy_url }}' virtualenv={{ ckan_virtenv }}
  when: ckan_hierarchy
  notify: Restart CKAN 
  tags: [ 'ckan', 'ckan_hierarchy', 'ckan_plugins' ]
 - name: Download the CKAN pages plugin code
  become: True
  become_user: '{{ ckan_shell_user }}'
  pip: name='{{ ckan_pages_url }}' virtualenv={{ ckan_virtenv }}
  when: ckan_pages
  notify: Restart CKAN 
  tags: [ 'ckan', 'ckan_pages', 'ckan_plugins' ]
 - name: Overwrite the base.py ckanext-spatial plugin file to enable authentication against the Geonetwork nodes
  copy: src=base.py dest=/usr/lib/ckan/default/src/ckanext-spatial/ckanext/spatial/harvesters/base.py owner={{ ckan_shell_user }} group={{ ckan_shell_user }} mode=0644 backup=yes
  notify: Restart CKAN 
  tags: [ 'ckan', 'ckan_pages', 'ckan_plugins', 'ckan_geo_auth' ]
 - name: Restart apache
  service: name=apache state=restarted enabled=yes
  when: ( ckan_install | changed )
  tags: ckan
 - name: Restart nginx
  service: name=nginx state=restarted enabled=yes
  when: ( ckan_install | changed )
  tags: ckan
 # To create the first sysadmin user:
 # . /usr/lib/ckan/default/bin/activate
 # cd /usr/lib/ckan/default/src/ckan
 # You have to create your first CKAN sysadmin user from the command line. For example, to create a user called seanh and make him a # sysadmin:
 # paster sysadmin add seanh -c /etc/ckan/default/production.ini
 #
 # To create some test data:
 # paster create-test-data -c /etc/ckan/default/production.ini
--- a/ckan/templates/tracker_update.sh.j2
+++ b/ckan/templates/tracker_update.sh.j2
@ -0,0 +1,8 @@
 #!/bin/bash
 . {{ ckan_virtenv }}/bin/activate
 paster --plugin=ckan tracking update         -c {{ ckan_config_file }}
 paster --plugin=ckan search-index rebuild -r -c {{ ckan_config_file }}
 exit 0
--- a/postgresql_extensions/tasks/main.yml
+++ b/postgresql_extensions/tasks/main.yml
@ -0,0 +1,10 @@
 ---
 - name: Add postgres extensions to the databases, if needed
  become: True
  become_user: postgres
  postgresql_ext: name={{ item.1 }} db={{ item.0.name }} port={{ psql_db_port }}
  with_subelements:
    - '{{ psql_db_data | default([]) }}'
    - extensions
  tags: [ 'postgresql', 'postgres', 'pg_extensions' ]