diff --git a/postgresql/defaults/main.yml b/postgresql/defaults/main.yml index 375c780b..79c831f2 100644 --- a/postgresql/defaults/main.yml +++ b/postgresql/defaults/main.yml @@ -20,6 +20,7 @@ psql_db_size_c: 170000000 psql_listen_on_ext_int: False psql_use_alternate_data_dir: False psql_data_dir: '/var/lib/postgresql/{{ psql_version }}' +psql_log_dir: /var/log/postgresql psql_conf_parameters: - { name: 'max_connections', value: '100', set: 'False' } - { name: 'ssl', value: 'true', set: 'False' } @@ -28,7 +29,6 @@ psql_conf_parameters: - { name: 'work_mem', value: '1MB', set: 'False' } - { name: 'maintenance_work_mem', value: '16MB', set: 'False' } - { name: 'max_stack_depth', value: '2MB', set: 'False' } - - { name: 'wal_level', value: 'minimal', set: 'False' } - { name: 'checkpoint_segments', value: '3', set: 'False' } - { name: 'max_files_per_process', value: '1000', set: 'False' } @@ -96,13 +96,15 @@ pgpool_insert_lock: 'on' pgpool_lobj_lock_table: '' pgpool_replication_stop_on_mismatch: 'on' pgpool_failover_if_affected_tuples_mismatch: 'off' +pgpool_recovery_timeout: 30 +pgpool_client_idle_limit_in_recovery: -1 pgpool_load_balance_mode: 'on' pgpool_ignore_leading_white_space: 'on' pgpool_recovery_user: postgres # pgpool_recovery_user_pwd: use a vault file for this one pgpool_recovery_stage1_script: pgpool_recovery_stage_1 pgpool_recovery_stage2_script: pgpool_recovery_stage_2 -pgpool_remote_start_script: postgresql_remote_start +pgpool_remote_start_script: pgpool_remote_start pgpool_white_function_list: '' pgpool_black_function_list: 'nextval,setval' pgpool_allow_sql_comments: 'on' @@ -115,6 +117,23 @@ pgpool_memqcache_memcached_port: 11211 pgpool_memqcache_expire: 0 pgpool_memqcache_auto_cache_invalidation: 'on' +# WAL files archiving is mandatory for pgpool recovery +psql_wal_files_archiving_enabled: '{{ psql_pgpool_install }}' +psql_restart_after_wal_enabling: True +psql_wal_archiving_log_dir: '{{ psql_data_dir }}/archive_log' +psql_base_backup_dir: '{{ pg_backup_base_dir }}/base_backup' +psql_wal_files_conf: + - { name: 'wal_level', value: 'archive', set: '{{ psql_wal_files_archiving_enabled }}' } + - { name: 'wal_sync_method', value: 'fdatasync', set: '{{ psql_wal_files_archiving_enabled }}' } + - { name: 'full_page_writes', value: 'on', set: '{{ psql_wal_files_archiving_enabled }}' } + - { name: 'wal_log_hints', value: 'on', set: '{{ psql_wal_files_archiving_enabled }}' } + - { name: 'archive_mode', value: 'on', set: '{{ psql_wal_files_archiving_enabled }}' } + - { name: 'archive_command', value: "'test ! -f {{ psql_wal_archiving_log_dir }}/%f && cp %p {{ psql_wal_archiving_log_dir }}/%f'", set: '{{ psql_wal_files_archiving_enabled }}' } + - { name: 'archive_timeout', value: '120', set: '{{ psql_wal_files_archiving_enabled }}' } + - { name: 'max_wal_senders', value: '5', set: '{{ psql_wal_files_archiving_enabled }}' } + - { name: 'wal_sender_timeout', value: '60s', set: '{{ psql_wal_files_archiving_enabled }}' } + - { name: 'max_replication_slots', value: '5', set: '{{ psql_wal_files_archiving_enabled }}' } + # postgis postgres_install_gis_extensions: False @@ -130,7 +149,8 @@ pg_backup_retain_copies: 15 pg_backup_build_db_list: "no" # Dynamically created from psql_db_data if pg_backup_db_list is not set #pg_backup_db_list: '{{ psql_db_name}}' -pg_backup_destdir: /var/lib/pgsql/backups +pg_backup_base_dir: /var/lib/pgsql +pg_backup_destdir: '{{ pg_backup_base_dir }}/backups' pg_backup_logdir: /var/log/postgresql pg_backup_logfile: '{{ pg_backup_logdir }}/postgresql-backup.log' pg_backup_use_auth: "yes" diff --git a/postgresql/handlers/main.yml b/postgresql/handlers/main.yml index a9a803a6..54aa620a 100644 --- a/postgresql/handlers/main.yml +++ b/postgresql/handlers/main.yml @@ -13,5 +13,11 @@ ignore_errors: True - name: Restart pgpool2 - service: name=pgpool2 state=reloaded + service: name=pgpool2 state=restarted when: pgpool_enabled + +- name: Restart postgresql with pgpool config + service: name=postgresql state=restarted + when: + - postgresql_enabled + - psql_restart_after_wal_enabling diff --git a/postgresql/tasks/configure-access.yml b/postgresql/tasks/configure-access.yml index 48987cf3..50ef24d6 100644 --- a/postgresql/tasks/configure-access.yml +++ b/postgresql/tasks/configure-access.yml @@ -25,12 +25,12 @@ - name: Set the postgresql listen port action: configfile path=/etc/postgresql/{{ psql_version }}/main/postgresql.conf key=port value="{{ psql_db_port }}" - notify: Restart postgresql + register: restart_postgresql tags: [ 'postgresql', 'postgres', 'pg_conf' ] - name: We want postgres listen on the public IP action: configfile path=/etc/postgresql/{{ psql_version }}/main/postgresql.conf key=listen_addresses value="'*'" - notify: Restart postgresql + register: restart_postgresql when: - psql_listen_on_ext_int - psql_db_data is defined @@ -38,7 +38,7 @@ - name: If postgresql is only accessed from localhost make it listen only on the localhost interface action: configfile path=/etc/postgresql/{{ psql_version }}/main/postgresql.conf key=listen_addresses value="'localhost'" - notify: Restart postgresql + register: restart_postgresql when: - not psql_listen_on_ext_int - psql_db_data is defined @@ -46,19 +46,19 @@ - name: Log the connections action: configfile path=/etc/postgresql/{{ psql_version }}/main/postgresql.conf key=log_connections value="on" - notify: Restart postgresql + register: restart_postgresql when: psql_db_data is defined tags: [ 'postgresql', 'postgres', 'pg_conf' ] - name: Log the disconnections action: configfile path=/etc/postgresql/{{ psql_version }}/main/postgresql.conf key=log_disconnections value="on" - notify: Restart postgresql + register: restart_postgresql when: psql_db_data is defined tags: [ 'postgresql', 'postgres', 'pg_conf' ] - name: Log the hostnames action: configfile path=/etc/postgresql/{{ psql_version }}/main/postgresql.conf key=log_hostname value="on" - notify: Restart postgresql + register: restart_postgresql when: - psql_listen_on_ext_int - psql_db_data is defined @@ -71,3 +71,9 @@ - postgresql.conf tags: [ 'postgresql', 'postgres', 'pg_hba', 'pg_conf' ] +- name: Restart the postgresql server after changing parameters that need a restart + service: name=postgresql state=restarted + when: + - postgresql_enabled + - ( restart_postgresql | changed ) + tags: [ 'postgresql', 'postgres', 'pg_hba', 'pg_conf' ] diff --git a/postgresql/tasks/postgres_pgpool.yml b/postgresql/tasks/postgres_pgpool.yml index 9bb14dc6..e48256ec 100644 --- a/postgresql/tasks/postgres_pgpool.yml +++ b/postgresql/tasks/postgres_pgpool.yml @@ -6,7 +6,7 @@ notify: Restart postgresql tags: [ 'postgresql', 'postgres', 'pgpool' ] -- name: Add the user that will manage the recovery, if not postgres +- name: Add the postgres user that will manage the recovery, if not postgres become: True become_user: postgres postgresql_user: user={{ pgpool_recovery_user }} password={{ pgpool_recovery_user_pwd }} role_attr_flags=REPLICATION port={{ psql_db_port }} @@ -27,6 +27,17 @@ notify: Reload postgresql tags: [ 'postgresql', 'postgres', 'pgpool' ] +- name: Add the system user that will manage the recovery, if not postgres + user: user={{ pgpool_recovery_user }} password={{ pgpool_recovery_user_pwd | password_hash('sha512') }} groups=postgres shell=/bin/bash system=yes + when: + - ('{{ pgpool_recovery_user }}' != 'postgres') + - pgpool_recovery_user_pwd is defined + tags: [ 'postgresql', 'postgres', 'pgpool' ] + +- name: Create the ssh keys for the recovery user + user: user={{ pgpool_recovery_user }} generate_ssh_key=yes ssh_key_bits=2048 ssh_key_file=.ssh/id_rsa ssh_key_type=rsa + tags: [ 'postgresql', 'postgres', 'pgpool' ] + - name: Install the pgpool recovery and remote restart scripts. They assume that the postgresql hosts can talk to each other template: src={{ item.1 }}.j2 dest={{ item.0.backend_data_directory }}/{{ item.1 }} owner=postgres group=postgres mode=0500 with_nested: @@ -34,3 +45,23 @@ - [ '{{ pgpool_recovery_stage1_script }}', '{{ pgpool_recovery_stage2_script }}', '{{ pgpool_remote_start_script }}' ] tags: [ 'postgresql', 'postgres', 'pgpool' ] +- name: Set the postgresql configuration parameters needed by pgpool + action: configfile path=/etc/postgresql/{{ psql_version }}/main/postgresql.conf key={{ item.name }} value="{{ item.value }}" + with_items: '{{ psql_wal_files_conf }}' + when: + - item.set + - psql_wal_files_archiving_enabled + notify: Restart postgresql with pgpool config + tags: [ 'postgresql', 'postgres', 'pg_conf', 'pgpool' ] + +- name: Install the sudoers config that permits the postgres user to restart the service after a recovery + template: src=postgresql-sudoers.j2 dest=/etc/sudoers.d/postgres-pgpool owner=root group=root mode=0440 + tags: [ 'postgres', 'postgresql', 'sudo', 'pgpool' ] + +- name: Install a script that cleans up the wal log archives + template: src=postgresql_wal_backup_and_removal.j2 dest=/usr/local/sbin/postgresql_wal_backup_and_removal owner=root group=root mode=0755 + tags: [ 'postgresql', 'postgres', 'pgpool' ] + +- name: Install a cron job to cleanup the wal log archives + cron: name="Clean up the postgresql WAL log archives" user=postgres job="/usr/local/sbin/postgresql_wal_backup_and_removal > {{ psql_log_dir }}/wal_removal.log 2>&1" special_time=daily + tags: [ 'postgresql', 'postgres', 'pgpool' ] diff --git a/postgresql/tasks/postgresql-backup.yml b/postgresql/tasks/postgresql-backup.yml index 2deb076e..3cda498a 100644 --- a/postgresql/tasks/postgresql-backup.yml +++ b/postgresql/tasks/postgresql-backup.yml @@ -18,3 +18,12 @@ - name: Create the postgresql backups data directory file: dest={{ pg_backup_destdir }} owner=postgres group=postgres mode=700 recurse=yes state=directory tags: [ 'pg_backup', 'postgresql', 'postgres' ] + +- name: Create the postgresql WAL files backup directories + file: dest={{ item }} owner=postgres group=postgres mode=700 recurse=yes state=directory + with_items: + - '{{ psql_wal_archiving_log_dir }}' + - '{{ psql_base_backup_dir }}' + when: psql_wal_files_archiving_enabled + tags: [ 'pg_backup', 'postgresql', 'postgres' ] + diff --git a/postgresql/templates/pgpool.conf.j2 b/postgresql/templates/pgpool.conf.j2 index 678b0799..2875b48b 100644 --- a/postgresql/templates/pgpool.conf.j2 +++ b/postgresql/templates/pgpool.conf.j2 @@ -430,11 +430,11 @@ recovery_1st_stage_command = '{{ pgpool_recovery_stage1_script }}' # Executes a command in first stage recovery_2nd_stage_command = '{{ pgpool_recovery_stage2_script }}' # Executes a command in second stage -recovery_timeout = 90 +recovery_timeout = {{ pgpool_recovery_timeout }} # Timeout in seconds to wait for the # recovering node's postmaster to start up # 0 means no wait -client_idle_limit_in_recovery = 0 +client_idle_limit_in_recovery = {{ pgpool_client_idle_limit_in_recovery }} # Client is disconnected after being idle # for that many seconds in the second stage # of online recovery diff --git a/postgresql/templates/pgpool_recovery_stage_1.j2 b/postgresql/templates/pgpool_recovery_stage_1.j2 index 85234b37..2add0fe7 100644 --- a/postgresql/templates/pgpool_recovery_stage_1.j2 +++ b/postgresql/templates/pgpool_recovery_stage_1.j2 @@ -1,10 +1,31 @@ #!/bin/bash -DATA=$1 +# PGpool stage 1 recovery script +# Reference: http://michael.stapelberg.de/Artikel/replicated_postgresql_with_pgpool +# +TS=$(date +%Y-%m-%d_%H-%M-%S) +MASTER_HOST=$(hostname -f) +MASTER_DATA=$1 RECOVERY_TARGET=$2 RECOVERY_DATA=$3 -psql -c "select pg_start_backup('pgpool-recovery')" postgres -echo "restore_command = 'scp $HOSTNAME:{{ psql_data_dir }}/archive_log/%f %p'" > {{ psql_data_dir }}/recovery.conf -tar -C {{ psql_data_dir }}/ -zcf pgsql.tar.gz main -psql -c 'select pg_stop_backup()' postgres -scp pgsql.tar.gz $RECOVERY_TARGET:$RECOVERY_DATA +# Move the PostgreSQL data directory out of our way. +ssh -T $RECOVERY_TARGET \ + "[ -d $RECOVERY_DATA ] && mv $RECOVERY_DATA $RECOVERY_DATA.$TS" + +# We only use archived WAL logs during recoveries, so delete all +# logs from the last recovery to limit the growth. +rm $MASTER_DATA/archive_log/* + +# With this file present, our archive_command will actually +# archive WAL files. +touch $MASTER_DATA/archive_log/backup_in_progress + +# Perform a backup of the database. +ssh -T $RECOVERY_TARGET \ + "pg_basebackup -h $MASTER_HOST -D $RECOVERY_DATA --xlog" + +# Configure the restore_command to use the archive_log WALs we’ll copy +# over in 2nd_stage.sh. +echo "restore_command = 'cp $RECOVERY_DATA/archive_log/%f %p'" | \ + ssh -T $RECOVERY_TARGET "cat > $RECOVERY_DATA/recovery.conf" +EOF diff --git a/postgresql/templates/pgpool_recovery_stage_2.j2 b/postgresql/templates/pgpool_recovery_stage_2.j2 index 9d6a12de..2a821e7c 100644 --- a/postgresql/templates/pgpool_recovery_stage_2.j2 +++ b/postgresql/templates/pgpool_recovery_stage_2.j2 @@ -1,11 +1,11 @@ #!/bin/bash # Online recovery 2nd stage script +# Reference: http://michael.stapelberg.de/Artikel/replicated_postgresql_with_pgpool # -datadir=$1 # master dabatase cluster -DEST=$2 # hostname of the DB node to be recovered -DESTDIR=$3 # database cluster of the DB node to be recovered +MASTER_DATA=$1 +RECOVERY_TARGET=$2 +RECOVERY_DATA=$3 port={{ psql_db_port }} # PostgreSQL port number -archdir={{ psql_data_dir }}/archive_log # archive log directory # Force to flush current value of sequences to xlog psql -p $port -t -c 'SELECT datname FROM pg_database WHERE NOT datistemplate AND datallowconn' template1| @@ -16,4 +16,15 @@ do fi done -psql -p $port -c "SELECT pgpool_switch_xlog('$archdir')" template1 +# Flush all transactions to disk. Since pgpool stopped all connections, +# there cannot be any data that does not reside on disk until the +# to-be-recovered host is back on line. +psql -p $port -c "SELECT pgpool_switch_xlog('$MASTER_DATA/archive_log')" template1 + +# Copy over all archive logs at once. +rsync -avx --delete $MASTER_DATA/archive_log/ \ + $RECOVERY_TARGET:$RECOVERY_DATA/archive_log/ + +# Delete the flag file to disable WAL archiving again. +rm $MASTER_DATA/archive_log/backup_in_progress +EOF diff --git a/postgresql/templates/postgresql_remote_start.j2 b/postgresql/templates/pgpool_remote_start.j2 similarity index 63% rename from postgresql/templates/postgresql_remote_start.j2 rename to postgresql/templates/pgpool_remote_start.j2 index 6c44c255..cb2d0157 100644 --- a/postgresql/templates/postgresql_remote_start.j2 +++ b/postgresql/templates/pgpool_remote_start.j2 @@ -3,6 +3,6 @@ DEST=$1 DESTDIR=$2 # Deploy a base backup -ssh -T $DEST 'cd {{ psql_data_dir }}; tar zxf pgsql.tar.gz' 2>/dev/null 1>/dev/null < /dev/null +#ssh -T $DEST 'cd {{ psql_data_dir }}; tar zxf pgsql.tar.gz' 2>/dev/null 1>/dev/null < /dev/null # Startup PostgreSQL server ssh -T $DEST sudo /etc/init.d/postgresql start 2>/dev/null 1>/dev/null < /dev/null diff --git a/postgresql/templates/postgresql-sudoers.j2 b/postgresql/templates/postgresql-sudoers.j2 new file mode 100644 index 00000000..9726c80c --- /dev/null +++ b/postgresql/templates/postgresql-sudoers.j2 @@ -0,0 +1,3 @@ +postgres ALL=(ALL) NOPASSWD: /etc/init.d/postgres* + + diff --git a/postgresql/templates/postgresql_wal_backup_and_removal.j2 b/postgresql/templates/postgresql_wal_backup_and_removal.j2 index bdb4ae20..22d64548 100644 --- a/postgresql/templates/postgresql_wal_backup_and_removal.j2 +++ b/postgresql/templates/postgresql_wal_backup_and_removal.j2 @@ -1,16 +1,16 @@ #!/bin/bash -BASE_BACKUP_DIR=/var/lib/pgsql/base_backup -WAL_ARCHIVES_LOG_DIR=/var/lib/postgresql/9.5/archive_log +BASE_BACKUP_DIR={{ psql_base_backup_dir }} +WAL_ARCHIVES_LOG_DIR={{ psql_wal_archiving_log_dir }} WAL_LATEST_BACKUP= # The base backup dir needs to be empty rm -f $BASE_BACKUP_DIR/* -pg_basebackup -F t -z -D /var/lib/pgsql/base_backup +pg_basebackup -F t -z -D $BASE_BACKUP_DIR cd $WAL_ARCHIVES_LOG_DIR -WAL_LATEST_BACKUP=$( /bin/ls -1 *.backup | sort | tail -1 ) +WAL_LATEST_BACKUP=$( /bin/ls -1tr *.backup | tail -1 ) pg_archivecleanup $WAL_ARCHIVES_LOG_DIR $WAL_LATEST_BACKUP exit $? diff --git a/postgresql/templates/wal_archive_cleanup.j2 b/postgresql/templates/wal_archive_cleanup.j2 deleted file mode 100644 index 76792740..00000000 --- a/postgresql/templates/wal_archive_cleanup.j2 +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -su - postgres -c "/usr/local/sbin/postgresql_wal_backup_and_removal > /var/log/postgresql/wal_removal.log 2>&1" - -exit 0 -