diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..723ef36f4 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.idea \ No newline at end of file diff --git a/ENVIRONMENT.rst b/ENVIRONMENT.rst index 6875fa68b..22685a593 100644 --- a/ENVIRONMENT.rst +++ b/ENVIRONMENT.rst @@ -17,6 +17,7 @@ Environment Configuration Settings - **PGROOT**: a directory where we put the pgdata (by default /home/postgres/pgroot). One may adjust it to point to the mount point of the persistent volume, such as EBS. - **WALE_TMPDIR**: directory to store WAL-E temporary files. PGROOT/../tmp by default, make sure it has a few GBs of free space. - **PGDATA**: location of PostgreSQL data directory, by default PGROOT/pgdata. +- **WAL_DIRECTORY**: location where the write-ahead log should be stored. By default stored inside PGDATA. This option is useful if you plan to use separate disks for WAL and data. - **PGUSER_STANDBY**: username for the replication user, 'standby' by default. - **PGPASSWORD_STANDBY**: a password for the replication user, 'standby' by default. - **STANDBY_HOST**: hostname or IP address of the primary to stream from. diff --git a/postgres-appliance/bootstrap/clone_with_wale.py b/postgres-appliance/bootstrap/clone_with_wale.py index 9e0adc1c5..a0ec6fd2c 100755 --- a/postgres-appliance/bootstrap/clone_with_wale.py +++ b/postgres-appliance/bootstrap/clone_with_wale.py @@ -22,6 +22,7 @@ def read_configuration(): parser = argparse.ArgumentParser(description="Script to clone from S3 with support for point-in-time-recovery") parser.add_argument('--scope', required=True, help='target cluster name') parser.add_argument('--datadir', required=True, help='target cluster postgres data directory') + parser.add_argument('--waldir', required=True, help='target cluster postgres wal directory') parser.add_argument('--recovery-target-time', help='the timestamp up to which recovery will proceed (including time zone)', dest='recovery_target_time_string') @@ -29,7 +30,7 @@ def read_configuration(): 'command to fetch that backup without running it') args = parser.parse_args() - options = namedtuple('Options', 'name datadir recovery_target_time dry_run') + options = namedtuple('Options', 'name datadir recovery_target_time dry_run waldir') if args.recovery_target_time_string: recovery_target_time = parse(args.recovery_target_time_string) if recovery_target_time.tzinfo is None: @@ -37,7 +38,7 @@ def read_configuration(): else: recovery_target_time = None - return options(args.scope, args.datadir, recovery_target_time, args.dry_run) + return options(args.scope, args.datadir, recovery_target_time, args.dry_run, args.waldir) def build_wale_command(command, datadir=None, backup=None): @@ -178,10 +179,28 @@ def run_clone_from_s3(options): return 0 +def create_symbolic_link_wal_directory(pg_data, wal_dir): + pg_wal = f'{pg_data}/pg_wal' + logger.info(f"Examining whether WAL already exists or not. directory={pg_wal}") + + if not os.path.isdir(pg_wal): + create_symbolic_link_wal_dir = ['ln', '-s', wal_dir, pg_wal] + ret = subprocess.call(create_symbolic_link_wal_dir) + if ret == 0: + logger.info(f"Successfully created a wal directory with symbolic link to {wal_dir}") + else: + raise Exception("Creating a separate wal directory failed with exit code {0}".format(ret)) + else: + logger.info(f"Wal directory with symbolic link to {wal_dir} already exists.") + + def main(): options = read_configuration() try: run_clone_from_s3(options) + logger.info(f'Found waldir={options.waldir}') + if options.waldir: + create_symbolic_link_wal_directory(options.datadir, options.waldir) except Exception: logger.exception("Clone failed") return 1 diff --git a/postgres-appliance/scripts/basebackup.sh b/postgres-appliance/scripts/basebackup.sh index 7c8fc68dc..dbd0b6a2f 100755 --- a/postgres-appliance/scripts/basebackup.sh +++ b/postgres-appliance/scripts/basebackup.sh @@ -14,6 +14,9 @@ while getopts ":-:" optchar; do retries=* ) RETRIES=${OPTARG#*=} ;; + wal_dir=* ) + WAL_DIR=${OPTARG#*=} + ;; esac done @@ -27,6 +30,12 @@ else PG_BASEBACKUP_OPTS=() fi +if [[ -n "$WAL_DIR" ]]; then + PG_WAL_OPTS=(--waldir="$WAL_DIR") +else + PG_WAL_OPTS=() +fi + WAL_FAST=$(dirname "$DATA_DIR")/wal_fast readonly WAL_FAST mkdir -p "$WAL_FAST" @@ -97,7 +106,7 @@ fi ATTEMPT=0 while [[ $((ATTEMPT++)) -le $RETRIES ]]; do - pg_basebackup --pgdata="${DATA_DIR}" "${PG_BASEBACKUP_OPTS[@]}" --dbname="${CONNSTR}" & + pg_basebackup --pgdata="${DATA_DIR}" "${PG_WAL_OPTS[@]}" "${PG_BASEBACKUP_OPTS[@]}" --dbname="${CONNSTR}" & basebackup_pid=$! wait $basebackup_pid EXITCODE=$? diff --git a/postgres-appliance/scripts/configure_spilo.py b/postgres-appliance/scripts/configure_spilo.py index 3195327fb..13ac246da 100755 --- a/postgres-appliance/scripts/configure_spilo.py +++ b/postgres-appliance/scripts/configure_spilo.py @@ -229,7 +229,7 @@ def deep_update(a, b): method: clone_with_wale clone_with_wale: command: envdir "{{CLONE_WALE_ENV_DIR}}" python3 /scripts/clone_with_wale.py - --recovery-target-time="{{CLONE_TARGET_TIME}}" + --recovery-target-time="{{CLONE_TARGET_TIME}}" --waldir="{{WAL_DIRECTORY}}" recovery_conf: restore_command: envdir "{{CLONE_WALE_ENV_DIR}}" timeout "{{WAL_RESTORE_TIMEOUT}}" /scripts/restore_command.sh "%f" "%p" @@ -254,6 +254,9 @@ def deep_update(a, b): --port={{CLONE_PORT}} --user="{{CLONE_USER}}" {{/CLONE_WITH_BASEBACKUP}} initdb: + {{#WAL_DIRECTORY}} + - waldir: /home/postgres/wal + {{/WAL_DIRECTORY}} - encoding: UTF8 - locale: {{INITDB_LOCALE}}.UTF-8 - data-checksums @@ -370,10 +373,12 @@ def deep_update(a, b): threshold_backup_size_percentage: {{WALE_BACKUP_THRESHOLD_PERCENTAGE}} retries: 2 no_master: 1 + wal_dir: "{{WAL_DIRECTORY}}" {{/USE_WALE}} basebackup_fast_xlog: command: /scripts/basebackup.sh retries: 2 + wal_dir: "{{WAL_DIRECTORY}}" {{#STANDBY_WITH_WALE}} bootstrap_standby_with_wale: command: envdir "{{STANDBY_WALE_ENV_DIR}}" bash /scripts/wale_restore.sh @@ -381,6 +386,7 @@ def deep_update(a, b): threshold_backup_size_percentage: {{WALE_BACKUP_THRESHOLD_PERCENTAGE}} retries: 2 no_master: 1 + wal_dir: "{{WAL_DIRECTORY}}" {{/STANDBY_WITH_WALE}} ''' @@ -524,6 +530,7 @@ def get_placeholders(provider): placeholders.setdefault('PGROOT', os.path.join(placeholders['PGHOME'], 'pgroot')) placeholders.setdefault('WALE_TMPDIR', os.path.abspath(os.path.join(placeholders['PGROOT'], '../tmp'))) placeholders.setdefault('PGDATA', os.path.join(placeholders['PGROOT'], 'pgdata')) + placeholders.setdefault('WAL_DIRECTORY', '') placeholders.setdefault('HUMAN_ROLE', 'zalandos') placeholders.setdefault('PGUSER_STANDBY', 'standby') placeholders.setdefault('PGPASSWORD_STANDBY', 'standby') diff --git a/postgres-appliance/scripts/restore_command.sh b/postgres-appliance/scripts/restore_command.sh index a4bb88939..7164ef5ee 100755 --- a/postgres-appliance/scripts/restore_command.sh +++ b/postgres-appliance/scripts/restore_command.sh @@ -27,7 +27,7 @@ readonly wal_destination=$2 wal_dir=$(dirname "$wal_destination") readonly wal_dir -wal_fast_source=$(dirname "$(dirname "$(realpath "$wal_dir")")")/wal_fast/$wal_filename +wal_fast_source=$(dirname "$PGDATA/wal_fast/$wal_filename") readonly wal_fast_source [[ -f $wal_fast_source ]] && exec mv "${wal_fast_source}" "${wal_destination}" diff --git a/postgres-appliance/scripts/wale_restore.sh b/postgres-appliance/scripts/wale_restore.sh index 497afe30f..9b758c518 100755 --- a/postgres-appliance/scripts/wale_restore.sh +++ b/postgres-appliance/scripts/wale_restore.sh @@ -27,6 +27,9 @@ while getopts ":-:" optchar; do no_master=*|no-master=* ) NO_MASTER=${OPTARG#*=} ;; + wal_dir=* ) + WAL_DIR=${OPTARG#*=} + ;; esac done @@ -87,8 +90,16 @@ while true; do if $WAL_E backup-fetch "$DATA_DIR" LATEST; then version=$(<"$DATA_DIR/PG_VERSION") [[ "$version" =~ \. ]] && wal_name=xlog || wal_name=wal - readonly wal_dir=$DATA_DIR/pg_$wal_name - [[ ! -d $wal_dir ]] && rm -f "$wal_dir" && mkdir "$wal_dir" + readonly pg_wal_location=$DATA_DIR/pg_$wal_name + + # Only create a symbolic link when a separate WAL directory is specified. + if [[ -n "$WAL_DIR" ]]; then + PG_WAL_OPTS=(ln -s "$WAL_DIR" "$pg_wal_location") + else + PG_WAL_OPTS=(mkdir "$pg_wal_location") + fi + + [[ ! -d $pg_wal_location ]] && rm -f "$pg_wal_location" && "${PG_WAL_OPTS[@]}" # remove broken symlinks from PGDATA find "$DATA_DIR" -xtype l -delete exit 0 diff --git a/postgres-appliance/tests/test_spilo.sh b/postgres-appliance/tests/test_spilo.sh index 478c44640..44f9f1345 100755 --- a/postgres-appliance/tests/test_spilo.sh +++ b/postgres-appliance/tests/test_spilo.sh @@ -227,6 +227,16 @@ function start_clone_with_basebackup_upgrade_container() { -d spilo3 } +function start_separate_wal_directory_container() { + local ID=$1 + + docker-compose run \ + -e SCOPE=separatewal \ + -e WAL_DIRECTORY="/home/postgres/wal" \ + --name "${PREFIX}separatewal$ID" \ + -d "spilo$ID" +} + function verify_clone_upgrade() { local type=$2 local from_version=$3 @@ -241,6 +251,18 @@ function verify_archive_mode_is_on() { [ "$archive_mode" = "on" ] } +function verify_wal_outside_data_directory() { + local target_path="/home/postgres/wal" + is_symbolic_link=$( + docker_exec "$1" " + [ -L '/home/postgres/pgdata/pgroot/data/pg_wal' ] && + readlink -f '/home/postgres/pgdata/pgroot/data/pg_wal' | grep -q \"$target_path\" && + echo true || + echo false" + ) + [ "$is_symbolic_link" = true ] +} + # TEST SUITE 1 - In-place major upgrade 12->13->...->16 # TEST SUITE 2 - Major upgrade 12->16 after wal-e clone (with CLONE_PGVERSION set) @@ -248,6 +270,7 @@ function verify_archive_mode_is_on() { # TEST SUITE 4 - Major upgrade 12->13 after wal-e clone (no CLONE_PGVERSION) # TEST SUITE 5 - Replica bootstrap with wal-e # TEST SUITE 6 - Major upgrade 13->14 after clone with basebackup +# TEST SUITE 7 - Form a fresh cluster that persists WALs outside of data directory function test_spilo() { # TEST SUITE 1 local container=$1 @@ -345,6 +368,11 @@ function test_spilo() { basebackup_container=$(start_clone_with_basebackup_upgrade_container "$upgrade_container") # SCOPE=upgrade2 PGVERSION=14 CLONE: _SCOPE=upgrade log_info "[TS6] Started $basebackup_container for testing major upgrade 13->14 after clone with basebackup" + # TEST SUITE 7 + local seapate_wal_container="${PREFIX}separatewal1" + start_separate_wal_directory_container 1 # WAL_DIRECTORY="/home/postgres/wal" SCOPE=separatewal + start_separate_wal_directory_container 2 # WAL_DIRECTORY="/home/postgres/wal" SCOPE=separatewal + log_info "[TS7] Started a fresh cluster to test for persisting WALs on a specified location" # TEST SUITE 1 # run_test test_pg_upgrade_to_16_check_failed "$container" # pg_upgrade --check complains about timescaledb @@ -367,6 +395,12 @@ function test_spilo() { log_info "[TS6] Testing in-place major upgrade 13->14 after clone with basebackup" run_test verify_clone_upgrade "$basebackup_container" "basebackup" 13 14 run_test verify_archive_mode_is_on "$basebackup_container" + + # TEST SUITE 7 + wait_all_streaming "$seapate_wal_container" 1 + wait_zero_lag "$seapate_wal_container" 1 + run_test verify_wal_outside_data_directory "$seapate_wal_container" + run_test verify_wal_outside_data_directory "${PREFIX}separatewal2" } function main() {