Skip to content

chore: log upgrade failures remotely to project #635

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions ansible/files/admin_api_scripts/pg_upgrade_common.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#! /usr/bin/env bash

# Common functions and variables used by pg_upgrade_initiate.sh and pg_upgrade_complete.sh

REPORTING_PROJECT_REF="ihmaxnjpcccasmrbkpvo"
REPORTING_CREDENTIALS_FILE="/root/upgrade-reporting-credentials"

REPORTING_ANON_KEY=""
if [ -f "$REPORTING_CREDENTIALS_FILE" ]; then
REPORTING_ANON_KEY=$(cat "$REPORTING_CREDENTIALS_FILE")
fi

function run_sql {
psql -h localhost -U supabase_admin -d postgres "$@"
}

function ship_logs {
LOG_FILE=$1

if [ -z "$REPORTING_ANON_KEY" ]; then
echo "No reporting key found. Skipping log upload."
return 0
fi

if [ ! -f "$LOG_FILE" ]; then
echo "No log file found. Skipping log upload."
return 0
fi

if [ ! -s "$LOG_FILE" ]; then
echo "Log file is empty. Skipping log upload."
return 0
fi

HOSTNAME=$(hostname)
DERIVED_REF="${HOSTNAME##*-}"

printf -v BODY '{ "ref": "%s", "step": "%s", "content": %s }' "$DERIVED_REF" "completion" "$(cat "$LOG_FILE" | jq -Rs '.')"
curl -sf -X POST "https://$REPORTING_PROJECT_REF.supabase.co/rest/v1/error_logs" \
-H "apikey: ${REPORTING_ANON_KEY}" \
-H 'Content-type: application/json' \
-d "$BODY"
}

function retry {
local retries=$1
shift

local count=0
until "$@"; do
exit=$?
wait=$((2 ** (count + 1)))
count=$((count + 1))
if [ $count -lt "$retries" ]; then
echo "Command $* exited with code $exit, retrying..."
sleep $wait
else
echo "Command $* exited with code $exit, no more retries left."
return $exit
fi
done
return 0
}
50 changes: 31 additions & 19 deletions ansible/files/admin_api_scripts/pg_upgrade_complete.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,19 @@

set -eEuo pipefail

run_sql() {
psql -h localhost -U supabase_admin -d postgres "$@"
}
# shellcheck disable=SC1091
source ./pg_upgrade_common.sh

LOG_FILE="/tmp/pg-upgrade-complete.log"

cleanup() {
function cleanup {
UPGRADE_STATUS=${1:-"failed"}
EXIT_CODE=${?:-0}

echo "$UPGRADE_STATUS" > /tmp/pg-upgrade-status

ship_logs "$LOG_FILE" || true

exit "$EXIT_CODE"
}

Expand All @@ -29,41 +32,50 @@ function complete_pg_upgrade {
echo "running" > /tmp/pg-upgrade-status

echo "1. Mounting data disk"
mount -a -v
retry 3 mount -a -v

# copying custom configurations
echo "2. Copying custom configurations"
cp -R /data/conf/* /etc/postgresql-custom/
chown -R postgres:postgres /var/lib/postgresql/data
chown -R postgres:postgres /data/pgdata
retry 3 copy_configs

echo "3. Starting postgresql"
service postgresql start
retry 3 service postgresql start

echo "4. Running generated SQL files"
retry 3 run_generated_sql

sleep 5

echo "5. Restarting postgresql"
retry 3 service postgresql restart

echo "6. Starting vacuum analyze"
retry 3 start_vacuum_analyze
}

function copy_configs {
cp -R /data/conf/* /etc/postgresql-custom/
chown -R postgres:postgres /var/lib/postgresql/data
chown -R postgres:postgres /data/pgdata
}

function run_generated_sql {
if [ -d /data/sql ]; then
for FILE in /data/sql/*.sql; do
if [ -f "$FILE" ]; then
run_sql -f "$FILE"
fi
done
fi

sleep 5

echo "5. Restarting postgresql"
service postgresql restart

echo "6. Starting vacuum analyze"
start_vacuum_analyze
}

function start_vacuum_analyze {
echo "complete" > /tmp/pg-upgrade-status
su -c 'vacuumdb --all --analyze-in-stages' -s "$SHELL" postgres
echo "Upgrade job completed"
cleanup "complete"
}

trap cleanup ERR

complete_pg_upgrade >>/var/log/pg-upgrade-complete.log 2>&1 &

complete_pg_upgrade >> $LOG_FILE 2>&1 &
15 changes: 10 additions & 5 deletions ansible/files/admin_api_scripts/pg_upgrade_initiate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ PG13_EXTENSIONS_TO_DISABLE=(

set -eEuo pipefail

# shellcheck disable=SC1091
source ./pg_upgrade_common.sh

LOG_FILE="/var/log/pg-upgrade-initiate.log"

PGVERSION=$1
IS_DRY_RUN=${2:-false}
if [ "$IS_DRY_RUN" != false ]; then
Expand All @@ -35,12 +40,9 @@ fi

MOUNT_POINT="/data_migration"

run_sql() {
psql -h localhost -U supabase_admin -d postgres "$@"
}

POST_UPGRADE_EXTENSION_SCRIPT="/tmp/pg_upgrade/pg_upgrade_extensions.sql"
OLD_PGVERSION=$(run_sql -A -t -c "SHOW server_version;")

# If upgrading from older major PG versions, disable specific extensions
if [[ "$OLD_PGVERSION" =~ 14* ]]; then
EXTENSIONS_TO_DISABLE+=("${PG14_EXTENSIONS_TO_DISABLE[@]}")
Expand All @@ -61,6 +63,9 @@ cleanup() {
if [ -d "${MOUNT_POINT}/pgdata/pg_upgrade_output.d/" ]; then
echo "Copying pg_upgrade output to /var/log"
cp -R "${MOUNT_POINT}/pgdata/pg_upgrade_output.d/" /var/log/ || true
ship_logs "$LOG_FILE" || true
tail -n +1 /var/log/pg_upgrade_output.d/*/* > /var/log/pg_upgrade_output.d/pg_upgrade.log || true
ship_logs "/var/log/pg_upgrade_output.d/pg_upgrade.log" || true
fi

if [ -L /var/lib/postgresql ]; then
Expand Down Expand Up @@ -279,6 +284,6 @@ echo "running" > /tmp/pg-upgrade-status
if [ "$IS_DRY_RUN" = true ]; then
initiate_upgrade
else
initiate_upgrade >> /var/log/pg-upgrade-initiate.log 2>&1 &
initiate_upgrade >> "$LOG_FILE" 2>&1 &
echo "Upgrade initiate job completed"
fi
2 changes: 1 addition & 1 deletion common.vars.pkr.hcl
Original file line number Diff line number Diff line change
@@ -1 +1 @@
postgres-version = "15.1.0.79"
postgres-version = "15.1.0.80"