From 367d366dec985069bcba2161a611f8767bc0e6e5 Mon Sep 17 00:00:00 2001 From: BigDaddyAman <139612136+BigDaddyAman@users.noreply.github.com> Date: Tue, 23 Dec 2025 11:08:14 +0800 Subject: [PATCH] Add parallel pg_dump support and document PG_DUMP_JOBS --- README.md | 140 ++++++++++++++++++++++++++++++++++++------------------ main.py | 61 +++++++++++++++++------- 2 files changed, 137 insertions(+), 64 deletions(-) diff --git a/README.md b/README.md index 04bcb01..10a3f21 100644 --- a/README.md +++ b/README.md @@ -1,74 +1,120 @@ # Postgres-to-R2 Backup A lightweight automation service that creates scheduled PostgreSQL backups and securely uploads them to **Cloudflare R2 object storage**. -Designed for **Railway deployments**, with built-in support for Docker and cron scheduling. +Designed specifically as a **Railway deployment template**, with built-in support for Docker and cron scheduling. --- ## โœจ Features -- ๐Ÿ“ฆ **Automated Backups** โ€” scheduled daily or hourly backups of your PostgreSQL database -- ๐Ÿ” **Optional Encryption** โ€” compress with gzip or encrypt with 7z and password-protection -- โ˜๏ธ **Cloudflare R2 Integration** โ€” seamless upload to your R2 bucket -- ๐Ÿงน **Retention Policy** โ€” keep a fixed number of backups, auto-clean old ones -- ๐Ÿ”— **Flexible Database URL** โ€” supports both private and public PostgreSQL URLs -- ๐Ÿณ **Docker Ready** โ€” lightweight container for portable deployment +- ๐Ÿ“ฆ **Automated Backups** โ€” scheduled daily or hourly PostgreSQL backups +- ๐Ÿ” **Optional Encryption** โ€” gzip compression or 7z encryption with password +- โ˜๏ธ **Cloudflare R2 Integration** โ€” seamless S3-compatible uploads +- ๐Ÿงน **Retention Policy** โ€” automatically delete old backups +- ๐Ÿ”— **Flexible Database URLs** โ€” supports private and public PostgreSQL URLs +- โšก **Optimized Performance** โ€” parallel pg_dump and multipart R2 uploads +- ๐Ÿณ **Docker Ready** โ€” portable, lightweight container +- ๐Ÿš€ **Railway Template First** โ€” no fork required for normal usage --- -## ๐Ÿš€ Deployment on Railway +## ๐Ÿš€ Deployment on Railway (Recommended) -1. **Fork this repository** -2. **Create a new project** on [Railway](https://railway.app/) -3. **Add environment variables** in Railway dashboard: - -```env -DATABASE_URL= # Your PostgreSQL database URL (private) -DATABASE_PUBLIC_URL= # Public database URL (optional) -USE_PUBLIC_URL=false # Set to true to use DATABASE_PUBLIC_URL -DUMP_FORMAT=dump # Options: sql, plain, dump, custom, tar -FILENAME_PREFIX=backup # Prefix for backup files -MAX_BACKUPS=7 # Number of backups to keep -R2_ACCESS_KEY= # Cloudflare R2 access key -R2_SECRET_KEY= # Cloudflare R2 secret key -R2_BUCKET_NAME= # R2 bucket name -R2_ENDPOINT= # R2 endpoint URL -BACKUP_PASSWORD= # Optional: password for 7z encryption -BACKUP_TIME=00:00 # Daily backup time in UTC (HH:MM format) -``` - -### Quick Deploy -Click the button below to deploy directly to Railway: +1. Click the **Deploy on Railway** button below +2. Railway will create a new project using the latest version of this repository +3. Add the required environment variables in the Railway dashboard +4. (Optional) Configure a cron job for your desired backup schedule [![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/e-ywUS?referralCode=nIQTyp&utm_medium=integration&utm_source=template&utm_campaign=generic) --- +## ๐Ÿ”ง Environment Variables + +```env +DATABASE_URL= # PostgreSQL database URL (private) +DATABASE_PUBLIC_URL= # Public PostgreSQL URL (optional) +USE_PUBLIC_URL=false # Set true to use DATABASE_PUBLIC_URL + +DUMP_FORMAT=dump # sql | plain | dump | custom | tar +FILENAME_PREFIX=backup # Backup filename prefix +MAX_BACKUPS=7 # Number of backups to retain +PG_DUMP_JOBS=1 # Optional: parallel pg_dump jobs (use 2โ€“4 for 1โ€“2GB DBs) + +R2_ACCESS_KEY= # Cloudflare R2 access key +R2_SECRET_KEY= # Cloudflare R2 secret key +R2_BUCKET_NAME= # R2 bucket name +R2_ENDPOINT= # R2 endpoint URL + +BACKUP_PASSWORD= # Optional: enables 7z encryption +BACKUP_TIME=00:00 # Daily backup time (UTC, HH:MM) +``` + +--- + +## โšก Performance Optimization (Optional) + +For larger databases (โ‰ˆ1โ€“2 GB), you can significantly speed up backups by enabling +parallel PostgreSQL dumps. + +### Parallel pg_dump + +Set the number of parallel jobs: + +```env +PG_DUMP_JOBS=4 +``` + +**Notes** +- Only applies to `dump`, `custom`, or `tar` formats +- Default is `1` (safe for all users) +- Recommended values: `2โ€“4` +- Higher values may overload small databases + +This feature is **fully optional** and disabled by default. + +--- + ## โฐ Railway Cron Jobs -You can configure the backup schedule using Railway's built-in cron jobs in the dashboard: +You can configure the backup schedule using **Railway Cron Jobs**: -1. Go to your project settings -2. Navigate to **Deployments** > **Cron** -3. Add a new cron job pointing to your service +1. Open your Railway project +2. Go to **Deployments โ†’ Cron** +3. Add a cron job targeting this service -Common cron expressions: +### Common Cron Expressions | Schedule | Cron Expression | Description | -|----------|----------------|-------------| -| Hourly | `0 * * * *` | Run once every hour | -| Daily (midnight) | `0 0 * * *` | Run once per day at midnight | -| Twice Daily | `0 */12 * * *` | Run every 12 hours | -| Weekly | `0 0 * * 0` | Run once per week (Sunday) | -| Monthly | `0 0 1 * *` | Run once per month | +|--------|----------------|------------| +| Hourly | `0 * * * *` | Every hour | +| Daily | `0 0 * * *` | Once per day (UTC midnight) | +| Twice Daily | `0 */12 * * *` | Every 12 hours | +| Weekly | `0 0 * * 0` | Every Sunday | +| Monthly | `0 0 1 * *` | First day of the month | -Pro Tips: -- Use [crontab.guru](https://crontab.guru) to verify your cron expressions -- All times are in UTC -- Configure backup retention (`MAX_BACKUPS`) according to your schedule -```` +**Tips** +- All cron times are **UTC** +- Use https://crontab.guru to validate expressions +- Adjust `MAX_BACKUPS` to match your schedule -๐Ÿ“œ License +--- + +## ๐Ÿ›  Development & Contributions + +Fork this repository **only if you plan to**: + +- Modify the backup logic +- Add features or integrations +- Submit pull requests +- Run locally for development + +For normal usage, deploying via the **Railway template** is recommended. + +--- + +## ๐Ÿ“œ License + +This project is open source under the **MIT License**. -This project is open source under the MIT License. You are free to use, modify, and distribute it with attribution. diff --git a/main.py b/main.py index 4d44a1f..ae136be 100644 --- a/main.py +++ b/main.py @@ -27,6 +27,7 @@ DUMP_FORMAT = os.environ.get("DUMP_FORMAT", "dump") BACKUP_PASSWORD = os.environ.get("BACKUP_PASSWORD") USE_PUBLIC_URL = os.environ.get("USE_PUBLIC_URL", "false").lower() == "true" BACKUP_TIME = os.environ.get("BACKUP_TIME", "00:00") +PG_DUMP_JOBS = int(os.environ.get("PG_DUMP_JOBS", "1")) def log(msg): print(msg, flush=True) @@ -75,24 +76,38 @@ def run_backup(): timestamp = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S') backup_file = f"{FILENAME_PREFIX}_{timestamp}.{ext}" - if BACKUP_PASSWORD: - compressed_file = f"{backup_file}.7z" - else: - compressed_file = f"{backup_file}.gz" + compressed_file = ( + f"{backup_file}.7z" if BACKUP_PASSWORD else f"{backup_file}.gz" + ) compressed_file_r2 = f"{BACKUP_PREFIX}{compressed_file}" - ##Create backup + # -------------------------- + # Create backup + # -------------------------- try: log(f"[INFO] Creating backup {backup_file}") - subprocess.run( - ["pg_dump", f"--dbname={database_url}", "-F", pg_format, "-f", backup_file], - check=True - ) + + dump_cmd = [ + "pg_dump", + f"--dbname={database_url}", + "-F", pg_format, + "--no-owner", + "--no-acl", + "-f", backup_file + ] + + if pg_format in ("c", "t") and PG_DUMP_JOBS > 1: + dump_cmd.insert(-2, f"--jobs={PG_DUMP_JOBS}") + log(f"[INFO] Using parallel pg_dump with {PG_DUMP_JOBS} jobs") + + subprocess.run(dump_cmd, check=True) if BACKUP_PASSWORD: log("[INFO] Encrypting backup with 7z...") - with py7zr.SevenZipFile(compressed_file, 'w', password=BACKUP_PASSWORD) as archive: + with py7zr.SevenZipFile( + compressed_file, "w", password=BACKUP_PASSWORD + ) as archive: archive.write(backup_file) log("[SUCCESS] Backup encrypted successfully") else: @@ -113,11 +128,11 @@ def run_backup(): ## Upload to R2 if os.path.exists(compressed_file): size = os.path.getsize(compressed_file) - log(f"[INFO] Final backup size: {size/1024/1024:.2f} MB") - + log(f"[INFO] Final backup size: {size / 1024 / 1024:.2f} MB") + try: client = boto3.client( - 's3', + "s3", endpoint_url=R2_ENDPOINT, aws_access_key_id=R2_ACCESS_KEY, aws_secret_access_key=R2_SECRET_KEY @@ -139,11 +154,23 @@ def run_backup(): log(f"[SUCCESS] Backup uploaded: {compressed_file_r2}") - objects = client.list_objects_v2(Bucket=R2_BUCKET_NAME, Prefix=BACKUP_PREFIX) - if 'Contents' in objects: - backups = sorted(objects['Contents'], key=lambda x: x['LastModified'], reverse=True) + objects = client.list_objects_v2( + Bucket=R2_BUCKET_NAME, + Prefix=BACKUP_PREFIX + ) + + if "Contents" in objects: + backups = sorted( + objects["Contents"], + key=lambda x: x["LastModified"], + reverse=True + ) + for obj in backups[MAX_BACKUPS:]: - client.delete_object(Bucket=R2_BUCKET_NAME, Key=obj['Key']) + client.delete_object( + Bucket=R2_BUCKET_NAME, + Key=obj["Key"] + ) log(f"[INFO] Deleted old backup: {obj['Key']}") except Exception as e: