From 367d366dec985069bcba2161a611f8767bc0e6e5 Mon Sep 17 00:00:00 2001
From: BigDaddyAman <139612136+BigDaddyAman@users.noreply.github.com>
Date: Tue, 23 Dec 2025 11:08:14 +0800
Subject: [PATCH] Add parallel pg_dump support and document PG_DUMP_JOBS

---
 README.md | 140 ++++++++++++++++++++++++++++++++++++------------------
 main.py   |  61 +++++++++++++++++-------
 2 files changed, 137 insertions(+), 64 deletions(-)

diff --git a/README.md b/README.md
index 04bcb01..10a3f21 100644
--- a/README.md
+++ b/README.md
@@ -1,74 +1,120 @@
 # Postgres-to-R2 Backup
 
 A lightweight automation service that creates scheduled PostgreSQL backups and securely uploads them to **Cloudflare R2 object storage**.  
-Designed for **Railway deployments**, with built-in support for Docker and cron scheduling.
+Designed specifically as a **Railway deployment template**, with built-in support for Docker and cron scheduling.
 
 ---
 
 ## ✨ Features
 
-- 📦 **Automated Backups** — scheduled daily or hourly backups of your PostgreSQL database  
-- 🔐 **Optional Encryption** — compress with gzip or encrypt with 7z and password-protection  
-- ☁️ **Cloudflare R2 Integration** — seamless upload to your R2 bucket  
-- 🧹 **Retention Policy** — keep a fixed number of backups, auto-clean old ones  
-- 🔗 **Flexible Database URL** — supports both private and public PostgreSQL URLs  
-- 🐳 **Docker Ready** — lightweight container for portable deployment  
+- 📦 **Automated Backups** — scheduled daily or hourly PostgreSQL backups  
+- 🔐 **Optional Encryption** — gzip compression or 7z encryption with password  
+- ☁️ **Cloudflare R2 Integration** — seamless S3-compatible uploads  
+- 🧹 **Retention Policy** — automatically delete old backups  
+- 🔗 **Flexible Database URLs** — supports private and public PostgreSQL URLs  
+- ⚡ **Optimized Performance** — parallel pg_dump and multipart R2 uploads  
+- 🐳 **Docker Ready** — portable, lightweight container  
+- 🚀 **Railway Template First** — no fork required for normal usage  
 
 ---
 
-## 🚀 Deployment on Railway
+## 🚀 Deployment on Railway (Recommended)
 
-1. **Fork this repository**  
-2. **Create a new project** on [Railway](https://railway.app/)  
-3. **Add environment variables** in Railway dashboard:
-
-```env
-DATABASE_URL=           # Your PostgreSQL database URL (private)
-DATABASE_PUBLIC_URL=    # Public database URL (optional)
-USE_PUBLIC_URL=false    # Set to true to use DATABASE_PUBLIC_URL
-DUMP_FORMAT=dump        # Options: sql, plain, dump, custom, tar
-FILENAME_PREFIX=backup  # Prefix for backup files
-MAX_BACKUPS=7           # Number of backups to keep
-R2_ACCESS_KEY=          # Cloudflare R2 access key
-R2_SECRET_KEY=          # Cloudflare R2 secret key
-R2_BUCKET_NAME=         # R2 bucket name
-R2_ENDPOINT=            # R2 endpoint URL
-BACKUP_PASSWORD=        # Optional: password for 7z encryption
-BACKUP_TIME=00:00       # Daily backup time in UTC (HH:MM format)
-```
-
-### Quick Deploy
-Click the button below to deploy directly to Railway:
+1. Click the **Deploy on Railway** button below  
+2. Railway will create a new project using the latest version of this repository  
+3. Add the required environment variables in the Railway dashboard  
+4. (Optional) Configure a cron job for your desired backup schedule  
 
 [![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/e-ywUS?referralCode=nIQTyp&utm_medium=integration&utm_source=template&utm_campaign=generic)
 
 ---
 
+## 🔧 Environment Variables
+
+```env
+DATABASE_URL=           # PostgreSQL database URL (private)
+DATABASE_PUBLIC_URL=    # Public PostgreSQL URL (optional)
+USE_PUBLIC_URL=false    # Set true to use DATABASE_PUBLIC_URL
+
+DUMP_FORMAT=dump        # sql | plain | dump | custom | tar
+FILENAME_PREFIX=backup  # Backup filename prefix
+MAX_BACKUPS=7           # Number of backups to retain
+PG_DUMP_JOBS=1          # Optional: parallel pg_dump jobs (use 2–4 for 1–2GB DBs)
+
+R2_ACCESS_KEY=          # Cloudflare R2 access key
+R2_SECRET_KEY=          # Cloudflare R2 secret key
+R2_BUCKET_NAME=         # R2 bucket name
+R2_ENDPOINT=            # R2 endpoint URL
+
+BACKUP_PASSWORD=        # Optional: enables 7z encryption
+BACKUP_TIME=00:00       # Daily backup time (UTC, HH:MM)
+```
+
+---
+
+## ⚡ Performance Optimization (Optional)
+
+For larger databases (≈1–2 GB), you can significantly speed up backups by enabling
+parallel PostgreSQL dumps.
+
+### Parallel pg_dump
+
+Set the number of parallel jobs:
+
+```env
+PG_DUMP_JOBS=4
+```
+
+**Notes**
+- Only applies to `dump`, `custom`, or `tar` formats
+- Default is `1` (safe for all users)
+- Recommended values: `2–4`
+- Higher values may overload small databases
+
+This feature is **fully optional** and disabled by default.
+
+---
+
 ## ⏰ Railway Cron Jobs
 
-You can configure the backup schedule using Railway's built-in cron jobs in the dashboard:
+You can configure the backup schedule using **Railway Cron Jobs**:
 
-1. Go to your project settings
-2. Navigate to **Deployments** > **Cron**
-3. Add a new cron job pointing to your service
+1. Open your Railway project  
+2. Go to **Deployments → Cron**  
+3. Add a cron job targeting this service  
 
-Common cron expressions:
+### Common Cron Expressions
 
 | Schedule | Cron Expression | Description |
-|----------|----------------|-------------|
-| Hourly | `0 * * * *` | Run once every hour |
-| Daily (midnight) | `0 0 * * *` | Run once per day at midnight |
-| Twice Daily | `0 */12 * * *` | Run every 12 hours |
-| Weekly | `0 0 * * 0` | Run once per week (Sunday) |
-| Monthly | `0 0 1 * *` | Run once per month |
+|--------|----------------|------------|
+| Hourly | `0 * * * *` | Every hour |
+| Daily | `0 0 * * *` | Once per day (UTC midnight) |
+| Twice Daily | `0 */12 * * *` | Every 12 hours |
+| Weekly | `0 0 * * 0` | Every Sunday |
+| Monthly | `0 0 1 * *` | First day of the month |
 
-Pro Tips:
-- Use [crontab.guru](https://crontab.guru) to verify your cron expressions
-- All times are in UTC
-- Configure backup retention (`MAX_BACKUPS`) according to your schedule
-````
+**Tips**
+- All cron times are **UTC**
+- Use https://crontab.guru to validate expressions
+- Adjust `MAX_BACKUPS` to match your schedule
 
-📜 License
+---
+
+## 🛠 Development & Contributions
+
+Fork this repository **only if you plan to**:
+
+- Modify the backup logic
+- Add features or integrations
+- Submit pull requests
+- Run locally for development
+
+For normal usage, deploying via the **Railway template** is recommended.
+
+---
+
+## 📜 License
+
+This project is open source under the **MIT License**.
 
-This project is open source under the MIT License.
 You are free to use, modify, and distribute it with attribution.
diff --git a/main.py b/main.py
index 4d44a1f..ae136be 100644
--- a/main.py
+++ b/main.py
@@ -27,6 +27,7 @@ DUMP_FORMAT = os.environ.get("DUMP_FORMAT", "dump")
 BACKUP_PASSWORD = os.environ.get("BACKUP_PASSWORD")
 USE_PUBLIC_URL = os.environ.get("USE_PUBLIC_URL", "false").lower() == "true"
 BACKUP_TIME = os.environ.get("BACKUP_TIME", "00:00")
+PG_DUMP_JOBS = int(os.environ.get("PG_DUMP_JOBS", "1"))
 
 def log(msg):
     print(msg, flush=True)
@@ -75,24 +76,38 @@ def run_backup():
     timestamp = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')
     backup_file = f"{FILENAME_PREFIX}_{timestamp}.{ext}"
 
-    if BACKUP_PASSWORD:
-        compressed_file = f"{backup_file}.7z"
-    else:
-        compressed_file = f"{backup_file}.gz"
+    compressed_file = (
+        f"{backup_file}.7z" if BACKUP_PASSWORD else f"{backup_file}.gz"
+    )
 
     compressed_file_r2 = f"{BACKUP_PREFIX}{compressed_file}"
 
-    ##Create backup
+    # --------------------------
+    # Create backup
+    # --------------------------
     try:
         log(f"[INFO] Creating backup {backup_file}")
-        subprocess.run(
-            ["pg_dump", f"--dbname={database_url}", "-F", pg_format, "-f", backup_file],
-            check=True
-        )
+
+        dump_cmd = [
+            "pg_dump",
+            f"--dbname={database_url}",
+            "-F", pg_format,
+            "--no-owner",
+            "--no-acl",
+            "-f", backup_file
+        ]
+
+        if pg_format in ("c", "t") and PG_DUMP_JOBS > 1:
+            dump_cmd.insert(-2, f"--jobs={PG_DUMP_JOBS}")
+            log(f"[INFO] Using parallel pg_dump with {PG_DUMP_JOBS} jobs")
+
+        subprocess.run(dump_cmd, check=True)
 
         if BACKUP_PASSWORD:
             log("[INFO] Encrypting backup with 7z...")
-            with py7zr.SevenZipFile(compressed_file, 'w', password=BACKUP_PASSWORD) as archive:
+            with py7zr.SevenZipFile(
+                compressed_file, "w", password=BACKUP_PASSWORD
+            ) as archive:
                 archive.write(backup_file)
             log("[SUCCESS] Backup encrypted successfully")
         else:
@@ -113,11 +128,11 @@ def run_backup():
     ## Upload to R2
     if os.path.exists(compressed_file):
         size = os.path.getsize(compressed_file)
-        log(f"[INFO] Final backup size: {size/1024/1024:.2f} MB")
-        
+        log(f"[INFO] Final backup size: {size / 1024 / 1024:.2f} MB")
+
     try:
         client = boto3.client(
-            's3',
+            "s3",
             endpoint_url=R2_ENDPOINT,
             aws_access_key_id=R2_ACCESS_KEY,
             aws_secret_access_key=R2_SECRET_KEY
@@ -139,11 +154,23 @@ def run_backup():
 
         log(f"[SUCCESS] Backup uploaded: {compressed_file_r2}")
 
-        objects = client.list_objects_v2(Bucket=R2_BUCKET_NAME, Prefix=BACKUP_PREFIX)
-        if 'Contents' in objects:
-            backups = sorted(objects['Contents'], key=lambda x: x['LastModified'], reverse=True)
+        objects = client.list_objects_v2(
+            Bucket=R2_BUCKET_NAME,
+            Prefix=BACKUP_PREFIX
+        )
+
+        if "Contents" in objects:
+            backups = sorted(
+                objects["Contents"],
+                key=lambda x: x["LastModified"],
+                reverse=True
+            )
+
             for obj in backups[MAX_BACKUPS:]:
-                client.delete_object(Bucket=R2_BUCKET_NAME, Key=obj['Key'])
+                client.delete_object(
+                    Bucket=R2_BUCKET_NAME,
+                    Key=obj["Key"]
+                )
                 log(f"[INFO] Deleted old backup: {obj['Key']}")
 
     except Exception as e: