From 6d45e93666588cf3c402abef440829e0d5ee8706 Mon Sep 17 00:00:00 2001 From: BigDaddyAman <139612136+BigDaddyAman@users.noreply.github.com> Date: Sat, 13 Sep 2025 21:38:54 +0800 Subject: [PATCH] Initial release: PostgreSQL backup bot with R2 support --- .gitignore | 50 +++++++++++++++ Dockerfile | 14 +++++ MIT License.md | 21 +++++++ README.md | 69 +++++++++++++++++++++ main.py | 154 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 5 ++ 6 files changed, 313 insertions(+) create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 MIT License.md create mode 100644 README.md create mode 100644 main.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..efb6e2a --- /dev/null +++ b/.gitignore @@ -0,0 +1,50 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual Environment +venv/ +env/ +ENV/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Environment variables +.env +.env.local + +# Backup files +*.gz +*.7z +*.dump +*.sql +*.tar + +# Logs +*.log + +# OS +.DS_Store +Thumbs.db \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e7a4e9d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.12-slim + +RUN apt-get update && \ + apt-get install -y postgresql-client gcc libpq-dev gzip && \ + rm -rf /var/lib/apt/lists/* + +COPY requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir -r /app/requirements.txt + +COPY main.py /app/main.py + +WORKDIR /app + +CMD ["python", "main.py"] \ No newline at end of file diff --git a/MIT License.md b/MIT License.md new file mode 100644 index 0000000..629dc36 --- /dev/null +++ b/MIT License.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Aman + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..87aacae --- /dev/null +++ b/README.md @@ -0,0 +1,69 @@ +# PostgreSQL Backup Bot for Railway + +A lightweight automation bot that creates scheduled PostgreSQL backups and securely uploads them to **Cloudflare R2 object storage**. +Designed for **Railway deployments**, with built-in support for Docker and cron scheduling. + +--- + +## โœจ Features + +- ๐Ÿ“ฆ **Automated Backups** โ€” scheduled daily or hourly backups of your PostgreSQL database +- ๐Ÿ” **Optional Encryption** โ€” compress with gzip or encrypt with 7z and password-protection +- โ˜๏ธ **Cloudflare R2 Integration** โ€” seamless upload to your R2 bucket +- ๐Ÿงน **Retention Policy** โ€” keep a fixed number of backups, auto-clean old ones +- ๐Ÿ”— **Flexible Database URL** โ€” supports both private and public PostgreSQL URLs +- ๐Ÿณ **Docker Ready** โ€” lightweight container for portable deployment + +--- + +## ๐Ÿš€ Deployment on Railway + +1. **Fork this repository** +2. **Create a new project** on [Railway](https://railway.app/) +3. **Add environment variables** in Railway dashboard: + +```env +DATABASE_URL= # Your PostgreSQL database URL (private) +DATABASE_PUBLIC_URL= # Public database URL (optional) +USE_PUBLIC_URL=false # Set to true to use DATABASE_PUBLIC_URL +DUMP_FORMAT=dump # Options: sql, plain, dump, custom, tar +FILENAME_PREFIX=backup # Prefix for backup files +MAX_BACKUPS=7 # Number of backups to keep +R2_ACCESS_KEY= # Cloudflare R2 access key +R2_SECRET_KEY= # Cloudflare R2 secret key +R2_BUCKET_NAME= # R2 bucket name +R2_ENDPOINT= # R2 endpoint URL +BACKUP_PASSWORD= # Optional: password for 7z encryption +BACKUP_TIME=00:00 # Daily backup time in UTC (HH:MM format) +``` + +--- + +## โฐ Railway Cron Jobs + +You can configure the backup schedule using Railway's built-in cron jobs in the dashboard: + +1. Go to your project settings +2. Navigate to **Deployments** > **Cron** +3. Add a new cron job pointing to your service + +Common cron expressions: + +| Schedule | Cron Expression | Description | +|----------|----------------|-------------| +| Hourly | `0 * * * *` | Run once every hour | +| Daily (midnight) | `0 0 * * *` | Run once per day at midnight | +| Twice Daily | `0 */12 * * *` | Run every 12 hours | +| Weekly | `0 0 * * 0` | Run once per week (Sunday) | +| Monthly | `0 0 1 * *` | Run once per month | + +Pro Tips: +- Use [crontab.guru](https://crontab.guru) to verify your cron expressions +- All times are in UTC +- Configure backup retention (`MAX_BACKUPS`) according to your schedule +```` + +๐Ÿ“œ License + +This project is open source under the MIT License. +You are free to use, modify, and distribute it with attribution. \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..8cb0349 --- /dev/null +++ b/main.py @@ -0,0 +1,154 @@ +import os +import subprocess +import boto3 +from datetime import datetime, timezone +from urllib.parse import urlparse +from dotenv import load_dotenv +import time +import schedule +import py7zr +import shutil + +load_dotenv() + +# -------------------------- +# Environment variables +# -------------------------- +DATABASE_URL = os.environ.get("DATABASE_URL") +DATABASE_PUBLIC_URL = os.environ.get("DATABASE_PUBLIC_URL") +R2_ACCESS_KEY = os.environ.get("R2_ACCESS_KEY") +R2_SECRET_KEY = os.environ.get("R2_SECRET_KEY") +R2_BUCKET_NAME = os.environ.get("R2_BUCKET_NAME") +R2_ENDPOINT = os.environ.get("R2_ENDPOINT") +MAX_BACKUPS = int(os.environ.get("MAX_BACKUPS", 7)) +BACKUP_PREFIX = os.environ.get("BACKUP_PREFIX", "") +FILENAME_PREFIX = os.environ.get("FILENAME_PREFIX", "backup") +DUMP_FORMAT = os.environ.get("DUMP_FORMAT", "dump") +BACKUP_PASSWORD = os.environ.get("BACKUP_PASSWORD") +USE_PUBLIC_URL = os.environ.get("USE_PUBLIC_URL", "false").lower() == "true" +BACKUP_TIME = os.environ.get("BACKUP_TIME", "00:00") + +def log(msg): + print(msg, flush=True) + +try: + hour, minute = BACKUP_TIME.split(":") + if not (0 <= int(hour) <= 23 and 0 <= int(minute) <= 59): + log("[WARNING] Invalid BACKUP_TIME format. Using default: 00:00") + BACKUP_TIME = "00:00" +except ValueError: + log("[WARNING] Invalid BACKUP_TIME format. Using default: 00:00") + BACKUP_TIME = "00:00" + +def get_database_url(): + """Get the appropriate database URL based on configuration""" + if USE_PUBLIC_URL: + if not DATABASE_PUBLIC_URL: + raise ValueError("[ERROR] DATABASE_PUBLIC_URL not set but USE_PUBLIC_URL=true!") + return DATABASE_PUBLIC_URL + + if not DATABASE_URL: + raise ValueError("[ERROR] DATABASE_URL not set!") + return DATABASE_URL + +def run_backup(): + """Main backup function that handles the entire backup process""" + if shutil.which("pg_dump") is None: + log("[ERROR] pg_dump not found. Install postgresql-client.") + return + + database_url = get_database_url() + url = urlparse(database_url) + db_name = url.path[1:] + + log(f"[INFO] Using {'public' if USE_PUBLIC_URL else 'private'} database URL") + + format_map = { + "sql": ("p", "sql"), + "plain": ("p", "sql"), + "dump": ("c", "dump"), + "custom": ("c", "dump"), + "tar": ("t", "tar") + } + pg_format, ext = format_map.get(DUMP_FORMAT.lower(), ("c", "dump")) + + timestamp = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S') + backup_file = f"{FILENAME_PREFIX}_{timestamp}.{ext}" + + if BACKUP_PASSWORD: + compressed_file = f"{backup_file}.7z" + else: + compressed_file = f"{backup_file}.gz" + + compressed_file_r2 = f"{BACKUP_PREFIX}{compressed_file}" + + # -------------------------- + # Create backup + # -------------------------- + try: + log(f"[INFO] Creating backup {backup_file}") + subprocess.run( + ["pg_dump", f"--dbname={database_url}", "-F", pg_format, "-f", backup_file], + check=True + ) + + if BACKUP_PASSWORD: + log("[INFO] Encrypting backup with 7z...") + with py7zr.SevenZipFile(compressed_file, 'w', password=BACKUP_PASSWORD) as archive: + archive.write(backup_file) + log("[SUCCESS] Backup encrypted successfully") + else: + log("[INFO] Compressing backup with gzip...") + subprocess.run(["gzip", "-f", backup_file], check=True) + log("[SUCCESS] Backup compressed successfully") + + except subprocess.CalledProcessError as e: + log(f"[ERROR] Backup creation failed: {e}") + return + except Exception as e: + log(f"[ERROR] Compression/encryption failed: {e}") + return + finally: + if os.path.exists(backup_file): + os.remove(backup_file) + + # -------------------------- + # Upload to R2 + # -------------------------- + try: + client = boto3.client( + 's3', + endpoint_url=R2_ENDPOINT, + aws_access_key_id=R2_ACCESS_KEY, + aws_secret_access_key=R2_SECRET_KEY + ) + + with open(compressed_file, "rb") as f: + client.upload_fileobj(f, R2_BUCKET_NAME, compressed_file_r2) + log(f"[SUCCESS] Backup uploaded: {compressed_file_r2}") + + objects = client.list_objects_v2(Bucket=R2_BUCKET_NAME, Prefix=BACKUP_PREFIX) + if 'Contents' in objects: + backups = sorted(objects['Contents'], key=lambda x: x['LastModified'], reverse=True) + for obj in backups[MAX_BACKUPS:]: + client.delete_object(Bucket=R2_BUCKET_NAME, Key=obj['Key']) + log(f"[INFO] Deleted old backup: {obj['Key']}") + + except Exception as e: + log(f"[ERROR] R2 operation failed: {e}") + return + finally: + if os.path.exists(compressed_file): + os.remove(compressed_file) + +if __name__ == "__main__": + log("[INFO] Starting backup scheduler...") + log(f"[INFO] Scheduled backup time: {BACKUP_TIME} UTC") + + schedule.every().day.at(BACKUP_TIME).do(run_backup) + + run_backup() + + while True: + schedule.run_pending() + time.sleep(60) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a007127 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +boto3==1.40.30 +psycopg2-binary==2.9.10 +python-dotenv==1.1.1 +py7zr==1.0.0 +schedule==1.2.2 \ No newline at end of file