commit 10213621f227367987a22aaf4911e10cef0b16db Author: Kaki Filem Team Date: Sat Jan 31 20:35:10 2026 +0800 Initial commit diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..1557907 --- /dev/null +++ b/.env.example @@ -0,0 +1,19 @@ +# Database +DATABASE_URL= +DATABASE_PUBLIC_URL= +USE_PUBLIC_URL=false + +# S3 / R2 (S3-compatible storage) +R2_ENDPOINT= +R2_BUCKET_NAME= +R2_ACCESS_KEY= +R2_SECRET_KEY= +S3_REGION=us-east-1 + +# Backup settings +MAX_BACKUPS=7 +BACKUP_PREFIX= +FILENAME_PREFIX=backup +DUMP_FORMAT=dump +BACKUP_PASSWORD= +BACKUP_TIME=00:00 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..efb6e2a --- /dev/null +++ b/.gitignore @@ -0,0 +1,50 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual Environment +venv/ +env/ +ENV/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Environment variables +.env +.env.local + +# Backup files +*.gz +*.7z +*.dump +*.sql +*.tar + +# Logs +*.log + +# OS +.DS_Store +Thumbs.db \ No newline at end of file diff --git a/CONTRIBUTING b/CONTRIBUTING new file mode 100644 index 0000000..25d249a --- /dev/null +++ b/CONTRIBUTING @@ -0,0 +1,28 @@ +# Contributing to Postgres-to-R2 Backup + +Thanks for your interest in contributing ๐ŸŽ‰ +All contributions are welcome โ€” bug reports, documentation improvements, and code changes. + +--- + +## Getting Started + +1. Fork the repository +2. Clone your fork locally +3. Create a feature branch from `main` +4. Submit a pull request against `main` + +--- + +## Development + +### Requirements +- Python 3.9+ +- PostgreSQL client tools (`pg_dump`) +- pip +- (Optional) Docker + +### Local Setup +```bash +pip install -r requirements.txt +python main.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6c4ba7d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ +FROM python:3.12-slim AS builder + +RUN apt-get update && \ + apt-get install -y gcc libpq-dev && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip setuptools wheel + +COPY requirements.txt /app/requirements.txt +RUN pip install --prefix=/install -r /app/requirements.txt + +FROM python:3.12-slim + +ENV PYTHONUNBUFFERED=1 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends postgresql-client gzip && \ + rm -rf /var/lib/apt/lists/* + +COPY --from=builder /install /usr/local + +COPY main.py /app/main.py + +WORKDIR /app + +CMD ["python", "main.py"] diff --git a/MIT License.md b/MIT License.md new file mode 100644 index 0000000..629dc36 --- /dev/null +++ b/MIT License.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Aman + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..c570c96 --- /dev/null +++ b/README.md @@ -0,0 +1,214 @@ +![License](https://img.shields.io/badge/license-MIT-blue.svg) +![Python](https://img.shields.io/badge/python-3.12-blue) +![Storage](https://img.shields.io/badge/storage-S3--compatible-orange) +![Database](https://img.shields.io/badge/database-PostgreSQL-336791) +![Deploy](https://img.shields.io/badge/deploy-Railway-purple) +![Docker](https://img.shields.io/badge/docker-supported-blue) + +# Postgres-to-R2 Backup (S3-Compatible) + +A lightweight automation service that creates scheduled PostgreSQL backups and securely uploads them to **S3-compatible object storage** +such as **Cloudflare R2, AWS S3, Wasabi, Backblaze B2, or MinIO**. +Designed specifically as a **Railway deployment template**, with built-in support for Docker and cron scheduling. + +--- + +## โœจ Features + +- ๐Ÿ“ฆ **Automated Backups** โ€” scheduled daily or hourly PostgreSQL backups +- ๐Ÿ” **Optional Encryption** โ€” gzip compression or 7z encryption with password +- โ˜๏ธ **Cloudflare R2 Integration** โ€” seamless S3-compatible storage support +- ๐Ÿงน **Retention Policy** โ€” automatically delete old backups +- ๐Ÿ”— **Flexible Database URLs** โ€” supports private and public PostgreSQL URLs +- โšก **Optimized Performance** โ€” parallel pg_dump and multipart S3 uploads +- ๐Ÿณ **Docker Ready** โ€” portable, lightweight container +- ๐Ÿš€ **Railway Template First** โ€” no fork required for normal usage +- ๐Ÿชฃ **S3-Compatible Storage** โ€” works with R2, AWS S3, Wasabi, B2, MinIO + +--- + +## ๐Ÿš€ Deployment on Railway + +1. Click the **Deploy on Railway** button below +2. Railway will create a new project using the latest version of this repository +3. Add the required environment variables in the Railway dashboard +4. (Optional) Configure a cron job for your desired backup schedule + +[![Deploy on Railway](https://railway.com/button.svg)](https://railway.com/deploy/postgres-to-r2-backup?referralCode=nIQTyp&utm_medium=integration&utm_source=template&utm_campaign=generic) + +--- + +## ๐Ÿ”ง Environment Variables (S3-Compatible) + +```env +DATABASE_URL= # PostgreSQL database URL (private) +DATABASE_PUBLIC_URL= # Public PostgreSQL URL (optional) +USE_PUBLIC_URL=false # Set true to use DATABASE_PUBLIC_URL + +DUMP_FORMAT=dump # sql | plain | dump | custom | tar +FILENAME_PREFIX=backup # Backup filename prefix +MAX_BACKUPS=7 # Number of backups to retain + +R2_ENDPOINT= # S3 endpoint URL +R2_BUCKET_NAME= # Bucket name +R2_ACCESS_KEY= # Access key +R2_SECRET_KEY= # Secret key +S3_REGION=us-east-1 # Required for AWS S3 (ignored by R2/MinIO) + +BACKUP_PASSWORD= # Optional: enables 7z encryption +BACKUP_TIME=00:00 # Daily backup time (UTC, HH:MM) +``` + +> Variable names use `R2_*` for historical reasons, but **any S3-compatible provider** can be used by changing the endpoint and credentials. +> For AWS S3 users: ensure `S3_REGION` matches your bucketโ€™s region. + +--- + +## โ˜๏ธ Supported S3-Compatible Providers + +This project uses the **standard AWS S3 API via boto3**, and works with: + +- Cloudflare R2 (recommended) +- AWS S3 +- Wasabi +- Backblaze B2 (S3 API) +- MinIO (self-hosted) + +### Example Endpoints + +| Provider | Endpoint Example | +|--------|------------------| +| Cloudflare R2 | `https://.r2.cloudflarestorage.com` | +| AWS S3 | `https://s3.amazonaws.com` | +| Wasabi | `https://s3.wasabisys.com` | +| Backblaze B2 | `https://s3.us-west-004.backblazeb2.com` | +| MinIO | `http://localhost:9000` | + +--- + +## โฐ Railway Cron Jobs + +You can configure the backup schedule using **Railway Cron Jobs**: + +1. Open your Railway project +2. Go to **Deployments โ†’ Cron** +3. Add a cron job targeting this service + +### Common Cron Expressions + +| Schedule | Cron Expression | Description | +|--------|----------------|------------| +| Hourly | `0 * * * *` | Every hour | +| Daily | `0 0 * * *` | Once per day (UTC midnight) | +| Twice Daily | `0 */12 * * *` | Every 12 hours | +| Weekly | `0 0 * * 0` | Every Sunday | +| Monthly | `0 0 1 * *` | First day of the month | + +**Tips** +- All cron times are **UTC** +- Use https://crontab.guru to validate expressions +- Adjust `MAX_BACKUPS` to match your schedule + +--- + +## ๐Ÿ–ฅ๏ธ Running Locally or on Other Platforms + +It can run on **any platform** that supports: +- Python 3.9+ +- `pg_dump` (PostgreSQL client tools) +- Environment variables +- Long-running background processes or cron + +> Docker images use **Python 3.12** by default. +> Local execution supports **Python 3.9+**. + +### Supported Environments + +- Local machine (Linux / macOS / Windows*) +- VPS (Netcup, Hetzner, DigitalOcean, etc.) +- Docker containers +- Other PaaS providers (Heroku, Fly.io, Render, etc.) + +> *Windows is supported when `pg_dump` is installed and available in PATH.* + +### Local Requirements + +- Python 3.9+ +- PostgreSQL client tools (`pg_dump`) +- pip + +### Run Manually (Local) + +```bash +pip install -r requirements.txt +python main.py +``` + +### Run with Docker (Optional) + +Build and run the image locally: + +```bash +docker build -t postgres-to-r2-backup . +docker run --env-file .env postgres-to-r2-backup +``` + +> Ensure the container is allowed to run continuously when not using an external cron scheduler. + +> All scheduling uses **UTC** by default (e.g. Malaysia UTC+8 โ†’ set `BACKUP_TIME=16:00` for midnight). + +### Run from Prebuilt Docker Image + +If you downloaded a prebuilt Docker image archive (`.tar` or `.tar.gz`), you can run it without building locally: + +```bash +# Extract the archive (if compressed) +tar -xzf postgres-to-r2-backup_v1.0.0.tar.gz + +# Load the image into Docker +docker load -i postgres-to-r2-backup_v1.0.0.tar + +# Run the container +docker run --env-file .env postgres-to-r2-backup:v1.0.0 +``` + +> Prebuilt images are architecture-specific (amd64 / arm64). + +--- + +## ๐Ÿ” Security + +- **Do not expose PostgreSQL directly to the public internet.** + If your database is not on a private network, use a secure tunnel instead. + +- **Recommended: Cloudflare Tunnel** + When using a public database URL, it is strongly recommended to connect via a secure tunnel such as **Cloudflare Tunnel** rather than opening database ports. + +- **Protect credentials** + Store all secrets (database URLs, R2 keys, encryption passwords) using environment variables. + Never commit `.env` files to version control. + +- **Encrypted backups (optional)** + Set `BACKUP_PASSWORD` to enable encrypted backups using 7z before uploading to S3-compatible storage. + +- **Least privilege access** + Use a PostgreSQL user with read-only access where possible, and restrict R2 credentials to the required bucket only. + +--- + +## ๐Ÿ›  Development & Contributions + +Fork this repository **only if you plan to**: + +- Modify the backup logic +- Add features or integrations +- Submit pull requests +- Run locally for development + +--- + +## ๐Ÿ“œ License + +This project is open source under the **MIT License**. + +You are free to use, modify, and distribute it with attribution. diff --git a/main.py b/main.py new file mode 100644 index 0000000..e4df4f0 --- /dev/null +++ b/main.py @@ -0,0 +1,180 @@ +import os +import subprocess +import boto3 +from boto3.session import Config +from datetime import datetime, timezone +from boto3.s3.transfer import TransferConfig +from dotenv import load_dotenv +import time +import schedule +import py7zr +import shutil + +load_dotenv() + +## ENV + +DATABASE_URL = os.environ.get("DATABASE_URL") +DATABASE_PUBLIC_URL = os.environ.get("DATABASE_PUBLIC_URL") +R2_ACCESS_KEY = os.environ.get("R2_ACCESS_KEY") +R2_SECRET_KEY = os.environ.get("R2_SECRET_KEY") +R2_BUCKET_NAME = os.environ.get("R2_BUCKET_NAME") +R2_ENDPOINT = os.environ.get("R2_ENDPOINT") +MAX_BACKUPS = int(os.environ.get("MAX_BACKUPS", 7)) +BACKUP_PREFIX = os.environ.get("BACKUP_PREFIX", "") +FILENAME_PREFIX = os.environ.get("FILENAME_PREFIX", "backup") +DUMP_FORMAT = os.environ.get("DUMP_FORMAT", "dump") +BACKUP_PASSWORD = os.environ.get("BACKUP_PASSWORD") +USE_PUBLIC_URL = os.environ.get("USE_PUBLIC_URL", "false").lower() == "true" +BACKUP_TIME = os.environ.get("BACKUP_TIME", "00:00") +S3_REGION = os.environ.get("S3_REGION", "us-east-1") + +def log(msg): + print(msg, flush=True) + +## Validate BACKUP_TIME +try: + hour, minute = BACKUP_TIME.split(":") + if not (0 <= int(hour) <= 23 and 0 <= int(minute) <= 59): + raise ValueError +except ValueError: + log("[WARNING] Invalid BACKUP_TIME format. Using default: 00:00") + BACKUP_TIME = "00:00" + +def get_database_url(): + if USE_PUBLIC_URL: + if not DATABASE_PUBLIC_URL: + raise ValueError("[ERROR] DATABASE_PUBLIC_URL not set but USE_PUBLIC_URL=true!") + return DATABASE_PUBLIC_URL + + if not DATABASE_URL: + raise ValueError("[ERROR] DATABASE_URL not set!") + return DATABASE_URL + +def run_backup(): + if shutil.which("pg_dump") is None: + log("[ERROR] pg_dump not found. Install postgresql-client.") + return + + database_url = get_database_url() + log(f"[INFO] Using {'public' if USE_PUBLIC_URL else 'private'} database URL") + + format_map = { + "sql": ("p", "sql"), + "plain": ("p", "sql"), + "dump": ("c", "dump"), + "custom": ("c", "dump"), + "tar": ("t", "tar") + } + pg_format, ext = format_map.get(DUMP_FORMAT.lower(), ("c", "dump")) + + timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") + backup_file = f"{FILENAME_PREFIX}_{timestamp}.{ext}" + + compressed_file = ( + f"{backup_file}.7z" if BACKUP_PASSWORD else f"{backup_file}.gz" + ) + + compressed_file_r2 = f"{BACKUP_PREFIX}{compressed_file}" + + ## Create backup + try: + log(f"[INFO] Creating backup {backup_file}") + + dump_cmd = [ + "pg_dump", + f"--dbname={database_url}", + "-F", pg_format, + "--no-owner", + "--no-acl", + "-f", backup_file + ] + + subprocess.run(dump_cmd, check=True) + + if BACKUP_PASSWORD: + log("[INFO] Encrypting backup with 7z...") + with py7zr.SevenZipFile(compressed_file, "w", password=BACKUP_PASSWORD) as archive: + archive.write(backup_file) + log("[SUCCESS] Backup encrypted successfully") + else: + log("[INFO] Compressing backup with gzip...") + subprocess.run(["gzip", "-f", backup_file], check=True) + log("[SUCCESS] Backup compressed successfully") + + except subprocess.CalledProcessError as e: + log(f"[ERROR] Backup creation failed: {e}") + return + finally: + if os.path.exists(backup_file): + os.remove(backup_file) + + ## Upload to R2 + if os.path.exists(compressed_file): + size = os.path.getsize(compressed_file) + log(f"[INFO] Final backup size: {size / 1024 / 1024:.2f} MB") + + try: + client = boto3.client( + "s3", + endpoint_url=R2_ENDPOINT, + aws_access_key_id=R2_ACCESS_KEY, + aws_secret_access_key=R2_SECRET_KEY, + region_name=S3_REGION, + config=Config( + s3={"addressing_style": "path"} + ) + ) + + config = TransferConfig( + multipart_threshold=8 * 1024 * 1024, + multipart_chunksize=8 * 1024 * 1024, + max_concurrency=4, + use_threads=True + ) + + client.upload_file( + compressed_file, + R2_BUCKET_NAME, + compressed_file_r2, + Config=config + ) + + log(f"[SUCCESS] Backup uploaded: {compressed_file_r2}") + + objects = client.list_objects_v2( + Bucket=R2_BUCKET_NAME, + Prefix=BACKUP_PREFIX + ) + + if "Contents" in objects: + backups = sorted( + objects["Contents"], + key=lambda x: x["LastModified"], + reverse=True + ) + + for obj in backups[MAX_BACKUPS:]: + client.delete_object( + Bucket=R2_BUCKET_NAME, + Key=obj["Key"] + ) + log(f"[INFO] Deleted old backup: {obj['Key']}") + + except Exception as e: + log(f"[ERROR] R2 operation failed: {e}") + finally: + if os.path.exists(compressed_file): + os.remove(compressed_file) + +if __name__ == "__main__": + log("[INFO] Starting backup scheduler...") + log(f"[INFO] Scheduled backup time: {BACKUP_TIME} UTC") + + schedule.every().day.at(BACKUP_TIME).do(run_backup) + + run_backup() + + while True: + schedule.run_pending() + time.sleep(60) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..56d132c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +boto3==1.42.26 +psycopg2-binary==2.9.10 +python-dotenv==1.2.1 +py7zr==1.1.0 +schedule==1.2.2