From 10213621f227367987a22aaf4911e10cef0b16db Mon Sep 17 00:00:00 2001 From: Kaki Filem Team Date: Sat, 31 Jan 2026 20:35:10 +0800 Subject: [PATCH] Initial commit --- .env.example | 19 +++++ .gitignore | 50 +++++++++++ CONTRIBUTING | 28 +++++++ Dockerfile | 26 ++++++ MIT License.md | 21 +++++ README.md | 214 +++++++++++++++++++++++++++++++++++++++++++++++ main.py | 180 +++++++++++++++++++++++++++++++++++++++ requirements.txt | 5 ++ 8 files changed, 543 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 CONTRIBUTING create mode 100644 Dockerfile create mode 100644 MIT License.md create mode 100644 README.md create mode 100644 main.py create mode 100644 requirements.txt diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..1557907 --- /dev/null +++ b/.env.example @@ -0,0 +1,19 @@ +# Database +DATABASE_URL= +DATABASE_PUBLIC_URL= +USE_PUBLIC_URL=false + +# S3 / R2 (S3-compatible storage) +R2_ENDPOINT= +R2_BUCKET_NAME= +R2_ACCESS_KEY= +R2_SECRET_KEY= +S3_REGION=us-east-1 + +# Backup settings +MAX_BACKUPS=7 +BACKUP_PREFIX= +FILENAME_PREFIX=backup +DUMP_FORMAT=dump +BACKUP_PASSWORD= +BACKUP_TIME=00:00 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..efb6e2a --- /dev/null +++ b/.gitignore @@ -0,0 +1,50 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual Environment +venv/ +env/ +ENV/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Environment variables +.env +.env.local + +# Backup files +*.gz +*.7z +*.dump +*.sql +*.tar + +# Logs +*.log + +# OS +.DS_Store +Thumbs.db \ No newline at end of file diff --git a/CONTRIBUTING b/CONTRIBUTING new file mode 100644 index 0000000..25d249a --- /dev/null +++ b/CONTRIBUTING @@ -0,0 +1,28 @@ +# Contributing to Postgres-to-R2 Backup + +Thanks for your interest in contributing ๐ŸŽ‰ +All contributions are welcome โ€” bug reports, documentation improvements, and code changes. + +--- + +## Getting Started + +1. Fork the repository +2. Clone your fork locally +3. Create a feature branch from `main` +4. Submit a pull request against `main` + +--- + +## Development + +### Requirements +- Python 3.9+ +- PostgreSQL client tools (`pg_dump`) +- pip +- (Optional) Docker + +### Local Setup +```bash +pip install -r requirements.txt +python main.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6c4ba7d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ +FROM python:3.12-slim AS builder + +RUN apt-get update && \ + apt-get install -y gcc libpq-dev && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip setuptools wheel + +COPY requirements.txt /app/requirements.txt +RUN pip install --prefix=/install -r /app/requirements.txt + +FROM python:3.12-slim + +ENV PYTHONUNBUFFERED=1 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends postgresql-client gzip && \ + rm -rf /var/lib/apt/lists/* + +COPY --from=builder /install /usr/local + +COPY main.py /app/main.py + +WORKDIR /app + +CMD ["python", "main.py"] diff --git a/MIT License.md b/MIT License.md new file mode 100644 index 0000000..629dc36 --- /dev/null +++ b/MIT License.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Aman + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..c570c96 --- /dev/null +++ b/README.md @@ -0,0 +1,214 @@ +![License](https://img.shields.io/badge/license-MIT-blue.svg) +![Python](https://img.shields.io/badge/python-3.12-blue) +![Storage](https://img.shields.io/badge/storage-S3--compatible-orange) +![Database](https://img.shields.io/badge/database-PostgreSQL-336791) +![Deploy](https://img.shields.io/badge/deploy-Railway-purple) +![Docker](https://img.shields.io/badge/docker-supported-blue) + +# Postgres-to-R2 Backup (S3-Compatible) + +A lightweight automation service that creates scheduled PostgreSQL backups and securely uploads them to **S3-compatible object storage** +such as **Cloudflare R2, AWS S3, Wasabi, Backblaze B2, or MinIO**. +Designed specifically as a **Railway deployment template**, with built-in support for Docker and cron scheduling. + +--- + +## โœจ Features + +- ๐Ÿ“ฆ **Automated Backups** โ€” scheduled daily or hourly PostgreSQL backups +- ๐Ÿ” **Optional Encryption** โ€” gzip compression or 7z encryption with password +- โ˜๏ธ **Cloudflare R2 Integration** โ€” seamless S3-compatible storage support +- ๐Ÿงน **Retention Policy** โ€” automatically delete old backups +- ๐Ÿ”— **Flexible Database URLs** โ€” supports private and public PostgreSQL URLs +- โšก **Optimized Performance** โ€” parallel pg_dump and multipart S3 uploads +- ๐Ÿณ **Docker Ready** โ€” portable, lightweight container +- ๐Ÿš€ **Railway Template First** โ€” no fork required for normal usage +- ๐Ÿชฃ **S3-Compatible Storage** โ€” works with R2, AWS S3, Wasabi, B2, MinIO + +--- + +## ๐Ÿš€ Deployment on Railway + +1. Click the **Deploy on Railway** button below +2. Railway will create a new project using the latest version of this repository +3. Add the required environment variables in the Railway dashboard +4. (Optional) Configure a cron job for your desired backup schedule + +[![Deploy on Railway](https://railway.com/button.svg)](https://railway.com/deploy/postgres-to-r2-backup?referralCode=nIQTyp&utm_medium=integration&utm_source=template&utm_campaign=generic) + +--- + +## ๐Ÿ”ง Environment Variables (S3-Compatible) + +```env +DATABASE_URL= # PostgreSQL database URL (private) +DATABASE_PUBLIC_URL= # Public PostgreSQL URL (optional) +USE_PUBLIC_URL=false # Set true to use DATABASE_PUBLIC_URL + +DUMP_FORMAT=dump # sql | plain | dump | custom | tar +FILENAME_PREFIX=backup # Backup filename prefix +MAX_BACKUPS=7 # Number of backups to retain + +R2_ENDPOINT= # S3 endpoint URL +R2_BUCKET_NAME= # Bucket name +R2_ACCESS_KEY= # Access key +R2_SECRET_KEY= # Secret key +S3_REGION=us-east-1 # Required for AWS S3 (ignored by R2/MinIO) + +BACKUP_PASSWORD= # Optional: enables 7z encryption +BACKUP_TIME=00:00 # Daily backup time (UTC, HH:MM) +``` + +> Variable names use `R2_*` for historical reasons, but **any S3-compatible provider** can be used by changing the endpoint and credentials. +> For AWS S3 users: ensure `S3_REGION` matches your bucketโ€™s region. + +--- + +## โ˜๏ธ Supported S3-Compatible Providers + +This project uses the **standard AWS S3 API via boto3**, and works with: + +- Cloudflare R2 (recommended) +- AWS S3 +- Wasabi +- Backblaze B2 (S3 API) +- MinIO (self-hosted) + +### Example Endpoints + +| Provider | Endpoint Example | +|--------|------------------| +| Cloudflare R2 | `https://.r2.cloudflarestorage.com` | +| AWS S3 | `https://s3.amazonaws.com` | +| Wasabi | `https://s3.wasabisys.com` | +| Backblaze B2 | `https://s3.us-west-004.backblazeb2.com` | +| MinIO | `http://localhost:9000` | + +--- + +## โฐ Railway Cron Jobs + +You can configure the backup schedule using **Railway Cron Jobs**: + +1. Open your Railway project +2. Go to **Deployments โ†’ Cron** +3. Add a cron job targeting this service + +### Common Cron Expressions + +| Schedule | Cron Expression | Description | +|--------|----------------|------------| +| Hourly | `0 * * * *` | Every hour | +| Daily | `0 0 * * *` | Once per day (UTC midnight) | +| Twice Daily | `0 */12 * * *` | Every 12 hours | +| Weekly | `0 0 * * 0` | Every Sunday | +| Monthly | `0 0 1 * *` | First day of the month | + +**Tips** +- All cron times are **UTC** +- Use https://crontab.guru to validate expressions +- Adjust `MAX_BACKUPS` to match your schedule + +--- + +## ๐Ÿ–ฅ๏ธ Running Locally or on Other Platforms + +It can run on **any platform** that supports: +- Python 3.9+ +- `pg_dump` (PostgreSQL client tools) +- Environment variables +- Long-running background processes or cron + +> Docker images use **Python 3.12** by default. +> Local execution supports **Python 3.9+**. + +### Supported Environments + +- Local machine (Linux / macOS / Windows*) +- VPS (Netcup, Hetzner, DigitalOcean, etc.) +- Docker containers +- Other PaaS providers (Heroku, Fly.io, Render, etc.) + +> *Windows is supported when `pg_dump` is installed and available in PATH.* + +### Local Requirements + +- Python 3.9+ +- PostgreSQL client tools (`pg_dump`) +- pip + +### Run Manually (Local) + +```bash +pip install -r requirements.txt +python main.py +``` + +### Run with Docker (Optional) + +Build and run the image locally: + +```bash +docker build -t postgres-to-r2-backup . +docker run --env-file .env postgres-to-r2-backup +``` + +> Ensure the container is allowed to run continuously when not using an external cron scheduler. + +> All scheduling uses **UTC** by default (e.g. Malaysia UTC+8 โ†’ set `BACKUP_TIME=16:00` for midnight). + +### Run from Prebuilt Docker Image + +If you downloaded a prebuilt Docker image archive (`.tar` or `.tar.gz`), you can run it without building locally: + +```bash +# Extract the archive (if compressed) +tar -xzf postgres-to-r2-backup_v1.0.0.tar.gz + +# Load the image into Docker +docker load -i postgres-to-r2-backup_v1.0.0.tar + +# Run the container +docker run --env-file .env postgres-to-r2-backup:v1.0.0 +``` + +> Prebuilt images are architecture-specific (amd64 / arm64). + +--- + +## ๐Ÿ” Security + +- **Do not expose PostgreSQL directly to the public internet.** + If your database is not on a private network, use a secure tunnel instead. + +- **Recommended: Cloudflare Tunnel** + When using a public database URL, it is strongly recommended to connect via a secure tunnel such as **Cloudflare Tunnel** rather than opening database ports. + +- **Protect credentials** + Store all secrets (database URLs, R2 keys, encryption passwords) using environment variables. + Never commit `.env` files to version control. + +- **Encrypted backups (optional)** + Set `BACKUP_PASSWORD` to enable encrypted backups using 7z before uploading to S3-compatible storage. + +- **Least privilege access** + Use a PostgreSQL user with read-only access where possible, and restrict R2 credentials to the required bucket only. + +--- + +## ๐Ÿ›  Development & Contributions + +Fork this repository **only if you plan to**: + +- Modify the backup logic +- Add features or integrations +- Submit pull requests +- Run locally for development + +--- + +## ๐Ÿ“œ License + +This project is open source under the **MIT License**. + +You are free to use, modify, and distribute it with attribution. diff --git a/main.py b/main.py new file mode 100644 index 0000000..e4df4f0 --- /dev/null +++ b/main.py @@ -0,0 +1,180 @@ +import os +import subprocess +import boto3 +from boto3.session import Config +from datetime import datetime, timezone +from boto3.s3.transfer import TransferConfig +from dotenv import load_dotenv +import time +import schedule +import py7zr +import shutil + +load_dotenv() + +## ENV + +DATABASE_URL = os.environ.get("DATABASE_URL") +DATABASE_PUBLIC_URL = os.environ.get("DATABASE_PUBLIC_URL") +R2_ACCESS_KEY = os.environ.get("R2_ACCESS_KEY") +R2_SECRET_KEY = os.environ.get("R2_SECRET_KEY") +R2_BUCKET_NAME = os.environ.get("R2_BUCKET_NAME") +R2_ENDPOINT = os.environ.get("R2_ENDPOINT") +MAX_BACKUPS = int(os.environ.get("MAX_BACKUPS", 7)) +BACKUP_PREFIX = os.environ.get("BACKUP_PREFIX", "") +FILENAME_PREFIX = os.environ.get("FILENAME_PREFIX", "backup") +DUMP_FORMAT = os.environ.get("DUMP_FORMAT", "dump") +BACKUP_PASSWORD = os.environ.get("BACKUP_PASSWORD") +USE_PUBLIC_URL = os.environ.get("USE_PUBLIC_URL", "false").lower() == "true" +BACKUP_TIME = os.environ.get("BACKUP_TIME", "00:00") +S3_REGION = os.environ.get("S3_REGION", "us-east-1") + +def log(msg): + print(msg, flush=True) + +## Validate BACKUP_TIME +try: + hour, minute = BACKUP_TIME.split(":") + if not (0 <= int(hour) <= 23 and 0 <= int(minute) <= 59): + raise ValueError +except ValueError: + log("[WARNING] Invalid BACKUP_TIME format. Using default: 00:00") + BACKUP_TIME = "00:00" + +def get_database_url(): + if USE_PUBLIC_URL: + if not DATABASE_PUBLIC_URL: + raise ValueError("[ERROR] DATABASE_PUBLIC_URL not set but USE_PUBLIC_URL=true!") + return DATABASE_PUBLIC_URL + + if not DATABASE_URL: + raise ValueError("[ERROR] DATABASE_URL not set!") + return DATABASE_URL + +def run_backup(): + if shutil.which("pg_dump") is None: + log("[ERROR] pg_dump not found. Install postgresql-client.") + return + + database_url = get_database_url() + log(f"[INFO] Using {'public' if USE_PUBLIC_URL else 'private'} database URL") + + format_map = { + "sql": ("p", "sql"), + "plain": ("p", "sql"), + "dump": ("c", "dump"), + "custom": ("c", "dump"), + "tar": ("t", "tar") + } + pg_format, ext = format_map.get(DUMP_FORMAT.lower(), ("c", "dump")) + + timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") + backup_file = f"{FILENAME_PREFIX}_{timestamp}.{ext}" + + compressed_file = ( + f"{backup_file}.7z" if BACKUP_PASSWORD else f"{backup_file}.gz" + ) + + compressed_file_r2 = f"{BACKUP_PREFIX}{compressed_file}" + + ## Create backup + try: + log(f"[INFO] Creating backup {backup_file}") + + dump_cmd = [ + "pg_dump", + f"--dbname={database_url}", + "-F", pg_format, + "--no-owner", + "--no-acl", + "-f", backup_file + ] + + subprocess.run(dump_cmd, check=True) + + if BACKUP_PASSWORD: + log("[INFO] Encrypting backup with 7z...") + with py7zr.SevenZipFile(compressed_file, "w", password=BACKUP_PASSWORD) as archive: + archive.write(backup_file) + log("[SUCCESS] Backup encrypted successfully") + else: + log("[INFO] Compressing backup with gzip...") + subprocess.run(["gzip", "-f", backup_file], check=True) + log("[SUCCESS] Backup compressed successfully") + + except subprocess.CalledProcessError as e: + log(f"[ERROR] Backup creation failed: {e}") + return + finally: + if os.path.exists(backup_file): + os.remove(backup_file) + + ## Upload to R2 + if os.path.exists(compressed_file): + size = os.path.getsize(compressed_file) + log(f"[INFO] Final backup size: {size / 1024 / 1024:.2f} MB") + + try: + client = boto3.client( + "s3", + endpoint_url=R2_ENDPOINT, + aws_access_key_id=R2_ACCESS_KEY, + aws_secret_access_key=R2_SECRET_KEY, + region_name=S3_REGION, + config=Config( + s3={"addressing_style": "path"} + ) + ) + + config = TransferConfig( + multipart_threshold=8 * 1024 * 1024, + multipart_chunksize=8 * 1024 * 1024, + max_concurrency=4, + use_threads=True + ) + + client.upload_file( + compressed_file, + R2_BUCKET_NAME, + compressed_file_r2, + Config=config + ) + + log(f"[SUCCESS] Backup uploaded: {compressed_file_r2}") + + objects = client.list_objects_v2( + Bucket=R2_BUCKET_NAME, + Prefix=BACKUP_PREFIX + ) + + if "Contents" in objects: + backups = sorted( + objects["Contents"], + key=lambda x: x["LastModified"], + reverse=True + ) + + for obj in backups[MAX_BACKUPS:]: + client.delete_object( + Bucket=R2_BUCKET_NAME, + Key=obj["Key"] + ) + log(f"[INFO] Deleted old backup: {obj['Key']}") + + except Exception as e: + log(f"[ERROR] R2 operation failed: {e}") + finally: + if os.path.exists(compressed_file): + os.remove(compressed_file) + +if __name__ == "__main__": + log("[INFO] Starting backup scheduler...") + log(f"[INFO] Scheduled backup time: {BACKUP_TIME} UTC") + + schedule.every().day.at(BACKUP_TIME).do(run_backup) + + run_backup() + + while True: + schedule.run_pending() + time.sleep(60) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..56d132c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +boto3==1.42.26 +psycopg2-binary==2.9.10 +python-dotenv==1.2.1 +py7zr==1.1.0 +schedule==1.2.2