diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..06d7405 Binary files /dev/null and b/__init__.py differ diff --git a/cli.py b/cli.py new file mode 100644 index 0000000..4b3d5db --- /dev/null +++ b/cli.py @@ -0,0 +1,166 @@ +import argparse +import shutil +import os +import sys +import textwrap +import importlib.metadata + +from main import run_backup + +def get_version(): + try: + return importlib.metadata.version("pg-r2-backup") + except importlib.metadata.PackageNotFoundError: + return "dev" + + +def mask(value, show=4): + if not value: + return "" + if len(value) <= show: + return "*" * len(value) + return value[:show] + "*" * (len(value) - show) + +def doctor(): + print("pg-r2-backup doctor\n") + + if shutil.which("pg_dump") is None: + print("[FAIL] pg_dump not found in PATH") + else: + print("[OK] pg_dump found") + + required_envs = [ + "DATABASE_URL", + "R2_ACCESS_KEY", + "R2_SECRET_KEY", + "R2_BUCKET_NAME", + "R2_ENDPOINT", + ] + + missing = [e for e in required_envs if not os.environ.get(e)] + + if missing: + print("\n[FAIL] Missing environment variables:") + for m in missing: + print(f" - {m}") + else: + print("\n[OK] Required environment variables set") + + use_public = os.environ.get("USE_PUBLIC_URL", "false").lower() == "true" + print(f"\nDatabase URL mode : {'public' if use_public else 'private'}") + + if os.environ.get("BACKUP_PASSWORD"): + print("Compression : 7z (encrypted)") + else: + print("Compression : gzip") + + print("\nDoctor check complete.") + + +def config_show(): + print("pg-r2-backup config\n") + + config = { + "USE_PUBLIC_URL": os.environ.get("USE_PUBLIC_URL", "false"), + "DUMP_FORMAT": os.environ.get("DUMP_FORMAT", "dump"), + "FILENAME_PREFIX": os.environ.get("FILENAME_PREFIX", "backup"), + "MAX_BACKUPS": os.environ.get("MAX_BACKUPS", "7"), + "BACKUP_TIME": os.environ.get("BACKUP_TIME", "00:00"), + "R2_BUCKET_NAME": os.environ.get("R2_BUCKET_NAME", ""), + "R2_ENDPOINT": os.environ.get("R2_ENDPOINT", ""), + "R2_ACCESS_KEY": mask(os.environ.get("R2_ACCESS_KEY")), + "R2_SECRET_KEY": mask(os.environ.get("R2_SECRET_KEY")), + } + + for k, v in config.items(): + print(f"{k:<16} : {v}") + + +def init_env(): + if os.path.exists(".env"): + print("[ERROR] .env already exists") + return + + example = ".env.example" + if not os.path.exists(example): + print("[ERROR] .env.example not found") + return + + shutil.copy(example, ".env") + print("[SUCCESS] .env created from .env.example") + print("Edit the file before running backups.") + + +def schedule_info(): + print(textwrap.dedent(""" + pg-r2-backup scheduling + + Linux / macOS (cron): + 0 0 * * * pg-r2-backup run + + Windows (Task Scheduler): + Program : pg-r2-backup + Args : run + Start in: folder containing .env (working directory) + + Railway / Docker: + Use the platform scheduler + """).strip()) + +def main(): + parser = argparse.ArgumentParser( + prog="pg-r2-backup", + description="PostgreSQL backup tool for Cloudflare R2", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=textwrap.dedent(""" + Examples: + pg-r2-backup doctor + pg-r2-backup run + pg-r2-backup config show + pg-r2-backup init + pg-r2-backup schedule + """) + ) + + parser.add_argument( + "--version", + action="version", + version=f"%(prog)s {get_version()}" + ) + + subparsers = parser.add_subparsers(dest="command") + + subparsers.add_parser("run", help="Run backup immediately") + subparsers.add_parser("doctor", help="Check environment & dependencies") + subparsers.add_parser("schedule", help="Show scheduling examples") + + config_parser = subparsers.add_parser("config", help="Show configuration") + config_sub = config_parser.add_subparsers(dest="subcommand") + config_sub.add_parser("show", help="Show current configuration") + + subparsers.add_parser("init", help="Create .env from .env.example") + + args = parser.parse_args() + + if args.command == "run": + run_backup() + + elif args.command == "doctor": + doctor() + + elif args.command == "config" and args.subcommand == "show": + config_show() + + elif args.command == "init": + init_env() + + elif args.command == "schedule": + schedule_info() + + else: + parser.print_help() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/main.py b/main.py index e4df4f0..6be2640 100644 --- a/main.py +++ b/main.py @@ -1,180 +1,188 @@ -import os -import subprocess -import boto3 -from boto3.session import Config -from datetime import datetime, timezone -from boto3.s3.transfer import TransferConfig -from dotenv import load_dotenv -import time -import schedule -import py7zr -import shutil - -load_dotenv() - -## ENV - -DATABASE_URL = os.environ.get("DATABASE_URL") -DATABASE_PUBLIC_URL = os.environ.get("DATABASE_PUBLIC_URL") -R2_ACCESS_KEY = os.environ.get("R2_ACCESS_KEY") -R2_SECRET_KEY = os.environ.get("R2_SECRET_KEY") -R2_BUCKET_NAME = os.environ.get("R2_BUCKET_NAME") -R2_ENDPOINT = os.environ.get("R2_ENDPOINT") -MAX_BACKUPS = int(os.environ.get("MAX_BACKUPS", 7)) -BACKUP_PREFIX = os.environ.get("BACKUP_PREFIX", "") -FILENAME_PREFIX = os.environ.get("FILENAME_PREFIX", "backup") -DUMP_FORMAT = os.environ.get("DUMP_FORMAT", "dump") -BACKUP_PASSWORD = os.environ.get("BACKUP_PASSWORD") -USE_PUBLIC_URL = os.environ.get("USE_PUBLIC_URL", "false").lower() == "true" -BACKUP_TIME = os.environ.get("BACKUP_TIME", "00:00") -S3_REGION = os.environ.get("S3_REGION", "us-east-1") - -def log(msg): - print(msg, flush=True) - -## Validate BACKUP_TIME -try: - hour, minute = BACKUP_TIME.split(":") - if not (0 <= int(hour) <= 23 and 0 <= int(minute) <= 59): - raise ValueError -except ValueError: - log("[WARNING] Invalid BACKUP_TIME format. Using default: 00:00") - BACKUP_TIME = "00:00" - -def get_database_url(): - if USE_PUBLIC_URL: - if not DATABASE_PUBLIC_URL: - raise ValueError("[ERROR] DATABASE_PUBLIC_URL not set but USE_PUBLIC_URL=true!") - return DATABASE_PUBLIC_URL - - if not DATABASE_URL: - raise ValueError("[ERROR] DATABASE_URL not set!") - return DATABASE_URL - -def run_backup(): - if shutil.which("pg_dump") is None: - log("[ERROR] pg_dump not found. Install postgresql-client.") - return - - database_url = get_database_url() - log(f"[INFO] Using {'public' if USE_PUBLIC_URL else 'private'} database URL") - - format_map = { - "sql": ("p", "sql"), - "plain": ("p", "sql"), - "dump": ("c", "dump"), - "custom": ("c", "dump"), - "tar": ("t", "tar") - } - pg_format, ext = format_map.get(DUMP_FORMAT.lower(), ("c", "dump")) - - timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") - backup_file = f"{FILENAME_PREFIX}_{timestamp}.{ext}" - - compressed_file = ( - f"{backup_file}.7z" if BACKUP_PASSWORD else f"{backup_file}.gz" - ) - - compressed_file_r2 = f"{BACKUP_PREFIX}{compressed_file}" - - ## Create backup - try: - log(f"[INFO] Creating backup {backup_file}") - - dump_cmd = [ - "pg_dump", - f"--dbname={database_url}", - "-F", pg_format, - "--no-owner", - "--no-acl", - "-f", backup_file - ] - - subprocess.run(dump_cmd, check=True) - - if BACKUP_PASSWORD: - log("[INFO] Encrypting backup with 7z...") - with py7zr.SevenZipFile(compressed_file, "w", password=BACKUP_PASSWORD) as archive: - archive.write(backup_file) - log("[SUCCESS] Backup encrypted successfully") - else: - log("[INFO] Compressing backup with gzip...") - subprocess.run(["gzip", "-f", backup_file], check=True) - log("[SUCCESS] Backup compressed successfully") - - except subprocess.CalledProcessError as e: - log(f"[ERROR] Backup creation failed: {e}") - return - finally: - if os.path.exists(backup_file): - os.remove(backup_file) - - ## Upload to R2 - if os.path.exists(compressed_file): - size = os.path.getsize(compressed_file) - log(f"[INFO] Final backup size: {size / 1024 / 1024:.2f} MB") - - try: - client = boto3.client( - "s3", - endpoint_url=R2_ENDPOINT, - aws_access_key_id=R2_ACCESS_KEY, - aws_secret_access_key=R2_SECRET_KEY, - region_name=S3_REGION, - config=Config( - s3={"addressing_style": "path"} - ) - ) - - config = TransferConfig( - multipart_threshold=8 * 1024 * 1024, - multipart_chunksize=8 * 1024 * 1024, - max_concurrency=4, - use_threads=True - ) - - client.upload_file( - compressed_file, - R2_BUCKET_NAME, - compressed_file_r2, - Config=config - ) - - log(f"[SUCCESS] Backup uploaded: {compressed_file_r2}") - - objects = client.list_objects_v2( - Bucket=R2_BUCKET_NAME, - Prefix=BACKUP_PREFIX - ) - - if "Contents" in objects: - backups = sorted( - objects["Contents"], - key=lambda x: x["LastModified"], - reverse=True - ) - - for obj in backups[MAX_BACKUPS:]: - client.delete_object( - Bucket=R2_BUCKET_NAME, - Key=obj["Key"] - ) - log(f"[INFO] Deleted old backup: {obj['Key']}") - - except Exception as e: - log(f"[ERROR] R2 operation failed: {e}") - finally: - if os.path.exists(compressed_file): - os.remove(compressed_file) - -if __name__ == "__main__": - log("[INFO] Starting backup scheduler...") - log(f"[INFO] Scheduled backup time: {BACKUP_TIME} UTC") - - schedule.every().day.at(BACKUP_TIME).do(run_backup) - - run_backup() - - while True: - schedule.run_pending() - time.sleep(60) +import os +import subprocess +import boto3 +from boto3.session import Config +from datetime import datetime, timezone +from boto3.s3.transfer import TransferConfig +from dotenv import load_dotenv, find_dotenv +import time +import schedule +import py7zr +import shutil +import gzip + +load_dotenv(find_dotenv(usecwd=True), override=True) + +## ENV + +DATABASE_URL = os.environ.get("DATABASE_URL") +DATABASE_PUBLIC_URL = os.environ.get("DATABASE_PUBLIC_URL") +R2_ACCESS_KEY = os.environ.get("R2_ACCESS_KEY") +R2_SECRET_KEY = os.environ.get("R2_SECRET_KEY") +R2_BUCKET_NAME = os.environ.get("R2_BUCKET_NAME") +R2_ENDPOINT = os.environ.get("R2_ENDPOINT") +MAX_BACKUPS = int(os.environ.get("MAX_BACKUPS", 7)) +BACKUP_PREFIX = os.environ.get("BACKUP_PREFIX", "") +FILENAME_PREFIX = os.environ.get("FILENAME_PREFIX", "backup") +DUMP_FORMAT = os.environ.get("DUMP_FORMAT", "dump") +BACKUP_PASSWORD = os.environ.get("BACKUP_PASSWORD") +USE_PUBLIC_URL = os.environ.get("USE_PUBLIC_URL", "false").lower() == "true" +BACKUP_TIME = os.environ.get("BACKUP_TIME", "00:00") +S3_REGION = os.environ.get("S3_REGION", "us-east-1") + +def log(msg): + print(msg, flush=True) + +## Validate BACKUP_TIME +try: + hour, minute = BACKUP_TIME.split(":") + if not (0 <= int(hour) <= 23 and 0 <= int(minute) <= 59): + raise ValueError +except ValueError: + log("[WARNING] Invalid BACKUP_TIME format. Using default: 00:00") + BACKUP_TIME = "00:00" + +def get_database_url(): + if USE_PUBLIC_URL: + if not DATABASE_PUBLIC_URL: + raise ValueError("[ERROR] DATABASE_PUBLIC_URL not set but USE_PUBLIC_URL=true!") + return DATABASE_PUBLIC_URL + + if not DATABASE_URL: + raise ValueError("[ERROR] DATABASE_URL not set!") + return DATABASE_URL + +def gzip_compress(src): + dst = src + ".gz" + with open(src, "rb") as f_in: + with gzip.open(dst, "wb") as f_out: + shutil.copyfileobj(f_in, f_out) + return dst + +def run_backup(): + if shutil.which("pg_dump") is None: + log("[ERROR] pg_dump not found. Install postgresql-client.") + return + + database_url = get_database_url() + log(f"[INFO] Using {'public' if USE_PUBLIC_URL else 'private'} database URL") + + format_map = { + "sql": ("p", "sql"), + "plain": ("p", "sql"), + "dump": ("c", "dump"), + "custom": ("c", "dump"), + "tar": ("t", "tar") + } + pg_format, ext = format_map.get(DUMP_FORMAT.lower(), ("c", "dump")) + + timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") + backup_file = f"{FILENAME_PREFIX}_{timestamp}.{ext}" + + compressed_file = ( + f"{backup_file}.7z" if BACKUP_PASSWORD else f"{backup_file}.gz" + ) + + compressed_file_r2 = f"{BACKUP_PREFIX}{compressed_file}" + + ## Create backup + try: + log(f"[INFO] Creating backup {backup_file}") + + dump_cmd = [ + "pg_dump", + f"--dbname={database_url}", + "-F", pg_format, + "--no-owner", + "--no-acl", + "-f", backup_file + ] + + subprocess.run(dump_cmd, check=True) + + if BACKUP_PASSWORD: + log("[INFO] Encrypting backup with 7z...") + with py7zr.SevenZipFile(compressed_file, "w", password=BACKUP_PASSWORD) as archive: + archive.write(backup_file) + log("[SUCCESS] Backup encrypted successfully") + else: + log("[INFO] Compressing backup with gzip...") + gzip_compress(backup_file) + log("[SUCCESS] Backup compressed successfully") + + except subprocess.CalledProcessError as e: + log(f"[ERROR] Backup creation failed: {e}") + return + finally: + if os.path.exists(backup_file): + os.remove(backup_file) + + ## Upload to R2 + if os.path.exists(compressed_file): + size = os.path.getsize(compressed_file) + log(f"[INFO] Final backup size: {size / 1024 / 1024:.2f} MB") + + try: + client = boto3.client( + "s3", + endpoint_url=R2_ENDPOINT, + aws_access_key_id=R2_ACCESS_KEY, + aws_secret_access_key=R2_SECRET_KEY, + region_name=S3_REGION, + config=Config( + s3={"addressing_style": "path"} + ) + ) + + config = TransferConfig( + multipart_threshold=8 * 1024 * 1024, + multipart_chunksize=8 * 1024 * 1024, + max_concurrency=4, + use_threads=True + ) + + client.upload_file( + compressed_file, + R2_BUCKET_NAME, + compressed_file_r2, + Config=config + ) + + log(f"[SUCCESS] Backup uploaded: {compressed_file_r2}") + + objects = client.list_objects_v2( + Bucket=R2_BUCKET_NAME, + Prefix=BACKUP_PREFIX + ) + + if "Contents" in objects: + backups = sorted( + objects["Contents"], + key=lambda x: x["LastModified"], + reverse=True + ) + + for obj in backups[MAX_BACKUPS:]: + client.delete_object( + Bucket=R2_BUCKET_NAME, + Key=obj["Key"] + ) + log(f"[INFO] Deleted old backup: {obj['Key']}") + + except Exception as e: + log(f"[ERROR] R2 operation failed: {e}") + finally: + if os.path.exists(compressed_file): + os.remove(compressed_file) + +if __name__ == "__main__": + log("[INFO] Starting backup scheduler...") + log(f"[INFO] Scheduled backup time: {BACKUP_TIME} UTC") + + schedule.every().day.at(BACKUP_TIME).do(run_backup) + + run_backup() + + while True: + schedule.run_pending() + time.sleep(60) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ff32c0c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,35 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "pg-r2-backup" +version = "1.0.5" +description = "PostgreSQL backup tool for Cloudflare R2 (S3 Compatible)" +readme = "README.md" +requires-python = ">=3.9" + +authors = [ + { name = "Aman" } +] + +license = "MIT" + +dependencies = [ + "boto3", + "python-dotenv", + "schedule", + "py7zr" +] + +[project.urls] +Homepage = "https://github.com/BigDaddyAman/pg-r2-backup" +Repository = "https://github.com/BigDaddyAman/pg-r2-backup" +Issues = "https://github.com/BigDaddyAman/pg-r2-backup/issues" + +[tool.setuptools] +packages = ["cli"] +py-modules = ["main"] + +[project.scripts] +pg-r2-backup = "cli.cli:main" diff --git a/requirements.txt b/requirements.txt index 56d132c..ffb3b85 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -boto3==1.42.26 -psycopg2-binary==2.9.10 -python-dotenv==1.2.1 -py7zr==1.1.0 -schedule==1.2.2 +boto3==1.42.39 +psycopg2-binary==2.9.10 +python-dotenv==1.2.1 +py7zr==1.1.0 +schedule==1.2.2