Add CLI interface and pyproject.toml for PyPI packaging

This commit is contained in:
KakiFilem Team 2026-02-02 19:33:18 +08:00
parent 3bbab71301
commit 2f7d5a7949
5 changed files with 394 additions and 185 deletions

BIN
__init__.py Normal file

Binary file not shown.

166
cli.py Normal file
View File

@ -0,0 +1,166 @@
import argparse
import shutil
import os
import sys
import textwrap
import importlib.metadata
from main import run_backup
def get_version():
try:
return importlib.metadata.version("pg-r2-backup")
except importlib.metadata.PackageNotFoundError:
return "dev"
def mask(value, show=4):
if not value:
return ""
if len(value) <= show:
return "*" * len(value)
return value[:show] + "*" * (len(value) - show)
def doctor():
print("pg-r2-backup doctor\n")
if shutil.which("pg_dump") is None:
print("[FAIL] pg_dump not found in PATH")
else:
print("[OK] pg_dump found")
required_envs = [
"DATABASE_URL",
"R2_ACCESS_KEY",
"R2_SECRET_KEY",
"R2_BUCKET_NAME",
"R2_ENDPOINT",
]
missing = [e for e in required_envs if not os.environ.get(e)]
if missing:
print("\n[FAIL] Missing environment variables:")
for m in missing:
print(f" - {m}")
else:
print("\n[OK] Required environment variables set")
use_public = os.environ.get("USE_PUBLIC_URL", "false").lower() == "true"
print(f"\nDatabase URL mode : {'public' if use_public else 'private'}")
if os.environ.get("BACKUP_PASSWORD"):
print("Compression : 7z (encrypted)")
else:
print("Compression : gzip")
print("\nDoctor check complete.")
def config_show():
print("pg-r2-backup config\n")
config = {
"USE_PUBLIC_URL": os.environ.get("USE_PUBLIC_URL", "false"),
"DUMP_FORMAT": os.environ.get("DUMP_FORMAT", "dump"),
"FILENAME_PREFIX": os.environ.get("FILENAME_PREFIX", "backup"),
"MAX_BACKUPS": os.environ.get("MAX_BACKUPS", "7"),
"BACKUP_TIME": os.environ.get("BACKUP_TIME", "00:00"),
"R2_BUCKET_NAME": os.environ.get("R2_BUCKET_NAME", ""),
"R2_ENDPOINT": os.environ.get("R2_ENDPOINT", ""),
"R2_ACCESS_KEY": mask(os.environ.get("R2_ACCESS_KEY")),
"R2_SECRET_KEY": mask(os.environ.get("R2_SECRET_KEY")),
}
for k, v in config.items():
print(f"{k:<16} : {v}")
def init_env():
if os.path.exists(".env"):
print("[ERROR] .env already exists")
return
example = ".env.example"
if not os.path.exists(example):
print("[ERROR] .env.example not found")
return
shutil.copy(example, ".env")
print("[SUCCESS] .env created from .env.example")
print("Edit the file before running backups.")
def schedule_info():
print(textwrap.dedent("""
pg-r2-backup scheduling
Linux / macOS (cron):
0 0 * * * pg-r2-backup run
Windows (Task Scheduler):
Program : pg-r2-backup
Args : run
Start in: folder containing .env (working directory)
Railway / Docker:
Use the platform scheduler
""").strip())
def main():
parser = argparse.ArgumentParser(
prog="pg-r2-backup",
description="PostgreSQL backup tool for Cloudflare R2",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=textwrap.dedent("""
Examples:
pg-r2-backup doctor
pg-r2-backup run
pg-r2-backup config show
pg-r2-backup init
pg-r2-backup schedule
""")
)
parser.add_argument(
"--version",
action="version",
version=f"%(prog)s {get_version()}"
)
subparsers = parser.add_subparsers(dest="command")
subparsers.add_parser("run", help="Run backup immediately")
subparsers.add_parser("doctor", help="Check environment & dependencies")
subparsers.add_parser("schedule", help="Show scheduling examples")
config_parser = subparsers.add_parser("config", help="Show configuration")
config_sub = config_parser.add_subparsers(dest="subcommand")
config_sub.add_parser("show", help="Show current configuration")
subparsers.add_parser("init", help="Create .env from .env.example")
args = parser.parse_args()
if args.command == "run":
run_backup()
elif args.command == "doctor":
doctor()
elif args.command == "config" and args.subcommand == "show":
config_show()
elif args.command == "init":
init_env()
elif args.command == "schedule":
schedule_info()
else:
parser.print_help()
sys.exit(1)
if __name__ == "__main__":
main()

368
main.py
View File

@ -1,180 +1,188 @@
import os
import subprocess
import boto3
from boto3.session import Config
from datetime import datetime, timezone
from boto3.s3.transfer import TransferConfig
from dotenv import load_dotenv
import time
import schedule
import py7zr
import shutil
load_dotenv()
## ENV
DATABASE_URL = os.environ.get("DATABASE_URL")
DATABASE_PUBLIC_URL = os.environ.get("DATABASE_PUBLIC_URL")
R2_ACCESS_KEY = os.environ.get("R2_ACCESS_KEY")
R2_SECRET_KEY = os.environ.get("R2_SECRET_KEY")
R2_BUCKET_NAME = os.environ.get("R2_BUCKET_NAME")
R2_ENDPOINT = os.environ.get("R2_ENDPOINT")
MAX_BACKUPS = int(os.environ.get("MAX_BACKUPS", 7))
BACKUP_PREFIX = os.environ.get("BACKUP_PREFIX", "")
FILENAME_PREFIX = os.environ.get("FILENAME_PREFIX", "backup")
DUMP_FORMAT = os.environ.get("DUMP_FORMAT", "dump")
BACKUP_PASSWORD = os.environ.get("BACKUP_PASSWORD")
USE_PUBLIC_URL = os.environ.get("USE_PUBLIC_URL", "false").lower() == "true"
BACKUP_TIME = os.environ.get("BACKUP_TIME", "00:00")
S3_REGION = os.environ.get("S3_REGION", "us-east-1")
def log(msg):
print(msg, flush=True)
## Validate BACKUP_TIME
try:
hour, minute = BACKUP_TIME.split(":")
if not (0 <= int(hour) <= 23 and 0 <= int(minute) <= 59):
raise ValueError
except ValueError:
log("[WARNING] Invalid BACKUP_TIME format. Using default: 00:00")
BACKUP_TIME = "00:00"
def get_database_url():
if USE_PUBLIC_URL:
if not DATABASE_PUBLIC_URL:
raise ValueError("[ERROR] DATABASE_PUBLIC_URL not set but USE_PUBLIC_URL=true!")
return DATABASE_PUBLIC_URL
if not DATABASE_URL:
raise ValueError("[ERROR] DATABASE_URL not set!")
return DATABASE_URL
def run_backup():
if shutil.which("pg_dump") is None:
log("[ERROR] pg_dump not found. Install postgresql-client.")
return
database_url = get_database_url()
log(f"[INFO] Using {'public' if USE_PUBLIC_URL else 'private'} database URL")
format_map = {
"sql": ("p", "sql"),
"plain": ("p", "sql"),
"dump": ("c", "dump"),
"custom": ("c", "dump"),
"tar": ("t", "tar")
}
pg_format, ext = format_map.get(DUMP_FORMAT.lower(), ("c", "dump"))
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
backup_file = f"{FILENAME_PREFIX}_{timestamp}.{ext}"
compressed_file = (
f"{backup_file}.7z" if BACKUP_PASSWORD else f"{backup_file}.gz"
)
compressed_file_r2 = f"{BACKUP_PREFIX}{compressed_file}"
## Create backup
try:
log(f"[INFO] Creating backup {backup_file}")
dump_cmd = [
"pg_dump",
f"--dbname={database_url}",
"-F", pg_format,
"--no-owner",
"--no-acl",
"-f", backup_file
]
subprocess.run(dump_cmd, check=True)
if BACKUP_PASSWORD:
log("[INFO] Encrypting backup with 7z...")
with py7zr.SevenZipFile(compressed_file, "w", password=BACKUP_PASSWORD) as archive:
archive.write(backup_file)
log("[SUCCESS] Backup encrypted successfully")
else:
log("[INFO] Compressing backup with gzip...")
subprocess.run(["gzip", "-f", backup_file], check=True)
log("[SUCCESS] Backup compressed successfully")
except subprocess.CalledProcessError as e:
log(f"[ERROR] Backup creation failed: {e}")
return
finally:
if os.path.exists(backup_file):
os.remove(backup_file)
## Upload to R2
if os.path.exists(compressed_file):
size = os.path.getsize(compressed_file)
log(f"[INFO] Final backup size: {size / 1024 / 1024:.2f} MB")
try:
client = boto3.client(
"s3",
endpoint_url=R2_ENDPOINT,
aws_access_key_id=R2_ACCESS_KEY,
aws_secret_access_key=R2_SECRET_KEY,
region_name=S3_REGION,
config=Config(
s3={"addressing_style": "path"}
)
)
config = TransferConfig(
multipart_threshold=8 * 1024 * 1024,
multipart_chunksize=8 * 1024 * 1024,
max_concurrency=4,
use_threads=True
)
client.upload_file(
compressed_file,
R2_BUCKET_NAME,
compressed_file_r2,
Config=config
)
log(f"[SUCCESS] Backup uploaded: {compressed_file_r2}")
objects = client.list_objects_v2(
Bucket=R2_BUCKET_NAME,
Prefix=BACKUP_PREFIX
)
if "Contents" in objects:
backups = sorted(
objects["Contents"],
key=lambda x: x["LastModified"],
reverse=True
)
for obj in backups[MAX_BACKUPS:]:
client.delete_object(
Bucket=R2_BUCKET_NAME,
Key=obj["Key"]
)
log(f"[INFO] Deleted old backup: {obj['Key']}")
except Exception as e:
log(f"[ERROR] R2 operation failed: {e}")
finally:
if os.path.exists(compressed_file):
os.remove(compressed_file)
if __name__ == "__main__":
log("[INFO] Starting backup scheduler...")
log(f"[INFO] Scheduled backup time: {BACKUP_TIME} UTC")
schedule.every().day.at(BACKUP_TIME).do(run_backup)
run_backup()
while True:
schedule.run_pending()
time.sleep(60)
import os
import subprocess
import boto3
from boto3.session import Config
from datetime import datetime, timezone
from boto3.s3.transfer import TransferConfig
from dotenv import load_dotenv, find_dotenv
import time
import schedule
import py7zr
import shutil
import gzip
load_dotenv(find_dotenv(usecwd=True), override=True)
## ENV
DATABASE_URL = os.environ.get("DATABASE_URL")
DATABASE_PUBLIC_URL = os.environ.get("DATABASE_PUBLIC_URL")
R2_ACCESS_KEY = os.environ.get("R2_ACCESS_KEY")
R2_SECRET_KEY = os.environ.get("R2_SECRET_KEY")
R2_BUCKET_NAME = os.environ.get("R2_BUCKET_NAME")
R2_ENDPOINT = os.environ.get("R2_ENDPOINT")
MAX_BACKUPS = int(os.environ.get("MAX_BACKUPS", 7))
BACKUP_PREFIX = os.environ.get("BACKUP_PREFIX", "")
FILENAME_PREFIX = os.environ.get("FILENAME_PREFIX", "backup")
DUMP_FORMAT = os.environ.get("DUMP_FORMAT", "dump")
BACKUP_PASSWORD = os.environ.get("BACKUP_PASSWORD")
USE_PUBLIC_URL = os.environ.get("USE_PUBLIC_URL", "false").lower() == "true"
BACKUP_TIME = os.environ.get("BACKUP_TIME", "00:00")
S3_REGION = os.environ.get("S3_REGION", "us-east-1")
def log(msg):
print(msg, flush=True)
## Validate BACKUP_TIME
try:
hour, minute = BACKUP_TIME.split(":")
if not (0 <= int(hour) <= 23 and 0 <= int(minute) <= 59):
raise ValueError
except ValueError:
log("[WARNING] Invalid BACKUP_TIME format. Using default: 00:00")
BACKUP_TIME = "00:00"
def get_database_url():
if USE_PUBLIC_URL:
if not DATABASE_PUBLIC_URL:
raise ValueError("[ERROR] DATABASE_PUBLIC_URL not set but USE_PUBLIC_URL=true!")
return DATABASE_PUBLIC_URL
if not DATABASE_URL:
raise ValueError("[ERROR] DATABASE_URL not set!")
return DATABASE_URL
def gzip_compress(src):
dst = src + ".gz"
with open(src, "rb") as f_in:
with gzip.open(dst, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
return dst
def run_backup():
if shutil.which("pg_dump") is None:
log("[ERROR] pg_dump not found. Install postgresql-client.")
return
database_url = get_database_url()
log(f"[INFO] Using {'public' if USE_PUBLIC_URL else 'private'} database URL")
format_map = {
"sql": ("p", "sql"),
"plain": ("p", "sql"),
"dump": ("c", "dump"),
"custom": ("c", "dump"),
"tar": ("t", "tar")
}
pg_format, ext = format_map.get(DUMP_FORMAT.lower(), ("c", "dump"))
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
backup_file = f"{FILENAME_PREFIX}_{timestamp}.{ext}"
compressed_file = (
f"{backup_file}.7z" if BACKUP_PASSWORD else f"{backup_file}.gz"
)
compressed_file_r2 = f"{BACKUP_PREFIX}{compressed_file}"
## Create backup
try:
log(f"[INFO] Creating backup {backup_file}")
dump_cmd = [
"pg_dump",
f"--dbname={database_url}",
"-F", pg_format,
"--no-owner",
"--no-acl",
"-f", backup_file
]
subprocess.run(dump_cmd, check=True)
if BACKUP_PASSWORD:
log("[INFO] Encrypting backup with 7z...")
with py7zr.SevenZipFile(compressed_file, "w", password=BACKUP_PASSWORD) as archive:
archive.write(backup_file)
log("[SUCCESS] Backup encrypted successfully")
else:
log("[INFO] Compressing backup with gzip...")
gzip_compress(backup_file)
log("[SUCCESS] Backup compressed successfully")
except subprocess.CalledProcessError as e:
log(f"[ERROR] Backup creation failed: {e}")
return
finally:
if os.path.exists(backup_file):
os.remove(backup_file)
## Upload to R2
if os.path.exists(compressed_file):
size = os.path.getsize(compressed_file)
log(f"[INFO] Final backup size: {size / 1024 / 1024:.2f} MB")
try:
client = boto3.client(
"s3",
endpoint_url=R2_ENDPOINT,
aws_access_key_id=R2_ACCESS_KEY,
aws_secret_access_key=R2_SECRET_KEY,
region_name=S3_REGION,
config=Config(
s3={"addressing_style": "path"}
)
)
config = TransferConfig(
multipart_threshold=8 * 1024 * 1024,
multipart_chunksize=8 * 1024 * 1024,
max_concurrency=4,
use_threads=True
)
client.upload_file(
compressed_file,
R2_BUCKET_NAME,
compressed_file_r2,
Config=config
)
log(f"[SUCCESS] Backup uploaded: {compressed_file_r2}")
objects = client.list_objects_v2(
Bucket=R2_BUCKET_NAME,
Prefix=BACKUP_PREFIX
)
if "Contents" in objects:
backups = sorted(
objects["Contents"],
key=lambda x: x["LastModified"],
reverse=True
)
for obj in backups[MAX_BACKUPS:]:
client.delete_object(
Bucket=R2_BUCKET_NAME,
Key=obj["Key"]
)
log(f"[INFO] Deleted old backup: {obj['Key']}")
except Exception as e:
log(f"[ERROR] R2 operation failed: {e}")
finally:
if os.path.exists(compressed_file):
os.remove(compressed_file)
if __name__ == "__main__":
log("[INFO] Starting backup scheduler...")
log(f"[INFO] Scheduled backup time: {BACKUP_TIME} UTC")
schedule.every().day.at(BACKUP_TIME).do(run_backup)
run_backup()
while True:
schedule.run_pending()
time.sleep(60)

35
pyproject.toml Normal file
View File

@ -0,0 +1,35 @@
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"
[project]
name = "pg-r2-backup"
version = "1.0.5"
description = "PostgreSQL backup tool for Cloudflare R2 (S3 Compatible)"
readme = "README.md"
requires-python = ">=3.9"
authors = [
{ name = "Aman" }
]
license = "MIT"
dependencies = [
"boto3",
"python-dotenv",
"schedule",
"py7zr"
]
[project.urls]
Homepage = "https://github.com/BigDaddyAman/pg-r2-backup"
Repository = "https://github.com/BigDaddyAman/pg-r2-backup"
Issues = "https://github.com/BigDaddyAman/pg-r2-backup/issues"
[tool.setuptools]
packages = ["cli"]
py-modules = ["main"]
[project.scripts]
pg-r2-backup = "cli.cli:main"

View File

@ -1,5 +1,5 @@
boto3==1.42.26
psycopg2-binary==2.9.10
python-dotenv==1.2.1
py7zr==1.1.0
schedule==1.2.2
boto3==1.42.39
psycopg2-binary==2.9.10
python-dotenv==1.2.1
py7zr==1.1.0
schedule==1.2.2