Add media retention cleanup command
This commit is contained in:
parent
3b6503a6ed
commit
507074b80e
10 changed files with 722 additions and 52 deletions
|
|
@ -7,11 +7,21 @@ import os
|
|||
import signal
|
||||
import sys
|
||||
from contextlib import suppress
|
||||
from pathlib import Path
|
||||
|
||||
from hypercorn.asyncio import serve as hypercorn_serve
|
||||
from hypercorn.config import Config as HypercornConfig
|
||||
|
||||
import repub.crawl as crawl_module
|
||||
from repub.cleanup import DEFAULT_MEDIA_DIRS, cleanup_media
|
||||
from repub.config import (
|
||||
AUDIO_DIR,
|
||||
FILE_DIR,
|
||||
IMAGE_DIR,
|
||||
VIDEO_DIR,
|
||||
build_base_settings,
|
||||
load_config,
|
||||
)
|
||||
from repub.web import SHUTDOWN_EVENT_KEY, create_app
|
||||
|
||||
FeedNameFilter = crawl_module.FeedNameFilter
|
||||
|
|
@ -61,11 +71,39 @@ def parse_args(argv: list[str] | None = None) -> tuple[str, argparse.Namespace]:
|
|||
default="repub.toml",
|
||||
help="Path to runtime config TOML file",
|
||||
)
|
||||
|
||||
cleanup_parser = subparsers.add_parser(
|
||||
"cleanup-media",
|
||||
help="Delete old unreferenced published media",
|
||||
)
|
||||
cleanup_parser.add_argument(
|
||||
"-c",
|
||||
"--config",
|
||||
default=None,
|
||||
help="Read output and media directory settings from runtime config TOML",
|
||||
)
|
||||
cleanup_parser.add_argument(
|
||||
"--feeds-dir",
|
||||
default=None,
|
||||
help="Published feeds directory to clean (default: config out_dir/feeds or out/feeds)",
|
||||
)
|
||||
cleanup_parser.add_argument(
|
||||
"--days",
|
||||
type=int,
|
||||
default=25,
|
||||
help="Delete unreferenced media older than this many days",
|
||||
)
|
||||
cleanup_parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Report cleanup matches without deleting files",
|
||||
)
|
||||
|
||||
if not raw_args:
|
||||
raw_args = ["serve", "--dev-mode"]
|
||||
elif raw_args[0] in {"-c", "--config"}:
|
||||
raw_args = ["crawl", *raw_args]
|
||||
elif raw_args[0] not in {"serve", "crawl"}:
|
||||
elif raw_args[0] not in {"serve", "crawl", "cleanup-media"}:
|
||||
raw_args = ["serve", "--dev-mode", *raw_args]
|
||||
|
||||
args = parser.parse_args(raw_args)
|
||||
|
|
@ -73,6 +111,25 @@ def parse_args(argv: list[str] | None = None) -> tuple[str, argparse.Namespace]:
|
|||
return command, args
|
||||
|
||||
|
||||
def _cleanup_config(args: argparse.Namespace) -> tuple[Path, tuple[str, ...]]:
|
||||
feeds_dir = Path(args.feeds_dir) if args.feeds_dir else Path("out/feeds")
|
||||
media_dirs = DEFAULT_MEDIA_DIRS
|
||||
if args.config is None:
|
||||
return feeds_dir, media_dirs
|
||||
|
||||
config = load_config(args.config)
|
||||
settings = build_base_settings(config)
|
||||
media_dirs = (
|
||||
str(settings.get("REPUBLISHER_IMAGE_DIR", IMAGE_DIR)),
|
||||
str(settings.get("REPUBLISHER_AUDIO_DIR", AUDIO_DIR)),
|
||||
str(settings.get("REPUBLISHER_VIDEO_DIR", VIDEO_DIR)),
|
||||
str(settings.get("REPUBLISHER_FILE_DIR", FILE_DIR)),
|
||||
)
|
||||
if args.feeds_dir is None:
|
||||
feeds_dir = config.out_dir / "feeds"
|
||||
return feeds_dir, media_dirs
|
||||
|
||||
|
||||
def _install_signal_handlers(stop_event: asyncio.Event) -> None:
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
|
|
@ -116,6 +173,29 @@ def entrypoint(argv: list[str] | None = None) -> int:
|
|||
crawl_module.check_runtime = check_runtime
|
||||
return crawl_module.crawl_from_config(args.config)
|
||||
|
||||
if command == "cleanup-media":
|
||||
try:
|
||||
feeds_dir, media_dirs = _cleanup_config(args)
|
||||
except FileNotFoundError as error:
|
||||
missing_path = (
|
||||
Path(error.filename).expanduser()
|
||||
if error.filename
|
||||
else Path(args.config).expanduser()
|
||||
)
|
||||
logger.error("Config file not found: %s", missing_path)
|
||||
return 2
|
||||
except ValueError as error:
|
||||
logger.error("Invalid config: %s", error)
|
||||
return 2
|
||||
|
||||
result = cleanup_media(
|
||||
feeds_dir=feeds_dir,
|
||||
retention_days=args.days,
|
||||
dry_run=bool(args.dry_run),
|
||||
media_dirs=media_dirs,
|
||||
)
|
||||
return 1 if result.failures else 0
|
||||
|
||||
try:
|
||||
port = int(args.port)
|
||||
except ValueError:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue