2026-03-29 13:52:23 +02:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import argparse
|
2026-03-31 12:47:36 +02:00
|
|
|
import asyncio
|
2024-04-18 15:27:00 +02:00
|
|
|
import logging
|
2026-03-30 11:42:13 +02:00
|
|
|
import os
|
2026-03-31 12:47:36 +02:00
|
|
|
import signal
|
2026-03-29 13:52:23 +02:00
|
|
|
import sys
|
2026-03-31 12:47:36 +02:00
|
|
|
from contextlib import suppress
|
2026-05-27 13:04:47 +02:00
|
|
|
from pathlib import Path
|
2026-03-31 12:47:36 +02:00
|
|
|
|
|
|
|
|
from hypercorn.asyncio import serve as hypercorn_serve
|
|
|
|
|
from hypercorn.config import Config as HypercornConfig
|
2026-03-29 13:52:23 +02:00
|
|
|
|
2026-03-30 11:42:13 +02:00
|
|
|
import repub.crawl as crawl_module
|
2026-05-27 13:04:47 +02:00
|
|
|
from repub.cleanup import DEFAULT_MEDIA_DIRS, cleanup_media
|
|
|
|
|
from repub.config import (
|
|
|
|
|
AUDIO_DIR,
|
|
|
|
|
FILE_DIR,
|
|
|
|
|
IMAGE_DIR,
|
|
|
|
|
VIDEO_DIR,
|
|
|
|
|
build_base_settings,
|
|
|
|
|
load_config,
|
|
|
|
|
)
|
2026-03-31 12:47:36 +02:00
|
|
|
from repub.web import SHUTDOWN_EVENT_KEY, create_app
|
2024-04-18 15:27:00 +02:00
|
|
|
|
2026-03-30 11:42:13 +02:00
|
|
|
FeedNameFilter = crawl_module.FeedNameFilter
|
|
|
|
|
check_runtime = crawl_module.check_runtime
|
|
|
|
|
|
|
|
|
|
__all__ = ["FeedNameFilter", "check_runtime", "entrypoint", "parse_args"]
|
2024-04-18 11:57:24 +02:00
|
|
|
|
2024-04-18 15:27:00 +02:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
logger.setLevel(logging.DEBUG)
|
2026-03-29 13:52:23 +02:00
|
|
|
logger.propagate = False
|
|
|
|
|
if not logger.handlers:
|
|
|
|
|
handler = logging.StreamHandler()
|
|
|
|
|
handler.setLevel(logging.DEBUG)
|
|
|
|
|
handler.setFormatter(
|
|
|
|
|
logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
|
|
|
)
|
|
|
|
|
logger.addHandler(handler)
|
2024-04-18 11:57:24 +02:00
|
|
|
|
|
|
|
|
|
2026-03-30 11:42:13 +02:00
|
|
|
def parse_args(argv: list[str] | None = None) -> tuple[str, argparse.Namespace]:
|
|
|
|
|
raw_args = list(argv) if argv is not None else sys.argv[1:]
|
2024-04-18 11:57:24 +02:00
|
|
|
|
2026-03-30 11:42:13 +02:00
|
|
|
parser = argparse.ArgumentParser(description="Mirror RSS and Atom feeds")
|
|
|
|
|
subparsers = parser.add_subparsers(dest="command")
|
2024-04-18 11:57:24 +02:00
|
|
|
|
2026-03-30 11:42:13 +02:00
|
|
|
serve_parser = subparsers.add_parser("serve", help="Start the republisher web UI")
|
|
|
|
|
serve_parser.add_argument(
|
|
|
|
|
"--host",
|
2026-03-30 15:23:34 +02:00
|
|
|
default=os.environ.get("REPUBLISHER_HOST", "127.0.0.1"),
|
2026-03-30 11:42:13 +02:00
|
|
|
help="Host interface for the web UI",
|
|
|
|
|
)
|
|
|
|
|
serve_parser.add_argument(
|
|
|
|
|
"--port",
|
2026-03-30 15:23:34 +02:00
|
|
|
default=os.environ.get("REPUBLISHER_PORT", "8080"),
|
2026-03-30 11:42:13 +02:00
|
|
|
help="Port for the web UI",
|
|
|
|
|
)
|
2026-03-30 15:36:12 +02:00
|
|
|
serve_parser.add_argument(
|
|
|
|
|
"--dev-mode",
|
|
|
|
|
action="store_true",
|
|
|
|
|
help="Serve published feeds from /feeds for local development",
|
|
|
|
|
)
|
2024-04-18 11:57:24 +02:00
|
|
|
|
2026-03-30 11:42:13 +02:00
|
|
|
crawl_parser = subparsers.add_parser("crawl", help="Run the feed crawler once")
|
|
|
|
|
crawl_parser.add_argument(
|
2026-03-29 13:52:23 +02:00
|
|
|
"-c",
|
|
|
|
|
"--config",
|
|
|
|
|
default="repub.toml",
|
|
|
|
|
help="Path to runtime config TOML file",
|
|
|
|
|
)
|
2026-05-27 13:04:47 +02:00
|
|
|
|
|
|
|
|
cleanup_parser = subparsers.add_parser(
|
|
|
|
|
"cleanup-media",
|
|
|
|
|
help="Delete old unreferenced published media",
|
|
|
|
|
)
|
|
|
|
|
cleanup_parser.add_argument(
|
|
|
|
|
"-c",
|
|
|
|
|
"--config",
|
|
|
|
|
default=None,
|
|
|
|
|
help="Read output and media directory settings from runtime config TOML",
|
|
|
|
|
)
|
|
|
|
|
cleanup_parser.add_argument(
|
|
|
|
|
"--feeds-dir",
|
|
|
|
|
default=None,
|
|
|
|
|
help="Published feeds directory to clean (default: config out_dir/feeds or out/feeds)",
|
|
|
|
|
)
|
|
|
|
|
cleanup_parser.add_argument(
|
|
|
|
|
"--days",
|
|
|
|
|
type=int,
|
|
|
|
|
default=25,
|
|
|
|
|
help="Delete unreferenced media older than this many days",
|
|
|
|
|
)
|
|
|
|
|
cleanup_parser.add_argument(
|
|
|
|
|
"--dry-run",
|
|
|
|
|
action="store_true",
|
|
|
|
|
help="Report cleanup matches without deleting files",
|
|
|
|
|
)
|
|
|
|
|
|
2026-03-30 11:42:13 +02:00
|
|
|
if not raw_args:
|
2026-03-30 15:36:12 +02:00
|
|
|
raw_args = ["serve", "--dev-mode"]
|
2026-03-30 11:42:13 +02:00
|
|
|
elif raw_args[0] in {"-c", "--config"}:
|
|
|
|
|
raw_args = ["crawl", *raw_args]
|
2026-05-27 13:04:47 +02:00
|
|
|
elif raw_args[0] not in {"serve", "crawl", "cleanup-media"}:
|
2026-03-30 15:36:12 +02:00
|
|
|
raw_args = ["serve", "--dev-mode", *raw_args]
|
2026-03-29 13:52:23 +02:00
|
|
|
|
2026-03-30 11:42:13 +02:00
|
|
|
args = parser.parse_args(raw_args)
|
|
|
|
|
command = args.command or "serve"
|
|
|
|
|
return command, args
|
2026-03-29 13:52:23 +02:00
|
|
|
|
|
|
|
|
|
2026-05-27 13:04:47 +02:00
|
|
|
def _cleanup_config(args: argparse.Namespace) -> tuple[Path, tuple[str, ...]]:
|
|
|
|
|
feeds_dir = Path(args.feeds_dir) if args.feeds_dir else Path("out/feeds")
|
|
|
|
|
media_dirs = DEFAULT_MEDIA_DIRS
|
|
|
|
|
if args.config is None:
|
|
|
|
|
return feeds_dir, media_dirs
|
|
|
|
|
|
|
|
|
|
config = load_config(args.config)
|
|
|
|
|
settings = build_base_settings(config)
|
|
|
|
|
media_dirs = (
|
|
|
|
|
str(settings.get("REPUBLISHER_IMAGE_DIR", IMAGE_DIR)),
|
|
|
|
|
str(settings.get("REPUBLISHER_AUDIO_DIR", AUDIO_DIR)),
|
|
|
|
|
str(settings.get("REPUBLISHER_VIDEO_DIR", VIDEO_DIR)),
|
|
|
|
|
str(settings.get("REPUBLISHER_FILE_DIR", FILE_DIR)),
|
|
|
|
|
)
|
|
|
|
|
if args.feeds_dir is None:
|
|
|
|
|
feeds_dir = config.out_dir / "feeds"
|
|
|
|
|
return feeds_dir, media_dirs
|
|
|
|
|
|
|
|
|
|
|
2026-03-31 12:47:36 +02:00
|
|
|
def _install_signal_handlers(stop_event: asyncio.Event) -> None:
|
|
|
|
|
loop = asyncio.get_running_loop()
|
|
|
|
|
|
|
|
|
|
def request_stop(*_: object) -> None:
|
|
|
|
|
if not stop_event.is_set():
|
|
|
|
|
stop_event.set()
|
|
|
|
|
|
|
|
|
|
for signum in (signal.SIGINT, signal.SIGTERM):
|
|
|
|
|
try:
|
|
|
|
|
loop.add_signal_handler(signum, request_stop)
|
|
|
|
|
except NotImplementedError:
|
|
|
|
|
signal.signal(signum, request_stop)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _serve_app(*, host: str, port: int, dev_mode: bool) -> None:
|
|
|
|
|
stop_event = asyncio.Event()
|
|
|
|
|
_install_signal_handlers(stop_event)
|
|
|
|
|
|
|
|
|
|
app = create_app(dev_mode=dev_mode)
|
|
|
|
|
app.extensions[SHUTDOWN_EVENT_KEY] = stop_event
|
|
|
|
|
|
|
|
|
|
config = HypercornConfig()
|
|
|
|
|
config.bind = [f"{host}:{port}"]
|
|
|
|
|
config.use_reloader = False
|
|
|
|
|
config.accesslog = "-"
|
|
|
|
|
config.errorlog = "-"
|
|
|
|
|
|
|
|
|
|
async def shutdown_trigger() -> None:
|
|
|
|
|
await stop_event.wait()
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
await hypercorn_serve(app, config, shutdown_trigger=shutdown_trigger)
|
|
|
|
|
finally:
|
|
|
|
|
stop_event.set()
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 11:42:13 +02:00
|
|
|
def entrypoint(argv: list[str] | None = None) -> int:
|
|
|
|
|
command, args = parse_args(argv)
|
2026-03-29 13:52:23 +02:00
|
|
|
|
2026-03-30 11:42:13 +02:00
|
|
|
if command == "crawl":
|
|
|
|
|
crawl_module.check_runtime = check_runtime
|
|
|
|
|
return crawl_module.crawl_from_config(args.config)
|
2026-03-29 13:52:23 +02:00
|
|
|
|
2026-05-27 13:04:47 +02:00
|
|
|
if command == "cleanup-media":
|
|
|
|
|
try:
|
|
|
|
|
feeds_dir, media_dirs = _cleanup_config(args)
|
|
|
|
|
except FileNotFoundError as error:
|
|
|
|
|
missing_path = (
|
|
|
|
|
Path(error.filename).expanduser()
|
|
|
|
|
if error.filename
|
|
|
|
|
else Path(args.config).expanduser()
|
|
|
|
|
)
|
|
|
|
|
logger.error("Config file not found: %s", missing_path)
|
|
|
|
|
return 2
|
|
|
|
|
except ValueError as error:
|
|
|
|
|
logger.error("Invalid config: %s", error)
|
|
|
|
|
return 2
|
|
|
|
|
|
|
|
|
|
result = cleanup_media(
|
|
|
|
|
feeds_dir=feeds_dir,
|
|
|
|
|
retention_days=args.days,
|
|
|
|
|
dry_run=bool(args.dry_run),
|
|
|
|
|
media_dirs=media_dirs,
|
|
|
|
|
)
|
|
|
|
|
return 1 if result.failures else 0
|
|
|
|
|
|
2026-03-29 13:52:23 +02:00
|
|
|
try:
|
2026-03-30 11:42:13 +02:00
|
|
|
port = int(args.port)
|
|
|
|
|
except ValueError:
|
2026-03-30 15:23:34 +02:00
|
|
|
logger.error("Invalid REPUBLISHER_PORT/--port value: %s", args.port)
|
2026-03-29 14:44:45 +02:00
|
|
|
return 2
|
2026-03-29 13:52:23 +02:00
|
|
|
|
2026-03-31 12:47:36 +02:00
|
|
|
with suppress(KeyboardInterrupt):
|
|
|
|
|
asyncio.run(_serve_app(host=args.host, port=port, dev_mode=bool(args.dev_mode)))
|
2026-03-30 11:42:13 +02:00
|
|
|
return 0
|
2026-03-29 13:52:23 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
sys.exit(entrypoint())
|