import fcntl import io import os import subprocess import sys import time from datetime import UTC, datetime, timedelta from pathlib import Path from repub.cleanup import cleanup_media NOW = datetime(2026, 5, 27, 12, 0, tzinfo=UTC) def write_media(path: Path, body: bytes, *, age_days: int) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_bytes(body) timestamp = (NOW - timedelta(days=age_days)).timestamp() os.utime(path, (timestamp, timestamp)) def wait_until(path: Path, *, timeout: float = 5.0) -> None: deadline = time.monotonic() + timeout while time.monotonic() < deadline: if path.exists(): return time.sleep(0.05) raise AssertionError(f"timed out waiting for {path}") def test_cleanup_media_deletes_old_unreferenced_media_and_protects_latest_feed_refs( tmp_path: Path, ) -> None: feeds_dir = tmp_path / "feeds" demo_dir = feeds_dir / "demo" demo_dir.mkdir(parents=True) (demo_dir / "feed.rss").write_text( """ ]]> """.strip(), encoding="utf-8", ) write_media(demo_dir / "audio" / "current.mp3", b"audio", age_days=40) write_media(demo_dir / "images" / "full" / "current.webp", b"webp", age_days=40) write_media(demo_dir / "images" / "thumbs" / "current.jpg", b"jpg", age_days=40) write_media(demo_dir / "images" / "source" / "current.png", b"source", age_days=40) write_media(demo_dir / "video" / "old.mp4", b"video", age_days=40) write_media(demo_dir / "files" / "fresh.pdf", b"fresh", age_days=2) write_media(demo_dir / "images" / "full" / "old.webp", b"old", age_days=40) write_media(demo_dir / ".feed.rss.next", b"staged", age_days=40) output = io.StringIO() result = cleanup_media( feeds_dir=feeds_dir, retention_days=25, now=NOW, dry_run=False, output=output, ) assert (demo_dir / "audio" / "current.mp3").exists() assert (demo_dir / "images" / "full" / "current.webp").exists() assert (demo_dir / "images" / "thumbs" / "current.jpg").exists() assert not (demo_dir / "images" / "source" / "current.png").exists() assert not (demo_dir / "video" / "old.mp4").exists() assert not (demo_dir / "images" / "full" / "old.webp").exists() assert (demo_dir / "files" / "fresh.pdf").exists() assert (demo_dir / ".feed.rss.next").exists() assert result.matched_files == 3 assert result.deleted_files == 3 assert result.bytes_deleted == len(b"source") + len(b"video") + len(b"old") assert result.failures == 0 assert "dry_run=false" in output.getvalue() assert "deleted_files=3" in output.getvalue() def test_cleanup_media_dry_run_reports_matches_without_deleting(tmp_path: Path) -> None: feeds_dir = tmp_path / "feeds" old_file = feeds_dir / "demo" / "audio" / "old.mp3" write_media(old_file, b"audio", age_days=40) result = cleanup_media( feeds_dir=feeds_dir, retention_days=25, now=NOW, dry_run=True, output=io.StringIO(), ) assert old_file.exists() assert result.matched_files == 1 assert result.deleted_files == 0 assert result.bytes_deleted == 0 assert result.failures == 0 def test_cleanup_media_uses_configured_media_dirs(tmp_path: Path) -> None: feeds_dir = tmp_path / "feeds" demo_dir = feeds_dir / "demo" demo_dir.mkdir(parents=True) (demo_dir / "feed.rss").write_text( """ """.strip(), encoding="utf-8", ) write_media(demo_dir / "audio-custom" / "current.mp3", b"current", age_days=40) write_media(demo_dir / "audio-custom" / "old.mp3", b"old", age_days=40) write_media(demo_dir / "videos-custom" / "current.mp4", b"video", age_days=40) write_media(demo_dir / "audio" / "legacy.mp3", b"legacy", age_days=40) result = cleanup_media( feeds_dir=feeds_dir, retention_days=25, now=NOW, media_dirs=("audio-custom", "videos-custom"), output=io.StringIO(), ) assert (demo_dir / "audio-custom" / "current.mp3").exists() assert not (demo_dir / "audio-custom" / "old.mp3").exists() assert (demo_dir / "videos-custom" / "current.mp4").exists() assert (demo_dir / "audio" / "legacy.mp3").exists() assert result.matched_files == 1 assert result.deleted_files == 1 assert result.failures == 0 def test_cleanup_media_waits_for_active_crawl_media_lock(tmp_path: Path) -> None: out_dir = tmp_path / "out" feeds_dir = out_dir / "feeds" old_file = feeds_dir / "demo" / "audio" / "old.mp3" write_media(old_file, b"audio", age_days=40) lock_path = out_dir / ".media-retention.lock" lock_path.parent.mkdir(parents=True, exist_ok=True) started_path = tmp_path / "cleanup-started" done_path = tmp_path / "cleanup-done" script = """ import io import sys from datetime import UTC, datetime from pathlib import Path from repub.cleanup import cleanup_media Path(sys.argv[2]).write_text("started", encoding="utf-8") cleanup_media( feeds_dir=Path(sys.argv[1]), retention_days=25, now=datetime(2026, 5, 27, 12, 0, tzinfo=UTC), output=io.StringIO(), ) Path(sys.argv[3]).write_text("done", encoding="utf-8") """ with lock_path.open("a", encoding="utf-8") as lock_file: fcntl.flock(lock_file.fileno(), fcntl.LOCK_SH) process = subprocess.Popen( [ sys.executable, "-c", script, str(feeds_dir), str(started_path), str(done_path), ], cwd=Path.cwd(), stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True, ) try: wait_until(started_path) time.sleep(0.5) assert old_file.exists() assert process.poll() is None assert not done_path.exists() finally: fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN) stdout, stderr = process.communicate(timeout=5) assert process.returncode == 0, stdout + stderr assert not old_file.exists() assert done_path.exists()