output to out/feeds/*

This commit is contained in:
Abel Luck 2026-03-30 15:21:39 +02:00
parent beac981047
commit 6fd3b598ab
11 changed files with 298 additions and 16 deletions

View file

@ -141,12 +141,20 @@ def test_build_feed_settings_derives_output_paths_from_feed_slug(
assert feed_settings["REPUBLISHER_OUT_DIR"] == str(out_dir)
assert feed_settings["LOG_FILE"] == str(out_dir / "logs" / "info-marti.log")
assert feed_settings["HTTPCACHE_DIR"] == str(out_dir / "httpcache")
assert feed_settings["IMAGES_STORE"] == str(out_dir / "info-marti" / "images")
assert feed_settings["AUDIO_STORE"] == str(out_dir / "info-marti" / "audio")
assert feed_settings["VIDEO_STORE"] == str(out_dir / "info-marti" / "video")
assert feed_settings["FILES_STORE"] == str(out_dir / "info-marti" / "files")
assert feed_settings["IMAGES_STORE"] == str(
out_dir / "feeds" / "info-marti" / "images"
)
assert feed_settings["AUDIO_STORE"] == str(
out_dir / "feeds" / "info-marti" / "audio"
)
assert feed_settings["VIDEO_STORE"] == str(
out_dir / "feeds" / "info-marti" / "video"
)
assert feed_settings["FILES_STORE"] == str(
out_dir / "feeds" / "info-marti" / "files"
)
assert feed_settings["FEEDS"] == {
str(out_dir / "info-marti" / "feed.rss"): {
str(out_dir / "feeds" / "info-marti" / "feed.rss"): {
"format": "rss",
"postprocessing": [],
"feed_name": "info-marti",
@ -181,5 +189,9 @@ def test_build_feed_settings_uses_runtime_media_dir_overrides(tmp_path: Path) ->
assert feed_settings["REPUBLISHER_VIDEO_DIR"] == "videos-custom"
assert feed_settings["REPUBLISHER_AUDIO_DIR"] == "audio-custom"
assert feed_settings["VIDEO_STORE"] == str(out_dir / "gp-pod" / "videos-custom")
assert feed_settings["AUDIO_STORE"] == str(out_dir / "gp-pod" / "audio-custom")
assert feed_settings["VIDEO_STORE"] == str(
out_dir / "feeds" / "gp-pod" / "videos-custom"
)
assert feed_settings["AUDIO_STORE"] == str(
out_dir / "feeds" / "gp-pod" / "audio-custom"
)

View file

@ -1,6 +1,10 @@
from pathlib import Path
from scrapy.settings import Settings
from repub import entrypoint as entrypoint_module
from repub.spiders.rss_spider import RssFeedSpider
from repub.utils import FileType, local_audio_path, local_image_path
def test_entrypoint_supports_file_feed_urls(tmp_path: Path, monkeypatch) -> None:
@ -29,9 +33,33 @@ DOWNLOAD_TIMEOUT = 5
exit_code = entrypoint_module.entrypoint(["--config", str(config_path)])
output_path = tmp_path / "out" / "local-file" / "feed.rss"
output_path = tmp_path / "out" / "feeds" / "local-file" / "feed.rss"
assert exit_code == 0
assert output_path.exists()
output = output_path.read_text(encoding="utf-8")
assert "<title>Local Demo Feed</title>" in output
assert "<title>Local Demo Entry</title>" in output
def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
spider = RssFeedSpider(feed_name="demo", url="https://example.com/feed.rss")
spider.settings = Settings(
values={
"REPUBLISHER_IMAGE_DIR": "images",
"REPUBLISHER_FILE_DIR": "files",
"REPUBLISHER_AUDIO_DIR": "audio",
"REPUBLISHER_VIDEO_DIR": "video",
}
)
assert (
spider.rewrite_image_url("https://example.com/media/photo.jpg")
== f"images/{local_image_path('https://example.com/media/photo.jpg')}"
)
assert (
spider.rewrite_file_url(
FileType.AUDIO,
"https://example.com/media/podcast.mp3",
)
== f"audio/{local_audio_path('https://example.com/media/podcast.mp3')}"
)

View file

@ -129,7 +129,7 @@ def test_job_runtime_run_now_writes_log_and_stats_and_marks_success(
assert execution.bytes_count > 0
assert artifacts.log_path.exists()
assert artifacts.stats_path.exists()
output_path = tmp_path / "out" / "manual-source" / "feed.rss"
output_path = tmp_path / "out" / "feeds" / "manual-source" / "feed.rss"
assert output_path.exists()
output_text = output_path.read_text(encoding="utf-8")
assert "<title>Local Demo Feed</title>" in output_text
@ -291,7 +291,7 @@ def test_generate_pangea_feed_writes_pangea_rss_file(
log_path=tmp_path / "out" / "logs" / "pangea.log",
)
assert output_path == (tmp_path / "out" / "pangea-source" / "pangea.rss")
assert output_path == (tmp_path / "out" / "feeds" / "pangea-source" / "pangea.rss")
assert output_path.exists()
assert "Pangea Fixture" in output_path.read_text(encoding="utf-8")

View file

@ -1,6 +1,8 @@
from __future__ import annotations
import asyncio
import os
from datetime import UTC, datetime, timedelta
from pathlib import Path
from typing import Any, cast
@ -205,6 +207,7 @@ def test_render_dashboard_shows_dashboard_information_architecture(
assert "Operational snapshot" in body
assert "Running executions" in body
assert "Published feeds" in body
assert 'href="/sources"' in body
assert 'href="/runs"' in body
assert "Create source" in body
@ -246,6 +249,141 @@ def test_render_dashboard_describes_log_artifact_footprint(
asyncio.run(run())
def test_load_dashboard_view_lists_source_feed_artifacts(
monkeypatch, tmp_path: Path
) -> None:
db_path = tmp_path / "dashboard-feeds.db"
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
app = create_app()
out_dir = tmp_path / "out"
log_dir = out_dir / "logs"
app.config["REPUB_LOG_DIR"] = log_dir
log_dir.mkdir(parents=True)
create_source(
name="Available source",
slug="available-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url="https://example.com/available.xml",
)
create_source(
name="Missing source",
slug="missing-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url="https://example.com/missing.xml",
)
feed_dir = out_dir / "feeds" / "available-source"
feed_dir.mkdir(parents=True)
feed_path = feed_dir / "feed.rss"
feed_path.write_bytes(b"x" * 1024)
(feed_dir / "audio.mp3").write_bytes(b"y" * 2048)
reference_time = datetime(2026, 3, 30, 12, 30, tzinfo=UTC)
updated_at = reference_time - timedelta(minutes=32)
updated_at_epoch = updated_at.timestamp()
os.utime(feed_path, (updated_at_epoch, updated_at_epoch))
source_feeds = cast(
tuple[dict[str, object], ...],
load_dashboard_view(log_dir=log_dir, now=reference_time)["source_feeds"],
)
assert source_feeds == (
{
"source": "Available source",
"slug": "available-source",
"feed_href": "/feeds/available-source/feed.rss",
"feed_status_label": "Available",
"feed_status_tone": "done",
"feed_exists": True,
"last_updated": "32 minutes ago",
"last_updated_iso": updated_at.isoformat(),
"artifact_footprint": "3.0 KB",
},
{
"source": "Missing source",
"slug": "missing-source",
"feed_href": "/feeds/missing-source/feed.rss",
"feed_status_label": "Missing",
"feed_status_tone": "failed",
"feed_exists": False,
"last_updated": "Never published",
"last_updated_iso": None,
"artifact_footprint": "0 B",
},
)
def test_render_dashboard_shows_source_feed_links_and_statuses(
monkeypatch, tmp_path: Path
) -> None:
db_path = tmp_path / "dashboard-feed-links.db"
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
app = create_app()
app.config["REPUB_LOG_DIR"] = tmp_path / "out" / "logs"
create_source(
name="Published source",
slug="published-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url="https://example.com/published.xml",
)
create_source(
name="Missing source",
slug="missing-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url="https://example.com/missing.xml",
)
async def run() -> None:
published_feed = tmp_path / "out" / "feeds" / "published-source" / "feed.rss"
published_feed.parent.mkdir(parents=True)
published_feed.write_text("<rss/>\n", encoding="utf-8")
body = str(await render_dashboard(app))
assert "Published feeds" in body
assert 'href="/feeds/published-source/feed.rss"' in body
assert 'href="/feeds/missing-source/feed.rss"' in body
assert "Available" in body
assert "Missing" in body
assert "Never published" in body
asyncio.run(run())
def test_render_sources_shows_table_and_create_link() -> None:
async def run() -> None:
body = str(await render_sources())