from __future__ import annotations import asyncio import json import os import re from datetime import UTC, datetime, timedelta from pathlib import Path from typing import Any, cast import pytest from repub.components import action_button, status_badge, toggle_field from repub.datastar import RefreshBroker, render_sse_event, render_stream from repub.jobs import load_dashboard_view from repub.model import ( Job, JobExecution, JobExecutionStatus, Source, SourceFeed, SourcePangea, create_source, load_max_concurrent_jobs, load_settings_form, save_setting, ) from repub.pages.runs import runs_page from repub.pages.sources import sources_page from repub.web import ( create_app, get_refresh_broker, get_tab_state_store, render_create_source, render_dashboard, render_edit_source, render_execution_logs, render_runs, render_settings, render_sources, versioned_static_asset_href, ) def test_status_badge_uses_green_done_tone() -> None: badge = str(status_badge(label="Succeeded", tone="done")) assert "bg-emerald-100 text-emerald-800" in badge assert "Succeeded" in badge def test_toggle_field_active_state_utilities_exist_in_built_css() -> None: markup = str( toggle_field( label="Enabled", description="Enable this source", signal_name="enabled", checked=True, ) ) css = ( Path(__file__).resolve().parents[1] / "repub" / "static" / "app.css" ).read_text(encoding="utf-8") assert "data-class:bg-amber-500" in markup assert "data-class:translate-x-5" in markup assert ".bg-amber-500" in css assert ".translate-x-5" in css def test_action_button_adds_cursor_pointer_for_active_buttons() -> None: markup = str(action_button(label="Run now")) assert "cursor-pointer" in markup assert 'type="button"' in markup def test_action_button_omits_post_handler_when_disabled() -> None: markup = str( action_button( label="Queued", disabled=True, post_path="/actions/jobs/7/run-now", ) ) assert "cursor-not-allowed" in markup assert "@post(" not in markup def test_action_button_supports_submit_variant() -> None: markup = str( action_button( label="Save settings", tone="dark", button_type="submit", ) ) assert 'type="submit"' in markup assert "bg-slate-950" in markup assert "cursor-pointer" in markup def test_action_button_supports_datastar_pointerdown_post() -> None: markup = str( action_button( label="Delete", tone="danger", post_path="/actions/jobs/7/delete", ) ) assert 'data-on:pointerdown="@post('/actions/jobs/7/delete')"' in markup def test_runs_page_renders_completed_execution_end_time_as_relative_hoverable_time() -> ( None ): ended_at = "2026-01-15T10:00:00+00:00" body = str( runs_page( completed_executions=( { "source": "Completed source", "slug": "completed-source", "job_id": 7, "execution_id": 42, "ended_at": "2 hours ago", "ended_at_iso": ended_at, "status": "Succeeded", "status_tone": "done", "stats": "1 requests • 1 items • 1 bytes", "summary": "Worker exited successfully", "log_href": "/job/7/execution/42/logs", }, ) ) ) assert "data-ended-at" in body assert f'data-ended-at="{ended_at}"' in body assert f'datetime="{ended_at}"' in body assert f'title="{ended_at}"' in body assert ">2 hours ago<" in body def test_runs_page_renders_combined_running_jobs_table() -> None: body = str( runs_page( queued_executions=( { "source": "Queued source", "slug": "queued-source", "job_id": 7, "execution_id": 42, "queued_at": "2 minutes ago", "queued_at_iso": "2026-03-30T12:28:00+00:00", "queue_position": 1, "status": "Queued", "status_tone": "idle", "run_label": "Queued", "run_disabled": True, "run_post_path": "/actions/jobs/7/run-now", "cancel_post_path": "/actions/queued-executions/42/cancel", "move_up_disabled": True, "move_up_post_path": None, "move_down_disabled": True, "move_down_post_path": None, }, ) ) ) assert "Running jobs" in body assert "queued-source" in body assert ">Queued<" in body assert "/actions/queued-executions/42/cancel" in body def test_sources_page_removes_view_runs_action_and_last_run_caption() -> None: body = str( sources_page( sources=( { "name": "Source one", "slug": "source-one", "source_type": "Feed", "upstream": "https://example.com/feed.xml", "schedule": "cron: */5 * * * *", "last_run": "Never run", "state": "Enabled", "state_tone": "scheduled", }, ) ) ) assert ">Edit<" in body assert ">Delete<" in body assert "View runs" not in body assert "Never run" not in body def test_runs_page_renders_clear_completed_button_and_pagination() -> None: completed_executions = tuple( { "source": f"Completed source {index}", "slug": f"completed-source-{index}", "job_id": 7, "execution_id": index, "ended_at": "2 hours ago", "ended_at_iso": "2026-01-15T10:00:00+00:00", "status": "Succeeded", "status_tone": "done", "stats": "1 requests • 1 items • 1 bytes", "summary": "Worker exited successfully", "log_href": f"/job/7/execution/{index}/logs", } for index in range(1, 21) ) body = str( runs_page( completed_executions=completed_executions, completed_page=2, completed_page_size=20, completed_total_count=21, completed_total_pages=2, ) ) assert "/actions/completed-executions/clear" in body assert ">Clear history<" in body assert "Showing" in body assert "21" in body assert "@post('/actions/runs/completed-page/1')" in body assert "@post('/actions/runs/completed-page/2')" in body assert 'aria-current="page"' in body def test_root_get_serves_datastar_shim() -> None: async def run() -> None: client = create_app().test_client() response = await client.get("/") body = await response.get_data(as_text=True) stylesheet_href = versioned_static_asset_href("app.css") assert response.status_code == 200 assert response.headers["ETag"] assert body.startswith("") assert f'' in body assert ( '' in body ) assert 'data-signals:tabid="self.crypto.randomUUID().substring(0,8)"' in body assert 'data-init="@post(window.location.pathname +' in body assert "retryMaxCount: Infinity" in body assert "data-on:online__window=" in body assert '
None: href = versioned_static_asset_href("app.css") assert re.fullmatch(r"/static/app-[0-9a-f]{12}\.css", href) def test_versioned_static_asset_route_serves_registered_css_file() -> None: async def run() -> None: client = create_app().test_client() expected = ( Path(__file__).resolve().parents[1] / "repub" / "static" / "app.css" ).read_text(encoding="utf-8") response = await client.get("/static/app-deadbeefcafe.css") body = await response.get_data(as_text=True) assert response.status_code == 200 assert response.mimetype == "text/css" assert body == expected asyncio.run(run()) def test_versioned_static_asset_route_preserves_existing_hyphenated_files() -> None: async def run() -> None: client = create_app().test_client() response = await client.get("/static/datastar@1.0.0-RC.8.js") body = await response.get_data(as_text=True) assert response.status_code == 200 assert response.mimetype == "text/javascript" assert body.startswith("// Datastar v1.0.0-RC.8") asyncio.run(run()) def test_create_app_bootstraps_default_database_path( monkeypatch, tmp_path: Path ) -> None: monkeypatch.chdir(tmp_path) app = create_app() assert Path(app.config["REPUB_DB_PATH"]) == tmp_path / "republisher.db" assert (tmp_path / "republisher.db").exists() def test_root_get_honors_if_none_match() -> None: async def run() -> None: client = create_app().test_client() initial = await client.get("/") etag = initial.headers["ETag"] response = await client.get("/", headers={"If-None-Match": etag}) assert response.status_code == 304 assert response.headers["ETag"] == etag asyncio.run(run()) def test_dashboard_post_serves_morph_component() -> None: async def run() -> None: client = create_app().test_client() async with client.request("/?u=shim", method="POST") as connection: await connection.send_complete() chunk = await asyncio.wait_for(connection.receive(), timeout=1) raw_connection = cast(Any, connection) assert raw_connection.status_code == 200 assert raw_connection.headers["Content-Type"] == "text/event-stream" assert b"event: datastar-patch-elements" in chunk assert b"id: " in chunk assert b'
None: async def run() -> None: async def render() -> str: return '
same
' event_id, event = await render_sse_event(render) repeated_id, repeated_event = await render_sse_event( render, last_event_id=event_id ) assert repeated_id == event_id assert event is not None assert repeated_event is None asyncio.run(run()) def test_app_refresh_broker_publishes_events() -> None: async def run() -> None: app = create_app() broker = get_refresh_broker(app) queue = broker.subscribe() broker.publish() event = await asyncio.wait_for(queue.get(), timeout=1) assert event == "refresh-event" broker.unsubscribe(queue) asyncio.run(run()) def test_render_stream_yields_on_connect_and_refresh() -> None: async def run() -> None: queue = RefreshBroker().subscribe() renders = 0 async def render() -> str: nonlocal renders renders += 1 return f'
{renders}
' stream = render_stream(queue, render) first = await anext(stream) await queue.put("refresh-event") second = await anext(stream) await stream.aclose() assert "1
" in first assert "2
" in second asyncio.run(run()) def test_render_stream_uses_view_transition_for_queue_reorders() -> None: async def run() -> None: queue = RefreshBroker().subscribe() async def render() -> str: return '
queue
' stream = render_stream(queue, render, render_on_connect=False) await queue.put("queue-reordered") event = await anext(stream) await stream.aclose() assert "useViewTransition true" in str(event) asyncio.run(run()) def test_render_dashboard_shows_dashboard_information_architecture( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "dashboard-render.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() body = str(await render_dashboard(app)) assert "Operational snapshot" in body assert "Running executions" in body assert "Published feeds" in body assert 'href="/sources"' in body assert 'href="/runs"' in body assert "Create source" in body assert "lg:grid-cols-[14rem_minmax(0,1fr)]" in body assert "lg:px-5 lg:py-4" in body asyncio.run(run()) def test_render_dashboard_shows_empty_state_rows(monkeypatch, tmp_path: Path) -> None: db_path = tmp_path / "dashboard-empty.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() body = str(await render_dashboard(app)) assert "No job executions are running." in body assert "No feeds have been published yet." in body asyncio.run(run()) def test_load_dashboard_view_measures_log_artifact_path( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "dashboard-footprint.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) create_app() out_dir = tmp_path / "out" log_dir = out_dir / "logs" cache_dir = out_dir / "httpcache" log_dir.mkdir(parents=True) cache_dir.mkdir(parents=True) (log_dir / "run.log").write_bytes(b"x" * 1024) (cache_dir / "cache.bin").write_bytes(b"y" * 2048) snapshot = load_dashboard_view(log_dir=log_dir)["snapshot"] assert cast(dict[str, str], snapshot)["artifact_footprint"] == "3.0 KB" def test_render_dashboard_describes_log_artifact_footprint( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "dashboard-footprint-copy.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() body = str(await render_dashboard(app)) assert "Current artifact size under the output path." in body asyncio.run(run()) def test_load_dashboard_view_lists_source_feed_artifacts( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "dashboard-feeds.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) app = create_app() out_dir = tmp_path / "out" log_dir = out_dir / "logs" app.config["REPUB_LOG_DIR"] = log_dir log_dir.mkdir(parents=True) create_source( name="Available source", slug="available-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/available.xml", ) create_source( name="Missing source", slug="missing-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/missing.xml", ) feed_dir = out_dir / "feeds" / "available-source" feed_dir.mkdir(parents=True) feed_path = feed_dir / "feed.rss" feed_path.write_bytes(b"x" * 1024) (feed_dir / "audio.mp3").write_bytes(b"y" * 2048) reference_time = datetime(2026, 3, 30, 12, 30, tzinfo=UTC) updated_at = reference_time - timedelta(minutes=32) updated_at_epoch = updated_at.timestamp() os.utime(feed_path, (updated_at_epoch, updated_at_epoch)) source_feeds = cast( tuple[dict[str, object], ...], load_dashboard_view(log_dir=log_dir, now=reference_time)["source_feeds"], ) assert source_feeds == ( { "source": "Available source", "slug": "available-source", "feed_href": "/feeds/available-source/feed.rss", "feed_status_label": "Available", "feed_status_tone": "done", "feed_exists": True, "last_updated": "32 minutes ago", "last_updated_iso": updated_at.isoformat(), "artifact_footprint": "3.0 KB", }, { "source": "Missing source", "slug": "missing-source", "feed_href": "/feeds/missing-source/feed.rss", "feed_status_label": "Missing", "feed_status_tone": "failed", "feed_exists": False, "last_updated": "Never published", "last_updated_iso": None, "artifact_footprint": "0 B", }, ) def test_render_dashboard_shows_source_feed_links_and_statuses( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "dashboard-feed-links.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) app = create_app() app.config["REPUB_LOG_DIR"] = tmp_path / "out" / "logs" create_source( name="Published source", slug="published-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/published.xml", ) create_source( name="Missing source", slug="missing-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/missing.xml", ) async def run() -> None: published_feed = tmp_path / "out" / "feeds" / "published-source" / "feed.rss" published_feed.parent.mkdir(parents=True) published_feed.write_text("\n", encoding="utf-8") body = str(await render_dashboard(app)) assert "Published feeds" in body assert 'href="/feeds/published-source/feed.rss"' in body assert 'href="/feeds/missing-source/feed.rss"' in body assert "Available" in body assert "Missing" in body assert "Never published" in body asyncio.run(run()) def test_render_sources_shows_table_and_create_link() -> None: async def run() -> None: body = str(await render_sources()) assert ">Sources<" in body assert 'href="/sources/create"' in body assert "No sources yet." in body assert "guardian-feed" not in body assert "podcast-audio" not in body asyncio.run(run()) def test_render_sources_shows_live_sidebar_badges(monkeypatch, tmp_path: Path) -> None: db_path = tmp_path / "sources-sidebar.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) app = create_app() create_source( name="First source", slug="first-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="0", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/first.xml", ) create_source( name="Second source", slug="second-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="0", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/second.xml", ) async def run() -> None: body = str(await render_sources(app)) assert re.search( r'href="/sources"[^>]*>.*?Sources\s*]*>2', body, re.S, ) assert re.search( r'href="/runs"[^>]*>.*?Runs\s*]*>0', body, re.S, ) asyncio.run(run()) def test_render_dashboard_shows_live_sidebar_badges( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "dashboard-sidebar.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) app = create_app() create_source( name="Dashboard source", slug="dashboard-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="0", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/dashboard.xml", ) async def run() -> None: body = str(await render_dashboard(app)) assert re.search( r'href="/sources"[^>]*>.*?Sources\s*]*>1', body, re.S, ) assert re.search( r'href="/runs"[^>]*>.*?Runs\s*]*>0', body, re.S, ) asyncio.run(run()) def test_render_sources_shows_delete_action_for_each_source( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "sources-delete-row.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) app = create_app() create_source( name="Delete me", slug="delete-me", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="0", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/delete.xml", ) async def run() -> None: body = str(await render_sources(app)) assert "Delete" in body assert "data-on:pointerdown" in body assert "/actions/sources/delete-me/delete" in body asyncio.run(run()) def test_render_create_source_shows_dedicated_form_page() -> None: async def run() -> None: body = str(await render_create_source()) assert ">Create source<" in body assert "Source and job setup" in body assert "data-signals__ifmissing" in body assert "/actions/sources/create" in body assert 'data-show="$sourceType === 'feed'"' in body assert 'data-show="$sourceType === 'pangea'"' in body assert "jobEnabled" in body assert "onlyNewest" in body assert "includeAuthors" in body assert "excludeMedia" in body assert "includeContent" in body assert "convertImages" in body assert "convertVideo" in body assert "TEXT_ONLY" in body assert "breakingnews" in body assert "Pangea domain" in body assert "Feed URL" in body assert "Cron schedule" in body assert "Initial job state" in body assert "Pangea mobile articles" not in body assert "pangea-mobile" not in body assert "guardianproject.info" not in body assert ( "Primary Pangea mobile article mirror for the operator landing page." not in body ) assert "language=en,download_media=true" not in body assert 'id="spider-arguments"' in body assert "language=en\ndownload_media=true" not in body assert 'value="articles"' in body assert 'value="10"' in body assert 'value="3"' in body assert 'value="*/30"' in body assert 'value="*"' in body asyncio.run(run()) def test_render_edit_source_shows_existing_values(monkeypatch, tmp_path: Path) -> None: db_path = tmp_path / "edit-page.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) create_app() create_source( name="Kenya health desk", slug="kenya-health", source_type="pangea", notes="Regional health alerts.", spider_arguments="language=en\ndownload_media=true", enabled=True, convert_images=False, convert_video=False, cron_minute="0", cron_hour="*/6", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", pangea_domain="example.org", pangea_category="Health", content_type="breakingnews", only_newest=True, max_articles=12, oldest_article=5, include_authors=True, exclude_media=False, include_content=True, content_format="MOBILE_3", ) async def run() -> None: body = str(await render_edit_source("kenya-health")) assert "Edit source" in body assert "/actions/sources/kenya-health/edit" in body assert "Kenya health desk" in body assert "kenya-health" in body assert 'id="source-slug"' in body assert ( 'id="source-slug" name="source-slug" type="text" value="kenya-health"' in body ) assert " disabled " in body assert "cursor-not-allowed bg-slate-100 text-slate-500" in body assert "example.org" in body assert "Health" in body assert "language=en\ndownload_media=true" in body assert "convertImages: false" in body assert "convertVideo: false" in body asyncio.run(run()) def test_render_settings_shows_current_max_concurrent_jobs( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "settings-page.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) create_app() save_setting("max_concurrent_jobs", 3) save_setting("feed_url", "https://mirror.example") async def run() -> None: app = create_app() body = str(await render_settings(app)) assert ">Settings<" in body assert "/actions/settings" in body assert 'value="3"' in body assert 'value="https://mirror.example"' in body assert "Max concurrent jobs" in body assert "Feed URL" in body assert "Example: http://localhost:8080" in body assert "Must include http:// or https://" in body assert 'type="submit"' in body assert "cursor-pointer" in body asyncio.run(run()) def test_create_source_action_creates_pangea_source_and_job_in_database( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "sources.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() client = app.test_client() response = await client.post( "/actions/sources/create", headers={"Datastar-Request": "true"}, json={ "sourceName": "Kenya health desk", "sourceSlug": "kenya-health", "sourceType": "pangea", "pangeaDomain": "example.org", "pangeaCategory": " Health ", "contentFormat": "MOBILE_3", "contentType": "breakingnews", "maxArticles": "12", "oldestArticle": "5", "sourceNotes": "Regional health alerts.", "spiderArguments": "language=en\ndownload_media=true", "cronMinute": "0", "cronHour": "*/6", "cronDayOfMonth": "*", "cronDayOfWeek": "*", "cronMonth": "*", "jobEnabled": True, "onlyNewest": True, "includeAuthors": True, "excludeMedia": False, }, ) body = await response.get_data(as_text=True) assert response.status_code == 200 assert "window.location = '/sources'" in body source = Source.get(Source.slug == "kenya-health") pangea = SourcePangea.get(SourcePangea.source == source) job = Job.get(Job.source == source) rendered_sources = str(await render_sources(app)) assert source.name == "Kenya health desk" assert source.source_type == "pangea" assert pangea.category_name == " Health " assert pangea.content_type == "breakingnews" assert pangea.include_content is True assert job.enabled is True assert job.convert_images is True assert job.convert_video is True assert job.spider_arguments == "language=en\ndownload_media=true" assert job.cron_hour == "*/6" assert "kenya-health" in rendered_sources assert "Enabled" in rendered_sources asyncio.run(run()) def test_create_source_action_creates_feed_source_and_job_in_database( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "feed-sources.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() client = app.test_client() response = await client.post( "/actions/sources/create", headers={"Datastar-Request": "true"}, json={ "sourceName": "NASA feed", "sourceSlug": "nasa-feed", "sourceType": "feed", "feedUrl": "https://www.nasa.gov/rss/dyn/breaking_news.rss", "sourceNotes": "Primary NASA mirror.", "spiderArguments": "", "cronMinute": "30", "cronHour": "*", "cronDayOfMonth": "*", "cronDayOfWeek": "*", "cronMonth": "*", "jobEnabled": False, }, ) body = await response.get_data(as_text=True) assert response.status_code == 200 assert "window.location = '/sources'" in body source = Source.get(Source.slug == "nasa-feed") feed = SourceFeed.get(SourceFeed.source == source) job = Job.get(Job.source == source) rendered_sources = str(await render_sources(app)) assert source.source_type == "feed" assert feed.feed_url == "https://www.nasa.gov/rss/dyn/breaking_news.rss" assert job.enabled is False assert "nasa-feed" in rendered_sources assert "https://www.nasa.gov/rss/dyn/breaking_news.rss" in rendered_sources assert "Disabled" in rendered_sources asyncio.run(run()) def test_edit_source_action_updates_existing_source_and_job_in_database( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "edit-source.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) create_app() create_source( name="Kenya health desk", slug="kenya-health", source_type="pangea", notes="Regional health alerts.", spider_arguments="language=en\ndownload_media=true", enabled=True, cron_minute="0", cron_hour="*/6", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", pangea_domain="example.org", pangea_category="Health", content_type="breakingnews", only_newest=True, max_articles=12, oldest_article=5, include_authors=True, exclude_media=False, include_content=True, content_format="MOBILE_3", ) async def run() -> None: app = create_app() client = app.test_client() response = await client.post( "/actions/sources/kenya-health/edit", headers={"Datastar-Request": "true"}, json={ "sourceName": "Kenya health desk nightly", "sourceSlug": "kenya-health", "sourceType": "pangea", "pangeaDomain": "example.org", "pangeaCategory": "Nightly", "contentFormat": "TEXT_ONLY", "contentType": "articles", "maxArticles": "25", "oldestArticle": "7", "sourceNotes": "Updated nightly run.", "spiderArguments": "language=sw\ninclude_audio=false", "cronMinute": "15", "cronHour": "2", "cronDayOfMonth": "*", "cronDayOfWeek": "*", "cronMonth": "*", "jobEnabled": False, "convertImages": False, "convertVideo": False, "onlyNewest": False, "includeAuthors": False, "excludeMedia": True, "includeContent": True, }, ) body = await response.get_data(as_text=True) assert response.status_code == 200 assert "window.location = '/sources'" in body source = Source.get(Source.slug == "kenya-health") pangea = SourcePangea.get(SourcePangea.source == source) job = Job.get(Job.source == source) rendered_sources = str(await render_sources(app)) assert source.name == "Kenya health desk nightly" assert source.notes == "Updated nightly run." assert pangea.category_name == "Nightly" assert pangea.content_format == "TEXT_ONLY" assert pangea.max_articles == 25 assert pangea.include_authors is False assert pangea.exclude_media is True assert job.enabled is False assert job.convert_images is False assert job.convert_video is False assert job.spider_arguments == "language=sw\ninclude_audio=false" assert job.cron_hour == "2" assert "Kenya health desk nightly" in rendered_sources assert "example.org / Nightly" in rendered_sources assert "Disabled" in rendered_sources asyncio.run(run()) def test_edit_source_action_rejects_slug_changes(monkeypatch, tmp_path: Path) -> None: db_path = tmp_path / "edit-invalid.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) create_app() create_source( name="Kenya health desk", slug="kenya-health", source_type="pangea", notes="Regional health alerts.", spider_arguments="language=en\ndownload_media=true", enabled=True, cron_minute="0", cron_hour="*/6", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", pangea_domain="example.org", pangea_category="Health", content_type="breakingnews", only_newest=True, max_articles=12, oldest_article=5, include_authors=True, exclude_media=False, include_content=True, content_format="MOBILE_3", ) async def run() -> None: app = create_app() client = app.test_client() response = await client.post( "/actions/sources/kenya-health/edit", headers={"Datastar-Request": "true"}, json={ "sourceName": "Kenya health desk", "sourceSlug": "kenya-health-renamed", "sourceType": "pangea", "pangeaDomain": "example.org", "pangeaCategory": "Health", "contentFormat": "MOBILE_3", "contentType": "breakingnews", "maxArticles": "12", "oldestArticle": "5", "sourceNotes": "Regional health alerts.", "spiderArguments": "language=en\ndownload_media=true", "cronMinute": "0", "cronHour": "*/6", "cronDayOfMonth": "*", "cronDayOfWeek": "*", "cronMonth": "*", "jobEnabled": True, "onlyNewest": True, "includeAuthors": True, "excludeMedia": False, "includeContent": True, }, ) body = await response.get_data(as_text=True) assert response.status_code == 200 assert "Slug is immutable." in body assert Source.get(Source.slug == "kenya-health").name == "Kenya health desk" assert Source.select().where(Source.slug == "kenya-health-renamed").count() == 0 asyncio.run(run()) def test_create_source_action_validates_duplicate_slug_and_pangea_type( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "duplicate.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() Source.create( name="Guardian feed mirror", slug="guardian-feed", source_type="feed", ) client = app.test_client() response = await client.post( "/actions/sources/create", headers={"Datastar-Request": "true"}, json={ "sourceName": "Duplicate guardian", "sourceSlug": "guardian-feed", "sourceType": "pangea", "pangeaDomain": "example.org", "pangeaCategory": "News", "contentFormat": "WEB", "contentType": "not-a-real-type", "maxArticles": "ten", "oldestArticle": "3", "cronMinute": "0", "cronHour": "*", "cronDayOfMonth": "*", "cronDayOfWeek": "*", "cronMonth": "*", "jobEnabled": True, }, ) body = await response.get_data(as_text=True) assert response.status_code == 200 assert "Slug must be unique." in body assert "Content format is invalid." in body assert "Content type is invalid." in body assert "Max articles must be an integer." in body assert Source.select().where(Source.name == "Duplicate guardian").count() == 0 asyncio.run(run()) def test_settings_action_updates_max_concurrent_jobs( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "settings-action.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() client = app.test_client() response = await client.post( "/actions/settings", headers={"Datastar-Request": "true"}, json={ "maxConcurrentJobs": "3", "feedUrl": "https://mirror.example", }, ) body = await response.get_data(as_text=True) assert response.status_code == 200 assert "window.location = '/settings'" in body assert load_max_concurrent_jobs() == 3 assert load_settings_form()["feed_url"] == "https://mirror.example" assert 'value="3"' in str(await render_settings(app)) asyncio.run(run()) def test_settings_action_rejects_non_positive_max_concurrent_jobs( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "settings-invalid.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() client = app.test_client() response = await client.post( "/actions/settings", headers={"Datastar-Request": "true"}, json={"maxConcurrentJobs": "0", "feedUrl": "https://mirror.example"}, ) body = await response.get_data(as_text=True) assert response.status_code == 200 assert "Max concurrent jobs must be at least 1." in body assert load_max_concurrent_jobs() == 1 asyncio.run(run()) def test_settings_action_rejects_invalid_feed_url(monkeypatch, tmp_path: Path) -> None: db_path = tmp_path / "settings-invalid-url.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() client = app.test_client() response = await client.post( "/actions/settings", headers={"Datastar-Request": "true"}, json={"maxConcurrentJobs": "2", "feedUrl": "mirror.example"}, ) body = await response.get_data(as_text=True) assert response.status_code == 200 assert "Feed URL must be a valid URL." in body assert load_settings_form()["feed_url"] == "" asyncio.run(run()) def test_render_runs_shows_running_scheduled_and_completed_tables( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "runs-render.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() source = create_source( name="Runs render source", slug="runs-render-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*/30", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/runs.xml", ) job = Job.get(Job.source == source) execution = JobExecution.create( job=job, running_status=JobExecutionStatus.SUCCEEDED, ) body = str(await render_runs(app)) assert "Running jobs" in body assert "Scheduled jobs" in body assert "Completed job executions" in body assert "runs-render-source" in body assert f"/job/{job.id}/execution/{execution.get_id()}/logs" in body assert "data-next-run-at" in body assert "in " in body asyncio.run(run()) def test_render_runs_uses_compact_shell_and_table_classes( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "runs-compact.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() body = str(await render_runs(app)) assert "lg:grid-cols-[14rem_minmax(0,1fr)]" in body assert "lg:px-5 lg:py-4" in body assert "min-w-[64rem]" in body asyncio.run(run()) def test_render_runs_shows_empty_state_rows(monkeypatch, tmp_path: Path) -> None: db_path = tmp_path / "runs-empty.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() body = str(await render_runs(app)) assert body.count("No jobs are running or queued.") == 1 assert "No jobs are scheduled." in body assert "No job executions have completed yet." in body asyncio.run(run()) def test_runs_pagination_action_updates_only_the_current_tab( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "runs-tab-pagination.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() client = app.test_client() source = create_source( name="Paged runs source", slug="paged-runs-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*/30", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/paged-runs.xml", ) job = Job.get(Job.source == source) for minute in range(21): JobExecution.create( job=job, ended_at=datetime(2026, 3, 30, 12, minute, tzinfo=UTC), running_status=JobExecutionStatus.SUCCEEDED, ) async with client.request( "/runs?u=shim", method="POST", headers={ "Datastar-Request": "true", "Content-Type": "application/json", }, ) as first_connection: async with client.request( "/runs?u=shim", method="POST", headers={ "Datastar-Request": "true", "Content-Type": "application/json", }, ) as second_connection: await first_connection.send(json.dumps({"tabid": "tab-1"}).encode()) await second_connection.send(json.dumps({"tabid": "tab-2"}).encode()) await first_connection.send_complete() await second_connection.send_complete() first_body = ( await asyncio.wait_for(first_connection.receive(), timeout=1) ).decode() second_body = ( await asyncio.wait_for(second_connection.receive(), timeout=1) ).decode() assert ( 'href="/runs?completed_page=1" aria-current="page"' not in first_body ) assert ( 'Showing 1 to ' '20 of ' '21 results' ) in first_body assert ( 'Showing 1 to ' '20 of ' '21 results' ) in second_body response = await client.post( "/actions/runs/completed-page/2", headers={"Datastar-Request": "true"}, json={"tabid": "tab-1"}, ) assert response.status_code == 204 updated_first_body = ( await asyncio.wait_for(first_connection.receive(), timeout=1) ).decode() assert ( 'Showing 21 to ' '21 of ' '21 results' ) in updated_first_body assert 'aria-current="page"' in updated_first_body with pytest.raises(asyncio.TimeoutError): await asyncio.wait_for(second_connection.receive(), timeout=0.2) await second_connection.disconnect() await first_connection.disconnect() asyncio.run(run()) def test_runs_patch_creates_and_cleans_up_tab_state( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "runs-tab-state.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() client = app.test_client() async with client.request( "/runs?u=shim", method="POST", headers={ "Datastar-Request": "true", "Content-Type": "application/json", }, ) as connection: await connection.send(json.dumps({"tabid": "tab-1"}).encode()) await connection.send_complete() await asyncio.wait_for(connection.receive(), timeout=1) assert get_tab_state_store(app).get_tab_state("tab-1") == {} await connection.disconnect() await asyncio.sleep(0) assert get_tab_state_store(app).get_tab_state("tab-1") is None asyncio.run(run()) def test_render_runs_keeps_queued_execution_in_scheduled_jobs_table( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "runs-queued-render.db" log_dir = tmp_path / "out" / "logs" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) app = create_app() app.config["REPUB_LOG_DIR"] = log_dir queued_source = create_source( name="Queued source", slug="queued-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/queued.xml", ) create_source( name="Scheduled source", slug="scheduled-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/scheduled.xml", ) queued_job = Job.get(Job.source == queued_source) queued_execution = JobExecution.create( job=queued_job, running_status=JobExecutionStatus.PENDING, ) async def run() -> None: body = str(await render_runs(app)) assert "Running jobs" in body assert "Scheduled jobs" in body assert "queued-source" in body assert "scheduled-source" in body assert ">Queued<" in body assert ( f"/actions/queued-executions/{int(queued_execution.get_id())}/cancel" in body ) assert "Ready" in body asyncio.run(run()) def test_render_runs_shows_cancel_button_for_running_row_with_queued_follow_up( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "runs-cancel-follow-up.db" log_dir = tmp_path / "out" / "logs" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) app = create_app() app.config["REPUB_LOG_DIR"] = log_dir source = create_source( name="Busy source", slug="busy-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/busy.xml", ) job = Job.get(Job.source == source) running_execution = JobExecution.create( job=job, started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC), running_status=JobExecutionStatus.RUNNING, ) pending_execution = JobExecution.create( job=job, running_status=JobExecutionStatus.PENDING, ) async def run() -> None: body = str(await render_runs(app)) assert f"/job/{job.id}/execution/{int(running_execution.get_id())}/logs" in body assert ( f"/actions/queued-executions/{int(pending_execution.get_id())}/cancel" in body ) assert ">Cancel<" in body assert "Running jobs" in body asyncio.run(run()) def test_render_runs_keeps_all_action_controls_visible_in_html_after_compaction() -> ( None ): body = str( runs_page( running_executions=( { "source": "Running source", "slug": "running-source", "job_id": 1, "execution_id": 11, "started_at": "2026-03-30 12:00 UTC", "runtime": "running for 10s", "status": "Running", "stats": "1 requests • 1 items • 1 byte", "worker": "streaming stats from worker", "log_href": "/job/1/execution/11/logs", "cancel_label": "Stop", "cancel_post_path": "/actions/executions/11/cancel", }, ), queued_executions=( { "source": "Queued source", "slug": "queued-source", "job_id": 2, "execution_id": 22, "queued_at": "2 minutes ago", "queued_at_iso": "2026-03-30T12:28:00+00:00", "queue_position": 1, "status": "Queued", "status_tone": "idle", "run_label": "Queued", "run_disabled": True, "run_post_path": "/actions/jobs/2/run-now", "cancel_post_path": "/actions/queued-executions/22/cancel", "move_up_disabled": True, "move_up_post_path": None, "move_down_disabled": True, "move_down_post_path": None, }, ), upcoming_jobs=( { "source": "Scheduled source", "slug": "scheduled-source", "job_id": 3, "next_run": "in 5 minutes", "next_run_at": "2026-03-30T12:35:00+00:00", "schedule": "*/5 * * * *", "enabled_label": "Enabled", "enabled_tone": "scheduled", "run_disabled": False, "run_reason": "Ready", "toggle_label": "Disable", "toggle_post_path": "/actions/jobs/3/toggle-enabled", "run_post_path": "/actions/jobs/3/run-now", "delete_post_path": "/actions/jobs/3/delete", }, ), completed_executions=( { "source": "Completed source", "slug": "completed-source", "job_id": 4, "execution_id": 44, "ended_at": "2 minutes ago", "ended_at_iso": "2026-03-30T12:28:00+00:00", "status": "Succeeded", "status_tone": "done", "stats": "1 requests • 1 items • 1 byte", "summary": "Worker exited successfully", "log_href": "/job/4/execution/44/logs", }, ), ) ) assert "Running jobs" in body assert ">Stop<" in body assert ">Cancel<" in body assert ">Run now<" in body assert ">Disable<" in body assert "/job/4/execution/44/logs" in body def test_cancel_queued_execution_action_deletes_pending_row_without_touching_running_execution( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "cancel-queued-action.db" log_dir = tmp_path / "out" / "logs" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() app.config["REPUB_LOG_DIR"] = log_dir client = app.test_client() source = create_source( name="Busy source", slug="busy-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/busy.xml", ) job = Job.get(Job.source == source) running_execution = JobExecution.create( job=job, started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC), running_status=JobExecutionStatus.RUNNING, ) pending_execution = JobExecution.create( job=job, running_status=JobExecutionStatus.PENDING, ) response = await client.post( f"/actions/queued-executions/{int(pending_execution.get_id())}/cancel" ) assert response.status_code == 204 assert JobExecution.get_or_none(id=int(pending_execution.get_id())) is None assert ( JobExecution.get_by_id(int(running_execution.get_id())).running_status == JobExecutionStatus.RUNNING ) asyncio.run(run()) def test_clear_completed_executions_action_removes_history_and_log_artifacts( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "clear-completed-action.db" log_dir = tmp_path / "out" / "logs" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() app.config["REPUB_LOG_DIR"] = log_dir client = app.test_client() source = create_source( name="History source", slug="history-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/history.xml", ) job = Job.get(Job.source == source) completed_execution = JobExecution.create( job=job, running_status=JobExecutionStatus.SUCCEEDED, ended_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC), ) running_execution = JobExecution.create( job=job, running_status=JobExecutionStatus.RUNNING, started_at=datetime(2026, 3, 30, 12, 5, tzinfo=UTC), ) log_dir.mkdir(parents=True, exist_ok=True) completed_prefix = ( log_dir / f"job-{job.id}-execution-{int(completed_execution.get_id())}" ) running_log_path = ( log_dir / f"job-{job.id}-execution-{int(running_execution.get_id())}.log" ) for suffix in (".log", ".jsonl", ".pygea.log"): completed_prefix.with_suffix(suffix).write_text("history", encoding="utf-8") running_log_path.write_text("running", encoding="utf-8") response = await client.post("/actions/completed-executions/clear") assert response.status_code == 204 assert JobExecution.get_or_none(id=int(completed_execution.get_id())) is None assert JobExecution.get_or_none(id=int(running_execution.get_id())) is not None for suffix in (".log", ".jsonl", ".pygea.log"): assert not completed_prefix.with_suffix(suffix).exists() assert running_log_path.exists() asyncio.run(run()) def test_move_queued_execution_action_reorders_queue( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "move-queued-action.db" log_dir = tmp_path / "out" / "logs" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() app.config["REPUB_LOG_DIR"] = log_dir client = app.test_client() first_source = create_source( name="First queued source", slug="first-queued-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/first.xml", ) second_source = create_source( name="Second queued source", slug="second-queued-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/second.xml", ) first_job = Job.get(Job.source == first_source) second_job = Job.get(Job.source == second_source) first_execution = JobExecution.create( job=first_job, created_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC), running_status=JobExecutionStatus.PENDING, ) second_execution = JobExecution.create( job=second_job, created_at=datetime(2026, 3, 30, 12, 5, tzinfo=UTC), running_status=JobExecutionStatus.PENDING, ) response = await client.post( f"/actions/queued-executions/{int(second_execution.get_id())}/move-up" ) assert response.status_code == 204 body = str(await render_runs(app)) assert body.index("second-queued-source") < body.index("first-queued-source") assert ( f"/actions/queued-executions/{int(second_execution.get_id())}/move-down" in body ) assert ( f"/actions/queued-executions/{int(first_execution.get_id())}/move-up" in body ) asyncio.run(run()) def test_toggle_job_enabled_action_removes_queued_execution( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "toggle-removes-queue.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() client = app.test_client() source = create_source( name="Queued source", slug="queued-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/queued.xml", ) job = Job.get(Job.source == source) queued_execution = JobExecution.create( job=job, running_status=JobExecutionStatus.PENDING, ) response = await client.post(f"/actions/jobs/{job.id}/toggle-enabled") assert response.status_code == 204 assert Job.get_by_id(job.id).enabled is False assert JobExecution.get_or_none(id=int(queued_execution.get_id())) is None body = str(await render_runs(app)) assert ( f"/actions/queued-executions/{int(queued_execution.get_id())}/cancel" not in body ) assert "Disabled" in body asyncio.run(run()) def test_render_create_source_uses_shared_submit_button( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "create-source-shared-submit.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() body = str(await render_create_source(app)) assert 'type="submit"' in body assert "Create source" in body assert "cursor-pointer" in body assert "bg-slate-950" in body asyncio.run(run()) def test_render_execution_logs_uses_app_route(monkeypatch, tmp_path: Path) -> None: db_path = tmp_path / "logs-render.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: log_dir = tmp_path / "out" / "logs" app = create_app() app.config["REPUB_LOG_DIR"] = log_dir source = create_source( name="Log render source", slug="log-render-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/30", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/logs.xml", ) job = Job.get(Job.source == source) execution = JobExecution.create( job=job, running_status=JobExecutionStatus.RUNNING, ) log_path = log_dir / f"job-{job.id}-execution-{execution.get_id()}.log" log_path.parent.mkdir(parents=True, exist_ok=True) log_path.write_text( "\n".join( ( "scheduler: run_now requested", "worker: starting simulated crawl", "worker: waiting for more log lines ...", ) ), encoding="utf-8", ) body = str( await render_execution_logs( app, job_id=job.id, execution_id=int(execution.get_id()) ) ) assert f"Job {job.id} / execution {execution.get_id()}" in body assert f"/job/{job.id}/execution/{execution.get_id()}/logs" in body assert "waiting for more log lines" in body asyncio.run(run())