from __future__ import annotations import asyncio from pathlib import Path from typing import Any, cast from repub.datastar import RefreshBroker, render_sse_event, render_stream from repub.model import Job, Source, SourceFeed, SourcePangea from repub.web import ( create_app, get_refresh_broker, render_create_source, render_dashboard, render_execution_logs, render_runs, render_sources, ) def test_root_get_serves_datastar_shim() -> None: async def run() -> None: client = create_app().test_client() response = await client.get("/") body = await response.get_data(as_text=True) assert response.status_code == 200 assert response.headers["ETag"] assert body.startswith("") assert ( '' in body ) assert 'data-signals:tabid="self.crypto.randomUUID().substring(0,8)"' in body assert 'data-init="@post(window.location.pathname +' in body assert "retryMaxCount: Infinity" in body assert "data-on:online__window=" in body assert '
' in body asyncio.run(run()) def test_create_app_bootstraps_default_database_path( monkeypatch, tmp_path: Path ) -> None: monkeypatch.chdir(tmp_path) app = create_app() assert Path(app.config["REPUB_DB_PATH"]) == tmp_path / "republisher.db" assert (tmp_path / "republisher.db").exists() def test_root_get_honors_if_none_match() -> None: async def run() -> None: client = create_app().test_client() initial = await client.get("/") etag = initial.headers["ETag"] response = await client.get("/", headers={"If-None-Match": etag}) assert response.status_code == 304 assert response.headers["ETag"] == etag asyncio.run(run()) def test_dashboard_post_serves_morph_component() -> None: async def run() -> None: client = create_app().test_client() async with client.request("/?u=shim", method="POST") as connection: await connection.send_complete() chunk = await asyncio.wait_for(connection.receive(), timeout=1) raw_connection = cast(Any, connection) assert raw_connection.status_code == 200 assert raw_connection.headers["Content-Type"] == "text/event-stream" assert b"event: datastar-patch-elements" in chunk assert b"id: " in chunk assert b'
None: async def run() -> None: async def render() -> str: return '
same
' event_id, event = await render_sse_event(render) repeated_id, repeated_event = await render_sse_event( render, last_event_id=event_id ) assert repeated_id == event_id assert event is not None assert repeated_event is None asyncio.run(run()) def test_app_refresh_broker_publishes_events() -> None: async def run() -> None: app = create_app() broker = get_refresh_broker(app) queue = broker.subscribe() broker.publish() event = await asyncio.wait_for(queue.get(), timeout=1) assert event == "refresh-event" broker.unsubscribe(queue) asyncio.run(run()) def test_render_stream_yields_on_connect_and_refresh() -> None: async def run() -> None: queue = RefreshBroker().subscribe() renders = 0 async def render() -> str: nonlocal renders renders += 1 return f'
{renders}
' stream = render_stream(queue, render) first = await anext(stream) await queue.put("refresh-event") second = await anext(stream) await stream.aclose() assert "1
" in first assert "2" in second asyncio.run(run()) def test_render_dashboard_shows_dashboard_information_architecture() -> None: async def run() -> None: body = str(await render_dashboard()) assert "Operational snapshot" in body assert "Running executions" in body assert 'href="/sources"' in body assert 'href="/runs"' in body assert "/job/7/execution/104/logs" in body assert "Create source" in body asyncio.run(run()) def test_render_sources_shows_table_and_create_link() -> None: async def run() -> None: body = str(await render_sources()) assert "Configured feed and Pangea sources live here as tables" in body assert ">Sources<" in body assert 'href="/sources/create"' in body assert "guardian-feed" not in body assert "podcast-audio" not in body asyncio.run(run()) def test_render_create_source_shows_dedicated_form_page() -> None: async def run() -> None: body = str(await render_create_source()) assert "Dedicated create page for the source form" in body assert "Source and job setup" in body assert "data-signals__ifmissing" in body assert "/actions/sources/create" in body assert 'data-show="$sourceType === 'feed'"' in body assert 'data-show="$sourceType === 'pangea'"' in body assert "jobEnabled" in body assert "onlyNewest" in body assert "includeAuthors" in body assert "excludeMedia" in body assert "includeContent" in body assert "TEXT_ONLY" in body assert "breakingnews" in body assert "Pangea domain" in body assert "Feed URL" in body assert "Cron schedule" in body assert "Initial job state" in body assert "Pangea mobile articles" not in body assert "pangea-mobile" not in body assert "guardianproject.info" not in body assert ( "Primary Pangea mobile article mirror for the operator landing page." not in body ) assert "language=en,download_media=true" not in body assert "language=en\ndownload_media=true" in body assert 'value="articles"' in body assert 'value="10"' in body assert 'value="3"' in body assert 'value="*/30"' in body assert 'value="*"' in body asyncio.run(run()) def test_create_source_action_creates_pangea_source_and_job_in_database( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "sources.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() client = app.test_client() response = await client.post( "/actions/sources/create", headers={"Datastar-Request": "true"}, json={ "sourceName": "Kenya health desk", "sourceSlug": "kenya-health", "sourceType": "pangea", "pangeaDomain": "example.org", "pangeaCategory": "Health", "contentFormat": "MOBILE_3", "contentType": "breakingnews", "maxArticles": "12", "oldestArticle": "5", "sourceNotes": "Regional health alerts.", "spiderArguments": "language=en\ndownload_media=true", "cronMinute": "0", "cronHour": "*/6", "cronDayOfMonth": "*", "cronDayOfWeek": "*", "cronMonth": "*", "jobEnabled": True, "onlyNewest": True, "includeAuthors": True, "excludeMedia": False, }, ) body = await response.get_data(as_text=True) assert response.status_code == 200 assert "window.location = '/sources'" in body source = Source.get(Source.slug == "kenya-health") pangea = SourcePangea.get(SourcePangea.source == source) job = Job.get(Job.source == source) rendered_sources = str(await render_sources(app)) assert source.name == "Kenya health desk" assert source.source_type == "pangea" assert pangea.content_type == "breakingnews" assert pangea.include_content is True assert job.enabled is True assert job.spider_arguments == "language=en\ndownload_media=true" assert job.cron_hour == "*/6" assert "kenya-health" in rendered_sources assert "example.org / Health" in rendered_sources assert "Enabled" in rendered_sources asyncio.run(run()) def test_create_source_action_creates_feed_source_and_job_in_database( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "feed-sources.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() client = app.test_client() response = await client.post( "/actions/sources/create", headers={"Datastar-Request": "true"}, json={ "sourceName": "NASA feed", "sourceSlug": "nasa-feed", "sourceType": "feed", "feedUrl": "https://www.nasa.gov/rss/dyn/breaking_news.rss", "sourceNotes": "Primary NASA mirror.", "spiderArguments": "", "cronMinute": "30", "cronHour": "*", "cronDayOfMonth": "*", "cronDayOfWeek": "*", "cronMonth": "*", "jobEnabled": False, }, ) body = await response.get_data(as_text=True) assert response.status_code == 200 assert "window.location = '/sources'" in body source = Source.get(Source.slug == "nasa-feed") feed = SourceFeed.get(SourceFeed.source == source) job = Job.get(Job.source == source) rendered_sources = str(await render_sources(app)) assert source.source_type == "feed" assert feed.feed_url == "https://www.nasa.gov/rss/dyn/breaking_news.rss" assert job.enabled is False assert "nasa-feed" in rendered_sources assert "https://www.nasa.gov/rss/dyn/breaking_news.rss" in rendered_sources assert "Disabled" in rendered_sources asyncio.run(run()) def test_create_source_action_validates_duplicate_slug_and_pangea_type( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "duplicate.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() Source.create( name="Guardian feed mirror", slug="guardian-feed", source_type="feed", ) client = app.test_client() response = await client.post( "/actions/sources/create", headers={"Datastar-Request": "true"}, json={ "sourceName": "Duplicate guardian", "sourceSlug": "guardian-feed", "sourceType": "pangea", "pangeaDomain": "example.org", "pangeaCategory": "News", "contentFormat": "WEB", "contentType": "not-a-real-type", "maxArticles": "ten", "oldestArticle": "3", "cronMinute": "0", "cronHour": "*", "cronDayOfMonth": "*", "cronDayOfWeek": "*", "cronMonth": "*", "jobEnabled": True, }, ) body = await response.get_data(as_text=True) assert response.status_code == 200 assert "Slug must be unique." in body assert "Content format is invalid." in body assert "Content type is invalid." in body assert "Max articles must be an integer." in body assert Source.select().where(Source.name == "Duplicate guardian").count() == 0 asyncio.run(run()) def test_render_runs_shows_running_upcoming_and_completed_tables() -> None: async def run() -> None: body = str(await render_runs()) assert "Running job executions" in body assert "Upcoming jobs" in body assert "Completed job executions" in body assert "Delete confirmation" in body assert "/job/11/execution/101/logs" in body assert "Already running" in body asyncio.run(run()) def test_render_execution_logs_uses_app_route() -> None: async def run() -> None: body = str(await render_execution_logs(job_id=7, execution_id=104)) assert "Job 7 / execution 104" in body assert "/job/7/execution/104/logs" in body assert "Streaming text log view" in body assert "waiting for more log lines" in body asyncio.run(run())