db backed source creation

This commit is contained in:
Abel Luck 2026-03-30 13:37:25 +02:00
parent b9e288a22d
commit 847aeae772
5 changed files with 312 additions and 99 deletions

View file

@ -53,7 +53,6 @@ def test_initialize_database_bootstraps_schema_from_sql_files(tmp_path: Path) ->
assert table_names == {
"job",
"job_execution",
"settings",
"source",
"source_feed",
"source_pangea",

View file

@ -5,10 +5,10 @@ from pathlib import Path
from typing import Any, cast
from repub.datastar import RefreshBroker, render_sse_event, render_stream
from repub.model import Job, Source, SourceFeed, SourcePangea
from repub.web import (
create_app,
get_refresh_broker,
get_sources_dict,
render_create_source,
render_dashboard,
render_execution_logs,
@ -161,8 +161,8 @@ def test_render_sources_shows_table_and_create_link() -> None:
assert "Configured feed and Pangea sources live here as tables" in body
assert ">Sources<" in body
assert 'href="/sources/create"' in body
assert "guardian-feed" in body
assert "podcast-audio" in body
assert "guardian-feed" not in body
assert "podcast-audio" not in body
asyncio.run(run())
@ -181,17 +181,37 @@ def test_render_create_source_shows_dedicated_form_page() -> None:
assert "onlyNewest" in body
assert "includeAuthors" in body
assert "excludeMedia" in body
assert "includeContent" in body
assert "TEXT_ONLY" in body
assert "breakingnews" in body
assert "Pangea domain" in body
assert "Feed URL" in body
assert "Cron schedule" in body
assert "Initial job state" in body
assert "Pangea mobile articles" not in body
assert "pangea-mobile" not in body
assert "guardianproject.info" not in body
assert (
"Primary Pangea mobile article mirror for the operator landing page."
not in body
)
assert "language=en,download_media=true" not in body
assert "language=en\ndownload_media=true" in body
assert 'value="articles"' in body
assert 'value="10"' in body
assert 'value="3"' in body
assert 'value="*/30"' in body
assert 'value="*"' in body
asyncio.run(run())
def test_create_source_action_adds_new_source_to_in_memory_store() -> None:
def test_create_source_action_creates_pangea_source_and_job_in_database(
monkeypatch, tmp_path: Path
) -> None:
db_path = tmp_path / "sources.db"
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
async def run() -> None:
app = create_app()
client = app.test_client()
@ -210,7 +230,7 @@ def test_create_source_action_adds_new_source_to_in_memory_store() -> None:
"maxArticles": "12",
"oldestArticle": "5",
"sourceNotes": "Regional health alerts.",
"spiderArguments": "language=en",
"spiderArguments": "language=en\ndownload_media=true",
"cronMinute": "0",
"cronHour": "*/6",
"cronDayOfMonth": "*",
@ -226,17 +246,89 @@ def test_create_source_action_adds_new_source_to_in_memory_store() -> None:
assert response.status_code == 200
assert "window.location = '/sources'" in body
assert "kenya-health" in get_sources_dict(app)
assert get_sources_dict(app)["kenya-health"]["content_type"] == "breakingnews"
source = Source.get(Source.slug == "kenya-health")
pangea = SourcePangea.get(SourcePangea.source == source)
job = Job.get(Job.source == source)
rendered_sources = str(await render_sources(app))
assert source.name == "Kenya health desk"
assert source.source_type == "pangea"
assert pangea.content_type == "breakingnews"
assert pangea.include_content is True
assert job.enabled is True
assert job.spider_arguments == "language=en\ndownload_media=true"
assert job.cron_hour == "*/6"
assert "kenya-health" in rendered_sources
assert "example.org / Health" in rendered_sources
assert "Enabled" in rendered_sources
asyncio.run(run())
def test_create_source_action_validates_duplicate_slug_and_pangea_type() -> None:
def test_create_source_action_creates_feed_source_and_job_in_database(
monkeypatch, tmp_path: Path
) -> None:
db_path = tmp_path / "feed-sources.db"
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
async def run() -> None:
app = create_app()
client = app.test_client()
response = await client.post(
"/actions/sources/create",
headers={"Datastar-Request": "true"},
json={
"sourceName": "NASA feed",
"sourceSlug": "nasa-feed",
"sourceType": "feed",
"feedUrl": "https://www.nasa.gov/rss/dyn/breaking_news.rss",
"sourceNotes": "Primary NASA mirror.",
"spiderArguments": "",
"cronMinute": "30",
"cronHour": "*",
"cronDayOfMonth": "*",
"cronDayOfWeek": "*",
"cronMonth": "*",
"jobEnabled": False,
},
)
body = await response.get_data(as_text=True)
assert response.status_code == 200
assert "window.location = '/sources'" in body
source = Source.get(Source.slug == "nasa-feed")
feed = SourceFeed.get(SourceFeed.source == source)
job = Job.get(Job.source == source)
rendered_sources = str(await render_sources(app))
assert source.source_type == "feed"
assert feed.feed_url == "https://www.nasa.gov/rss/dyn/breaking_news.rss"
assert job.enabled is False
assert "nasa-feed" in rendered_sources
assert "https://www.nasa.gov/rss/dyn/breaking_news.rss" in rendered_sources
assert "Disabled" in rendered_sources
asyncio.run(run())
def test_create_source_action_validates_duplicate_slug_and_pangea_type(
monkeypatch, tmp_path: Path
) -> None:
db_path = tmp_path / "duplicate.db"
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
async def run() -> None:
app = create_app()
Source.create(
name="Guardian feed mirror",
slug="guardian-feed",
source_type="feed",
)
client = app.test_client()
response = await client.post(
"/actions/sources/create",
headers={"Datastar-Request": "true"},
@ -265,9 +357,7 @@ def test_create_source_action_validates_duplicate_slug_and_pangea_type() -> None
assert "Content format is invalid." in body
assert "Content type is invalid." in body
assert "Max articles must be an integer." in body
assert "Duplicate guardian" not in {
str(source["name"]) for source in get_sources_dict(app).values()
}
assert Source.select().where(Source.name == "Duplicate guardian").count() == 0
asyncio.run(run())