2026-03-30 11:42:13 +02:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
2026-03-30 12:34:38 +02:00
|
|
|
import asyncio
|
2026-03-30 12:27:45 +02:00
|
|
|
import hashlib
|
2026-03-30 13:11:37 +02:00
|
|
|
from collections.abc import AsyncGenerator, Awaitable, Callable
|
2026-03-30 13:37:25 +02:00
|
|
|
from typing import TypedDict, cast
|
2026-03-30 13:23:36 +02:00
|
|
|
from urllib.parse import urlparse
|
2026-03-30 12:13:04 +02:00
|
|
|
|
2026-03-30 12:27:45 +02:00
|
|
|
import htpy as h
|
2026-03-30 13:23:36 +02:00
|
|
|
from datastar_py import ServerSentEventGenerator as SSE
|
|
|
|
|
from datastar_py.quart import DatastarResponse, read_signals
|
2026-03-30 12:34:38 +02:00
|
|
|
from datastar_py.sse import DatastarEvent
|
|
|
|
|
from htpy import Renderable
|
2026-03-30 13:37:25 +02:00
|
|
|
from peewee import IntegrityError
|
2026-03-30 12:27:45 +02:00
|
|
|
from quart import Quart, Response, request, url_for
|
|
|
|
|
|
2026-03-30 12:34:38 +02:00
|
|
|
from repub.datastar import RefreshBroker, render_stream
|
2026-03-30 13:37:25 +02:00
|
|
|
from repub.model import (
|
|
|
|
|
create_source,
|
|
|
|
|
initialize_database,
|
2026-03-30 13:49:00 +02:00
|
|
|
load_source_form,
|
2026-03-30 13:37:25 +02:00
|
|
|
load_sources,
|
|
|
|
|
source_slug_exists,
|
2026-03-30 13:49:00 +02:00
|
|
|
update_source,
|
2026-03-30 13:37:25 +02:00
|
|
|
)
|
2026-03-30 13:11:37 +02:00
|
|
|
from repub.pages import (
|
|
|
|
|
create_source_page,
|
|
|
|
|
dashboard_page,
|
2026-03-30 13:49:00 +02:00
|
|
|
edit_source_page,
|
2026-03-30 13:11:37 +02:00
|
|
|
execution_logs_page,
|
|
|
|
|
runs_page,
|
|
|
|
|
shim_page,
|
|
|
|
|
sources_page,
|
|
|
|
|
)
|
2026-03-30 13:37:25 +02:00
|
|
|
from repub.pages.sources import PANGEA_CONTENT_FORMATS, PANGEA_CONTENT_TYPES
|
2026-03-30 12:27:45 +02:00
|
|
|
|
2026-03-30 12:34:38 +02:00
|
|
|
REFRESH_BROKER_KEY = "repub.refresh_broker"
|
2026-03-30 13:11:37 +02:00
|
|
|
|
|
|
|
|
RenderFunction = Callable[[], Awaitable[Renderable]]
|
2026-03-30 12:34:38 +02:00
|
|
|
|
2026-03-30 12:27:45 +02:00
|
|
|
|
2026-03-30 13:37:25 +02:00
|
|
|
class SourceFormData(TypedDict):
|
|
|
|
|
name: str
|
|
|
|
|
slug: str
|
|
|
|
|
source_type: str
|
|
|
|
|
notes: str
|
|
|
|
|
spider_arguments: str
|
|
|
|
|
enabled: bool
|
|
|
|
|
cron_minute: str
|
|
|
|
|
cron_hour: str
|
|
|
|
|
cron_day_of_month: str
|
|
|
|
|
cron_day_of_week: str
|
|
|
|
|
cron_month: str
|
|
|
|
|
feed_url: str
|
|
|
|
|
pangea_domain: str
|
|
|
|
|
pangea_category: str
|
|
|
|
|
content_format: str
|
|
|
|
|
content_type: str
|
|
|
|
|
max_articles: int | None
|
|
|
|
|
oldest_article: int | None
|
|
|
|
|
only_newest: bool
|
|
|
|
|
include_authors: bool
|
|
|
|
|
exclude_media: bool
|
|
|
|
|
include_content: bool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DEFAULT_PANGEA_CONTENT_FORMAT = "MOBILE_3"
|
|
|
|
|
DEFAULT_PANGEA_CONTENT_TYPE = "articles"
|
|
|
|
|
DEFAULT_PANGEA_MAX_ARTICLES = "10"
|
|
|
|
|
DEFAULT_PANGEA_OLDEST_ARTICLE = "3"
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 12:27:45 +02:00
|
|
|
def _render_shim_page(*, stylesheet_href: str, datastar_src: str) -> tuple[str, str]:
|
|
|
|
|
head = (
|
|
|
|
|
h.title["Republisher Admin UI"],
|
|
|
|
|
h.link(rel="stylesheet", href=stylesheet_href),
|
|
|
|
|
)
|
|
|
|
|
body = str(shim_page(datastar_src=datastar_src, head=head))
|
|
|
|
|
etag = hashlib.sha256(body.encode("utf-8")).hexdigest()
|
|
|
|
|
return body, etag
|
2026-03-30 11:42:13 +02:00
|
|
|
|
|
|
|
|
|
2026-03-30 13:11:37 +02:00
|
|
|
def create_app() -> Quart:
|
2026-03-30 11:42:13 +02:00
|
|
|
app = Quart(__name__)
|
2026-03-30 13:23:36 +02:00
|
|
|
app.config["REPUB_DB_PATH"] = str(initialize_database())
|
2026-03-30 12:34:38 +02:00
|
|
|
app.extensions[REFRESH_BROKER_KEY] = RefreshBroker()
|
2026-03-30 11:42:13 +02:00
|
|
|
|
|
|
|
|
@app.get("/")
|
2026-03-30 13:11:37 +02:00
|
|
|
@app.get("/sources")
|
|
|
|
|
@app.get("/sources/create")
|
2026-03-30 13:49:00 +02:00
|
|
|
@app.get("/sources/<string:slug>/edit")
|
2026-03-30 13:11:37 +02:00
|
|
|
@app.get("/runs")
|
|
|
|
|
@app.get("/job/<int:job_id>/execution/<int:execution_id>/logs")
|
|
|
|
|
async def page_shim(
|
2026-03-30 13:49:00 +02:00
|
|
|
slug: str | None = None,
|
|
|
|
|
job_id: int | None = None,
|
|
|
|
|
execution_id: int | None = None,
|
2026-03-30 13:11:37 +02:00
|
|
|
) -> Response:
|
2026-03-30 13:49:00 +02:00
|
|
|
del slug, job_id, execution_id
|
2026-03-30 12:27:45 +02:00
|
|
|
body, etag = _render_shim_page(
|
|
|
|
|
stylesheet_href=url_for("static", filename="app.css"),
|
|
|
|
|
datastar_src=url_for("static", filename="datastar@1.0.0-RC.8.js"),
|
|
|
|
|
)
|
|
|
|
|
if request.if_none_match.contains(etag):
|
|
|
|
|
response = Response(status=304)
|
|
|
|
|
response.set_etag(etag)
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
|
response = Response(body, mimetype="text/html")
|
|
|
|
|
response.set_etag(etag)
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
|
@app.post("/")
|
2026-03-30 13:11:37 +02:00
|
|
|
async def dashboard_patch() -> DatastarResponse:
|
|
|
|
|
return _page_patch_response(app, render_dashboard)
|
|
|
|
|
|
|
|
|
|
@app.post("/sources")
|
|
|
|
|
async def sources_patch() -> DatastarResponse:
|
2026-03-30 13:23:36 +02:00
|
|
|
return _page_patch_response(app, lambda: render_sources(app))
|
2026-03-30 13:11:37 +02:00
|
|
|
|
|
|
|
|
@app.post("/sources/create")
|
|
|
|
|
async def create_source_patch() -> DatastarResponse:
|
2026-03-30 13:23:36 +02:00
|
|
|
return _page_patch_response(app, lambda: render_create_source(app))
|
|
|
|
|
|
2026-03-30 13:49:00 +02:00
|
|
|
@app.post("/sources/<string:slug>/edit")
|
|
|
|
|
async def edit_source_patch(slug: str) -> DatastarResponse:
|
|
|
|
|
return _page_patch_response(app, lambda: render_edit_source(slug))
|
|
|
|
|
|
2026-03-30 13:23:36 +02:00
|
|
|
@app.post("/actions/sources/create")
|
|
|
|
|
async def create_source_action() -> DatastarResponse:
|
|
|
|
|
signals = cast(dict[str, object], await read_signals())
|
|
|
|
|
source, error = validate_source_form(
|
|
|
|
|
signals,
|
2026-03-30 13:37:25 +02:00
|
|
|
slug_exists=source_slug_exists,
|
2026-03-30 13:23:36 +02:00
|
|
|
)
|
|
|
|
|
if error is not None:
|
|
|
|
|
return DatastarResponse(
|
|
|
|
|
SSE.patch_signals({"_formError": error, "_formSuccess": ""})
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert source is not None
|
2026-03-30 13:37:25 +02:00
|
|
|
try:
|
|
|
|
|
create_source(**source)
|
|
|
|
|
except IntegrityError:
|
|
|
|
|
return DatastarResponse(
|
|
|
|
|
SSE.patch_signals(
|
|
|
|
|
{"_formError": "Slug must be unique.", "_formSuccess": ""}
|
|
|
|
|
)
|
|
|
|
|
)
|
2026-03-30 13:23:36 +02:00
|
|
|
trigger_refresh(app)
|
|
|
|
|
return DatastarResponse(SSE.redirect("/sources"))
|
2026-03-30 11:42:13 +02:00
|
|
|
|
2026-03-30 13:49:00 +02:00
|
|
|
@app.post("/actions/sources/<string:slug>/edit")
|
|
|
|
|
async def edit_source_action(slug: str) -> DatastarResponse:
|
|
|
|
|
signals = cast(dict[str, object], await read_signals())
|
|
|
|
|
source, error = validate_source_form(
|
|
|
|
|
signals,
|
|
|
|
|
slug_exists=lambda candidate: candidate != slug
|
|
|
|
|
and source_slug_exists(candidate),
|
|
|
|
|
immutable_slug=slug,
|
|
|
|
|
)
|
|
|
|
|
if error is not None:
|
|
|
|
|
return DatastarResponse(
|
|
|
|
|
SSE.patch_signals({"_formError": error, "_formSuccess": ""})
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert source is not None
|
|
|
|
|
if update_source(slug, **source) is None:
|
|
|
|
|
return DatastarResponse(
|
|
|
|
|
SSE.patch_signals(
|
|
|
|
|
{"_formError": "Source does not exist.", "_formSuccess": ""}
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
trigger_refresh(app)
|
|
|
|
|
return DatastarResponse(SSE.redirect("/sources"))
|
|
|
|
|
|
2026-03-30 13:11:37 +02:00
|
|
|
@app.post("/runs")
|
|
|
|
|
async def runs_patch() -> DatastarResponse:
|
|
|
|
|
return _page_patch_response(app, render_runs)
|
2026-03-30 12:48:32 +02:00
|
|
|
|
2026-03-30 13:11:37 +02:00
|
|
|
@app.post("/job/<int:job_id>/execution/<int:execution_id>/logs")
|
|
|
|
|
async def logs_patch(job_id: int, execution_id: int) -> DatastarResponse:
|
|
|
|
|
async def render() -> Renderable:
|
|
|
|
|
return await render_execution_logs(job_id=job_id, execution_id=execution_id)
|
|
|
|
|
|
|
|
|
|
return _page_patch_response(app, render)
|
2026-03-30 12:48:32 +02:00
|
|
|
|
2026-03-30 11:42:13 +02:00
|
|
|
return app
|
2026-03-30 12:34:38 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_refresh_broker(app: Quart) -> RefreshBroker:
|
|
|
|
|
return cast(RefreshBroker, app.extensions[REFRESH_BROKER_KEY])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def trigger_refresh(app: Quart, event: object = "refresh-event") -> None:
|
|
|
|
|
get_refresh_broker(app).publish(event)
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 13:11:37 +02:00
|
|
|
async def render_dashboard() -> Renderable:
|
|
|
|
|
return dashboard_page()
|
2026-03-30 12:34:38 +02:00
|
|
|
|
|
|
|
|
|
2026-03-30 13:23:36 +02:00
|
|
|
async def render_sources(app: Quart | None = None) -> Renderable:
|
2026-03-30 13:37:25 +02:00
|
|
|
sources = None if app is None else load_sources()
|
2026-03-30 13:23:36 +02:00
|
|
|
return sources_page(sources=sources)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def render_create_source(app: Quart | None = None) -> Renderable:
|
|
|
|
|
del app
|
2026-03-30 13:11:37 +02:00
|
|
|
return create_source_page()
|
2026-03-30 12:34:38 +02:00
|
|
|
|
|
|
|
|
|
2026-03-30 13:49:00 +02:00
|
|
|
async def render_edit_source(slug: str) -> Renderable:
|
|
|
|
|
source = load_source_form(slug)
|
|
|
|
|
if source is None:
|
|
|
|
|
return sources_page(sources=())
|
|
|
|
|
return edit_source_page(
|
|
|
|
|
slug=slug,
|
|
|
|
|
source=source,
|
|
|
|
|
action_path=f"/actions/sources/{slug}/edit",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 13:11:37 +02:00
|
|
|
async def render_runs() -> Renderable:
|
|
|
|
|
return runs_page()
|
2026-03-30 12:34:38 +02:00
|
|
|
|
|
|
|
|
|
2026-03-30 13:11:37 +02:00
|
|
|
async def render_execution_logs(*, job_id: int, execution_id: int) -> Renderable:
|
|
|
|
|
return execution_logs_page(job_id=job_id, execution_id=execution_id)
|
2026-03-30 12:48:32 +02:00
|
|
|
|
|
|
|
|
|
2026-03-30 13:11:37 +02:00
|
|
|
def _page_patch_response(app: Quart, render: RenderFunction) -> DatastarResponse:
|
|
|
|
|
queue = get_refresh_broker(app).subscribe()
|
|
|
|
|
stream = render_stream(
|
|
|
|
|
queue,
|
|
|
|
|
render=render,
|
|
|
|
|
last_event_id=request.headers.get("last-event-id"),
|
2026-03-30 12:48:32 +02:00
|
|
|
)
|
2026-03-30 13:11:37 +02:00
|
|
|
return DatastarResponse(_unsubscribe_on_close(queue, stream, app))
|
2026-03-30 12:48:32 +02:00
|
|
|
|
|
|
|
|
|
2026-03-30 13:11:37 +02:00
|
|
|
async def _unsubscribe_on_close(
|
|
|
|
|
queue: object, stream: AsyncGenerator[DatastarEvent, None], app: Quart
|
|
|
|
|
) -> AsyncGenerator[DatastarEvent, None]:
|
|
|
|
|
try:
|
|
|
|
|
async for event in stream:
|
|
|
|
|
yield event
|
|
|
|
|
finally:
|
|
|
|
|
get_refresh_broker(app).unsubscribe(cast(asyncio.Queue[object], queue))
|
2026-03-30 13:23:36 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def validate_source_form(
|
|
|
|
|
signals: dict[str, object] | None,
|
|
|
|
|
*,
|
2026-03-30 13:37:25 +02:00
|
|
|
slug_exists: Callable[[str], bool],
|
2026-03-30 13:49:00 +02:00
|
|
|
immutable_slug: str | None = None,
|
2026-03-30 13:37:25 +02:00
|
|
|
) -> tuple[SourceFormData | None, str | None]:
|
2026-03-30 13:23:36 +02:00
|
|
|
if signals is None:
|
|
|
|
|
return None, "Missing form data."
|
|
|
|
|
|
|
|
|
|
source_name = _read_string(signals, "sourceName")
|
|
|
|
|
source_slug = _read_string(signals, "sourceSlug")
|
|
|
|
|
source_type = _read_string(signals, "sourceType")
|
|
|
|
|
feed_url = _read_string(signals, "feedUrl")
|
|
|
|
|
pangea_domain = _read_string(signals, "pangeaDomain")
|
|
|
|
|
pangea_category = _read_string(signals, "pangeaCategory")
|
|
|
|
|
content_format = _read_string(signals, "contentFormat")
|
|
|
|
|
content_type = _read_string(signals, "contentType")
|
|
|
|
|
max_articles = _read_string(signals, "maxArticles")
|
|
|
|
|
oldest_article = _read_string(signals, "oldestArticle")
|
|
|
|
|
source_notes = _read_string(signals, "sourceNotes")
|
2026-03-30 13:37:25 +02:00
|
|
|
spider_arguments = _normalize_multiline(_read_string(signals, "spiderArguments"))
|
2026-03-30 13:23:36 +02:00
|
|
|
cron_minute = _read_string(signals, "cronMinute")
|
|
|
|
|
cron_hour = _read_string(signals, "cronHour")
|
|
|
|
|
cron_day_of_month = _read_string(signals, "cronDayOfMonth")
|
|
|
|
|
cron_day_of_week = _read_string(signals, "cronDayOfWeek")
|
|
|
|
|
cron_month = _read_string(signals, "cronMonth")
|
|
|
|
|
|
|
|
|
|
errors: list[str] = []
|
|
|
|
|
if source_name == "":
|
|
|
|
|
errors.append("Source name is required.")
|
|
|
|
|
if source_slug == "":
|
|
|
|
|
errors.append("Slug is required.")
|
2026-03-30 13:49:00 +02:00
|
|
|
elif immutable_slug is not None and source_slug != immutable_slug:
|
|
|
|
|
errors.append("Slug is immutable.")
|
2026-03-30 13:37:25 +02:00
|
|
|
elif slug_exists(source_slug):
|
2026-03-30 13:23:36 +02:00
|
|
|
errors.append("Slug must be unique.")
|
|
|
|
|
|
|
|
|
|
if source_type not in {"feed", "pangea"}:
|
|
|
|
|
errors.append("Source type must be feed or pangea.")
|
|
|
|
|
|
|
|
|
|
if source_type == "feed":
|
|
|
|
|
if feed_url == "":
|
|
|
|
|
errors.append("Feed URL is required for feed sources.")
|
|
|
|
|
elif not _is_valid_url(feed_url):
|
|
|
|
|
errors.append("Feed URL must be a valid URL.")
|
|
|
|
|
|
|
|
|
|
if source_type == "pangea":
|
2026-03-30 13:37:25 +02:00
|
|
|
content_format = content_format or DEFAULT_PANGEA_CONTENT_FORMAT
|
|
|
|
|
content_type = content_type or DEFAULT_PANGEA_CONTENT_TYPE
|
|
|
|
|
max_articles = max_articles or DEFAULT_PANGEA_MAX_ARTICLES
|
|
|
|
|
oldest_article = oldest_article or DEFAULT_PANGEA_OLDEST_ARTICLE
|
2026-03-30 13:23:36 +02:00
|
|
|
if pangea_domain == "":
|
|
|
|
|
errors.append("Pangea domain is required.")
|
|
|
|
|
if pangea_category == "":
|
|
|
|
|
errors.append("Category name is required.")
|
|
|
|
|
if content_format not in PANGEA_CONTENT_FORMATS:
|
|
|
|
|
errors.append("Content format is invalid.")
|
|
|
|
|
if content_type not in PANGEA_CONTENT_TYPES:
|
|
|
|
|
errors.append("Content type is invalid.")
|
|
|
|
|
if _parse_int(max_articles) is None:
|
|
|
|
|
errors.append("Max articles must be an integer.")
|
|
|
|
|
if _parse_int(oldest_article) is None:
|
|
|
|
|
errors.append("Oldest article must be an integer.")
|
|
|
|
|
|
|
|
|
|
cron_values = (
|
|
|
|
|
cron_minute,
|
|
|
|
|
cron_hour,
|
|
|
|
|
cron_day_of_month,
|
|
|
|
|
cron_day_of_week,
|
|
|
|
|
cron_month,
|
|
|
|
|
)
|
|
|
|
|
if any(value == "" for value in cron_values):
|
|
|
|
|
errors.append("All cron fields are required.")
|
|
|
|
|
|
|
|
|
|
if errors:
|
|
|
|
|
return None, " ".join(errors)
|
|
|
|
|
|
|
|
|
|
enabled = _read_bool(signals, "jobEnabled")
|
2026-03-30 13:37:25 +02:00
|
|
|
source: SourceFormData = {
|
2026-03-30 13:23:36 +02:00
|
|
|
"name": source_name,
|
|
|
|
|
"slug": source_slug,
|
2026-03-30 13:37:25 +02:00
|
|
|
"source_type": source_type,
|
2026-03-30 13:23:36 +02:00
|
|
|
"notes": source_notes,
|
|
|
|
|
"spider_arguments": spider_arguments,
|
|
|
|
|
"feed_url": feed_url,
|
|
|
|
|
"pangea_domain": pangea_domain,
|
|
|
|
|
"pangea_category": pangea_category,
|
|
|
|
|
"content_format": content_format,
|
|
|
|
|
"content_type": content_type,
|
2026-03-30 13:37:25 +02:00
|
|
|
"max_articles": _parse_int(max_articles),
|
|
|
|
|
"oldest_article": _parse_int(oldest_article),
|
|
|
|
|
"enabled": enabled,
|
|
|
|
|
"only_newest": _read_bool(signals, "onlyNewest", default=True),
|
|
|
|
|
"include_authors": _read_bool(signals, "includeAuthors", default=True),
|
|
|
|
|
"exclude_media": _read_bool(signals, "excludeMedia", default=False),
|
|
|
|
|
"include_content": _read_bool(signals, "includeContent", default=True),
|
2026-03-30 13:23:36 +02:00
|
|
|
"cron_minute": cron_minute,
|
|
|
|
|
"cron_hour": cron_hour,
|
|
|
|
|
"cron_day_of_month": cron_day_of_month,
|
|
|
|
|
"cron_day_of_week": cron_day_of_week,
|
|
|
|
|
"cron_month": cron_month,
|
|
|
|
|
}
|
|
|
|
|
return source, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _read_string(signals: dict[str, object], key: str) -> str:
|
|
|
|
|
return str(signals.get(key, "")).strip()
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 13:37:25 +02:00
|
|
|
def _read_bool(signals: dict[str, object], key: str, *, default: bool = False) -> bool:
|
|
|
|
|
value = signals.get(key, default)
|
2026-03-30 13:23:36 +02:00
|
|
|
if isinstance(value, bool):
|
|
|
|
|
return value
|
|
|
|
|
if isinstance(value, str):
|
|
|
|
|
return value.lower() in {"true", "1", "on", "yes"}
|
|
|
|
|
return bool(value)
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 13:37:25 +02:00
|
|
|
def _normalize_multiline(value: str) -> str:
|
|
|
|
|
return value.replace("\r\n", "\n").replace("\r", "\n")
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 13:23:36 +02:00
|
|
|
def _parse_int(value: str) -> int | None:
|
|
|
|
|
try:
|
|
|
|
|
return int(value)
|
|
|
|
|
except ValueError:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _is_valid_url(value: str) -> bool:
|
|
|
|
|
parsed = urlparse(value)
|
|
|
|
|
return parsed.scheme in {"http", "https"} and parsed.netloc != ""
|