db backed source creation
This commit is contained in:
parent
b9e288a22d
commit
847aeae772
5 changed files with 312 additions and 99 deletions
117
repub/web.py
117
repub/web.py
|
|
@ -3,7 +3,7 @@ from __future__ import annotations
|
|||
import asyncio
|
||||
import hashlib
|
||||
from collections.abc import AsyncGenerator, Awaitable, Callable
|
||||
from typing import cast
|
||||
from typing import TypedDict, cast
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import htpy as h
|
||||
|
|
@ -11,10 +11,16 @@ from datastar_py import ServerSentEventGenerator as SSE
|
|||
from datastar_py.quart import DatastarResponse, read_signals
|
||||
from datastar_py.sse import DatastarEvent
|
||||
from htpy import Renderable
|
||||
from peewee import IntegrityError
|
||||
from quart import Quart, Response, request, url_for
|
||||
|
||||
from repub.datastar import RefreshBroker, render_stream
|
||||
from repub.model import initialize_database
|
||||
from repub.model import (
|
||||
create_source,
|
||||
initialize_database,
|
||||
load_sources,
|
||||
source_slug_exists,
|
||||
)
|
||||
from repub.pages import (
|
||||
create_source_page,
|
||||
dashboard_page,
|
||||
|
|
@ -23,18 +29,44 @@ from repub.pages import (
|
|||
shim_page,
|
||||
sources_page,
|
||||
)
|
||||
from repub.pages.sources import (
|
||||
DEFAULT_SOURCES,
|
||||
PANGEA_CONTENT_FORMATS,
|
||||
PANGEA_CONTENT_TYPES,
|
||||
)
|
||||
from repub.pages.sources import PANGEA_CONTENT_FORMATS, PANGEA_CONTENT_TYPES
|
||||
|
||||
REFRESH_BROKER_KEY = "repub.refresh_broker"
|
||||
SOURCES_KEY = "repub.sources"
|
||||
|
||||
RenderFunction = Callable[[], Awaitable[Renderable]]
|
||||
|
||||
|
||||
class SourceFormData(TypedDict):
|
||||
name: str
|
||||
slug: str
|
||||
source_type: str
|
||||
notes: str
|
||||
spider_arguments: str
|
||||
enabled: bool
|
||||
cron_minute: str
|
||||
cron_hour: str
|
||||
cron_day_of_month: str
|
||||
cron_day_of_week: str
|
||||
cron_month: str
|
||||
feed_url: str
|
||||
pangea_domain: str
|
||||
pangea_category: str
|
||||
content_format: str
|
||||
content_type: str
|
||||
max_articles: int | None
|
||||
oldest_article: int | None
|
||||
only_newest: bool
|
||||
include_authors: bool
|
||||
exclude_media: bool
|
||||
include_content: bool
|
||||
|
||||
|
||||
DEFAULT_PANGEA_CONTENT_FORMAT = "MOBILE_3"
|
||||
DEFAULT_PANGEA_CONTENT_TYPE = "articles"
|
||||
DEFAULT_PANGEA_MAX_ARTICLES = "10"
|
||||
DEFAULT_PANGEA_OLDEST_ARTICLE = "3"
|
||||
|
||||
|
||||
def _render_shim_page(*, stylesheet_href: str, datastar_src: str) -> tuple[str, str]:
|
||||
head = (
|
||||
h.title["Republisher Admin UI"],
|
||||
|
|
@ -49,7 +81,6 @@ def create_app() -> Quart:
|
|||
app = Quart(__name__)
|
||||
app.config["REPUB_DB_PATH"] = str(initialize_database())
|
||||
app.extensions[REFRESH_BROKER_KEY] = RefreshBroker()
|
||||
app.extensions[SOURCES_KEY] = _default_sources_dict()
|
||||
|
||||
@app.get("/")
|
||||
@app.get("/sources")
|
||||
|
|
@ -90,7 +121,7 @@ def create_app() -> Quart:
|
|||
signals = cast(dict[str, object], await read_signals())
|
||||
source, error = validate_source_form(
|
||||
signals,
|
||||
existing_sources=get_sources_dict(app),
|
||||
slug_exists=source_slug_exists,
|
||||
)
|
||||
if error is not None:
|
||||
return DatastarResponse(
|
||||
|
|
@ -98,7 +129,14 @@ def create_app() -> Quart:
|
|||
)
|
||||
|
||||
assert source is not None
|
||||
get_sources_dict(app)[str(source["slug"])] = source
|
||||
try:
|
||||
create_source(**source)
|
||||
except IntegrityError:
|
||||
return DatastarResponse(
|
||||
SSE.patch_signals(
|
||||
{"_formError": "Slug must be unique.", "_formSuccess": ""}
|
||||
)
|
||||
)
|
||||
trigger_refresh(app)
|
||||
return DatastarResponse(SSE.redirect("/sources"))
|
||||
|
||||
|
|
@ -128,12 +166,8 @@ async def render_dashboard() -> Renderable:
|
|||
return dashboard_page()
|
||||
|
||||
|
||||
def get_sources_dict(app: Quart) -> dict[str, dict[str, object]]:
|
||||
return cast(dict[str, dict[str, object]], app.extensions[SOURCES_KEY])
|
||||
|
||||
|
||||
async def render_sources(app: Quart | None = None) -> Renderable:
|
||||
sources = None if app is None else tuple(get_sources_dict(app).values())
|
||||
sources = None if app is None else load_sources()
|
||||
return sources_page(sources=sources)
|
||||
|
||||
|
||||
|
|
@ -170,15 +204,11 @@ async def _unsubscribe_on_close(
|
|||
get_refresh_broker(app).unsubscribe(cast(asyncio.Queue[object], queue))
|
||||
|
||||
|
||||
def _default_sources_dict() -> dict[str, dict[str, object]]:
|
||||
return {source["slug"]: dict(source) for source in DEFAULT_SOURCES}
|
||||
|
||||
|
||||
def validate_source_form(
|
||||
signals: dict[str, object] | None,
|
||||
*,
|
||||
existing_sources: dict[str, dict[str, object]],
|
||||
) -> tuple[dict[str, object] | None, str | None]:
|
||||
slug_exists: Callable[[str], bool],
|
||||
) -> tuple[SourceFormData | None, str | None]:
|
||||
if signals is None:
|
||||
return None, "Missing form data."
|
||||
|
||||
|
|
@ -193,7 +223,7 @@ def validate_source_form(
|
|||
max_articles = _read_string(signals, "maxArticles")
|
||||
oldest_article = _read_string(signals, "oldestArticle")
|
||||
source_notes = _read_string(signals, "sourceNotes")
|
||||
spider_arguments = _read_string(signals, "spiderArguments")
|
||||
spider_arguments = _normalize_multiline(_read_string(signals, "spiderArguments"))
|
||||
cron_minute = _read_string(signals, "cronMinute")
|
||||
cron_hour = _read_string(signals, "cronHour")
|
||||
cron_day_of_month = _read_string(signals, "cronDayOfMonth")
|
||||
|
|
@ -205,7 +235,7 @@ def validate_source_form(
|
|||
errors.append("Source name is required.")
|
||||
if source_slug == "":
|
||||
errors.append("Slug is required.")
|
||||
elif source_slug in existing_sources:
|
||||
elif slug_exists(source_slug):
|
||||
errors.append("Slug must be unique.")
|
||||
|
||||
if source_type not in {"feed", "pangea"}:
|
||||
|
|
@ -218,6 +248,10 @@ def validate_source_form(
|
|||
errors.append("Feed URL must be a valid URL.")
|
||||
|
||||
if source_type == "pangea":
|
||||
content_format = content_format or DEFAULT_PANGEA_CONTENT_FORMAT
|
||||
content_type = content_type or DEFAULT_PANGEA_CONTENT_TYPE
|
||||
max_articles = max_articles or DEFAULT_PANGEA_MAX_ARTICLES
|
||||
oldest_article = oldest_article or DEFAULT_PANGEA_OLDEST_ARTICLE
|
||||
if pangea_domain == "":
|
||||
errors.append("Pangea domain is required.")
|
||||
if pangea_category == "":
|
||||
|
|
@ -245,33 +279,24 @@ def validate_source_form(
|
|||
return None, " ".join(errors)
|
||||
|
||||
enabled = _read_bool(signals, "jobEnabled")
|
||||
source = {
|
||||
source: SourceFormData = {
|
||||
"name": source_name,
|
||||
"slug": source_slug,
|
||||
"source_type": "Feed" if source_type == "feed" else "Pangea",
|
||||
"upstream": (
|
||||
feed_url
|
||||
if source_type == "feed"
|
||||
else f"{pangea_domain} / {pangea_category}"
|
||||
),
|
||||
"schedule": f"cron: {cron_minute} {cron_hour} {cron_day_of_month} {cron_month} {cron_day_of_week}",
|
||||
"last_run": "Never run",
|
||||
"state": "Enabled" if enabled else "Disabled",
|
||||
"state_tone": "scheduled" if enabled else "idle",
|
||||
"source_type": source_type,
|
||||
"notes": source_notes,
|
||||
"spider_arguments": spider_arguments,
|
||||
"source_kind": source_type,
|
||||
"feed_url": feed_url,
|
||||
"pangea_domain": pangea_domain,
|
||||
"pangea_category": pangea_category,
|
||||
"content_format": content_format,
|
||||
"content_type": content_type,
|
||||
"max_articles": max_articles,
|
||||
"oldest_article": oldest_article,
|
||||
"job_enabled": enabled,
|
||||
"only_newest": _read_bool(signals, "onlyNewest"),
|
||||
"include_authors": _read_bool(signals, "includeAuthors"),
|
||||
"exclude_media": _read_bool(signals, "excludeMedia"),
|
||||
"max_articles": _parse_int(max_articles),
|
||||
"oldest_article": _parse_int(oldest_article),
|
||||
"enabled": enabled,
|
||||
"only_newest": _read_bool(signals, "onlyNewest", default=True),
|
||||
"include_authors": _read_bool(signals, "includeAuthors", default=True),
|
||||
"exclude_media": _read_bool(signals, "excludeMedia", default=False),
|
||||
"include_content": _read_bool(signals, "includeContent", default=True),
|
||||
"cron_minute": cron_minute,
|
||||
"cron_hour": cron_hour,
|
||||
"cron_day_of_month": cron_day_of_month,
|
||||
|
|
@ -285,8 +310,8 @@ def _read_string(signals: dict[str, object], key: str) -> str:
|
|||
return str(signals.get(key, "")).strip()
|
||||
|
||||
|
||||
def _read_bool(signals: dict[str, object], key: str) -> bool:
|
||||
value = signals.get(key, False)
|
||||
def _read_bool(signals: dict[str, object], key: str, *, default: bool = False) -> bool:
|
||||
value = signals.get(key, default)
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
|
|
@ -294,6 +319,10 @@ def _read_bool(signals: dict[str, object], key: str) -> bool:
|
|||
return bool(value)
|
||||
|
||||
|
||||
def _normalize_multiline(value: str) -> str:
|
||||
return value.replace("\r\n", "\n").replace("\r", "\n")
|
||||
|
||||
|
||||
def _parse_int(value: str) -> int | None:
|
||||
try:
|
||||
return int(value)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue