republisher/repub/web.py

773 lines
27 KiB
Python

from __future__ import annotations
import asyncio
import hashlib
from collections.abc import AsyncGenerator, Awaitable, Callable
from contextlib import suppress
from datetime import timedelta
from pathlib import Path
from typing import TypedDict, cast
from urllib.parse import urlparse
import htpy as h
from datastar_py import ServerSentEventGenerator as SSE
from datastar_py.quart import DatastarResponse, read_signals
from datastar_py.sse import DatastarEvent
from htpy import Renderable
from peewee import IntegrityError
from quart import Quart, Response, request, send_from_directory, url_for
from repub.datastar import RefreshBroker, TabStateStore, render_stream
from repub.jobs import (
COMPLETED_EXECUTION_PAGE_SIZE,
JobRuntime,
clear_completed_executions,
load_dashboard_view,
load_execution_log_view,
load_runs_view,
)
from repub.model import (
Job,
create_source,
delete_job_source,
delete_source,
initialize_database,
load_settings_form,
load_source_form,
load_sources,
save_setting,
source_slug_exists,
update_source,
)
from repub.pages import (
create_source_page,
dashboard_page_with_data,
edit_source_page,
execution_logs_page,
runs_page,
settings_page,
shim_page,
sources_page,
)
from repub.pages.sources import PANGEA_CONTENT_FORMATS, PANGEA_CONTENT_TYPES
REFRESH_BROKER_KEY = "repub.refresh_broker"
JOB_RUNTIME_KEY = "repub.job_runtime"
TAB_STATE_STORE_KEY = "repub.tab_state_store"
TAB_STATE_CLEANER_TASK_KEY = "repub.tab_state_cleaner_task"
SHUTDOWN_EVENT_KEY = "repub.shutdown_event"
DEFAULT_LOG_DIR = Path("out/logs")
DEFAULT_FEEDS_DIR = Path("out/feeds")
RUNS_TAB_STATE_KEY = "runs"
TAB_STATE_CLEAN_INTERVAL = timedelta(seconds=10)
PatchRenderFunction = Callable[[str | None], Awaitable[Renderable]]
class SourceFormData(TypedDict):
name: str
slug: str
source_type: str
notes: str
spider_arguments: str
enabled: bool
convert_images: bool
convert_video: bool
cron_minute: str
cron_hour: str
cron_day_of_month: str
cron_day_of_week: str
cron_month: str
feed_url: str
pangea_domain: str
pangea_category: str
content_format: str
content_type: str
max_articles: int | None
oldest_article: int | None
only_newest: bool
include_authors: bool
exclude_media: bool
include_content: bool
class SettingsFormData(TypedDict):
max_concurrent_jobs: int
feed_url: str
DEFAULT_PANGEA_CONTENT_FORMAT = "MOBILE_3"
DEFAULT_PANGEA_CONTENT_TYPE = "articles"
DEFAULT_PANGEA_MAX_ARTICLES = "10"
DEFAULT_PANGEA_OLDEST_ARTICLE = "3"
STATIC_DIR = Path(__file__).resolve().parent / "static"
CACHE_BUSTED_STATIC_ASSETS = frozenset({"app.css"})
CACHE_BUSTED_HASH_LENGTH = 12
def _render_shim_page(
*, stylesheet_href: str, datastar_src: str, current_path: str
) -> tuple[str, str]:
head = (
h.title["Republisher Admin UI"],
h.link(rel="stylesheet", href=stylesheet_href),
)
body = str(
shim_page(datastar_src=datastar_src, current_path=current_path, head=head)
)
etag = hashlib.sha256(body.encode("utf-8")).hexdigest()
return body, etag
def versioned_static_asset_filename(filename: str) -> str:
_require_cache_busted_static_asset(filename)
asset_path = STATIC_DIR / filename
truncated_hash = hashlib.sha256(asset_path.read_bytes()).hexdigest()[
:CACHE_BUSTED_HASH_LENGTH
]
return f"{asset_path.stem}-{truncated_hash}{asset_path.suffix}"
def versioned_static_asset_href(filename: str) -> str:
return f"/static/{versioned_static_asset_filename(filename)}"
def _require_cache_busted_static_asset(filename: str) -> None:
if filename not in CACHE_BUSTED_STATIC_ASSETS:
raise ValueError(f"Unsupported cache-busted static asset: {filename}")
def create_app(*, dev_mode: bool = False) -> Quart:
app = Quart(__name__)
app.config["REPUB_DB_PATH"] = str(initialize_database())
app.config.setdefault("REPUB_LOG_DIR", DEFAULT_LOG_DIR)
app.config.setdefault("REPUB_FEEDS_DIR", DEFAULT_FEEDS_DIR)
app.config["REPUB_DEV_MODE"] = dev_mode
app.extensions[REFRESH_BROKER_KEY] = RefreshBroker()
app.extensions[JOB_RUNTIME_KEY] = None
app.extensions[TAB_STATE_STORE_KEY] = TabStateStore()
app.extensions[TAB_STATE_CLEANER_TASK_KEY] = None
app.extensions[SHUTDOWN_EVENT_KEY] = None
@app.get("/feeds/<path:feed_path>")
async def published_feed(feed_path: str) -> Response:
if not bool(app.config["REPUB_DEV_MODE"]):
return Response(status=404)
response = await send_from_directory(
str(Path(app.config["REPUB_FEEDS_DIR"])),
feed_path,
)
if Path(feed_path).suffix == ".rss":
response.mimetype = "application/rss+xml"
return response
@app.get("/static/<string:asset_name>-<string:asset_hash>.<string:extension>")
async def versioned_static_asset(
asset_name: str, asset_hash: str, extension: str
) -> Response:
logical_filename = f"{asset_name}.{extension}"
requested_filename = f"{asset_name}-{asset_hash}.{extension}"
if logical_filename in CACHE_BUSTED_STATIC_ASSETS:
response = await send_from_directory(str(STATIC_DIR), logical_filename)
response.cache_control.public = True
response.cache_control.max_age = 31536000
response.cache_control.immutable = True
return response
response = await send_from_directory(str(STATIC_DIR), requested_filename)
return response
@app.get("/")
@app.get("/sources")
@app.get("/sources/create")
@app.get("/sources/<string:slug>/edit")
@app.get("/runs")
@app.get("/settings")
@app.get("/job/<int:job_id>/execution/<int:execution_id>/logs")
async def page_shim(
slug: str | None = None,
job_id: int | None = None,
execution_id: int | None = None,
) -> Response:
del slug, job_id, execution_id
body, etag = _render_shim_page(
stylesheet_href=versioned_static_asset_href("app.css"),
datastar_src=url_for("static", filename="datastar@1.0.0-RC.8.js"),
current_path=request.path,
)
if request.if_none_match.contains(etag):
response = Response(status=304)
response.set_etag(etag)
return response
response = Response(body, mimetype="text/html")
response.set_etag(etag)
return response
@app.post("/")
async def dashboard_patch() -> DatastarResponse:
return await _page_patch_response(app, lambda _tab_id: render_dashboard(app))
@app.post("/sources")
async def sources_patch() -> DatastarResponse:
return await _page_patch_response(app, lambda _tab_id: render_sources(app))
@app.post("/sources/create")
async def create_source_patch() -> DatastarResponse:
return await _page_patch_response(
app, lambda _tab_id: render_create_source(app)
)
@app.post("/sources/<string:slug>/edit")
async def edit_source_patch(slug: str) -> DatastarResponse:
return await _page_patch_response(
app, lambda _tab_id: render_edit_source(slug, app)
)
@app.post("/settings")
async def settings_patch() -> DatastarResponse:
return await _page_patch_response(app, lambda _tab_id: render_settings(app))
@app.post("/actions/sources/create")
async def create_source_action() -> DatastarResponse:
signals = cast(dict[str, object], await read_signals())
source, error = validate_source_form(
signals,
slug_exists=source_slug_exists,
)
if error is not None:
return DatastarResponse(
SSE.patch_signals({"_formError": error, "_formSuccess": ""})
)
assert source is not None
try:
create_source(**source)
except IntegrityError:
return DatastarResponse(
SSE.patch_signals(
{"_formError": "Slug must be unique.", "_formSuccess": ""}
)
)
get_job_runtime(app).sync_jobs()
trigger_refresh(app)
return DatastarResponse(SSE.redirect("/sources"))
@app.post("/actions/sources/<string:slug>/edit")
async def edit_source_action(slug: str) -> DatastarResponse:
signals = cast(dict[str, object], await read_signals())
source, error = validate_source_form(
signals,
slug_exists=lambda candidate: candidate != slug
and source_slug_exists(candidate),
immutable_slug=slug,
)
if error is not None:
return DatastarResponse(
SSE.patch_signals({"_formError": error, "_formSuccess": ""})
)
assert source is not None
if update_source(slug, **source) is None:
return DatastarResponse(
SSE.patch_signals(
{"_formError": "Source does not exist.", "_formSuccess": ""}
)
)
get_job_runtime(app).sync_jobs()
trigger_refresh(app)
return DatastarResponse(SSE.redirect("/sources"))
@app.post("/actions/sources/<string:slug>/delete")
async def delete_source_action(slug: str) -> Response:
delete_source(slug)
get_job_runtime(app).sync_jobs()
trigger_refresh(app)
return Response(status=204)
@app.post("/actions/settings")
async def update_settings_action() -> DatastarResponse:
signals = cast(dict[str, object], await read_signals())
settings, error = validate_settings_form(signals)
if error is not None:
return DatastarResponse(
SSE.patch_signals({"_formError": error, "_formSuccess": ""})
)
assert settings is not None
save_setting("max_concurrent_jobs", settings["max_concurrent_jobs"])
save_setting("feed_url", settings["feed_url"])
trigger_refresh(app)
return DatastarResponse(SSE.redirect("/settings"))
@app.post("/runs")
async def runs_patch() -> DatastarResponse:
return await _page_patch_response(
app,
lambda tab_id: render_runs(app, tab_id=tab_id),
)
@app.post("/actions/runs/completed-page/<int:page>")
async def set_completed_runs_page_action(page: int) -> Response:
signals = await _read_optional_signals()
tab_id = _read_tab_id(signals)
if tab_id is None:
return Response(status=400)
get_tab_state_store(app).update_page_state(
tab_id,
RUNS_TAB_STATE_KEY,
lambda state: {**state, "completed_page": max(1, page)},
)
trigger_refresh(app, tab_id=tab_id)
return Response(status=204)
@app.post("/actions/jobs/<int:job_id>/run-now")
async def run_job_now_action(job_id: int) -> Response:
get_job_runtime(app).run_job_now(job_id, reason="manual")
trigger_refresh(app)
return Response(status=204)
@app.post("/actions/jobs/<int:job_id>/toggle-enabled")
async def toggle_job_enabled_action(job_id: int) -> Response:
job = Job.get_or_none(id=job_id)
if job is not None:
get_job_runtime(app).set_job_enabled(job_id, enabled=not job.enabled)
trigger_refresh(app)
return Response(status=204)
@app.post("/actions/jobs/<int:job_id>/delete")
async def delete_job_action(job_id: int) -> Response:
delete_job_source(job_id)
get_job_runtime(app).sync_jobs()
trigger_refresh(app)
return Response(status=204)
@app.post("/actions/executions/<int:execution_id>/cancel")
async def cancel_execution_action(execution_id: int) -> Response:
get_job_runtime(app).request_execution_cancel(execution_id)
trigger_refresh(app)
return Response(status=204)
@app.post("/actions/queued-executions/<int:execution_id>/cancel")
async def cancel_queued_execution_action(execution_id: int) -> Response:
get_job_runtime(app).cancel_queued_execution(execution_id)
trigger_refresh(app)
return Response(status=204)
@app.post("/actions/queued-executions/<int:execution_id>/move-up")
async def move_queued_execution_up_action(execution_id: int) -> Response:
get_job_runtime(app).move_queued_execution(execution_id, direction="up")
return Response(status=204)
@app.post("/actions/queued-executions/<int:execution_id>/move-down")
async def move_queued_execution_down_action(execution_id: int) -> Response:
get_job_runtime(app).move_queued_execution(execution_id, direction="down")
return Response(status=204)
@app.post("/actions/completed-executions/clear")
async def clear_completed_executions_action() -> Response:
clear_completed_executions(log_dir=app.config["REPUB_LOG_DIR"])
trigger_refresh(app)
return Response(status=204)
@app.post("/job/<int:job_id>/execution/<int:execution_id>/logs")
async def logs_patch(job_id: int, execution_id: int) -> DatastarResponse:
async def render() -> Renderable:
return await render_execution_logs(
app, job_id=job_id, execution_id=execution_id
)
return await _page_patch_response(app, lambda _tab_id: render())
@app.before_serving
async def start_runtime() -> None:
get_job_runtime(app).start()
app.extensions[TAB_STATE_CLEANER_TASK_KEY] = asyncio.create_task(
_clean_tab_state_periodically(app)
)
@app.after_serving
async def stop_runtime() -> None:
cleaner = cast(
asyncio.Task[None] | None, app.extensions.get(TAB_STATE_CLEANER_TASK_KEY)
)
if cleaner is not None:
cleaner.cancel()
with suppress(asyncio.CancelledError):
await cleaner
get_job_runtime(app).shutdown()
return app
def get_refresh_broker(app: Quart) -> RefreshBroker:
return cast(RefreshBroker, app.extensions[REFRESH_BROKER_KEY])
def get_shutdown_event(app: Quart) -> asyncio.Event | None:
return cast(asyncio.Event | None, app.extensions.get(SHUTDOWN_EVENT_KEY))
def get_tab_state_store(app: Quart) -> TabStateStore:
return cast(TabStateStore, app.extensions[TAB_STATE_STORE_KEY])
def get_job_runtime(app: Quart) -> JobRuntime:
runtime = cast(JobRuntime | None, app.extensions.get(JOB_RUNTIME_KEY))
if runtime is None:
runtime = JobRuntime(
log_dir=app.config["REPUB_LOG_DIR"],
refresh_callback=lambda event="refresh-event": trigger_refresh(app, event),
)
app.extensions[JOB_RUNTIME_KEY] = runtime
return runtime
def trigger_refresh(
app: Quart, event: object = "refresh-event", *, tab_id: str | None = None
) -> None:
get_refresh_broker(app).publish(event, tab_id=tab_id)
async def render_dashboard(app: Quart | None = None) -> Renderable:
if app is None:
return dashboard_page_with_data()
view = load_dashboard_view(log_dir=app.config["REPUB_LOG_DIR"])
return dashboard_page_with_data(
snapshot=cast(dict[str, str], view["snapshot"]),
running_executions=cast(tuple[dict[str, object], ...], view["running"]),
queued_executions=cast(tuple[dict[str, object], ...], view["queued"]),
source_feeds=cast(tuple[dict[str, object], ...], view["source_feeds"]),
)
async def render_sources(app: Quart | None = None) -> Renderable:
if app is None:
return sources_page()
sources = load_sources()
return sources_page(
sources=sources,
running_count=len(
load_runs_view(log_dir=app.config["REPUB_LOG_DIR"])["running"]
),
)
async def render_create_source(app: Quart | None = None) -> Renderable:
if app is None:
return create_source_page()
sidebar_counts = _load_sidebar_counts(app)
return create_source_page(
source_count=sidebar_counts["source_count"],
running_count=sidebar_counts["running_count"],
)
async def render_edit_source(slug: str, app: Quart | None = None) -> Renderable:
source = load_source_form(slug)
if source is None:
return sources_page(sources=())
return edit_source_page(
slug=slug,
source=source,
action_path=f"/actions/sources/{slug}/edit",
**({} if app is None else _load_sidebar_counts(app)),
)
async def render_runs(
app: Quart | None = None, *, tab_id: str | None = None
) -> Renderable:
if app is None:
return runs_page()
tab_state = get_tab_state_store(app).get_page_state(tab_id, RUNS_TAB_STATE_KEY)
resolved_completed_page = max(1, _read_int(tab_state.get("completed_page"), 1))
view = load_runs_view(
log_dir=app.config["REPUB_LOG_DIR"],
completed_page=resolved_completed_page,
completed_page_size=COMPLETED_EXECUTION_PAGE_SIZE,
)
return runs_page(
running_executions=cast(tuple[dict[str, object], ...], view["running"]),
queued_executions=cast(tuple[dict[str, object], ...], view["queued"]),
upcoming_jobs=cast(tuple[dict[str, object], ...], view["upcoming"]),
completed_executions=cast(tuple[dict[str, object], ...], view["completed"]),
completed_page=cast(int, view["completed_page"]),
completed_page_size=cast(int, view["completed_page_size"]),
completed_total_count=cast(int, view["completed_total_count"]),
completed_total_pages=cast(int, view["completed_total_pages"]),
source_count=len(load_sources()),
)
async def render_settings(app: Quart | None = None) -> Renderable:
if app is None:
return settings_page(settings=load_settings_form())
sidebar_counts = _load_sidebar_counts(app)
return settings_page(
settings=load_settings_form(),
source_count=sidebar_counts["source_count"],
running_count=sidebar_counts["running_count"],
)
async def render_execution_logs(
app: Quart | None = None, *, job_id: int, execution_id: int
) -> Renderable:
if app is None:
return execution_logs_page(job_id=job_id, execution_id=execution_id)
log_view = load_execution_log_view(
log_dir=app.config["REPUB_LOG_DIR"],
job_id=job_id,
execution_id=execution_id,
)
return execution_logs_page(
job_id=job_id,
execution_id=execution_id,
log_view={
"title": log_view.title,
"description": log_view.description,
"status_label": log_view.status_label,
"status_tone": log_view.status_tone,
"log_text": log_view.log_text,
"error_message": log_view.error_message,
},
)
async def _page_patch_response(
app: Quart, render: PatchRenderFunction
) -> DatastarResponse:
signals = await _read_optional_signals()
tab_id = _read_tab_id(signals)
if tab_id is not None:
get_tab_state_store(app).connect(tab_id)
queue = get_refresh_broker(app).subscribe(tab_id=tab_id)
stream = render_stream(
queue,
render=lambda: render(tab_id),
last_event_id=request.headers.get("last-event-id"),
shutdown_event=get_shutdown_event(app),
)
return DatastarResponse(_unsubscribe_on_close(queue, stream, app, tab_id=tab_id))
async def _unsubscribe_on_close(
queue: object,
stream: AsyncGenerator[DatastarEvent, None],
app: Quart,
*,
tab_id: str | None,
) -> AsyncGenerator[DatastarEvent, None]:
try:
async for event in stream:
yield event
finally:
get_refresh_broker(app).unsubscribe(cast(asyncio.Queue[object], queue))
if tab_id is not None:
get_tab_state_store(app).disconnect(tab_id)
def _load_sidebar_counts(app: Quart) -> dict[str, int]:
return {
"source_count": len(load_sources()),
"running_count": len(
load_runs_view(log_dir=app.config["REPUB_LOG_DIR"])["running"]
),
}
async def _clean_tab_state_periodically(app: Quart) -> None:
while True:
await asyncio.sleep(TAB_STATE_CLEAN_INTERVAL.total_seconds())
get_tab_state_store(app).cleanup_stale()
async def _read_optional_signals() -> dict[str, object] | None:
content_type = request.headers.get("Content-Type", "")
if request.content_length in (None, 0) and "application/json" not in content_type:
return None
try:
return cast(dict[str, object] | None, await read_signals())
except Exception:
return None
def _read_tab_id(signals: dict[str, object] | None) -> str | None:
if signals is None:
return None
tab_id = _read_string(signals, "tabid")
return tab_id or None
def validate_source_form(
signals: dict[str, object] | None,
*,
slug_exists: Callable[[str], bool],
immutable_slug: str | None = None,
) -> tuple[SourceFormData | None, str | None]:
if signals is None:
return None, "Missing form data."
source_name = _read_string(signals, "sourceName")
source_slug = _read_string(signals, "sourceSlug")
source_type = _read_string(signals, "sourceType")
feed_url = _read_string(signals, "feedUrl")
pangea_domain = _read_string(signals, "pangeaDomain")
pangea_category = _read_string(signals, "pangeaCategory", strip=False)
content_format = _read_string(signals, "contentFormat")
content_type = _read_string(signals, "contentType")
max_articles = _read_string(signals, "maxArticles")
oldest_article = _read_string(signals, "oldestArticle")
source_notes = _read_string(signals, "sourceNotes")
spider_arguments = _normalize_multiline(_read_string(signals, "spiderArguments"))
cron_minute = _read_string(signals, "cronMinute")
cron_hour = _read_string(signals, "cronHour")
cron_day_of_month = _read_string(signals, "cronDayOfMonth")
cron_day_of_week = _read_string(signals, "cronDayOfWeek")
cron_month = _read_string(signals, "cronMonth")
errors: list[str] = []
if source_name == "":
errors.append("Source name is required.")
if source_slug == "":
errors.append("Slug is required.")
elif immutable_slug is not None and source_slug != immutable_slug:
errors.append("Slug is immutable.")
elif slug_exists(source_slug):
errors.append("Slug must be unique.")
if source_type not in {"feed", "pangea"}:
errors.append("Source type must be feed or pangea.")
if source_type == "feed":
if feed_url == "":
errors.append("Feed URL is required for feed sources.")
elif not _is_valid_url(feed_url):
errors.append("Feed URL must be a valid URL.")
if source_type == "pangea":
content_format = content_format or DEFAULT_PANGEA_CONTENT_FORMAT
content_type = content_type or DEFAULT_PANGEA_CONTENT_TYPE
max_articles = max_articles or DEFAULT_PANGEA_MAX_ARTICLES
oldest_article = oldest_article or DEFAULT_PANGEA_OLDEST_ARTICLE
if pangea_domain == "":
errors.append("Pangea domain is required.")
if pangea_category == "":
errors.append("Category name is required.")
if content_format not in PANGEA_CONTENT_FORMATS:
errors.append("Content format is invalid.")
if content_type not in PANGEA_CONTENT_TYPES:
errors.append("Content type is invalid.")
if _parse_int(max_articles) is None:
errors.append("Max articles must be an integer.")
if _parse_int(oldest_article) is None:
errors.append("Oldest article must be an integer.")
cron_values = (
cron_minute,
cron_hour,
cron_day_of_month,
cron_day_of_week,
cron_month,
)
if any(value == "" for value in cron_values):
errors.append("All cron fields are required.")
if errors:
return None, " ".join(errors)
enabled = _read_bool(signals, "jobEnabled")
source: SourceFormData = {
"name": source_name,
"slug": source_slug,
"source_type": source_type,
"notes": source_notes,
"spider_arguments": spider_arguments,
"feed_url": feed_url,
"pangea_domain": pangea_domain,
"pangea_category": pangea_category,
"content_format": content_format,
"content_type": content_type,
"max_articles": _parse_int(max_articles),
"oldest_article": _parse_int(oldest_article),
"enabled": enabled,
"convert_images": _read_bool(signals, "convertImages", default=True),
"convert_video": _read_bool(signals, "convertVideo", default=True),
"only_newest": _read_bool(signals, "onlyNewest", default=True),
"include_authors": _read_bool(signals, "includeAuthors", default=True),
"exclude_media": _read_bool(signals, "excludeMedia", default=False),
"include_content": _read_bool(signals, "includeContent", default=True),
"cron_minute": cron_minute,
"cron_hour": cron_hour,
"cron_day_of_month": cron_day_of_month,
"cron_day_of_week": cron_day_of_week,
"cron_month": cron_month,
}
return source, None
def validate_settings_form(
signals: dict[str, object] | None,
) -> tuple[SettingsFormData | None, str | None]:
if signals is None:
return None, "Missing form data."
max_concurrent_jobs = _parse_int(_read_string(signals, "maxConcurrentJobs"))
feed_url = _read_string(signals, "feedUrl").rstrip("/")
if max_concurrent_jobs is None:
return None, "Max concurrent jobs must be an integer."
if max_concurrent_jobs < 1:
return None, "Max concurrent jobs must be at least 1."
if feed_url != "" and not _is_valid_url(feed_url):
return None, "Feed URL must be a valid URL."
return {
"max_concurrent_jobs": max_concurrent_jobs,
"feed_url": feed_url,
}, None
def _read_string(signals: dict[str, object], key: str, *, strip: bool = True) -> str:
value = str(signals.get(key, ""))
return value.strip() if strip else value
def _read_bool(signals: dict[str, object], key: str, *, default: bool = False) -> bool:
value = signals.get(key, default)
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.lower() in {"true", "1", "on", "yes"}
return bool(value)
def _normalize_multiline(value: str) -> str:
return value.replace("\r\n", "\n").replace("\r", "\n")
def _parse_int(value: str) -> int | None:
try:
return int(value)
except ValueError:
return None
def _read_int(value: object, default: int) -> int:
if isinstance(value, bool):
return int(value)
if isinstance(value, int):
return value
if isinstance(value, str):
parsed = _parse_int(value)
return default if parsed is None else parsed
return default
def _is_valid_url(value: str) -> bool:
parsed = urlparse(value)
return parsed.scheme in {"http", "https"} and parsed.netloc != ""