output to out/feeds/*
This commit is contained in:
parent
beac981047
commit
6fd3b598ab
11 changed files with 298 additions and 16 deletions
|
|
@ -30,6 +30,14 @@ class RepublisherConfig:
|
||||||
scrapy_settings: dict[str, Any]
|
scrapy_settings: dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
def feed_output_dir(*, out_dir: Path, feed_slug: str) -> Path:
|
||||||
|
return out_dir / "feeds" / feed_slug
|
||||||
|
|
||||||
|
|
||||||
|
def feed_output_path(*, out_dir: Path, feed_slug: str) -> Path:
|
||||||
|
return feed_output_dir(out_dir=out_dir, feed_slug=feed_slug) / "feed.rss"
|
||||||
|
|
||||||
|
|
||||||
def _resolve_path(base_path: Path, value: str) -> Path:
|
def _resolve_path(base_path: Path, value: str) -> Path:
|
||||||
path = Path(value).expanduser()
|
path = Path(value).expanduser()
|
||||||
if not path.is_absolute():
|
if not path.is_absolute():
|
||||||
|
|
@ -173,7 +181,7 @@ def build_feed_settings(
|
||||||
out_dir: Path,
|
out_dir: Path,
|
||||||
feed_slug: str,
|
feed_slug: str,
|
||||||
) -> Settings:
|
) -> Settings:
|
||||||
feed_dir = out_dir / feed_slug
|
feed_dir = feed_output_dir(out_dir=out_dir, feed_slug=feed_slug)
|
||||||
image_dir = base_settings.get("REPUBLISHER_IMAGE_DIR", IMAGE_DIR)
|
image_dir = base_settings.get("REPUBLISHER_IMAGE_DIR", IMAGE_DIR)
|
||||||
video_dir = base_settings.get("REPUBLISHER_VIDEO_DIR", VIDEO_DIR)
|
video_dir = base_settings.get("REPUBLISHER_VIDEO_DIR", VIDEO_DIR)
|
||||||
audio_dir = base_settings.get("REPUBLISHER_AUDIO_DIR", AUDIO_DIR)
|
audio_dir = base_settings.get("REPUBLISHER_AUDIO_DIR", AUDIO_DIR)
|
||||||
|
|
@ -192,7 +200,7 @@ def build_feed_settings(
|
||||||
{
|
{
|
||||||
"REPUBLISHER_OUT_DIR": str(out_dir),
|
"REPUBLISHER_OUT_DIR": str(out_dir),
|
||||||
"FEEDS": {
|
"FEEDS": {
|
||||||
str(feed_dir / "feed.rss"): {
|
str(feed_output_path(out_dir=out_dir, feed_slug=feed_slug)): {
|
||||||
"format": "rss",
|
"format": "rss",
|
||||||
"postprocessing": [],
|
"postprocessing": [],
|
||||||
"feed_name": feed_slug,
|
"feed_name": feed_slug,
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ from repub.config import (
|
||||||
FeedConfig,
|
FeedConfig,
|
||||||
build_base_settings,
|
build_base_settings,
|
||||||
build_feed_settings,
|
build_feed_settings,
|
||||||
|
feed_output_dir,
|
||||||
load_config,
|
load_config,
|
||||||
)
|
)
|
||||||
from repub.media import check_runtime
|
from repub.media import check_runtime
|
||||||
|
|
@ -30,7 +31,9 @@ class FeedNameFilter:
|
||||||
def prepare_output_dirs(out_dir: Path, feed_name: str) -> None:
|
def prepare_output_dirs(out_dir: Path, feed_name: str) -> None:
|
||||||
(out_dir / "logs").mkdir(parents=True, exist_ok=True)
|
(out_dir / "logs").mkdir(parents=True, exist_ok=True)
|
||||||
(out_dir / "httpcache").mkdir(parents=True, exist_ok=True)
|
(out_dir / "httpcache").mkdir(parents=True, exist_ok=True)
|
||||||
(out_dir / feed_name).mkdir(parents=True, exist_ok=True)
|
feed_output_dir(out_dir=out_dir, feed_slug=feed_name).mkdir(
|
||||||
|
parents=True, exist_ok=True
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_feed_crawler(
|
def create_feed_crawler(
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@ from repub.config import (
|
||||||
RepublisherConfig,
|
RepublisherConfig,
|
||||||
build_base_settings,
|
build_base_settings,
|
||||||
build_feed_settings,
|
build_feed_settings,
|
||||||
|
feed_output_dir,
|
||||||
)
|
)
|
||||||
from repub.crawl import prepare_output_dirs
|
from repub.crawl import prepare_output_dirs
|
||||||
from repub.model import (
|
from repub.model import (
|
||||||
|
|
@ -136,6 +137,7 @@ def generate_pangea_feed(
|
||||||
) -> Path:
|
) -> Path:
|
||||||
resolved_out_dir = Path(out_dir).resolve()
|
resolved_out_dir = Path(out_dir).resolve()
|
||||||
resolved_log_path = Path(log_path).resolve()
|
resolved_log_path = Path(log_path).resolve()
|
||||||
|
pangea_out_dir = feed_output_dir(out_dir=resolved_out_dir, feed_slug=slug)
|
||||||
config = PygeaConfig(
|
config = PygeaConfig(
|
||||||
config_path=resolved_out_dir / "pygea-runtime.toml",
|
config_path=resolved_out_dir / "pygea-runtime.toml",
|
||||||
domain=domain,
|
domain=domain,
|
||||||
|
|
@ -161,7 +163,7 @@ def generate_pangea_feed(
|
||||||
results=ResultsConfig(
|
results=ResultsConfig(
|
||||||
output_to_file_p=True,
|
output_to_file_p=True,
|
||||||
output_file_name="pangea.rss",
|
output_file_name="pangea.rss",
|
||||||
output_directory=resolved_out_dir,
|
output_directory=pangea_out_dir.parent,
|
||||||
),
|
),
|
||||||
logging=LoggingConfig(
|
logging=LoggingConfig(
|
||||||
log_file=resolved_log_path,
|
log_file=resolved_log_path,
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ from typing import Callable, TextIO, cast
|
||||||
from apscheduler.schedulers.background import BackgroundScheduler
|
from apscheduler.schedulers.background import BackgroundScheduler
|
||||||
from apscheduler.triggers.cron import CronTrigger
|
from apscheduler.triggers.cron import CronTrigger
|
||||||
|
|
||||||
|
from repub.config import feed_output_dir, feed_output_path
|
||||||
from repub.model import Job, JobExecution, JobExecutionStatus, Source, database, utc_now
|
from repub.model import Job, JobExecution, JobExecutionStatus, Source, database, utc_now
|
||||||
|
|
||||||
SCHEDULER_JOB_PREFIX = "job-"
|
SCHEDULER_JOB_PREFIX = "job-"
|
||||||
|
|
@ -401,6 +402,7 @@ def load_dashboard_view(
|
||||||
runs_view = load_runs_view(log_dir=log_dir, now=reference_time)
|
runs_view = load_runs_view(log_dir=log_dir, now=reference_time)
|
||||||
output_dir = Path(log_dir).parent
|
output_dir = Path(log_dir).parent
|
||||||
with database.connection_context():
|
with database.connection_context():
|
||||||
|
sources = tuple(Source.select().order_by(Source.name.asc()))
|
||||||
failed_last_day = (
|
failed_last_day = (
|
||||||
JobExecution.select()
|
JobExecution.select()
|
||||||
.where(
|
.where(
|
||||||
|
|
@ -416,6 +418,10 @@ def load_dashboard_view(
|
||||||
footprint_bytes = _directory_size(output_dir)
|
footprint_bytes = _directory_size(output_dir)
|
||||||
return {
|
return {
|
||||||
"running": runs_view["running"],
|
"running": runs_view["running"],
|
||||||
|
"source_feeds": tuple(
|
||||||
|
_project_source_feed(source, output_dir, reference_time)
|
||||||
|
for source in sources
|
||||||
|
),
|
||||||
"snapshot": {
|
"snapshot": {
|
||||||
"running_now": str(len(runs_view["running"])),
|
"running_now": str(len(runs_view["running"])),
|
||||||
"upcoming_today": str(upcoming_ready),
|
"upcoming_today": str(upcoming_ready),
|
||||||
|
|
@ -605,6 +611,35 @@ def _project_completed_execution(
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _project_source_feed(
|
||||||
|
source: Source, output_dir: Path, reference_time: datetime
|
||||||
|
) -> dict[str, object]:
|
||||||
|
source_slug = str(source.slug)
|
||||||
|
source_dir = feed_output_dir(out_dir=output_dir, feed_slug=source_slug)
|
||||||
|
feed_path = feed_output_path(out_dir=output_dir, feed_slug=source_slug)
|
||||||
|
feed_exists = feed_path.exists()
|
||||||
|
updated_at = (
|
||||||
|
datetime.fromtimestamp(feed_path.stat().st_mtime, tz=UTC)
|
||||||
|
if feed_exists
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"source": source.name,
|
||||||
|
"slug": source_slug,
|
||||||
|
"feed_href": f"/feeds/{source_slug}/feed.rss",
|
||||||
|
"feed_status_label": "Available" if feed_exists else "Missing",
|
||||||
|
"feed_status_tone": "done" if feed_exists else "failed",
|
||||||
|
"feed_exists": feed_exists,
|
||||||
|
"last_updated": (
|
||||||
|
_humanize_relative_time(reference_time, updated_at)
|
||||||
|
if updated_at is not None
|
||||||
|
else "Never published"
|
||||||
|
),
|
||||||
|
"last_updated_iso": updated_at.isoformat() if updated_at is not None else None,
|
||||||
|
"artifact_footprint": _format_bytes(_directory_size(source_dir)),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _execution_status_label(execution: JobExecution) -> str:
|
def _execution_status_label(execution: JobExecution) -> str:
|
||||||
status = JobExecutionStatus(execution.running_status)
|
status = JobExecutionStatus(execution.running_status)
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ from repub.components import (
|
||||||
muted_action_link,
|
muted_action_link,
|
||||||
stat_card,
|
stat_card,
|
||||||
status_badge,
|
status_badge,
|
||||||
|
table_section,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -188,6 +189,56 @@ def running_executions_table(
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _source_feed_row(source_feed: Mapping[str, object]) -> tuple[Node, ...]:
|
||||||
|
last_updated_iso = source_feed.get("last_updated_iso")
|
||||||
|
last_updated = (
|
||||||
|
h.time(
|
||||||
|
datetime=str(last_updated_iso),
|
||||||
|
title=str(last_updated_iso),
|
||||||
|
class_="font-medium text-slate-900",
|
||||||
|
)[str(source_feed["last_updated"])]
|
||||||
|
if last_updated_iso is not None
|
||||||
|
else h.p(class_="font-medium text-slate-900")[str(source_feed["last_updated"])]
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
h.div[
|
||||||
|
h.div(class_="font-semibold text-slate-950")[str(source_feed["source"])],
|
||||||
|
h.p(class_="mt-0.5 font-mono text-[11px] text-slate-500")[
|
||||||
|
str(source_feed["slug"])
|
||||||
|
],
|
||||||
|
],
|
||||||
|
h.div(class_="min-w-64")[
|
||||||
|
inline_link(
|
||||||
|
href=str(source_feed["feed_href"]),
|
||||||
|
label=str(source_feed["feed_href"]),
|
||||||
|
tone="amber",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
status_badge(
|
||||||
|
label=str(source_feed["feed_status_label"]),
|
||||||
|
tone=str(source_feed["feed_status_tone"]),
|
||||||
|
),
|
||||||
|
last_updated,
|
||||||
|
h.p(class_="font-medium text-slate-900")[
|
||||||
|
str(source_feed["artifact_footprint"])
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def published_feeds_table(
|
||||||
|
*, source_feeds: tuple[Mapping[str, object], ...] | None = None
|
||||||
|
) -> Renderable:
|
||||||
|
rows = tuple(_source_feed_row(source_feed) for source_feed in (source_feeds or ()))
|
||||||
|
return table_section(
|
||||||
|
eyebrow="Published feeds",
|
||||||
|
title="Published feeds",
|
||||||
|
subtitle="Per-source public feed paths under /feeds, with current availability and disk usage.",
|
||||||
|
headers=("Source", "Feed URL", "Status", "Last updated", "Disk usage"),
|
||||||
|
rows=rows,
|
||||||
|
actions=muted_action_link(href="/sources", label="Manage sources"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def dashboard_page() -> Renderable:
|
def dashboard_page() -> Renderable:
|
||||||
return dashboard_page_with_data()
|
return dashboard_page_with_data()
|
||||||
|
|
||||||
|
|
@ -196,6 +247,7 @@ def dashboard_page_with_data(
|
||||||
*,
|
*,
|
||||||
snapshot: Mapping[str, str] | None = None,
|
snapshot: Mapping[str, str] | None = None,
|
||||||
running_executions: tuple[Mapping[str, object], ...] | None = None,
|
running_executions: tuple[Mapping[str, object], ...] | None = None,
|
||||||
|
source_feeds: tuple[Mapping[str, object], ...] | None = None,
|
||||||
) -> Renderable:
|
) -> Renderable:
|
||||||
return h.main(
|
return h.main(
|
||||||
id="morph",
|
id="morph",
|
||||||
|
|
@ -207,6 +259,7 @@ def dashboard_page_with_data(
|
||||||
dashboard_header(),
|
dashboard_header(),
|
||||||
operational_snapshot(snapshot=snapshot),
|
operational_snapshot(snapshot=snapshot),
|
||||||
running_executions_table(running_executions=running_executions),
|
running_executions_table(running_executions=running_executions),
|
||||||
|
published_feeds_table(source_feeds=source_feeds),
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ from scrapy.utils.spider import iterate_spider_output
|
||||||
|
|
||||||
from repub.items import ChannelElementItem, ElementItem
|
from repub.items import ChannelElementItem, ElementItem
|
||||||
from repub.rss import CDATA, CONTENT, ITUNES, MEDIA, E, munge_cdata_html, normalize_date
|
from repub.rss import CDATA, CONTENT, ITUNES, MEDIA, E, munge_cdata_html, normalize_date
|
||||||
from repub.utils import FileType, determine_file_type, local_file_path
|
from repub.utils import FileType, determine_file_type, local_file_path, local_image_path
|
||||||
|
|
||||||
|
|
||||||
class BaseRssFeedSpider(Spider):
|
class BaseRssFeedSpider(Spider):
|
||||||
|
|
@ -34,13 +34,15 @@ class BaseRssFeedSpider(Spider):
|
||||||
|
|
||||||
def rewrite_file_url(self, file_type: FileType, url):
|
def rewrite_file_url(self, file_type: FileType, url):
|
||||||
file_dir = self.settings["REPUBLISHER_FILE_DIR"]
|
file_dir = self.settings["REPUBLISHER_FILE_DIR"]
|
||||||
|
local_path = local_file_path(url)
|
||||||
if file_type == FileType.IMAGE:
|
if file_type == FileType.IMAGE:
|
||||||
file_dir = self.settings["REPUBLISHER_IMAGE_DIR"]
|
file_dir = self.settings["REPUBLISHER_IMAGE_DIR"]
|
||||||
|
local_path = local_image_path(url)
|
||||||
elif file_type == FileType.VIDEO:
|
elif file_type == FileType.VIDEO:
|
||||||
file_dir = self.settings["REPUBLISHER_VIDEO_DIR"]
|
file_dir = self.settings["REPUBLISHER_VIDEO_DIR"]
|
||||||
elif file_type == FileType.AUDIO:
|
elif file_type == FileType.AUDIO:
|
||||||
file_dir = self.settings["REPUBLISHER_AUDIO_DIR"]
|
file_dir = self.settings["REPUBLISHER_AUDIO_DIR"]
|
||||||
return f"/{file_dir}/{local_file_path(url)}"
|
return f"{file_dir}/{local_path}"
|
||||||
|
|
||||||
def rewrite_image_url(self, url):
|
def rewrite_image_url(self, url):
|
||||||
return self.rewrite_file_url(FileType.IMAGE, url)
|
return self.rewrite_file_url(FileType.IMAGE, url)
|
||||||
|
|
|
||||||
|
|
@ -284,6 +284,7 @@ async def render_dashboard(app: Quart | None = None) -> Renderable:
|
||||||
return dashboard_page_with_data(
|
return dashboard_page_with_data(
|
||||||
snapshot=cast(dict[str, str], view["snapshot"]),
|
snapshot=cast(dict[str, str], view["snapshot"]),
|
||||||
running_executions=cast(tuple[dict[str, object], ...], view["running"]),
|
running_executions=cast(tuple[dict[str, object], ...], view["running"]),
|
||||||
|
source_feeds=cast(tuple[dict[str, object], ...], view["source_feeds"]),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -141,12 +141,20 @@ def test_build_feed_settings_derives_output_paths_from_feed_slug(
|
||||||
assert feed_settings["REPUBLISHER_OUT_DIR"] == str(out_dir)
|
assert feed_settings["REPUBLISHER_OUT_DIR"] == str(out_dir)
|
||||||
assert feed_settings["LOG_FILE"] == str(out_dir / "logs" / "info-marti.log")
|
assert feed_settings["LOG_FILE"] == str(out_dir / "logs" / "info-marti.log")
|
||||||
assert feed_settings["HTTPCACHE_DIR"] == str(out_dir / "httpcache")
|
assert feed_settings["HTTPCACHE_DIR"] == str(out_dir / "httpcache")
|
||||||
assert feed_settings["IMAGES_STORE"] == str(out_dir / "info-marti" / "images")
|
assert feed_settings["IMAGES_STORE"] == str(
|
||||||
assert feed_settings["AUDIO_STORE"] == str(out_dir / "info-marti" / "audio")
|
out_dir / "feeds" / "info-marti" / "images"
|
||||||
assert feed_settings["VIDEO_STORE"] == str(out_dir / "info-marti" / "video")
|
)
|
||||||
assert feed_settings["FILES_STORE"] == str(out_dir / "info-marti" / "files")
|
assert feed_settings["AUDIO_STORE"] == str(
|
||||||
|
out_dir / "feeds" / "info-marti" / "audio"
|
||||||
|
)
|
||||||
|
assert feed_settings["VIDEO_STORE"] == str(
|
||||||
|
out_dir / "feeds" / "info-marti" / "video"
|
||||||
|
)
|
||||||
|
assert feed_settings["FILES_STORE"] == str(
|
||||||
|
out_dir / "feeds" / "info-marti" / "files"
|
||||||
|
)
|
||||||
assert feed_settings["FEEDS"] == {
|
assert feed_settings["FEEDS"] == {
|
||||||
str(out_dir / "info-marti" / "feed.rss"): {
|
str(out_dir / "feeds" / "info-marti" / "feed.rss"): {
|
||||||
"format": "rss",
|
"format": "rss",
|
||||||
"postprocessing": [],
|
"postprocessing": [],
|
||||||
"feed_name": "info-marti",
|
"feed_name": "info-marti",
|
||||||
|
|
@ -181,5 +189,9 @@ def test_build_feed_settings_uses_runtime_media_dir_overrides(tmp_path: Path) ->
|
||||||
|
|
||||||
assert feed_settings["REPUBLISHER_VIDEO_DIR"] == "videos-custom"
|
assert feed_settings["REPUBLISHER_VIDEO_DIR"] == "videos-custom"
|
||||||
assert feed_settings["REPUBLISHER_AUDIO_DIR"] == "audio-custom"
|
assert feed_settings["REPUBLISHER_AUDIO_DIR"] == "audio-custom"
|
||||||
assert feed_settings["VIDEO_STORE"] == str(out_dir / "gp-pod" / "videos-custom")
|
assert feed_settings["VIDEO_STORE"] == str(
|
||||||
assert feed_settings["AUDIO_STORE"] == str(out_dir / "gp-pod" / "audio-custom")
|
out_dir / "feeds" / "gp-pod" / "videos-custom"
|
||||||
|
)
|
||||||
|
assert feed_settings["AUDIO_STORE"] == str(
|
||||||
|
out_dir / "feeds" / "gp-pod" / "audio-custom"
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,10 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from scrapy.settings import Settings
|
||||||
|
|
||||||
from repub import entrypoint as entrypoint_module
|
from repub import entrypoint as entrypoint_module
|
||||||
|
from repub.spiders.rss_spider import RssFeedSpider
|
||||||
|
from repub.utils import FileType, local_audio_path, local_image_path
|
||||||
|
|
||||||
|
|
||||||
def test_entrypoint_supports_file_feed_urls(tmp_path: Path, monkeypatch) -> None:
|
def test_entrypoint_supports_file_feed_urls(tmp_path: Path, monkeypatch) -> None:
|
||||||
|
|
@ -29,9 +33,33 @@ DOWNLOAD_TIMEOUT = 5
|
||||||
|
|
||||||
exit_code = entrypoint_module.entrypoint(["--config", str(config_path)])
|
exit_code = entrypoint_module.entrypoint(["--config", str(config_path)])
|
||||||
|
|
||||||
output_path = tmp_path / "out" / "local-file" / "feed.rss"
|
output_path = tmp_path / "out" / "feeds" / "local-file" / "feed.rss"
|
||||||
assert exit_code == 0
|
assert exit_code == 0
|
||||||
assert output_path.exists()
|
assert output_path.exists()
|
||||||
output = output_path.read_text(encoding="utf-8")
|
output = output_path.read_text(encoding="utf-8")
|
||||||
assert "<title>Local Demo Feed</title>" in output
|
assert "<title>Local Demo Feed</title>" in output
|
||||||
assert "<title>Local Demo Entry</title>" in output
|
assert "<title>Local Demo Entry</title>" in output
|
||||||
|
|
||||||
|
|
||||||
|
def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
|
||||||
|
spider = RssFeedSpider(feed_name="demo", url="https://example.com/feed.rss")
|
||||||
|
spider.settings = Settings(
|
||||||
|
values={
|
||||||
|
"REPUBLISHER_IMAGE_DIR": "images",
|
||||||
|
"REPUBLISHER_FILE_DIR": "files",
|
||||||
|
"REPUBLISHER_AUDIO_DIR": "audio",
|
||||||
|
"REPUBLISHER_VIDEO_DIR": "video",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
spider.rewrite_image_url("https://example.com/media/photo.jpg")
|
||||||
|
== f"images/{local_image_path('https://example.com/media/photo.jpg')}"
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
spider.rewrite_file_url(
|
||||||
|
FileType.AUDIO,
|
||||||
|
"https://example.com/media/podcast.mp3",
|
||||||
|
)
|
||||||
|
== f"audio/{local_audio_path('https://example.com/media/podcast.mp3')}"
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -129,7 +129,7 @@ def test_job_runtime_run_now_writes_log_and_stats_and_marks_success(
|
||||||
assert execution.bytes_count > 0
|
assert execution.bytes_count > 0
|
||||||
assert artifacts.log_path.exists()
|
assert artifacts.log_path.exists()
|
||||||
assert artifacts.stats_path.exists()
|
assert artifacts.stats_path.exists()
|
||||||
output_path = tmp_path / "out" / "manual-source" / "feed.rss"
|
output_path = tmp_path / "out" / "feeds" / "manual-source" / "feed.rss"
|
||||||
assert output_path.exists()
|
assert output_path.exists()
|
||||||
output_text = output_path.read_text(encoding="utf-8")
|
output_text = output_path.read_text(encoding="utf-8")
|
||||||
assert "<title>Local Demo Feed</title>" in output_text
|
assert "<title>Local Demo Feed</title>" in output_text
|
||||||
|
|
@ -291,7 +291,7 @@ def test_generate_pangea_feed_writes_pangea_rss_file(
|
||||||
log_path=tmp_path / "out" / "logs" / "pangea.log",
|
log_path=tmp_path / "out" / "logs" / "pangea.log",
|
||||||
)
|
)
|
||||||
|
|
||||||
assert output_path == (tmp_path / "out" / "pangea-source" / "pangea.rss")
|
assert output_path == (tmp_path / "out" / "feeds" / "pangea-source" / "pangea.rss")
|
||||||
assert output_path.exists()
|
assert output_path.exists()
|
||||||
assert "Pangea Fixture" in output_path.read_text(encoding="utf-8")
|
assert "Pangea Fixture" in output_path.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import os
|
||||||
|
from datetime import UTC, datetime, timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, cast
|
from typing import Any, cast
|
||||||
|
|
||||||
|
|
@ -205,6 +207,7 @@ def test_render_dashboard_shows_dashboard_information_architecture(
|
||||||
|
|
||||||
assert "Operational snapshot" in body
|
assert "Operational snapshot" in body
|
||||||
assert "Running executions" in body
|
assert "Running executions" in body
|
||||||
|
assert "Published feeds" in body
|
||||||
assert 'href="/sources"' in body
|
assert 'href="/sources"' in body
|
||||||
assert 'href="/runs"' in body
|
assert 'href="/runs"' in body
|
||||||
assert "Create source" in body
|
assert "Create source" in body
|
||||||
|
|
@ -246,6 +249,141 @@ def test_render_dashboard_describes_log_artifact_footprint(
|
||||||
asyncio.run(run())
|
asyncio.run(run())
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_dashboard_view_lists_source_feed_artifacts(
|
||||||
|
monkeypatch, tmp_path: Path
|
||||||
|
) -> None:
|
||||||
|
db_path = tmp_path / "dashboard-feeds.db"
|
||||||
|
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
|
||||||
|
app = create_app()
|
||||||
|
out_dir = tmp_path / "out"
|
||||||
|
log_dir = out_dir / "logs"
|
||||||
|
app.config["REPUB_LOG_DIR"] = log_dir
|
||||||
|
log_dir.mkdir(parents=True)
|
||||||
|
|
||||||
|
create_source(
|
||||||
|
name="Available source",
|
||||||
|
slug="available-source",
|
||||||
|
source_type="feed",
|
||||||
|
notes="",
|
||||||
|
spider_arguments="",
|
||||||
|
enabled=False,
|
||||||
|
cron_minute="*/5",
|
||||||
|
cron_hour="*",
|
||||||
|
cron_day_of_month="*",
|
||||||
|
cron_day_of_week="*",
|
||||||
|
cron_month="*",
|
||||||
|
feed_url="https://example.com/available.xml",
|
||||||
|
)
|
||||||
|
create_source(
|
||||||
|
name="Missing source",
|
||||||
|
slug="missing-source",
|
||||||
|
source_type="feed",
|
||||||
|
notes="",
|
||||||
|
spider_arguments="",
|
||||||
|
enabled=False,
|
||||||
|
cron_minute="*/5",
|
||||||
|
cron_hour="*",
|
||||||
|
cron_day_of_month="*",
|
||||||
|
cron_day_of_week="*",
|
||||||
|
cron_month="*",
|
||||||
|
feed_url="https://example.com/missing.xml",
|
||||||
|
)
|
||||||
|
|
||||||
|
feed_dir = out_dir / "feeds" / "available-source"
|
||||||
|
feed_dir.mkdir(parents=True)
|
||||||
|
feed_path = feed_dir / "feed.rss"
|
||||||
|
feed_path.write_bytes(b"x" * 1024)
|
||||||
|
(feed_dir / "audio.mp3").write_bytes(b"y" * 2048)
|
||||||
|
reference_time = datetime(2026, 3, 30, 12, 30, tzinfo=UTC)
|
||||||
|
updated_at = reference_time - timedelta(minutes=32)
|
||||||
|
updated_at_epoch = updated_at.timestamp()
|
||||||
|
os.utime(feed_path, (updated_at_epoch, updated_at_epoch))
|
||||||
|
|
||||||
|
source_feeds = cast(
|
||||||
|
tuple[dict[str, object], ...],
|
||||||
|
load_dashboard_view(log_dir=log_dir, now=reference_time)["source_feeds"],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert source_feeds == (
|
||||||
|
{
|
||||||
|
"source": "Available source",
|
||||||
|
"slug": "available-source",
|
||||||
|
"feed_href": "/feeds/available-source/feed.rss",
|
||||||
|
"feed_status_label": "Available",
|
||||||
|
"feed_status_tone": "done",
|
||||||
|
"feed_exists": True,
|
||||||
|
"last_updated": "32 minutes ago",
|
||||||
|
"last_updated_iso": updated_at.isoformat(),
|
||||||
|
"artifact_footprint": "3.0 KB",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"source": "Missing source",
|
||||||
|
"slug": "missing-source",
|
||||||
|
"feed_href": "/feeds/missing-source/feed.rss",
|
||||||
|
"feed_status_label": "Missing",
|
||||||
|
"feed_status_tone": "failed",
|
||||||
|
"feed_exists": False,
|
||||||
|
"last_updated": "Never published",
|
||||||
|
"last_updated_iso": None,
|
||||||
|
"artifact_footprint": "0 B",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_dashboard_shows_source_feed_links_and_statuses(
|
||||||
|
monkeypatch, tmp_path: Path
|
||||||
|
) -> None:
|
||||||
|
db_path = tmp_path / "dashboard-feed-links.db"
|
||||||
|
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
|
||||||
|
app = create_app()
|
||||||
|
app.config["REPUB_LOG_DIR"] = tmp_path / "out" / "logs"
|
||||||
|
|
||||||
|
create_source(
|
||||||
|
name="Published source",
|
||||||
|
slug="published-source",
|
||||||
|
source_type="feed",
|
||||||
|
notes="",
|
||||||
|
spider_arguments="",
|
||||||
|
enabled=False,
|
||||||
|
cron_minute="*/5",
|
||||||
|
cron_hour="*",
|
||||||
|
cron_day_of_month="*",
|
||||||
|
cron_day_of_week="*",
|
||||||
|
cron_month="*",
|
||||||
|
feed_url="https://example.com/published.xml",
|
||||||
|
)
|
||||||
|
create_source(
|
||||||
|
name="Missing source",
|
||||||
|
slug="missing-source",
|
||||||
|
source_type="feed",
|
||||||
|
notes="",
|
||||||
|
spider_arguments="",
|
||||||
|
enabled=False,
|
||||||
|
cron_minute="*/5",
|
||||||
|
cron_hour="*",
|
||||||
|
cron_day_of_month="*",
|
||||||
|
cron_day_of_week="*",
|
||||||
|
cron_month="*",
|
||||||
|
feed_url="https://example.com/missing.xml",
|
||||||
|
)
|
||||||
|
|
||||||
|
async def run() -> None:
|
||||||
|
published_feed = tmp_path / "out" / "feeds" / "published-source" / "feed.rss"
|
||||||
|
published_feed.parent.mkdir(parents=True)
|
||||||
|
published_feed.write_text("<rss/>\n", encoding="utf-8")
|
||||||
|
|
||||||
|
body = str(await render_dashboard(app))
|
||||||
|
|
||||||
|
assert "Published feeds" in body
|
||||||
|
assert 'href="/feeds/published-source/feed.rss"' in body
|
||||||
|
assert 'href="/feeds/missing-source/feed.rss"' in body
|
||||||
|
assert "Available" in body
|
||||||
|
assert "Missing" in body
|
||||||
|
assert "Never published" in body
|
||||||
|
|
||||||
|
asyncio.run(run())
|
||||||
|
|
||||||
|
|
||||||
def test_render_sources_shows_table_and_create_link() -> None:
|
def test_render_sources_shows_table_and_create_link() -> None:
|
||||||
async def run() -> None:
|
async def run() -> None:
|
||||||
body = str(await render_sources())
|
body = str(await render_sources())
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue