implement scrapy + pygea job runner

This commit is contained in:
Abel Luck 2026-03-30 15:04:41 +02:00
parent 916968c579
commit 8af28c2f68
8 changed files with 888 additions and 163 deletions

View file

@ -6,6 +6,7 @@ from typing import Any, cast
from repub.components import status_badge
from repub.datastar import RefreshBroker, render_sse_event, render_stream
from repub.jobs import load_dashboard_view
from repub.model import (
Job,
JobExecution,
@ -15,6 +16,7 @@ from repub.model import (
SourcePangea,
create_source,
)
from repub.pages.runs import runs_page
from repub.web import (
create_app,
get_refresh_broker,
@ -34,6 +36,37 @@ def test_status_badge_uses_green_done_tone() -> None:
assert "Succeeded" in badge
def test_runs_page_renders_completed_execution_end_time_as_relative_hoverable_time() -> (
None
):
ended_at = "2026-01-15T10:00:00+00:00"
body = str(
runs_page(
completed_executions=(
{
"source": "Completed source",
"slug": "completed-source",
"job_id": 7,
"execution_id": 42,
"ended_at": "2 hours ago",
"ended_at_iso": ended_at,
"status": "Succeeded",
"status_tone": "done",
"stats": "1 requests • 1 items • 1 bytes",
"summary": "Worker exited successfully",
"log_href": "/job/7/execution/42/logs",
},
)
)
)
assert "data-ended-at" in body
assert f'data-ended-at="{ended_at}"' in body
assert f'datetime="{ended_at}"' in body
assert f'title="{ended_at}"' in body
assert ">2 hours ago<" in body
def test_root_get_serves_datastar_shim() -> None:
async def run() -> None:
client = create_app().test_client()
@ -179,6 +212,40 @@ def test_render_dashboard_shows_dashboard_information_architecture(
asyncio.run(run())
def test_load_dashboard_view_measures_log_artifact_path(
monkeypatch, tmp_path: Path
) -> None:
db_path = tmp_path / "dashboard-footprint.db"
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
create_app()
out_dir = tmp_path / "out"
log_dir = out_dir / "logs"
cache_dir = out_dir / "httpcache"
log_dir.mkdir(parents=True)
cache_dir.mkdir(parents=True)
(log_dir / "run.log").write_bytes(b"x" * 1024)
(cache_dir / "cache.bin").write_bytes(b"y" * 2048)
snapshot = load_dashboard_view(log_dir=log_dir)["snapshot"]
assert cast(dict[str, str], snapshot)["artifact_footprint"] == "3.0 KB"
def test_render_dashboard_describes_log_artifact_footprint(
monkeypatch, tmp_path: Path
) -> None:
db_path = tmp_path / "dashboard-footprint-copy.db"
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
async def run() -> None:
app = create_app()
body = str(await render_dashboard(app))
assert "Current artifact size under the output path." in body
asyncio.run(run())
def test_render_sources_shows_table_and_create_link() -> None:
async def run() -> None:
body = str(await render_sources())