republisher/tests/test_scheduler_runtime.py

from __future__ import annotations

import asyncio
import json
import socketserver
import subprocess
import sys
import threading
import time
from datetime import UTC, datetime, timedelta
from http.server import BaseHTTPRequestHandler
from pathlib import Path

from repub.job_runner import generate_pangea_feed
from repub.jobs import JobArtifacts, JobRuntime, load_runs_view
from repub.model import (
    Job,
    JobExecution,
    JobExecutionStatus,
    Source,
    create_source,
    initialize_database,
    save_setting,
)
from repub.web import create_app, get_job_runtime, render_execution_logs, render_runs

FIXTURE_FEED_PATH = (
    Path(__file__).resolve().parents[1] / "demo" / "fixtures" / "local-feed.rss"
).resolve()


def initialize_runtime_database(db_path: Path) -> None:
    initialize_database(db_path)
    save_setting("feed_url", "http://localhost:8080")


def test_job_runtime_syncs_enabled_jobs_into_apscheduler(tmp_path: Path) -> None:
    initialize_runtime_database(tmp_path / "scheduler.db")
    enabled_source = create_source(
        name="Enabled source",
        slug="enabled-source",
        source_type="feed",
        notes="",
        spider_arguments="",
        enabled=True,
        cron_minute="*/5",
        cron_hour="*",
        cron_day_of_month="*",
        cron_day_of_week="*",
        cron_month="*",
        feed_url="https://example.com/enabled.xml",
    )
    disabled_source = create_source(
        name="Disabled source",
        slug="disabled-source",
        source_type="feed",
        notes="",
        spider_arguments="",
        enabled=False,
        cron_minute="15",
        cron_hour="*",
        cron_day_of_month="*",
        cron_day_of_week="*",
        cron_month="*",
        feed_url="https://example.com/disabled.xml",
    )
    enabled_job = Job.get(Job.source == enabled_source)
    disabled_job = Job.get(Job.source == disabled_source)

    runtime = JobRuntime(log_dir=tmp_path / "out" / "logs")
    try:
        runtime.start()
        runtime.sync_jobs()

        scheduled_ids = {job.id for job in runtime.scheduler.get_jobs()}

        assert f"job-{enabled_job.id}" in scheduled_ids
        assert f"job-{disabled_job.id}" not in scheduled_ids

        enabled_job.enabled = False
        enabled_job.save()
        runtime.sync_jobs()

        scheduled_ids = {job.id for job in runtime.scheduler.get_jobs()}
        assert f"job-{enabled_job.id}" not in scheduled_ids
    finally:
        runtime.shutdown()


def test_job_runtime_run_now_writes_log_and_stats_and_marks_success(
    tmp_path: Path,
) -> None:
    initialize_runtime_database(tmp_path / "run-now.db")
    source = create_source(
        name="Manual source",
        slug="manual-source",
        source_type="feed",
        notes="",
        spider_arguments="",
        enabled=False,
        cron_minute="*/5",
        cron_hour="*",
        cron_day_of_month="*",
        cron_day_of_week="*",
        cron_month="*",
        feed_url=FIXTURE_FEED_PATH.as_uri(),
    )
    job = Job.get(Job.source == source)

    runtime = JobRuntime(log_dir=tmp_path / "out" / "logs")
    try:
        runtime.start()
        execution_id = runtime.run_job_now(job.id, reason="manual")
        assert execution_id is not None
        execution = _wait_for_terminal_execution(execution_id)
        artifacts = JobArtifacts.for_execution(
            log_dir=tmp_path / "out" / "logs",
            job_id=job.id,
            execution_id=execution_id,
        )

        assert execution.running_status == JobExecutionStatus.SUCCEEDED
        assert execution.started_at is not None
        assert execution.ended_at is not None
        assert execution.requests_count > 0
        assert execution.items_count > 0
        assert execution.bytes_count > 0
        assert artifacts.log_path.exists()
        assert artifacts.stats_path.exists()
        output_path = tmp_path / "out" / "feeds" / "manual-source" / "feed.rss"
        assert output_path.exists()
        output_text = output_path.read_text(encoding="utf-8")
        assert "<title>Local Demo Feed</title>" in output_text
        assert "<title>Local Demo Entry</title>" in output_text

        stats_lines = [
            json.loads(line)
            for line in artifacts.stats_path.read_text(encoding="utf-8").splitlines()
        ]
        assert len(stats_lines) >= 2
        assert stats_lines[-1]["requests_count"] == execution.requests_count
    finally:
        runtime.shutdown()


def test_job_runtime_respects_max_concurrent_jobs_setting(tmp_path: Path) -> None:
    db_path = tmp_path / "max-concurrency.db"
    log_dir = tmp_path / "out" / "logs"
    initialize_runtime_database(db_path)
    save_setting("max_concurrent_jobs", 1)

    with _slow_feed_server() as feed_url:
        first_source = create_source(
            name="First source",
            slug="first-source",
            source_type="feed",
            notes="",
            spider_arguments="",
            enabled=False,
            cron_minute="*/5",
            cron_hour="*",
            cron_day_of_month="*",
            cron_day_of_week="*",
            cron_month="*",
            feed_url=feed_url,
        )
        second_source = create_source(
            name="Second source",
            slug="second-source",
            source_type="feed",
            notes="",
            spider_arguments="",
            enabled=False,
            cron_minute="*/5",
            cron_hour="*",
            cron_day_of_month="*",
            cron_day_of_week="*",
            cron_month="*",
            feed_url=feed_url,
        )
        first_job = Job.get(Job.source == first_source)
        second_job = Job.get(Job.source == second_source)

        runtime = JobRuntime(log_dir=log_dir)
        try:
            runtime.start()
            first_execution_id = runtime.run_job_now(first_job.id, reason="manual")

            assert first_execution_id is not None
            _wait_for_running_execution(first_execution_id)

            second_execution_id = runtime.run_job_now(second_job.id, reason="manual")

            assert second_execution_id is not None
            second_execution = _wait_for_execution_status(
                second_execution_id,
                JobExecutionStatus.PENDING,
            )
            assert (
                JobExecution.select()
                .where(JobExecution.running_status == JobExecutionStatus.RUNNING)
                .count()
                == 1
            )
            assert second_execution.started_at is None
            assert (
                JobExecution.select()
                .where(JobExecution.running_status == JobExecutionStatus.PENDING)
                .count()
                == 1
            )
            runtime.request_execution_cancel(first_execution_id)
            finished_execution = _wait_for_terminal_execution(first_execution_id)
            assert finished_execution.running_status == JobExecutionStatus.CANCELED
        finally:
            runtime.shutdown()


def test_job_runtime_starts_queued_execution_after_capacity_opens(
    tmp_path: Path,
) -> None:
    db_path = tmp_path / "drain-queue.db"
    log_dir = tmp_path / "out" / "logs"
    initialize_runtime_database(db_path)
    save_setting("max_concurrent_jobs", 1)

    with _slow_feed_server() as feed_url:
        first_source = create_source(
            name="First source",
            slug="first-source",
            source_type="feed",
            notes="",
            spider_arguments="",
            enabled=False,
            cron_minute="*/5",
            cron_hour="*",
            cron_day_of_month="*",
            cron_day_of_week="*",
            cron_month="*",
            feed_url=feed_url,
        )
        second_source = create_source(
            name="Second source",
            slug="second-source",
            source_type="feed",
            notes="",
            spider_arguments="",
            enabled=False,
            cron_minute="*/5",
            cron_hour="*",
            cron_day_of_month="*",
            cron_day_of_week="*",
            cron_month="*",
            feed_url=FIXTURE_FEED_PATH.as_uri(),
        )
        first_job = Job.get(Job.source == first_source)
        second_job = Job.get(Job.source == second_source)

        runtime = JobRuntime(log_dir=log_dir)
        try:
            runtime.start()
            first_execution_id = runtime.run_job_now(first_job.id, reason="manual")
            assert first_execution_id is not None
            _wait_for_running_execution(first_execution_id)

            second_execution_id = runtime.run_job_now(second_job.id, reason="manual")
            assert second_execution_id is not None
            _wait_for_execution_status(second_execution_id, JobExecutionStatus.PENDING)

            runtime.request_execution_cancel(first_execution_id)
            finished_execution = _wait_for_terminal_execution(first_execution_id)
            assert finished_execution.running_status == JobExecutionStatus.CANCELED

            _wait_for_running_execution(second_execution_id)
            drained_execution = _wait_for_terminal_execution(second_execution_id)
            assert drained_execution.running_status == JobExecutionStatus.SUCCEEDED
            assert drained_execution.started_at is not None
        finally:
            runtime.shutdown()


def test_job_runtime_deduplicates_manual_queue_requests(tmp_path: Path) -> None:
    db_path = tmp_path / "queue-dedup.db"
    log_dir = tmp_path / "out" / "logs"
    initialize_runtime_database(db_path)
    save_setting("max_concurrent_jobs", 1)

    with _slow_feed_server() as feed_url:
        blocking_source = create_source(
            name="Blocking source",
            slug="blocking-source",
            source_type="feed",
            notes="",
            spider_arguments="",
            enabled=False,
            cron_minute="*/5",
            cron_hour="*",
            cron_day_of_month="*",
            cron_day_of_week="*",
            cron_month="*",
            feed_url=feed_url,
        )
        queued_source = create_source(
            name="Queued source",
            slug="queued-source",
            source_type="feed",
            notes="",
            spider_arguments="",
            enabled=False,
            cron_minute="*/5",
            cron_hour="*",
            cron_day_of_month="*",
            cron_day_of_week="*",
            cron_month="*",
            feed_url="https://example.com/queued.xml",
        )
        blocking_job = Job.get(Job.source == blocking_source)
        queued_job = Job.get(Job.source == queued_source)

        runtime = JobRuntime(log_dir=log_dir)
        try:
            runtime.start()
            blocking_execution_id = runtime.run_job_now(
                blocking_job.id, reason="manual"
            )
            assert blocking_execution_id is not None
            _wait_for_running_execution(blocking_execution_id)

            first_pending_id = runtime.run_job_now(queued_job.id, reason="manual")
            second_pending_id = runtime.run_job_now(queued_job.id, reason="manual")

            assert first_pending_id is not None
            assert second_pending_id == first_pending_id
            assert (
                JobExecution.select()
                .where(
                    (JobExecution.job == queued_job)
                    & (JobExecution.running_status == JobExecutionStatus.PENDING)
                )
                .count()
                == 1
            )
        finally:
            runtime.shutdown()


def test_job_runtime_allows_one_running_and_one_pending_per_job(
    tmp_path: Path,
) -> None:
    db_path = tmp_path / "running-plus-pending.db"
    log_dir = tmp_path / "out" / "logs"
    initialize_runtime_database(db_path)
    save_setting("max_concurrent_jobs", 1)

    with _slow_feed_server() as feed_url:
        source = create_source(
            name="Busy source",
            slug="busy-source",
            source_type="feed",
            notes="",
            spider_arguments="",
            enabled=False,
            cron_minute="*/5",
            cron_hour="*",
            cron_day_of_month="*",
            cron_day_of_week="*",
            cron_month="*",
            feed_url=feed_url,
        )
        job = Job.get(Job.source == source)

        runtime = JobRuntime(log_dir=log_dir)
        try:
            runtime.start()
            running_execution_id = runtime.run_job_now(job.id, reason="manual")
            assert running_execution_id is not None
            _wait_for_running_execution(running_execution_id)

            pending_execution_id = runtime.run_job_now(job.id, reason="manual")
            duplicate_pending_id = runtime.run_job_now(job.id, reason="manual")
            runtime.run_scheduled_job(job.id)

            assert pending_execution_id is not None
            assert duplicate_pending_id == pending_execution_id
            assert (
                JobExecution.select()
                .where(JobExecution.job == job)
                .where(JobExecution.running_status == JobExecutionStatus.RUNNING)
                .count()
                == 1
            )
            assert (
                JobExecution.select()
                .where(JobExecution.job == job)
                .where(JobExecution.running_status == JobExecutionStatus.PENDING)
                .count()
                == 1
            )
        finally:
            runtime.shutdown()


def test_job_runtime_start_drains_pending_rows_created_before_start(
    tmp_path: Path,
) -> None:
    db_path = tmp_path / "startup-drain.db"
    log_dir = tmp_path / "out" / "logs"
    initialize_runtime_database(db_path)
    source = create_source(
        name="Queued source",
        slug="queued-source",
        source_type="feed",
        notes="",
        spider_arguments="",
        enabled=False,
        cron_minute="*/5",
        cron_hour="*",
        cron_day_of_month="*",
        cron_day_of_week="*",
        cron_month="*",
        feed_url=FIXTURE_FEED_PATH.as_uri(),
    )
    job = Job.get(Job.source == source)
    pending_execution = JobExecution.create(
        job=job,
        running_status=JobExecutionStatus.PENDING,
    )

    runtime = JobRuntime(log_dir=log_dir)
    try:
        runtime.start()
        _wait_for_running_execution(int(pending_execution.get_id()))
        drained_execution = _wait_for_terminal_execution(
            int(pending_execution.get_id())
        )

        assert drained_execution.running_status == JobExecutionStatus.SUCCEEDED
        assert drained_execution.started_at is not None
    finally:
        runtime.shutdown()


def test_job_runtime_scheduled_runs_use_the_persistent_queue(
    tmp_path: Path,
) -> None:
    db_path = tmp_path / "scheduled-queue.db"
    log_dir = tmp_path / "out" / "logs"
    initialize_runtime_database(db_path)
    save_setting("max_concurrent_jobs", 1)

    with _slow_feed_server() as feed_url:
        first_source = create_source(
            name="First scheduled source",
            slug="first-scheduled-source",
            source_type="feed",
            notes="",
            spider_arguments="",
            enabled=True,
            cron_minute="*",
            cron_hour="*",
            cron_day_of_month="*",
            cron_day_of_week="*",
            cron_month="*",
            feed_url=feed_url,
        )
        second_source = create_source(
            name="Second scheduled source",
            slug="second-scheduled-source",
            source_type="feed",
            notes="",
            spider_arguments="",
            enabled=True,
            cron_minute="*",
            cron_hour="*",
            cron_day_of_month="*",
            cron_day_of_week="*",
            cron_month="*",
            feed_url="https://example.com/second-scheduled.xml",
        )
        first_job = Job.get(Job.source == first_source)
        second_job = Job.get(Job.source == second_source)

        runtime = JobRuntime(log_dir=log_dir)
        try:
            runtime.start()
            runtime.run_scheduled_job(first_job.id)
            first_execution = JobExecution.get(JobExecution.job == first_job)
            _wait_for_running_execution(int(first_execution.get_id()))

            runtime.run_scheduled_job(second_job.id)
            second_execution = JobExecution.get(JobExecution.job == second_job)

            assert second_execution.running_status == JobExecutionStatus.PENDING
            assert second_execution.started_at is None
        finally:
            runtime.shutdown()


def test_job_runtime_cancel_pending_follow_up_keeps_running_worker_alive(
    tmp_path: Path,
) -> None:
    db_path = tmp_path / "cancel-pending.db"
    log_dir = tmp_path / "out" / "logs"
    initialize_runtime_database(db_path)
    save_setting("max_concurrent_jobs", 1)

    with _slow_feed_server() as feed_url:
        source = create_source(
            name="Cancelable queued source",
            slug="cancelable-queued-source",
            source_type="feed",
            notes="",
            spider_arguments="",
            enabled=False,
            cron_minute="*/5",
            cron_hour="*",
            cron_day_of_month="*",
            cron_day_of_week="*",
            cron_month="*",
            feed_url=feed_url,
        )
        job = Job.get(Job.source == source)

        runtime = JobRuntime(log_dir=log_dir)
        try:
            runtime.start()
            running_execution_id = runtime.run_job_now(job.id, reason="manual")
            assert running_execution_id is not None
            _wait_for_running_execution(running_execution_id)

            pending_execution_id = runtime.run_job_now(job.id, reason="manual")
            assert pending_execution_id is not None
            _wait_for_execution_status(pending_execution_id, JobExecutionStatus.PENDING)

            assert runtime.cancel_queued_execution(pending_execution_id) is True
            assert JobExecution.get_or_none(id=pending_execution_id) is None
            assert (
                JobExecution.get_by_id(running_execution_id).running_status
                == JobExecutionStatus.RUNNING
            )
        finally:
            runtime.shutdown()


def test_job_runtime_cancel_marks_execution_canceled(tmp_path: Path) -> None:
    initialize_runtime_database(tmp_path / "cancel.db")
    with _slow_feed_server() as feed_url:
        source = create_source(
            name="Cancelable source",
            slug="cancelable-source",
            source_type="feed",
            notes="",
            spider_arguments="",
            enabled=False,
            cron_minute="*/5",
            cron_hour="*",
            cron_day_of_month="*",
            cron_day_of_week="*",
            cron_month="*",
            feed_url=feed_url,
        )
        job = Job.get(Job.source == source)

        runtime = JobRuntime(log_dir=tmp_path / "out" / "logs")
        try:
            runtime.start()
            execution_id = runtime.run_job_now(job.id, reason="manual")
            assert execution_id is not None
            _wait_for_running_execution(execution_id)

            runtime.request_execution_cancel(execution_id)
            execution = _wait_for_terminal_execution(execution_id)
            artifacts = JobArtifacts.for_execution(
                log_dir=tmp_path / "out" / "logs",
                job_id=job.id,
                execution_id=execution_id,
            )

            assert execution.running_status == JobExecutionStatus.CANCELED
            assert execution.ended_at is not None
            assert execution.stop_requested_at is not None
            assert "graceful stop requested" in artifacts.log_path.read_text(
                encoding="utf-8"
            )
        finally:
            runtime.shutdown()


def test_job_runtime_start_reconciles_stale_running_execution(tmp_path: Path) -> None:
    initialize_runtime_database(tmp_path / "stale-running.db")
    source = create_source(
        name="Stale source",
        slug="stale-source",
        source_type="feed",
        notes="",
        spider_arguments="",
        enabled=False,
        cron_minute="*/5",
        cron_hour="*",
        cron_day_of_month="*",
        cron_day_of_week="*",
        cron_month="*",
        feed_url="https://example.com/stale.xml",
    )
    job = Job.get(Job.source == source)
    execution = JobExecution.create(
        job=job,
        started_at="2026-03-30 12:30:00+00:00",
        running_status=JobExecutionStatus.RUNNING,
    )
    artifacts = JobArtifacts.for_execution(
        log_dir=tmp_path / "out" / "logs",
        job_id=job.id,
        execution_id=int(execution.get_id()),
    )
    artifacts.log_path.parent.mkdir(parents=True, exist_ok=True)
    artifacts.log_path.write_text(
        "worker: process lost during app restart\n",
        encoding="utf-8",
    )

    runtime = JobRuntime(log_dir=tmp_path / "out" / "logs")
    try:
        runtime.start()
        reconciled_execution = JobExecution.get_by_id(execution.get_id())

        assert reconciled_execution.running_status == JobExecutionStatus.FAILED
        assert reconciled_execution.ended_at is not None
        assert "marked failed after app restart" in artifacts.log_path.read_text(
            encoding="utf-8"
        )
    finally:
        runtime.shutdown()


def test_job_runtime_publishes_refresh_while_jobs_are_running(tmp_path: Path) -> None:
    initialize_runtime_database(tmp_path / "runtime-refresh.db")
    source = create_source(
        name="Running source",
        slug="running-source",
        source_type="feed",
        notes="",
        spider_arguments="",
        enabled=False,
        cron_minute="*/5",
        cron_hour="*",
        cron_day_of_month="*",
        cron_day_of_week="*",
        cron_month="*",
        feed_url="https://example.com/running.xml",
    )
    job = Job.get(Job.source == source)
    JobExecution.create(
        job=job,
        started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
        running_status=JobExecutionStatus.RUNNING,
    )
    events: list[object] = []

    runtime = JobRuntime(
        log_dir=tmp_path / "out" / "logs",
        refresh_callback=events.append,
    )
    runtime._last_runtime_refresh_at = time.monotonic() - 2.0
    runtime.poll_workers()

    assert "refresh-event" in events


def test_job_runtime_start_reattaches_live_worker_after_app_restart(
    tmp_path: Path,
) -> None:
    db_path = tmp_path / "live-worker.db"
    log_dir = tmp_path / "out" / "logs"
    initialize_runtime_database(db_path)
    with _slow_feed_server() as feed_url:
        source = create_source(
            name="Live worker source",
            slug="live-worker-source",
            source_type="feed",
            notes="",
            spider_arguments="",
            enabled=False,
            cron_minute="*/5",
            cron_hour="*",
            cron_day_of_month="*",
            cron_day_of_week="*",
            cron_month="*",
            feed_url=feed_url,
        )
        job = Job.get(Job.source == source)
        execution = JobExecution.create(
            job=job,
            started_at=datetime.now(UTC),
            running_status=JobExecutionStatus.RUNNING,
        )
        artifacts = JobArtifacts.for_execution(
            log_dir=log_dir,
            job_id=job.id,
            execution_id=int(execution.get_id()),
        )
        artifacts.log_path.parent.mkdir(parents=True, exist_ok=True)
        log_handle = artifacts.log_path.open("a", encoding="utf-8", buffering=1)
        process = subprocess.Popen(
            [
                sys.executable,
                "-u",
                "-m",
                "repub.job_runner",
                "--job-id",
                str(job.id),
                "--execution-id",
                str(execution.get_id()),
                "--db-path",
                str(db_path),
                "--out-dir",
                str(log_dir.parent),
                "--stats-path",
                str(artifacts.stats_path),
            ],
            stdout=log_handle,
            stderr=subprocess.STDOUT,
            text=True,
        )

        runtime = JobRuntime(log_dir=log_dir)
        try:
            time.sleep(0.1)
            runtime.start()

            running_execution = JobExecution.get_by_id(execution.get_id())
            assert running_execution.running_status == JobExecutionStatus.RUNNING
            assert running_execution.ended_at is None

            completed_execution = _wait_for_terminal_execution(int(execution.get_id()))
            assert completed_execution.running_status == JobExecutionStatus.SUCCEEDED
            assert "reattached" in artifacts.log_path.read_text(encoding="utf-8")
        finally:
            runtime.shutdown()
            if process.poll() is None:
                process.kill()
                process.wait(timeout=2)
            log_handle.close()


def test_job_runtime_start_restores_live_worker_marked_failed_by_restart_bug(
    tmp_path: Path,
) -> None:
    db_path = tmp_path / "restore-live-worker.db"
    log_dir = tmp_path / "out" / "logs"
    initialize_runtime_database(db_path)
    with _slow_feed_server() as feed_url:
        source = create_source(
            name="Recovered worker source",
            slug="recovered-worker-source",
            source_type="feed",
            notes="",
            spider_arguments="",
            enabled=False,
            cron_minute="*/5",
            cron_hour="*",
            cron_day_of_month="*",
            cron_day_of_week="*",
            cron_month="*",
            feed_url=feed_url,
        )
        job = Job.get(Job.source == source)
        execution = JobExecution.create(
            job=job,
            started_at=datetime.now(UTC),
            ended_at=datetime.now(UTC),
            running_status=JobExecutionStatus.FAILED,
        )
        artifacts = JobArtifacts.for_execution(
            log_dir=log_dir,
            job_id=job.id,
            execution_id=int(execution.get_id()),
        )
        artifacts.log_path.parent.mkdir(parents=True, exist_ok=True)
        log_handle = artifacts.log_path.open("a", encoding="utf-8", buffering=1)
        process = subprocess.Popen(
            [
                sys.executable,
                "-u",
                "-m",
                "repub.job_runner",
                "--job-id",
                str(job.id),
                "--execution-id",
                str(execution.get_id()),
                "--db-path",
                str(db_path),
                "--out-dir",
                str(log_dir.parent),
                "--stats-path",
                str(artifacts.stats_path),
            ],
            stdout=log_handle,
            stderr=subprocess.STDOUT,
            text=True,
        )

        runtime = JobRuntime(log_dir=log_dir)
        try:
            time.sleep(0.1)
            runtime.start()

            restored_execution = JobExecution.get_by_id(execution.get_id())
            assert restored_execution.running_status == JobExecutionStatus.RUNNING
            assert restored_execution.ended_at is None

            completed_execution = _wait_for_terminal_execution(int(execution.get_id()))
            assert completed_execution.running_status == JobExecutionStatus.SUCCEEDED
            assert "restored execution state" in artifacts.log_path.read_text(
                encoding="utf-8"
            )
        finally:
            runtime.shutdown()
            if process.poll() is None:
                process.kill()
                process.wait(timeout=2)
            log_handle.close()


def test_generate_pangea_feed_writes_pangea_rss_file(
    monkeypatch, tmp_path: Path
) -> None:
    class StubPangeaFeed:
        def __init__(self, config, feeds):
            self.config = config
            self.feed = feeds[0]

        def acquire_content(self) -> None:
            return None

        def generate_feed(self) -> None:
            return None

        def disgorge(self, slug: str):
            output_path = self.config.results.output_directory / slug / "pangea.rss"
            output_path.parent.mkdir(parents=True, exist_ok=True)
            output_path.write_text(
                "<rss><channel><title>Pangea Fixture</title></channel></rss>\n",
                encoding="utf-8",
            )
            return output_path

    monkeypatch.setattr(
        "repub.job_runner.pangea_feed_class",
        lambda: StubPangeaFeed,
    )

    output_path = generate_pangea_feed(
        name="Pangea source",
        slug="pangea-source",
        domain="example.org",
        category_name="News",
        content_type="articles",
        only_newest=True,
        max_articles=10,
        oldest_article=3,
        include_authors=True,
        exclude_media=False,
        include_content=True,
        content_format="MOBILE_3",
        out_dir=tmp_path / "out",
        log_path=tmp_path / "out" / "logs" / "pangea.log",
    )

    assert output_path == (tmp_path / "out" / "feeds" / "pangea-source" / "pangea.rss")
    assert output_path.exists()
    assert "Pangea Fixture" in output_path.read_text(encoding="utf-8")


def test_load_runs_view_humanizes_completed_execution_end_time(
    monkeypatch, tmp_path: Path
) -> None:
    db_path = tmp_path / "runs-view.db"
    log_dir = tmp_path / "out" / "logs"
    monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))

    app = create_app()
    app.config["REPUB_LOG_DIR"] = log_dir
    source = create_source(
        name="Completed source",
        slug="completed-source",
        source_type="feed",
        notes="",
        spider_arguments="",
        enabled=False,
        cron_minute="*/5",
        cron_hour="*",
        cron_day_of_month="*",
        cron_day_of_week="*",
        cron_month="*",
        feed_url="https://example.com/completed.xml",
    )
    job = Job.get(Job.source == source)
    reference_time = datetime(2026, 1, 15, 12, 0, tzinfo=UTC)
    ended_at = reference_time - timedelta(hours=2)
    JobExecution.create(
        job=job,
        running_status=JobExecutionStatus.SUCCEEDED,
        ended_at=ended_at,
    )

    view = load_runs_view(log_dir=app.config["REPUB_LOG_DIR"], now=reference_time)
    completed = view["completed"][0]

    assert completed["ended_at"] == "2 hours ago"
    assert completed["ended_at_iso"] == ended_at.isoformat()


def test_render_runs_uses_database_backed_jobs_and_executions(
    monkeypatch, tmp_path: Path
) -> None:
    db_path = tmp_path / "runs-page.db"
    log_dir = tmp_path / "out" / "logs"
    monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))

    app = create_app()
    app.config["REPUB_LOG_DIR"] = log_dir
    save_setting("feed_url", "http://localhost:8080")
    source = create_source(
        name="Runs page source",
        slug="runs-page-source",
        source_type="feed",
        notes="",
        spider_arguments="",
        enabled=True,
        cron_minute="*/5",
        cron_hour="*",
        cron_day_of_month="*",
        cron_day_of_week="*",
        cron_month="*",
        feed_url=FIXTURE_FEED_PATH.as_uri(),
    )
    job = Job.get(Job.source == source)
    runtime = get_job_runtime(app)
    runtime.start()
    try:
        execution_id = runtime.run_job_now(job.id, reason="manual")
        assert execution_id is not None
        execution = _wait_for_terminal_execution(execution_id)

        async def run() -> None:
            body = str(await render_runs(app))

            assert "runs-page-source" in body
            assert "Running jobs" in body
            assert "Scheduled jobs" in body
            assert "Completed job executions" in body
            assert f"/job/{job.id}/execution/{execution.get_id()}/logs" in body
            assert "Succeeded" in body
            assert "Run now" in body

        asyncio.run(run())
    finally:
        runtime.shutdown()


def test_render_execution_logs_handles_missing_execution_and_missing_log_file(
    monkeypatch, tmp_path: Path
) -> None:
    db_path = tmp_path / "log-errors.db"
    log_dir = tmp_path / "out" / "logs"
    monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))

    app = create_app()
    app.config["REPUB_LOG_DIR"] = log_dir
    source = create_source(
        name="Log source",
        slug="log-source",
        source_type="feed",
        notes="",
        spider_arguments="",
        enabled=False,
        cron_minute="*/5",
        cron_hour="*",
        cron_day_of_month="*",
        cron_day_of_week="*",
        cron_month="*",
        feed_url="https://example.com/log-source.xml",
    )
    job = Job.get(Job.source == source)
    execution = JobExecution.create(
        job=job,
        running_status=JobExecutionStatus.FAILED,
    )

    async def run() -> None:
        missing_execution = str(
            await render_execution_logs(app, job_id=job.id, execution_id=9999)
        )
        missing_log = str(
            await render_execution_logs(app, job_id=job.id, execution_id=execution.id)
        )

        assert "Execution log unavailable" in missing_execution
        assert "Execution does not exist." in missing_execution
        assert "Execution log unavailable" in missing_log
        assert "Log file has not been created yet." in missing_log

    asyncio.run(run())


def test_delete_job_action_removes_source_job_and_execution_history(
    monkeypatch, tmp_path: Path
) -> None:
    db_path = tmp_path / "delete-job.db"
    monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))

    async def run() -> None:
        app = create_app()
        client = app.test_client()

        source = create_source(
            name="Delete source",
            slug="delete-source",
            source_type="feed",
            notes="",
            spider_arguments="",
            enabled=True,
            cron_minute="*/30",
            cron_hour="*",
            cron_day_of_month="*",
            cron_day_of_week="*",
            cron_month="*",
            feed_url="https://example.com/delete.xml",
        )
        job = Job.get(Job.source == source)
        execution = JobExecution.create(
            job=job,
            running_status=JobExecutionStatus.SUCCEEDED,
        )

        response = await client.post(f"/actions/jobs/{job.id}/delete")

        assert response.status_code == 204
        assert Source.get_or_none(Source.slug == "delete-source") is None
        assert Job.get_or_none(id=job.id) is None
        assert JobExecution.get_or_none(id=int(execution.get_id())) is None

    asyncio.run(run())


def test_delete_source_action_removes_source_job_and_execution_history(
    monkeypatch, tmp_path: Path
) -> None:
    db_path = tmp_path / "delete-source.db"
    monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))

    async def run() -> None:
        app = create_app()
        client = app.test_client()

        source = create_source(
            name="Delete source row",
            slug="delete-source-row",
            source_type="feed",
            notes="",
            spider_arguments="",
            enabled=True,
            cron_minute="*/30",
            cron_hour="*",
            cron_day_of_month="*",
            cron_day_of_week="*",
            cron_month="*",
            feed_url="https://example.com/delete-source-row.xml",
        )
        job = Job.get(Job.source == source)
        execution = JobExecution.create(
            job=job,
            running_status=JobExecutionStatus.SUCCEEDED,
        )

        response = await client.post("/actions/sources/delete-source-row/delete")

        assert response.status_code == 204
        assert Source.get_or_none(Source.slug == "delete-source-row") is None
        assert Job.get_or_none(id=job.id) is None
        assert JobExecution.get_or_none(id=int(execution.get_id())) is None

    asyncio.run(run())


def _wait_for_running_execution(
    execution_id: int, *, timeout_seconds: float = 2.0
) -> JobExecution:
    deadline = time.monotonic() + timeout_seconds
    while time.monotonic() < deadline:
        execution = JobExecution.get_by_id(execution_id)
        if execution.running_status == JobExecutionStatus.RUNNING:
            return execution
        time.sleep(0.02)
    raise AssertionError(f"execution {execution_id} never entered RUNNING state")


def _wait_for_execution_status(
    execution_id: int,
    status: JobExecutionStatus,
    *,
    timeout_seconds: float = 2.0,
) -> JobExecution:
    deadline = time.monotonic() + timeout_seconds
    while time.monotonic() < deadline:
        execution = JobExecution.get_by_id(execution_id)
        if execution.running_status == status:
            return execution
        time.sleep(0.02)
    raise AssertionError(f"execution {execution_id} never entered {status.name}")


def _wait_for_terminal_execution(
    execution_id: int, *, timeout_seconds: float = 4.0
) -> JobExecution:
    deadline = time.monotonic() + timeout_seconds
    while time.monotonic() < deadline:
        execution = JobExecution.get_by_id(execution_id)
        if execution.running_status in {
            JobExecutionStatus.SUCCEEDED,
            JobExecutionStatus.FAILED,
            JobExecutionStatus.CANCELED,
        }:
            return execution
        time.sleep(0.02)
    raise AssertionError(f"execution {execution_id} did not finish in time")


class _SlowFeedRequestHandler(BaseHTTPRequestHandler):
    def do_GET(self) -> None:  # noqa: N802
        time.sleep(2.0)
        payload = FIXTURE_FEED_PATH.read_bytes()
        self.send_response(200)
        self.send_header("Content-Type", "application/rss+xml; charset=utf-8")
        self.send_header("Content-Length", str(len(payload)))
        self.end_headers()
        self.wfile.write(payload)

    def log_message(self, format: str, *args: object) -> None:
        del format, args


class _ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer):
    allow_reuse_address = True


class _slow_feed_server:
    def __enter__(self) -> str:
        self._server = _ThreadedTCPServer(("127.0.0.1", 0), _SlowFeedRequestHandler)
        self._thread = threading.Thread(
            target=self._server.serve_forever,
            kwargs={"poll_interval": 0.01},
            daemon=True,
        )
        self._thread.start()
        host = str(self._server.server_address[0])
        port = int(self._server.server_address[1])
        return f"http://{host}:{port}/slow-feed.rss"

    def __exit__(self, exc_type, exc, tb) -> None:
        del exc_type, exc, tb
        self._server.shutdown()
        self._server.server_close()
        self._thread.join(timeout=1)