from __future__ import annotations import asyncio import json import socketserver import subprocess import sys import threading import time from datetime import UTC, datetime, timedelta from http.server import BaseHTTPRequestHandler from pathlib import Path from repub.job_runner import generate_pangea_feed from repub.jobs import JobArtifacts, JobRuntime, load_runs_view from repub.model import ( Job, JobExecution, JobExecutionStatus, Source, create_source, initialize_database, save_setting, ) from repub.web import create_app, get_job_runtime, render_execution_logs, render_runs FIXTURE_FEED_PATH = ( Path(__file__).resolve().parents[1] / "demo" / "fixtures" / "local-feed.rss" ).resolve() def initialize_runtime_database(db_path: Path) -> None: initialize_database(db_path) save_setting("feed_url", "http://localhost:8080") def test_job_runtime_syncs_enabled_jobs_into_apscheduler(tmp_path: Path) -> None: initialize_runtime_database(tmp_path / "scheduler.db") enabled_source = create_source( name="Enabled source", slug="enabled-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/enabled.xml", ) disabled_source = create_source( name="Disabled source", slug="disabled-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="15", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/disabled.xml", ) enabled_job = Job.get(Job.source == enabled_source) disabled_job = Job.get(Job.source == disabled_source) runtime = JobRuntime(log_dir=tmp_path / "out" / "logs") try: runtime.start() runtime.sync_jobs() scheduled_ids = {job.id for job in runtime.scheduler.get_jobs()} assert f"job-{enabled_job.id}" in scheduled_ids assert f"job-{disabled_job.id}" not in scheduled_ids enabled_job.enabled = False enabled_job.save() runtime.sync_jobs() scheduled_ids = {job.id for job in runtime.scheduler.get_jobs()} assert f"job-{enabled_job.id}" not in scheduled_ids finally: runtime.shutdown() def test_job_runtime_run_now_writes_log_and_stats_and_marks_success( tmp_path: Path, ) -> None: initialize_runtime_database(tmp_path / "run-now.db") source = create_source( name="Manual source", slug="manual-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url=FIXTURE_FEED_PATH.as_uri(), ) job = Job.get(Job.source == source) runtime = JobRuntime(log_dir=tmp_path / "out" / "logs") try: runtime.start() execution_id = runtime.run_job_now(job.id, reason="manual") assert execution_id is not None execution = _wait_for_terminal_execution(execution_id) artifacts = JobArtifacts.for_execution( log_dir=tmp_path / "out" / "logs", job_id=job.id, execution_id=execution_id, ) assert execution.running_status == JobExecutionStatus.SUCCEEDED assert execution.started_at is not None assert execution.ended_at is not None assert execution.requests_count > 0 assert execution.items_count > 0 assert execution.bytes_count > 0 assert artifacts.log_path.exists() assert artifacts.stats_path.exists() output_path = tmp_path / "out" / "feeds" / "manual-source" / "feed.rss" assert output_path.exists() output_text = output_path.read_text(encoding="utf-8") assert "Local Demo Feed" in output_text assert "Local Demo Entry" in output_text stats_lines = [ json.loads(line) for line in artifacts.stats_path.read_text(encoding="utf-8").splitlines() ] assert len(stats_lines) >= 2 assert stats_lines[-1]["requests_count"] == execution.requests_count finally: runtime.shutdown() def test_job_runtime_respects_max_concurrent_jobs_setting(tmp_path: Path) -> None: db_path = tmp_path / "max-concurrency.db" log_dir = tmp_path / "out" / "logs" initialize_runtime_database(db_path) save_setting("max_concurrent_jobs", 1) with _slow_feed_server() as feed_url: first_source = create_source( name="First source", slug="first-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url=feed_url, ) second_source = create_source( name="Second source", slug="second-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url=feed_url, ) first_job = Job.get(Job.source == first_source) second_job = Job.get(Job.source == second_source) runtime = JobRuntime(log_dir=log_dir) try: runtime.start() first_execution_id = runtime.run_job_now(first_job.id, reason="manual") assert first_execution_id is not None _wait_for_running_execution(first_execution_id) second_execution_id = runtime.run_job_now(second_job.id, reason="manual") assert second_execution_id is not None second_execution = _wait_for_execution_status( second_execution_id, JobExecutionStatus.PENDING, ) assert ( JobExecution.select() .where(JobExecution.running_status == JobExecutionStatus.RUNNING) .count() == 1 ) assert second_execution.started_at is None assert ( JobExecution.select() .where(JobExecution.running_status == JobExecutionStatus.PENDING) .count() == 1 ) runtime.request_execution_cancel(first_execution_id) finished_execution = _wait_for_terminal_execution(first_execution_id) assert finished_execution.running_status == JobExecutionStatus.CANCELED finally: runtime.shutdown() def test_job_runtime_starts_queued_execution_after_capacity_opens( tmp_path: Path, ) -> None: db_path = tmp_path / "drain-queue.db" log_dir = tmp_path / "out" / "logs" initialize_runtime_database(db_path) save_setting("max_concurrent_jobs", 1) with _slow_feed_server() as feed_url: first_source = create_source( name="First source", slug="first-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url=feed_url, ) second_source = create_source( name="Second source", slug="second-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url=FIXTURE_FEED_PATH.as_uri(), ) first_job = Job.get(Job.source == first_source) second_job = Job.get(Job.source == second_source) runtime = JobRuntime(log_dir=log_dir) try: runtime.start() first_execution_id = runtime.run_job_now(first_job.id, reason="manual") assert first_execution_id is not None _wait_for_running_execution(first_execution_id) second_execution_id = runtime.run_job_now(second_job.id, reason="manual") assert second_execution_id is not None _wait_for_execution_status(second_execution_id, JobExecutionStatus.PENDING) runtime.request_execution_cancel(first_execution_id) finished_execution = _wait_for_terminal_execution(first_execution_id) assert finished_execution.running_status == JobExecutionStatus.CANCELED _wait_for_running_execution(second_execution_id) drained_execution = _wait_for_terminal_execution(second_execution_id) assert drained_execution.running_status == JobExecutionStatus.SUCCEEDED assert drained_execution.started_at is not None finally: runtime.shutdown() def test_job_runtime_deduplicates_manual_queue_requests(tmp_path: Path) -> None: db_path = tmp_path / "queue-dedup.db" log_dir = tmp_path / "out" / "logs" initialize_runtime_database(db_path) save_setting("max_concurrent_jobs", 1) with _slow_feed_server() as feed_url: blocking_source = create_source( name="Blocking source", slug="blocking-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url=feed_url, ) queued_source = create_source( name="Queued source", slug="queued-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/queued.xml", ) blocking_job = Job.get(Job.source == blocking_source) queued_job = Job.get(Job.source == queued_source) runtime = JobRuntime(log_dir=log_dir) try: runtime.start() blocking_execution_id = runtime.run_job_now( blocking_job.id, reason="manual" ) assert blocking_execution_id is not None _wait_for_running_execution(blocking_execution_id) first_pending_id = runtime.run_job_now(queued_job.id, reason="manual") second_pending_id = runtime.run_job_now(queued_job.id, reason="manual") assert first_pending_id is not None assert second_pending_id == first_pending_id assert ( JobExecution.select() .where( (JobExecution.job == queued_job) & (JobExecution.running_status == JobExecutionStatus.PENDING) ) .count() == 1 ) finally: runtime.shutdown() def test_job_runtime_allows_one_running_and_one_pending_per_job( tmp_path: Path, ) -> None: db_path = tmp_path / "running-plus-pending.db" log_dir = tmp_path / "out" / "logs" initialize_runtime_database(db_path) save_setting("max_concurrent_jobs", 1) with _slow_feed_server() as feed_url: source = create_source( name="Busy source", slug="busy-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url=feed_url, ) job = Job.get(Job.source == source) runtime = JobRuntime(log_dir=log_dir) try: runtime.start() running_execution_id = runtime.run_job_now(job.id, reason="manual") assert running_execution_id is not None _wait_for_running_execution(running_execution_id) pending_execution_id = runtime.run_job_now(job.id, reason="manual") duplicate_pending_id = runtime.run_job_now(job.id, reason="manual") runtime.run_scheduled_job(job.id) assert pending_execution_id is not None assert duplicate_pending_id == pending_execution_id assert ( JobExecution.select() .where(JobExecution.job == job) .where(JobExecution.running_status == JobExecutionStatus.RUNNING) .count() == 1 ) assert ( JobExecution.select() .where(JobExecution.job == job) .where(JobExecution.running_status == JobExecutionStatus.PENDING) .count() == 1 ) finally: runtime.shutdown() def test_job_runtime_start_drains_pending_rows_created_before_start( tmp_path: Path, ) -> None: db_path = tmp_path / "startup-drain.db" log_dir = tmp_path / "out" / "logs" initialize_runtime_database(db_path) source = create_source( name="Queued source", slug="queued-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url=FIXTURE_FEED_PATH.as_uri(), ) job = Job.get(Job.source == source) pending_execution = JobExecution.create( job=job, running_status=JobExecutionStatus.PENDING, ) runtime = JobRuntime(log_dir=log_dir) try: runtime.start() _wait_for_running_execution(int(pending_execution.get_id())) drained_execution = _wait_for_terminal_execution( int(pending_execution.get_id()) ) assert drained_execution.running_status == JobExecutionStatus.SUCCEEDED assert drained_execution.started_at is not None finally: runtime.shutdown() def test_job_runtime_scheduled_runs_use_the_persistent_queue( tmp_path: Path, ) -> None: db_path = tmp_path / "scheduled-queue.db" log_dir = tmp_path / "out" / "logs" initialize_runtime_database(db_path) save_setting("max_concurrent_jobs", 1) with _slow_feed_server() as feed_url: first_source = create_source( name="First scheduled source", slug="first-scheduled-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url=feed_url, ) second_source = create_source( name="Second scheduled source", slug="second-scheduled-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/second-scheduled.xml", ) first_job = Job.get(Job.source == first_source) second_job = Job.get(Job.source == second_source) runtime = JobRuntime(log_dir=log_dir) try: runtime.start() runtime.run_scheduled_job(first_job.id) first_execution = JobExecution.get(JobExecution.job == first_job) _wait_for_running_execution(int(first_execution.get_id())) runtime.run_scheduled_job(second_job.id) second_execution = JobExecution.get(JobExecution.job == second_job) assert second_execution.running_status == JobExecutionStatus.PENDING assert second_execution.started_at is None finally: runtime.shutdown() def test_job_runtime_cancel_pending_follow_up_keeps_running_worker_alive( tmp_path: Path, ) -> None: db_path = tmp_path / "cancel-pending.db" log_dir = tmp_path / "out" / "logs" initialize_runtime_database(db_path) save_setting("max_concurrent_jobs", 1) with _slow_feed_server() as feed_url: source = create_source( name="Cancelable queued source", slug="cancelable-queued-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url=feed_url, ) job = Job.get(Job.source == source) runtime = JobRuntime(log_dir=log_dir) try: runtime.start() running_execution_id = runtime.run_job_now(job.id, reason="manual") assert running_execution_id is not None _wait_for_running_execution(running_execution_id) pending_execution_id = runtime.run_job_now(job.id, reason="manual") assert pending_execution_id is not None _wait_for_execution_status(pending_execution_id, JobExecutionStatus.PENDING) assert runtime.cancel_queued_execution(pending_execution_id) is True assert JobExecution.get_or_none(id=pending_execution_id) is None assert ( JobExecution.get_by_id(running_execution_id).running_status == JobExecutionStatus.RUNNING ) finally: runtime.shutdown() def test_job_runtime_cancel_marks_execution_canceled(tmp_path: Path) -> None: initialize_runtime_database(tmp_path / "cancel.db") with _slow_feed_server() as feed_url: source = create_source( name="Cancelable source", slug="cancelable-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url=feed_url, ) job = Job.get(Job.source == source) runtime = JobRuntime(log_dir=tmp_path / "out" / "logs") try: runtime.start() execution_id = runtime.run_job_now(job.id, reason="manual") assert execution_id is not None _wait_for_running_execution(execution_id) runtime.request_execution_cancel(execution_id) execution = _wait_for_terminal_execution(execution_id) artifacts = JobArtifacts.for_execution( log_dir=tmp_path / "out" / "logs", job_id=job.id, execution_id=execution_id, ) assert execution.running_status == JobExecutionStatus.CANCELED assert execution.ended_at is not None assert execution.stop_requested_at is not None assert "graceful stop requested" in artifacts.log_path.read_text( encoding="utf-8" ) finally: runtime.shutdown() def test_job_runtime_start_reconciles_stale_running_execution(tmp_path: Path) -> None: initialize_runtime_database(tmp_path / "stale-running.db") source = create_source( name="Stale source", slug="stale-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/stale.xml", ) job = Job.get(Job.source == source) execution = JobExecution.create( job=job, started_at="2026-03-30 12:30:00+00:00", running_status=JobExecutionStatus.RUNNING, ) artifacts = JobArtifacts.for_execution( log_dir=tmp_path / "out" / "logs", job_id=job.id, execution_id=int(execution.get_id()), ) artifacts.log_path.parent.mkdir(parents=True, exist_ok=True) artifacts.log_path.write_text( "worker: process lost during app restart\n", encoding="utf-8", ) runtime = JobRuntime(log_dir=tmp_path / "out" / "logs") try: runtime.start() reconciled_execution = JobExecution.get_by_id(execution.get_id()) assert reconciled_execution.running_status == JobExecutionStatus.FAILED assert reconciled_execution.ended_at is not None assert "marked failed after app restart" in artifacts.log_path.read_text( encoding="utf-8" ) finally: runtime.shutdown() def test_job_runtime_publishes_refresh_while_jobs_are_running(tmp_path: Path) -> None: initialize_runtime_database(tmp_path / "runtime-refresh.db") source = create_source( name="Running source", slug="running-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/running.xml", ) job = Job.get(Job.source == source) JobExecution.create( job=job, started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC), running_status=JobExecutionStatus.RUNNING, ) events: list[object] = [] runtime = JobRuntime( log_dir=tmp_path / "out" / "logs", refresh_callback=events.append, ) runtime._last_runtime_refresh_at = time.monotonic() - 2.0 runtime.poll_workers() assert "refresh-event" in events def test_job_runtime_start_reattaches_live_worker_after_app_restart( tmp_path: Path, ) -> None: db_path = tmp_path / "live-worker.db" log_dir = tmp_path / "out" / "logs" initialize_runtime_database(db_path) with _slow_feed_server() as feed_url: source = create_source( name="Live worker source", slug="live-worker-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url=feed_url, ) job = Job.get(Job.source == source) execution = JobExecution.create( job=job, started_at=datetime.now(UTC), running_status=JobExecutionStatus.RUNNING, ) artifacts = JobArtifacts.for_execution( log_dir=log_dir, job_id=job.id, execution_id=int(execution.get_id()), ) artifacts.log_path.parent.mkdir(parents=True, exist_ok=True) log_handle = artifacts.log_path.open("a", encoding="utf-8", buffering=1) process = subprocess.Popen( [ sys.executable, "-u", "-m", "repub.job_runner", "--job-id", str(job.id), "--execution-id", str(execution.get_id()), "--db-path", str(db_path), "--out-dir", str(log_dir.parent), "--stats-path", str(artifacts.stats_path), ], stdout=log_handle, stderr=subprocess.STDOUT, text=True, ) runtime = JobRuntime(log_dir=log_dir) try: time.sleep(0.1) runtime.start() running_execution = JobExecution.get_by_id(execution.get_id()) assert running_execution.running_status == JobExecutionStatus.RUNNING assert running_execution.ended_at is None completed_execution = _wait_for_terminal_execution(int(execution.get_id())) assert completed_execution.running_status == JobExecutionStatus.SUCCEEDED assert "reattached" in artifacts.log_path.read_text(encoding="utf-8") finally: runtime.shutdown() if process.poll() is None: process.kill() process.wait(timeout=2) log_handle.close() def test_job_runtime_start_restores_live_worker_marked_failed_by_restart_bug( tmp_path: Path, ) -> None: db_path = tmp_path / "restore-live-worker.db" log_dir = tmp_path / "out" / "logs" initialize_runtime_database(db_path) with _slow_feed_server() as feed_url: source = create_source( name="Recovered worker source", slug="recovered-worker-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url=feed_url, ) job = Job.get(Job.source == source) execution = JobExecution.create( job=job, started_at=datetime.now(UTC), ended_at=datetime.now(UTC), running_status=JobExecutionStatus.FAILED, ) artifacts = JobArtifacts.for_execution( log_dir=log_dir, job_id=job.id, execution_id=int(execution.get_id()), ) artifacts.log_path.parent.mkdir(parents=True, exist_ok=True) log_handle = artifacts.log_path.open("a", encoding="utf-8", buffering=1) process = subprocess.Popen( [ sys.executable, "-u", "-m", "repub.job_runner", "--job-id", str(job.id), "--execution-id", str(execution.get_id()), "--db-path", str(db_path), "--out-dir", str(log_dir.parent), "--stats-path", str(artifacts.stats_path), ], stdout=log_handle, stderr=subprocess.STDOUT, text=True, ) runtime = JobRuntime(log_dir=log_dir) try: time.sleep(0.1) runtime.start() restored_execution = JobExecution.get_by_id(execution.get_id()) assert restored_execution.running_status == JobExecutionStatus.RUNNING assert restored_execution.ended_at is None completed_execution = _wait_for_terminal_execution(int(execution.get_id())) assert completed_execution.running_status == JobExecutionStatus.SUCCEEDED assert "restored execution state" in artifacts.log_path.read_text( encoding="utf-8" ) finally: runtime.shutdown() if process.poll() is None: process.kill() process.wait(timeout=2) log_handle.close() def test_generate_pangea_feed_writes_pangea_rss_file( monkeypatch, tmp_path: Path ) -> None: class StubPangeaFeed: def __init__(self, config, feeds): self.config = config self.feed = feeds[0] def acquire_content(self) -> None: return None def generate_feed(self) -> None: return None def disgorge(self, slug: str): output_path = self.config.results.output_directory / slug / "pangea.rss" output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text( "Pangea Fixture\n", encoding="utf-8", ) return output_path monkeypatch.setattr( "repub.job_runner.pangea_feed_class", lambda: StubPangeaFeed, ) output_path = generate_pangea_feed( name="Pangea source", slug="pangea-source", domain="example.org", category_name="News", content_type="articles", only_newest=True, max_articles=10, oldest_article=3, include_authors=True, exclude_media=False, include_content=True, content_format="MOBILE_3", out_dir=tmp_path / "out", log_path=tmp_path / "out" / "logs" / "pangea.log", ) assert output_path == (tmp_path / "out" / "feeds" / "pangea-source" / "pangea.rss") assert output_path.exists() assert "Pangea Fixture" in output_path.read_text(encoding="utf-8") def test_load_runs_view_humanizes_completed_execution_end_time( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "runs-view.db" log_dir = tmp_path / "out" / "logs" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) app = create_app() app.config["REPUB_LOG_DIR"] = log_dir source = create_source( name="Completed source", slug="completed-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/completed.xml", ) job = Job.get(Job.source == source) reference_time = datetime(2026, 1, 15, 12, 0, tzinfo=UTC) ended_at = reference_time - timedelta(hours=2) JobExecution.create( job=job, running_status=JobExecutionStatus.SUCCEEDED, ended_at=ended_at, ) view = load_runs_view(log_dir=app.config["REPUB_LOG_DIR"], now=reference_time) completed = view["completed"][0] assert completed["ended_at"] == "2 hours ago" assert completed["ended_at_iso"] == ended_at.isoformat() def test_render_runs_uses_database_backed_jobs_and_executions( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "runs-page.db" log_dir = tmp_path / "out" / "logs" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) app = create_app() app.config["REPUB_LOG_DIR"] = log_dir save_setting("feed_url", "http://localhost:8080") source = create_source( name="Runs page source", slug="runs-page-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url=FIXTURE_FEED_PATH.as_uri(), ) job = Job.get(Job.source == source) runtime = get_job_runtime(app) runtime.start() try: execution_id = runtime.run_job_now(job.id, reason="manual") assert execution_id is not None execution = _wait_for_terminal_execution(execution_id) async def run() -> None: body = str(await render_runs(app)) assert "runs-page-source" in body assert "Running jobs" in body assert "Scheduled jobs" in body assert "Completed job executions" in body assert f"/job/{job.id}/execution/{execution.get_id()}/logs" in body assert "Succeeded" in body assert "Run now" in body asyncio.run(run()) finally: runtime.shutdown() def test_render_execution_logs_handles_missing_execution_and_missing_log_file( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "log-errors.db" log_dir = tmp_path / "out" / "logs" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) app = create_app() app.config["REPUB_LOG_DIR"] = log_dir source = create_source( name="Log source", slug="log-source", source_type="feed", notes="", spider_arguments="", enabled=False, cron_minute="*/5", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/log-source.xml", ) job = Job.get(Job.source == source) execution = JobExecution.create( job=job, running_status=JobExecutionStatus.FAILED, ) async def run() -> None: missing_execution = str( await render_execution_logs(app, job_id=job.id, execution_id=9999) ) missing_log = str( await render_execution_logs(app, job_id=job.id, execution_id=execution.id) ) assert "Execution log unavailable" in missing_execution assert "Execution does not exist." in missing_execution assert "Execution log unavailable" in missing_log assert "Log file has not been created yet." in missing_log asyncio.run(run()) def test_delete_job_action_removes_source_job_and_execution_history( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "delete-job.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() client = app.test_client() source = create_source( name="Delete source", slug="delete-source", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*/30", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/delete.xml", ) job = Job.get(Job.source == source) execution = JobExecution.create( job=job, running_status=JobExecutionStatus.SUCCEEDED, ) response = await client.post(f"/actions/jobs/{job.id}/delete") assert response.status_code == 204 assert Source.get_or_none(Source.slug == "delete-source") is None assert Job.get_or_none(id=job.id) is None assert JobExecution.get_or_none(id=int(execution.get_id())) is None asyncio.run(run()) def test_delete_source_action_removes_source_job_and_execution_history( monkeypatch, tmp_path: Path ) -> None: db_path = tmp_path / "delete-source.db" monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path)) async def run() -> None: app = create_app() client = app.test_client() source = create_source( name="Delete source row", slug="delete-source-row", source_type="feed", notes="", spider_arguments="", enabled=True, cron_minute="*/30", cron_hour="*", cron_day_of_month="*", cron_day_of_week="*", cron_month="*", feed_url="https://example.com/delete-source-row.xml", ) job = Job.get(Job.source == source) execution = JobExecution.create( job=job, running_status=JobExecutionStatus.SUCCEEDED, ) response = await client.post("/actions/sources/delete-source-row/delete") assert response.status_code == 204 assert Source.get_or_none(Source.slug == "delete-source-row") is None assert Job.get_or_none(id=job.id) is None assert JobExecution.get_or_none(id=int(execution.get_id())) is None asyncio.run(run()) def _wait_for_running_execution( execution_id: int, *, timeout_seconds: float = 2.0 ) -> JobExecution: deadline = time.monotonic() + timeout_seconds while time.monotonic() < deadline: execution = JobExecution.get_by_id(execution_id) if execution.running_status == JobExecutionStatus.RUNNING: return execution time.sleep(0.02) raise AssertionError(f"execution {execution_id} never entered RUNNING state") def _wait_for_execution_status( execution_id: int, status: JobExecutionStatus, *, timeout_seconds: float = 2.0, ) -> JobExecution: deadline = time.monotonic() + timeout_seconds while time.monotonic() < deadline: execution = JobExecution.get_by_id(execution_id) if execution.running_status == status: return execution time.sleep(0.02) raise AssertionError(f"execution {execution_id} never entered {status.name}") def _wait_for_terminal_execution( execution_id: int, *, timeout_seconds: float = 4.0 ) -> JobExecution: deadline = time.monotonic() + timeout_seconds while time.monotonic() < deadline: execution = JobExecution.get_by_id(execution_id) if execution.running_status in { JobExecutionStatus.SUCCEEDED, JobExecutionStatus.FAILED, JobExecutionStatus.CANCELED, }: return execution time.sleep(0.02) raise AssertionError(f"execution {execution_id} did not finish in time") class _SlowFeedRequestHandler(BaseHTTPRequestHandler): def do_GET(self) -> None: # noqa: N802 time.sleep(2.0) payload = FIXTURE_FEED_PATH.read_bytes() self.send_response(200) self.send_header("Content-Type", "application/rss+xml; charset=utf-8") self.send_header("Content-Length", str(len(payload))) self.end_headers() self.wfile.write(payload) def log_message(self, format: str, *args: object) -> None: del format, args class _ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer): allow_reuse_address = True class _slow_feed_server: def __enter__(self) -> str: self._server = _ThreadedTCPServer(("127.0.0.1", 0), _SlowFeedRequestHandler) self._thread = threading.Thread( target=self._server.serve_forever, kwargs={"poll_interval": 0.01}, daemon=True, ) self._thread.start() host = str(self._server.server_address[0]) port = int(self._server.server_address[1]) return f"http://{host}:{port}/slow-feed.rss" def __exit__(self, exc_type, exc, tb) -> None: del exc_type, exc, tb self._server.shutdown() self._server.server_close() self._thread.join(timeout=1)