2026-03-30 14:02:39 +02:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
|
import json
|
2026-03-30 15:04:41 +02:00
|
|
|
import socketserver
|
2026-03-30 15:53:04 +02:00
|
|
|
import subprocess
|
|
|
|
|
import sys
|
2026-03-30 15:04:41 +02:00
|
|
|
import threading
|
2026-03-30 14:02:39 +02:00
|
|
|
import time
|
2026-03-30 15:04:41 +02:00
|
|
|
from datetime import UTC, datetime, timedelta
|
|
|
|
|
from http.server import BaseHTTPRequestHandler
|
2026-03-30 14:02:39 +02:00
|
|
|
from pathlib import Path
|
|
|
|
|
|
2026-03-30 15:04:41 +02:00
|
|
|
from repub.job_runner import generate_pangea_feed
|
|
|
|
|
from repub.jobs import JobArtifacts, JobRuntime, load_runs_view
|
2026-03-30 14:02:39 +02:00
|
|
|
from repub.model import (
|
|
|
|
|
Job,
|
|
|
|
|
JobExecution,
|
|
|
|
|
JobExecutionStatus,
|
|
|
|
|
Source,
|
|
|
|
|
create_source,
|
|
|
|
|
initialize_database,
|
2026-03-30 18:26:02 +02:00
|
|
|
save_setting,
|
2026-03-30 14:02:39 +02:00
|
|
|
)
|
|
|
|
|
from repub.web import create_app, get_job_runtime, render_execution_logs, render_runs
|
|
|
|
|
|
2026-03-30 15:04:41 +02:00
|
|
|
FIXTURE_FEED_PATH = (
|
|
|
|
|
Path(__file__).resolve().parents[1] / "demo" / "fixtures" / "local-feed.rss"
|
|
|
|
|
).resolve()
|
|
|
|
|
|
2026-03-30 14:02:39 +02:00
|
|
|
|
2026-03-31 12:14:47 +02:00
|
|
|
def initialize_runtime_database(db_path: Path) -> None:
|
|
|
|
|
initialize_database(db_path)
|
|
|
|
|
save_setting("feed_url", "http://localhost:8080")
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 14:02:39 +02:00
|
|
|
def test_job_runtime_syncs_enabled_jobs_into_apscheduler(tmp_path: Path) -> None:
|
2026-03-31 12:14:47 +02:00
|
|
|
initialize_runtime_database(tmp_path / "scheduler.db")
|
2026-03-30 14:02:39 +02:00
|
|
|
enabled_source = create_source(
|
|
|
|
|
name="Enabled source",
|
|
|
|
|
slug="enabled-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=True,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url="https://example.com/enabled.xml",
|
|
|
|
|
)
|
|
|
|
|
disabled_source = create_source(
|
|
|
|
|
name="Disabled source",
|
|
|
|
|
slug="disabled-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="15",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url="https://example.com/disabled.xml",
|
|
|
|
|
)
|
|
|
|
|
enabled_job = Job.get(Job.source == enabled_source)
|
|
|
|
|
disabled_job = Job.get(Job.source == disabled_source)
|
|
|
|
|
|
2026-03-30 15:36:12 +02:00
|
|
|
runtime = JobRuntime(log_dir=tmp_path / "out" / "logs")
|
2026-03-30 14:02:39 +02:00
|
|
|
try:
|
|
|
|
|
runtime.start()
|
|
|
|
|
runtime.sync_jobs()
|
|
|
|
|
|
|
|
|
|
scheduled_ids = {job.id for job in runtime.scheduler.get_jobs()}
|
|
|
|
|
|
|
|
|
|
assert f"job-{enabled_job.id}" in scheduled_ids
|
|
|
|
|
assert f"job-{disabled_job.id}" not in scheduled_ids
|
|
|
|
|
|
|
|
|
|
enabled_job.enabled = False
|
|
|
|
|
enabled_job.save()
|
|
|
|
|
runtime.sync_jobs()
|
|
|
|
|
|
|
|
|
|
scheduled_ids = {job.id for job in runtime.scheduler.get_jobs()}
|
|
|
|
|
assert f"job-{enabled_job.id}" not in scheduled_ids
|
|
|
|
|
finally:
|
|
|
|
|
runtime.shutdown()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_job_runtime_run_now_writes_log_and_stats_and_marks_success(
|
|
|
|
|
tmp_path: Path,
|
|
|
|
|
) -> None:
|
2026-03-31 12:14:47 +02:00
|
|
|
initialize_runtime_database(tmp_path / "run-now.db")
|
2026-03-30 14:02:39 +02:00
|
|
|
source = create_source(
|
|
|
|
|
name="Manual source",
|
|
|
|
|
slug="manual-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
2026-03-30 15:04:41 +02:00
|
|
|
feed_url=FIXTURE_FEED_PATH.as_uri(),
|
2026-03-30 14:02:39 +02:00
|
|
|
)
|
|
|
|
|
job = Job.get(Job.source == source)
|
|
|
|
|
|
2026-03-30 15:36:12 +02:00
|
|
|
runtime = JobRuntime(log_dir=tmp_path / "out" / "logs")
|
2026-03-30 14:02:39 +02:00
|
|
|
try:
|
|
|
|
|
runtime.start()
|
|
|
|
|
execution_id = runtime.run_job_now(job.id, reason="manual")
|
|
|
|
|
assert execution_id is not None
|
|
|
|
|
execution = _wait_for_terminal_execution(execution_id)
|
|
|
|
|
artifacts = JobArtifacts.for_execution(
|
|
|
|
|
log_dir=tmp_path / "out" / "logs",
|
|
|
|
|
job_id=job.id,
|
|
|
|
|
execution_id=execution_id,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert execution.running_status == JobExecutionStatus.SUCCEEDED
|
|
|
|
|
assert execution.started_at is not None
|
|
|
|
|
assert execution.ended_at is not None
|
|
|
|
|
assert execution.requests_count > 0
|
|
|
|
|
assert execution.items_count > 0
|
|
|
|
|
assert execution.bytes_count > 0
|
|
|
|
|
assert artifacts.log_path.exists()
|
|
|
|
|
assert artifacts.stats_path.exists()
|
2026-03-30 15:21:39 +02:00
|
|
|
output_path = tmp_path / "out" / "feeds" / "manual-source" / "feed.rss"
|
2026-03-30 15:04:41 +02:00
|
|
|
assert output_path.exists()
|
|
|
|
|
output_text = output_path.read_text(encoding="utf-8")
|
|
|
|
|
assert "<title>Local Demo Feed</title>" in output_text
|
|
|
|
|
assert "<title>Local Demo Entry</title>" in output_text
|
2026-03-30 14:02:39 +02:00
|
|
|
|
|
|
|
|
stats_lines = [
|
|
|
|
|
json.loads(line)
|
|
|
|
|
for line in artifacts.stats_path.read_text(encoding="utf-8").splitlines()
|
|
|
|
|
]
|
|
|
|
|
assert len(stats_lines) >= 2
|
|
|
|
|
assert stats_lines[-1]["requests_count"] == execution.requests_count
|
|
|
|
|
finally:
|
|
|
|
|
runtime.shutdown()
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 18:26:02 +02:00
|
|
|
def test_job_runtime_respects_max_concurrent_jobs_setting(tmp_path: Path) -> None:
|
|
|
|
|
db_path = tmp_path / "max-concurrency.db"
|
|
|
|
|
log_dir = tmp_path / "out" / "logs"
|
2026-03-31 12:14:47 +02:00
|
|
|
initialize_runtime_database(db_path)
|
2026-03-30 18:26:02 +02:00
|
|
|
save_setting("max_concurrent_jobs", 1)
|
|
|
|
|
|
|
|
|
|
with _slow_feed_server() as feed_url:
|
|
|
|
|
first_source = create_source(
|
|
|
|
|
name="First source",
|
|
|
|
|
slug="first-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url=feed_url,
|
|
|
|
|
)
|
|
|
|
|
second_source = create_source(
|
|
|
|
|
name="Second source",
|
|
|
|
|
slug="second-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url=feed_url,
|
|
|
|
|
)
|
|
|
|
|
first_job = Job.get(Job.source == first_source)
|
|
|
|
|
second_job = Job.get(Job.source == second_source)
|
|
|
|
|
|
|
|
|
|
runtime = JobRuntime(log_dir=log_dir)
|
|
|
|
|
try:
|
|
|
|
|
runtime.start()
|
|
|
|
|
first_execution_id = runtime.run_job_now(first_job.id, reason="manual")
|
|
|
|
|
|
|
|
|
|
assert first_execution_id is not None
|
|
|
|
|
_wait_for_running_execution(first_execution_id)
|
|
|
|
|
|
|
|
|
|
second_execution_id = runtime.run_job_now(second_job.id, reason="manual")
|
|
|
|
|
|
2026-03-31 09:24:46 +02:00
|
|
|
assert second_execution_id is not None
|
|
|
|
|
second_execution = _wait_for_execution_status(
|
|
|
|
|
second_execution_id,
|
|
|
|
|
JobExecutionStatus.PENDING,
|
|
|
|
|
)
|
2026-03-30 18:26:02 +02:00
|
|
|
assert (
|
|
|
|
|
JobExecution.select()
|
|
|
|
|
.where(JobExecution.running_status == JobExecutionStatus.RUNNING)
|
|
|
|
|
.count()
|
|
|
|
|
== 1
|
|
|
|
|
)
|
2026-03-31 09:24:46 +02:00
|
|
|
assert second_execution.started_at is None
|
|
|
|
|
assert (
|
|
|
|
|
JobExecution.select()
|
|
|
|
|
.where(JobExecution.running_status == JobExecutionStatus.PENDING)
|
|
|
|
|
.count()
|
|
|
|
|
== 1
|
|
|
|
|
)
|
2026-03-30 18:26:02 +02:00
|
|
|
runtime.request_execution_cancel(first_execution_id)
|
|
|
|
|
finished_execution = _wait_for_terminal_execution(first_execution_id)
|
|
|
|
|
assert finished_execution.running_status == JobExecutionStatus.CANCELED
|
|
|
|
|
finally:
|
|
|
|
|
runtime.shutdown()
|
|
|
|
|
|
|
|
|
|
|
2026-03-31 09:24:46 +02:00
|
|
|
def test_job_runtime_starts_queued_execution_after_capacity_opens(
|
|
|
|
|
tmp_path: Path,
|
|
|
|
|
) -> None:
|
|
|
|
|
db_path = tmp_path / "drain-queue.db"
|
|
|
|
|
log_dir = tmp_path / "out" / "logs"
|
2026-03-31 12:14:47 +02:00
|
|
|
initialize_runtime_database(db_path)
|
2026-03-31 09:24:46 +02:00
|
|
|
save_setting("max_concurrent_jobs", 1)
|
|
|
|
|
|
|
|
|
|
with _slow_feed_server() as feed_url:
|
|
|
|
|
first_source = create_source(
|
|
|
|
|
name="First source",
|
|
|
|
|
slug="first-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url=feed_url,
|
|
|
|
|
)
|
|
|
|
|
second_source = create_source(
|
|
|
|
|
name="Second source",
|
|
|
|
|
slug="second-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url=FIXTURE_FEED_PATH.as_uri(),
|
|
|
|
|
)
|
|
|
|
|
first_job = Job.get(Job.source == first_source)
|
|
|
|
|
second_job = Job.get(Job.source == second_source)
|
|
|
|
|
|
|
|
|
|
runtime = JobRuntime(log_dir=log_dir)
|
|
|
|
|
try:
|
|
|
|
|
runtime.start()
|
|
|
|
|
first_execution_id = runtime.run_job_now(first_job.id, reason="manual")
|
|
|
|
|
assert first_execution_id is not None
|
|
|
|
|
_wait_for_running_execution(first_execution_id)
|
|
|
|
|
|
|
|
|
|
second_execution_id = runtime.run_job_now(second_job.id, reason="manual")
|
|
|
|
|
assert second_execution_id is not None
|
|
|
|
|
_wait_for_execution_status(second_execution_id, JobExecutionStatus.PENDING)
|
|
|
|
|
|
|
|
|
|
runtime.request_execution_cancel(first_execution_id)
|
|
|
|
|
finished_execution = _wait_for_terminal_execution(first_execution_id)
|
|
|
|
|
assert finished_execution.running_status == JobExecutionStatus.CANCELED
|
|
|
|
|
|
|
|
|
|
_wait_for_running_execution(second_execution_id)
|
|
|
|
|
drained_execution = _wait_for_terminal_execution(second_execution_id)
|
|
|
|
|
assert drained_execution.running_status == JobExecutionStatus.SUCCEEDED
|
|
|
|
|
assert drained_execution.started_at is not None
|
|
|
|
|
finally:
|
|
|
|
|
runtime.shutdown()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_job_runtime_deduplicates_manual_queue_requests(tmp_path: Path) -> None:
|
|
|
|
|
db_path = tmp_path / "queue-dedup.db"
|
|
|
|
|
log_dir = tmp_path / "out" / "logs"
|
2026-03-31 12:14:47 +02:00
|
|
|
initialize_runtime_database(db_path)
|
2026-03-31 09:24:46 +02:00
|
|
|
save_setting("max_concurrent_jobs", 1)
|
|
|
|
|
|
|
|
|
|
with _slow_feed_server() as feed_url:
|
|
|
|
|
blocking_source = create_source(
|
|
|
|
|
name="Blocking source",
|
|
|
|
|
slug="blocking-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url=feed_url,
|
|
|
|
|
)
|
|
|
|
|
queued_source = create_source(
|
|
|
|
|
name="Queued source",
|
|
|
|
|
slug="queued-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url="https://example.com/queued.xml",
|
|
|
|
|
)
|
|
|
|
|
blocking_job = Job.get(Job.source == blocking_source)
|
|
|
|
|
queued_job = Job.get(Job.source == queued_source)
|
|
|
|
|
|
|
|
|
|
runtime = JobRuntime(log_dir=log_dir)
|
|
|
|
|
try:
|
|
|
|
|
runtime.start()
|
|
|
|
|
blocking_execution_id = runtime.run_job_now(
|
|
|
|
|
blocking_job.id, reason="manual"
|
|
|
|
|
)
|
|
|
|
|
assert blocking_execution_id is not None
|
|
|
|
|
_wait_for_running_execution(blocking_execution_id)
|
|
|
|
|
|
|
|
|
|
first_pending_id = runtime.run_job_now(queued_job.id, reason="manual")
|
|
|
|
|
second_pending_id = runtime.run_job_now(queued_job.id, reason="manual")
|
|
|
|
|
|
|
|
|
|
assert first_pending_id is not None
|
|
|
|
|
assert second_pending_id == first_pending_id
|
|
|
|
|
assert (
|
|
|
|
|
JobExecution.select()
|
|
|
|
|
.where(
|
|
|
|
|
(JobExecution.job == queued_job)
|
|
|
|
|
& (JobExecution.running_status == JobExecutionStatus.PENDING)
|
|
|
|
|
)
|
|
|
|
|
.count()
|
|
|
|
|
== 1
|
|
|
|
|
)
|
|
|
|
|
finally:
|
|
|
|
|
runtime.shutdown()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_job_runtime_allows_one_running_and_one_pending_per_job(
|
|
|
|
|
tmp_path: Path,
|
|
|
|
|
) -> None:
|
|
|
|
|
db_path = tmp_path / "running-plus-pending.db"
|
|
|
|
|
log_dir = tmp_path / "out" / "logs"
|
2026-03-31 12:14:47 +02:00
|
|
|
initialize_runtime_database(db_path)
|
2026-03-31 09:24:46 +02:00
|
|
|
save_setting("max_concurrent_jobs", 1)
|
|
|
|
|
|
|
|
|
|
with _slow_feed_server() as feed_url:
|
|
|
|
|
source = create_source(
|
|
|
|
|
name="Busy source",
|
|
|
|
|
slug="busy-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url=feed_url,
|
|
|
|
|
)
|
|
|
|
|
job = Job.get(Job.source == source)
|
|
|
|
|
|
|
|
|
|
runtime = JobRuntime(log_dir=log_dir)
|
|
|
|
|
try:
|
|
|
|
|
runtime.start()
|
|
|
|
|
running_execution_id = runtime.run_job_now(job.id, reason="manual")
|
|
|
|
|
assert running_execution_id is not None
|
|
|
|
|
_wait_for_running_execution(running_execution_id)
|
|
|
|
|
|
|
|
|
|
pending_execution_id = runtime.run_job_now(job.id, reason="manual")
|
|
|
|
|
duplicate_pending_id = runtime.run_job_now(job.id, reason="manual")
|
|
|
|
|
runtime.run_scheduled_job(job.id)
|
|
|
|
|
|
|
|
|
|
assert pending_execution_id is not None
|
|
|
|
|
assert duplicate_pending_id == pending_execution_id
|
|
|
|
|
assert (
|
|
|
|
|
JobExecution.select()
|
|
|
|
|
.where(JobExecution.job == job)
|
|
|
|
|
.where(JobExecution.running_status == JobExecutionStatus.RUNNING)
|
|
|
|
|
.count()
|
|
|
|
|
== 1
|
|
|
|
|
)
|
|
|
|
|
assert (
|
|
|
|
|
JobExecution.select()
|
|
|
|
|
.where(JobExecution.job == job)
|
|
|
|
|
.where(JobExecution.running_status == JobExecutionStatus.PENDING)
|
|
|
|
|
.count()
|
|
|
|
|
== 1
|
|
|
|
|
)
|
|
|
|
|
finally:
|
|
|
|
|
runtime.shutdown()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_job_runtime_start_drains_pending_rows_created_before_start(
|
|
|
|
|
tmp_path: Path,
|
|
|
|
|
) -> None:
|
|
|
|
|
db_path = tmp_path / "startup-drain.db"
|
|
|
|
|
log_dir = tmp_path / "out" / "logs"
|
2026-03-31 12:14:47 +02:00
|
|
|
initialize_runtime_database(db_path)
|
2026-03-31 09:24:46 +02:00
|
|
|
source = create_source(
|
|
|
|
|
name="Queued source",
|
|
|
|
|
slug="queued-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url=FIXTURE_FEED_PATH.as_uri(),
|
|
|
|
|
)
|
|
|
|
|
job = Job.get(Job.source == source)
|
|
|
|
|
pending_execution = JobExecution.create(
|
|
|
|
|
job=job,
|
|
|
|
|
running_status=JobExecutionStatus.PENDING,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
runtime = JobRuntime(log_dir=log_dir)
|
|
|
|
|
try:
|
|
|
|
|
runtime.start()
|
|
|
|
|
_wait_for_running_execution(int(pending_execution.get_id()))
|
|
|
|
|
drained_execution = _wait_for_terminal_execution(
|
|
|
|
|
int(pending_execution.get_id())
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert drained_execution.running_status == JobExecutionStatus.SUCCEEDED
|
|
|
|
|
assert drained_execution.started_at is not None
|
|
|
|
|
finally:
|
|
|
|
|
runtime.shutdown()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_job_runtime_scheduled_runs_use_the_persistent_queue(
|
|
|
|
|
tmp_path: Path,
|
|
|
|
|
) -> None:
|
|
|
|
|
db_path = tmp_path / "scheduled-queue.db"
|
|
|
|
|
log_dir = tmp_path / "out" / "logs"
|
2026-03-31 12:14:47 +02:00
|
|
|
initialize_runtime_database(db_path)
|
2026-03-31 09:24:46 +02:00
|
|
|
save_setting("max_concurrent_jobs", 1)
|
|
|
|
|
|
|
|
|
|
with _slow_feed_server() as feed_url:
|
|
|
|
|
first_source = create_source(
|
|
|
|
|
name="First scheduled source",
|
|
|
|
|
slug="first-scheduled-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=True,
|
|
|
|
|
cron_minute="*",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url=feed_url,
|
|
|
|
|
)
|
|
|
|
|
second_source = create_source(
|
|
|
|
|
name="Second scheduled source",
|
|
|
|
|
slug="second-scheduled-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=True,
|
|
|
|
|
cron_minute="*",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url="https://example.com/second-scheduled.xml",
|
|
|
|
|
)
|
|
|
|
|
first_job = Job.get(Job.source == first_source)
|
|
|
|
|
second_job = Job.get(Job.source == second_source)
|
|
|
|
|
|
|
|
|
|
runtime = JobRuntime(log_dir=log_dir)
|
|
|
|
|
try:
|
|
|
|
|
runtime.start()
|
|
|
|
|
runtime.run_scheduled_job(first_job.id)
|
|
|
|
|
first_execution = JobExecution.get(JobExecution.job == first_job)
|
|
|
|
|
_wait_for_running_execution(int(first_execution.get_id()))
|
|
|
|
|
|
|
|
|
|
runtime.run_scheduled_job(second_job.id)
|
|
|
|
|
second_execution = JobExecution.get(JobExecution.job == second_job)
|
|
|
|
|
|
|
|
|
|
assert second_execution.running_status == JobExecutionStatus.PENDING
|
|
|
|
|
assert second_execution.started_at is None
|
|
|
|
|
finally:
|
|
|
|
|
runtime.shutdown()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_job_runtime_cancel_pending_follow_up_keeps_running_worker_alive(
|
|
|
|
|
tmp_path: Path,
|
|
|
|
|
) -> None:
|
|
|
|
|
db_path = tmp_path / "cancel-pending.db"
|
|
|
|
|
log_dir = tmp_path / "out" / "logs"
|
2026-03-31 12:14:47 +02:00
|
|
|
initialize_runtime_database(db_path)
|
2026-03-31 09:24:46 +02:00
|
|
|
save_setting("max_concurrent_jobs", 1)
|
|
|
|
|
|
|
|
|
|
with _slow_feed_server() as feed_url:
|
|
|
|
|
source = create_source(
|
|
|
|
|
name="Cancelable queued source",
|
|
|
|
|
slug="cancelable-queued-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url=feed_url,
|
|
|
|
|
)
|
|
|
|
|
job = Job.get(Job.source == source)
|
|
|
|
|
|
|
|
|
|
runtime = JobRuntime(log_dir=log_dir)
|
|
|
|
|
try:
|
|
|
|
|
runtime.start()
|
|
|
|
|
running_execution_id = runtime.run_job_now(job.id, reason="manual")
|
|
|
|
|
assert running_execution_id is not None
|
|
|
|
|
_wait_for_running_execution(running_execution_id)
|
|
|
|
|
|
|
|
|
|
pending_execution_id = runtime.run_job_now(job.id, reason="manual")
|
|
|
|
|
assert pending_execution_id is not None
|
|
|
|
|
_wait_for_execution_status(pending_execution_id, JobExecutionStatus.PENDING)
|
|
|
|
|
|
|
|
|
|
assert runtime.cancel_queued_execution(pending_execution_id) is True
|
|
|
|
|
assert JobExecution.get_or_none(id=pending_execution_id) is None
|
|
|
|
|
assert (
|
|
|
|
|
JobExecution.get_by_id(running_execution_id).running_status
|
|
|
|
|
== JobExecutionStatus.RUNNING
|
|
|
|
|
)
|
|
|
|
|
finally:
|
|
|
|
|
runtime.shutdown()
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 14:02:39 +02:00
|
|
|
def test_job_runtime_cancel_marks_execution_canceled(tmp_path: Path) -> None:
|
2026-03-31 12:14:47 +02:00
|
|
|
initialize_runtime_database(tmp_path / "cancel.db")
|
2026-03-30 15:04:41 +02:00
|
|
|
with _slow_feed_server() as feed_url:
|
|
|
|
|
source = create_source(
|
|
|
|
|
name="Cancelable source",
|
|
|
|
|
slug="cancelable-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url=feed_url,
|
2026-03-30 14:02:39 +02:00
|
|
|
)
|
2026-03-30 15:04:41 +02:00
|
|
|
job = Job.get(Job.source == source)
|
2026-03-30 14:02:39 +02:00
|
|
|
|
2026-03-30 15:36:12 +02:00
|
|
|
runtime = JobRuntime(log_dir=tmp_path / "out" / "logs")
|
2026-03-30 15:04:41 +02:00
|
|
|
try:
|
|
|
|
|
runtime.start()
|
|
|
|
|
execution_id = runtime.run_job_now(job.id, reason="manual")
|
|
|
|
|
assert execution_id is not None
|
|
|
|
|
_wait_for_running_execution(execution_id)
|
|
|
|
|
|
|
|
|
|
runtime.request_execution_cancel(execution_id)
|
|
|
|
|
execution = _wait_for_terminal_execution(execution_id)
|
|
|
|
|
artifacts = JobArtifacts.for_execution(
|
|
|
|
|
log_dir=tmp_path / "out" / "logs",
|
|
|
|
|
job_id=job.id,
|
|
|
|
|
execution_id=execution_id,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert execution.running_status == JobExecutionStatus.CANCELED
|
|
|
|
|
assert execution.ended_at is not None
|
|
|
|
|
assert execution.stop_requested_at is not None
|
|
|
|
|
assert "graceful stop requested" in artifacts.log_path.read_text(
|
|
|
|
|
encoding="utf-8"
|
|
|
|
|
)
|
|
|
|
|
finally:
|
|
|
|
|
runtime.shutdown()
|
2026-03-30 14:02:39 +02:00
|
|
|
|
|
|
|
|
|
2026-03-30 14:18:55 +02:00
|
|
|
def test_job_runtime_start_reconciles_stale_running_execution(tmp_path: Path) -> None:
|
2026-03-31 12:14:47 +02:00
|
|
|
initialize_runtime_database(tmp_path / "stale-running.db")
|
2026-03-30 14:18:55 +02:00
|
|
|
source = create_source(
|
|
|
|
|
name="Stale source",
|
|
|
|
|
slug="stale-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url="https://example.com/stale.xml",
|
|
|
|
|
)
|
|
|
|
|
job = Job.get(Job.source == source)
|
|
|
|
|
execution = JobExecution.create(
|
|
|
|
|
job=job,
|
|
|
|
|
started_at="2026-03-30 12:30:00+00:00",
|
|
|
|
|
running_status=JobExecutionStatus.RUNNING,
|
|
|
|
|
)
|
|
|
|
|
artifacts = JobArtifacts.for_execution(
|
|
|
|
|
log_dir=tmp_path / "out" / "logs",
|
|
|
|
|
job_id=job.id,
|
|
|
|
|
execution_id=int(execution.get_id()),
|
|
|
|
|
)
|
|
|
|
|
artifacts.log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
artifacts.log_path.write_text(
|
|
|
|
|
"worker: process lost during app restart\n",
|
|
|
|
|
encoding="utf-8",
|
|
|
|
|
)
|
|
|
|
|
|
2026-03-30 15:36:12 +02:00
|
|
|
runtime = JobRuntime(log_dir=tmp_path / "out" / "logs")
|
2026-03-30 14:18:55 +02:00
|
|
|
try:
|
|
|
|
|
runtime.start()
|
|
|
|
|
reconciled_execution = JobExecution.get_by_id(execution.get_id())
|
|
|
|
|
|
|
|
|
|
assert reconciled_execution.running_status == JobExecutionStatus.FAILED
|
|
|
|
|
assert reconciled_execution.ended_at is not None
|
|
|
|
|
assert "marked failed after app restart" in artifacts.log_path.read_text(
|
|
|
|
|
encoding="utf-8"
|
|
|
|
|
)
|
|
|
|
|
finally:
|
|
|
|
|
runtime.shutdown()
|
|
|
|
|
|
|
|
|
|
|
2026-03-31 10:23:46 +02:00
|
|
|
def test_job_runtime_publishes_refresh_while_jobs_are_running(tmp_path: Path) -> None:
|
2026-03-31 12:14:47 +02:00
|
|
|
initialize_runtime_database(tmp_path / "runtime-refresh.db")
|
2026-03-31 10:23:46 +02:00
|
|
|
source = create_source(
|
|
|
|
|
name="Running source",
|
|
|
|
|
slug="running-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url="https://example.com/running.xml",
|
|
|
|
|
)
|
|
|
|
|
job = Job.get(Job.source == source)
|
|
|
|
|
JobExecution.create(
|
|
|
|
|
job=job,
|
|
|
|
|
started_at=datetime(2026, 3, 30, 12, 0, tzinfo=UTC),
|
|
|
|
|
running_status=JobExecutionStatus.RUNNING,
|
|
|
|
|
)
|
|
|
|
|
events: list[object] = []
|
|
|
|
|
|
|
|
|
|
runtime = JobRuntime(
|
|
|
|
|
log_dir=tmp_path / "out" / "logs",
|
|
|
|
|
refresh_callback=events.append,
|
|
|
|
|
)
|
|
|
|
|
runtime._last_runtime_refresh_at = time.monotonic() - 2.0
|
|
|
|
|
runtime.poll_workers()
|
|
|
|
|
|
|
|
|
|
assert "refresh-event" in events
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 15:53:04 +02:00
|
|
|
def test_job_runtime_start_reattaches_live_worker_after_app_restart(
|
|
|
|
|
tmp_path: Path,
|
|
|
|
|
) -> None:
|
|
|
|
|
db_path = tmp_path / "live-worker.db"
|
|
|
|
|
log_dir = tmp_path / "out" / "logs"
|
2026-03-31 12:14:47 +02:00
|
|
|
initialize_runtime_database(db_path)
|
2026-03-30 15:53:04 +02:00
|
|
|
with _slow_feed_server() as feed_url:
|
|
|
|
|
source = create_source(
|
|
|
|
|
name="Live worker source",
|
|
|
|
|
slug="live-worker-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url=feed_url,
|
|
|
|
|
)
|
|
|
|
|
job = Job.get(Job.source == source)
|
|
|
|
|
execution = JobExecution.create(
|
|
|
|
|
job=job,
|
|
|
|
|
started_at=datetime.now(UTC),
|
|
|
|
|
running_status=JobExecutionStatus.RUNNING,
|
|
|
|
|
)
|
|
|
|
|
artifacts = JobArtifacts.for_execution(
|
|
|
|
|
log_dir=log_dir,
|
|
|
|
|
job_id=job.id,
|
|
|
|
|
execution_id=int(execution.get_id()),
|
|
|
|
|
)
|
|
|
|
|
artifacts.log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
log_handle = artifacts.log_path.open("a", encoding="utf-8", buffering=1)
|
|
|
|
|
process = subprocess.Popen(
|
|
|
|
|
[
|
|
|
|
|
sys.executable,
|
|
|
|
|
"-u",
|
|
|
|
|
"-m",
|
|
|
|
|
"repub.job_runner",
|
|
|
|
|
"--job-id",
|
|
|
|
|
str(job.id),
|
|
|
|
|
"--execution-id",
|
|
|
|
|
str(execution.get_id()),
|
|
|
|
|
"--db-path",
|
|
|
|
|
str(db_path),
|
|
|
|
|
"--out-dir",
|
|
|
|
|
str(log_dir.parent),
|
|
|
|
|
"--stats-path",
|
|
|
|
|
str(artifacts.stats_path),
|
|
|
|
|
],
|
|
|
|
|
stdout=log_handle,
|
|
|
|
|
stderr=subprocess.STDOUT,
|
|
|
|
|
text=True,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
runtime = JobRuntime(log_dir=log_dir)
|
|
|
|
|
try:
|
|
|
|
|
time.sleep(0.1)
|
|
|
|
|
runtime.start()
|
|
|
|
|
|
|
|
|
|
running_execution = JobExecution.get_by_id(execution.get_id())
|
|
|
|
|
assert running_execution.running_status == JobExecutionStatus.RUNNING
|
|
|
|
|
assert running_execution.ended_at is None
|
|
|
|
|
|
|
|
|
|
completed_execution = _wait_for_terminal_execution(int(execution.get_id()))
|
|
|
|
|
assert completed_execution.running_status == JobExecutionStatus.SUCCEEDED
|
|
|
|
|
assert "reattached" in artifacts.log_path.read_text(encoding="utf-8")
|
|
|
|
|
finally:
|
|
|
|
|
runtime.shutdown()
|
|
|
|
|
if process.poll() is None:
|
|
|
|
|
process.kill()
|
|
|
|
|
process.wait(timeout=2)
|
|
|
|
|
log_handle.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_job_runtime_start_restores_live_worker_marked_failed_by_restart_bug(
|
|
|
|
|
tmp_path: Path,
|
|
|
|
|
) -> None:
|
|
|
|
|
db_path = tmp_path / "restore-live-worker.db"
|
|
|
|
|
log_dir = tmp_path / "out" / "logs"
|
2026-03-31 12:14:47 +02:00
|
|
|
initialize_runtime_database(db_path)
|
2026-03-30 15:53:04 +02:00
|
|
|
with _slow_feed_server() as feed_url:
|
|
|
|
|
source = create_source(
|
|
|
|
|
name="Recovered worker source",
|
|
|
|
|
slug="recovered-worker-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url=feed_url,
|
|
|
|
|
)
|
|
|
|
|
job = Job.get(Job.source == source)
|
|
|
|
|
execution = JobExecution.create(
|
|
|
|
|
job=job,
|
|
|
|
|
started_at=datetime.now(UTC),
|
|
|
|
|
ended_at=datetime.now(UTC),
|
|
|
|
|
running_status=JobExecutionStatus.FAILED,
|
|
|
|
|
)
|
|
|
|
|
artifacts = JobArtifacts.for_execution(
|
|
|
|
|
log_dir=log_dir,
|
|
|
|
|
job_id=job.id,
|
|
|
|
|
execution_id=int(execution.get_id()),
|
|
|
|
|
)
|
|
|
|
|
artifacts.log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
log_handle = artifacts.log_path.open("a", encoding="utf-8", buffering=1)
|
|
|
|
|
process = subprocess.Popen(
|
|
|
|
|
[
|
|
|
|
|
sys.executable,
|
|
|
|
|
"-u",
|
|
|
|
|
"-m",
|
|
|
|
|
"repub.job_runner",
|
|
|
|
|
"--job-id",
|
|
|
|
|
str(job.id),
|
|
|
|
|
"--execution-id",
|
|
|
|
|
str(execution.get_id()),
|
|
|
|
|
"--db-path",
|
|
|
|
|
str(db_path),
|
|
|
|
|
"--out-dir",
|
|
|
|
|
str(log_dir.parent),
|
|
|
|
|
"--stats-path",
|
|
|
|
|
str(artifacts.stats_path),
|
|
|
|
|
],
|
|
|
|
|
stdout=log_handle,
|
|
|
|
|
stderr=subprocess.STDOUT,
|
|
|
|
|
text=True,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
runtime = JobRuntime(log_dir=log_dir)
|
|
|
|
|
try:
|
|
|
|
|
time.sleep(0.1)
|
|
|
|
|
runtime.start()
|
|
|
|
|
|
|
|
|
|
restored_execution = JobExecution.get_by_id(execution.get_id())
|
|
|
|
|
assert restored_execution.running_status == JobExecutionStatus.RUNNING
|
|
|
|
|
assert restored_execution.ended_at is None
|
|
|
|
|
|
|
|
|
|
completed_execution = _wait_for_terminal_execution(int(execution.get_id()))
|
|
|
|
|
assert completed_execution.running_status == JobExecutionStatus.SUCCEEDED
|
|
|
|
|
assert "restored execution state" in artifacts.log_path.read_text(
|
|
|
|
|
encoding="utf-8"
|
|
|
|
|
)
|
|
|
|
|
finally:
|
|
|
|
|
runtime.shutdown()
|
|
|
|
|
if process.poll() is None:
|
|
|
|
|
process.kill()
|
|
|
|
|
process.wait(timeout=2)
|
|
|
|
|
log_handle.close()
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 15:10:47 +02:00
|
|
|
def test_generate_pangea_feed_writes_pangea_rss_file(
|
|
|
|
|
monkeypatch, tmp_path: Path
|
|
|
|
|
) -> None:
|
2026-03-30 15:04:41 +02:00
|
|
|
class StubPangeaFeed:
|
|
|
|
|
def __init__(self, config, feeds):
|
|
|
|
|
self.config = config
|
|
|
|
|
self.feed = feeds[0]
|
|
|
|
|
|
|
|
|
|
def acquire_content(self) -> None:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def generate_feed(self) -> None:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def disgorge(self, slug: str):
|
2026-03-30 15:10:47 +02:00
|
|
|
output_path = self.config.results.output_directory / slug / "pangea.rss"
|
2026-03-30 15:04:41 +02:00
|
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
output_path.write_text(
|
|
|
|
|
"<rss><channel><title>Pangea Fixture</title></channel></rss>\n",
|
|
|
|
|
encoding="utf-8",
|
|
|
|
|
)
|
|
|
|
|
return output_path
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr(
|
|
|
|
|
"repub.job_runner.pangea_feed_class",
|
|
|
|
|
lambda: StubPangeaFeed,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
output_path = generate_pangea_feed(
|
|
|
|
|
name="Pangea source",
|
|
|
|
|
slug="pangea-source",
|
|
|
|
|
domain="example.org",
|
|
|
|
|
category_name="News",
|
|
|
|
|
content_type="articles",
|
|
|
|
|
only_newest=True,
|
|
|
|
|
max_articles=10,
|
|
|
|
|
oldest_article=3,
|
|
|
|
|
include_authors=True,
|
|
|
|
|
exclude_media=False,
|
|
|
|
|
include_content=True,
|
|
|
|
|
content_format="MOBILE_3",
|
|
|
|
|
out_dir=tmp_path / "out",
|
|
|
|
|
log_path=tmp_path / "out" / "logs" / "pangea.log",
|
|
|
|
|
)
|
|
|
|
|
|
2026-03-30 15:21:39 +02:00
|
|
|
assert output_path == (tmp_path / "out" / "feeds" / "pangea-source" / "pangea.rss")
|
2026-03-30 15:04:41 +02:00
|
|
|
assert output_path.exists()
|
|
|
|
|
assert "Pangea Fixture" in output_path.read_text(encoding="utf-8")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_load_runs_view_humanizes_completed_execution_end_time(
|
|
|
|
|
monkeypatch, tmp_path: Path
|
|
|
|
|
) -> None:
|
|
|
|
|
db_path = tmp_path / "runs-view.db"
|
|
|
|
|
log_dir = tmp_path / "out" / "logs"
|
|
|
|
|
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
|
|
|
|
|
|
|
|
|
|
app = create_app()
|
|
|
|
|
app.config["REPUB_LOG_DIR"] = log_dir
|
|
|
|
|
source = create_source(
|
|
|
|
|
name="Completed source",
|
|
|
|
|
slug="completed-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url="https://example.com/completed.xml",
|
|
|
|
|
)
|
|
|
|
|
job = Job.get(Job.source == source)
|
|
|
|
|
reference_time = datetime(2026, 1, 15, 12, 0, tzinfo=UTC)
|
|
|
|
|
ended_at = reference_time - timedelta(hours=2)
|
|
|
|
|
JobExecution.create(
|
|
|
|
|
job=job,
|
|
|
|
|
running_status=JobExecutionStatus.SUCCEEDED,
|
|
|
|
|
ended_at=ended_at,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
view = load_runs_view(log_dir=app.config["REPUB_LOG_DIR"], now=reference_time)
|
|
|
|
|
completed = view["completed"][0]
|
|
|
|
|
|
|
|
|
|
assert completed["ended_at"] == "2 hours ago"
|
|
|
|
|
assert completed["ended_at_iso"] == ended_at.isoformat()
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 14:02:39 +02:00
|
|
|
def test_render_runs_uses_database_backed_jobs_and_executions(
|
|
|
|
|
monkeypatch, tmp_path: Path
|
|
|
|
|
) -> None:
|
|
|
|
|
db_path = tmp_path / "runs-page.db"
|
|
|
|
|
log_dir = tmp_path / "out" / "logs"
|
|
|
|
|
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
|
|
|
|
|
|
|
|
|
|
app = create_app()
|
|
|
|
|
app.config["REPUB_LOG_DIR"] = log_dir
|
2026-03-31 12:14:47 +02:00
|
|
|
save_setting("feed_url", "http://localhost:8080")
|
2026-03-30 14:02:39 +02:00
|
|
|
source = create_source(
|
|
|
|
|
name="Runs page source",
|
|
|
|
|
slug="runs-page-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=True,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
2026-03-30 15:04:41 +02:00
|
|
|
feed_url=FIXTURE_FEED_PATH.as_uri(),
|
2026-03-30 14:02:39 +02:00
|
|
|
)
|
|
|
|
|
job = Job.get(Job.source == source)
|
|
|
|
|
runtime = get_job_runtime(app)
|
|
|
|
|
runtime.start()
|
|
|
|
|
try:
|
|
|
|
|
execution_id = runtime.run_job_now(job.id, reason="manual")
|
|
|
|
|
assert execution_id is not None
|
|
|
|
|
execution = _wait_for_terminal_execution(execution_id)
|
|
|
|
|
|
|
|
|
|
async def run() -> None:
|
|
|
|
|
body = str(await render_runs(app))
|
|
|
|
|
|
|
|
|
|
assert "runs-page-source" in body
|
2026-03-31 10:23:46 +02:00
|
|
|
assert "Running jobs" in body
|
2026-03-31 09:24:46 +02:00
|
|
|
assert "Scheduled jobs" in body
|
2026-03-30 14:02:39 +02:00
|
|
|
assert "Completed job executions" in body
|
|
|
|
|
assert f"/job/{job.id}/execution/{execution.get_id()}/logs" in body
|
|
|
|
|
assert "Succeeded" in body
|
|
|
|
|
assert "Run now" in body
|
|
|
|
|
|
|
|
|
|
asyncio.run(run())
|
|
|
|
|
finally:
|
|
|
|
|
runtime.shutdown()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_render_execution_logs_handles_missing_execution_and_missing_log_file(
|
|
|
|
|
monkeypatch, tmp_path: Path
|
|
|
|
|
) -> None:
|
|
|
|
|
db_path = tmp_path / "log-errors.db"
|
|
|
|
|
log_dir = tmp_path / "out" / "logs"
|
|
|
|
|
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
|
|
|
|
|
|
|
|
|
|
app = create_app()
|
|
|
|
|
app.config["REPUB_LOG_DIR"] = log_dir
|
|
|
|
|
source = create_source(
|
|
|
|
|
name="Log source",
|
|
|
|
|
slug="log-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=False,
|
|
|
|
|
cron_minute="*/5",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url="https://example.com/log-source.xml",
|
|
|
|
|
)
|
|
|
|
|
job = Job.get(Job.source == source)
|
|
|
|
|
execution = JobExecution.create(
|
|
|
|
|
job=job,
|
|
|
|
|
running_status=JobExecutionStatus.FAILED,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
async def run() -> None:
|
|
|
|
|
missing_execution = str(
|
|
|
|
|
await render_execution_logs(app, job_id=job.id, execution_id=9999)
|
|
|
|
|
)
|
|
|
|
|
missing_log = str(
|
|
|
|
|
await render_execution_logs(app, job_id=job.id, execution_id=execution.id)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert "Execution log unavailable" in missing_execution
|
|
|
|
|
assert "Execution does not exist." in missing_execution
|
|
|
|
|
assert "Execution log unavailable" in missing_log
|
|
|
|
|
assert "Log file has not been created yet." in missing_log
|
|
|
|
|
|
|
|
|
|
asyncio.run(run())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_delete_job_action_removes_source_job_and_execution_history(
|
|
|
|
|
monkeypatch, tmp_path: Path
|
|
|
|
|
) -> None:
|
|
|
|
|
db_path = tmp_path / "delete-job.db"
|
|
|
|
|
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
|
|
|
|
|
|
|
|
|
|
async def run() -> None:
|
|
|
|
|
app = create_app()
|
|
|
|
|
client = app.test_client()
|
|
|
|
|
|
|
|
|
|
source = create_source(
|
|
|
|
|
name="Delete source",
|
|
|
|
|
slug="delete-source",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=True,
|
|
|
|
|
cron_minute="*/30",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url="https://example.com/delete.xml",
|
|
|
|
|
)
|
|
|
|
|
job = Job.get(Job.source == source)
|
|
|
|
|
execution = JobExecution.create(
|
|
|
|
|
job=job,
|
|
|
|
|
running_status=JobExecutionStatus.SUCCEEDED,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
response = await client.post(f"/actions/jobs/{job.id}/delete")
|
|
|
|
|
|
|
|
|
|
assert response.status_code == 204
|
|
|
|
|
assert Source.get_or_none(Source.slug == "delete-source") is None
|
|
|
|
|
assert Job.get_or_none(id=job.id) is None
|
|
|
|
|
assert JobExecution.get_or_none(id=int(execution.get_id())) is None
|
|
|
|
|
|
|
|
|
|
asyncio.run(run())
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 17:25:37 +02:00
|
|
|
def test_delete_source_action_removes_source_job_and_execution_history(
|
|
|
|
|
monkeypatch, tmp_path: Path
|
|
|
|
|
) -> None:
|
|
|
|
|
db_path = tmp_path / "delete-source.db"
|
|
|
|
|
monkeypatch.setenv("REPUBLISHER_DB_PATH", str(db_path))
|
|
|
|
|
|
|
|
|
|
async def run() -> None:
|
|
|
|
|
app = create_app()
|
|
|
|
|
client = app.test_client()
|
|
|
|
|
|
|
|
|
|
source = create_source(
|
|
|
|
|
name="Delete source row",
|
|
|
|
|
slug="delete-source-row",
|
|
|
|
|
source_type="feed",
|
|
|
|
|
notes="",
|
|
|
|
|
spider_arguments="",
|
|
|
|
|
enabled=True,
|
|
|
|
|
cron_minute="*/30",
|
|
|
|
|
cron_hour="*",
|
|
|
|
|
cron_day_of_month="*",
|
|
|
|
|
cron_day_of_week="*",
|
|
|
|
|
cron_month="*",
|
|
|
|
|
feed_url="https://example.com/delete-source-row.xml",
|
|
|
|
|
)
|
|
|
|
|
job = Job.get(Job.source == source)
|
|
|
|
|
execution = JobExecution.create(
|
|
|
|
|
job=job,
|
|
|
|
|
running_status=JobExecutionStatus.SUCCEEDED,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
response = await client.post("/actions/sources/delete-source-row/delete")
|
|
|
|
|
|
|
|
|
|
assert response.status_code == 204
|
|
|
|
|
assert Source.get_or_none(Source.slug == "delete-source-row") is None
|
|
|
|
|
assert Job.get_or_none(id=job.id) is None
|
|
|
|
|
assert JobExecution.get_or_none(id=int(execution.get_id())) is None
|
|
|
|
|
|
|
|
|
|
asyncio.run(run())
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 14:02:39 +02:00
|
|
|
def _wait_for_running_execution(
|
|
|
|
|
execution_id: int, *, timeout_seconds: float = 2.0
|
|
|
|
|
) -> JobExecution:
|
|
|
|
|
deadline = time.monotonic() + timeout_seconds
|
|
|
|
|
while time.monotonic() < deadline:
|
|
|
|
|
execution = JobExecution.get_by_id(execution_id)
|
|
|
|
|
if execution.running_status == JobExecutionStatus.RUNNING:
|
|
|
|
|
return execution
|
|
|
|
|
time.sleep(0.02)
|
|
|
|
|
raise AssertionError(f"execution {execution_id} never entered RUNNING state")
|
|
|
|
|
|
|
|
|
|
|
2026-03-31 09:24:46 +02:00
|
|
|
def _wait_for_execution_status(
|
|
|
|
|
execution_id: int,
|
|
|
|
|
status: JobExecutionStatus,
|
|
|
|
|
*,
|
|
|
|
|
timeout_seconds: float = 2.0,
|
|
|
|
|
) -> JobExecution:
|
|
|
|
|
deadline = time.monotonic() + timeout_seconds
|
|
|
|
|
while time.monotonic() < deadline:
|
|
|
|
|
execution = JobExecution.get_by_id(execution_id)
|
|
|
|
|
if execution.running_status == status:
|
|
|
|
|
return execution
|
|
|
|
|
time.sleep(0.02)
|
|
|
|
|
raise AssertionError(f"execution {execution_id} never entered {status.name}")
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 14:02:39 +02:00
|
|
|
def _wait_for_terminal_execution(
|
|
|
|
|
execution_id: int, *, timeout_seconds: float = 4.0
|
|
|
|
|
) -> JobExecution:
|
|
|
|
|
deadline = time.monotonic() + timeout_seconds
|
|
|
|
|
while time.monotonic() < deadline:
|
|
|
|
|
execution = JobExecution.get_by_id(execution_id)
|
|
|
|
|
if execution.running_status in {
|
|
|
|
|
JobExecutionStatus.SUCCEEDED,
|
|
|
|
|
JobExecutionStatus.FAILED,
|
|
|
|
|
JobExecutionStatus.CANCELED,
|
|
|
|
|
}:
|
|
|
|
|
return execution
|
|
|
|
|
time.sleep(0.02)
|
|
|
|
|
raise AssertionError(f"execution {execution_id} did not finish in time")
|
2026-03-30 15:04:41 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class _SlowFeedRequestHandler(BaseHTTPRequestHandler):
|
|
|
|
|
def do_GET(self) -> None: # noqa: N802
|
|
|
|
|
time.sleep(2.0)
|
|
|
|
|
payload = FIXTURE_FEED_PATH.read_bytes()
|
|
|
|
|
self.send_response(200)
|
|
|
|
|
self.send_header("Content-Type", "application/rss+xml; charset=utf-8")
|
|
|
|
|
self.send_header("Content-Length", str(len(payload)))
|
|
|
|
|
self.end_headers()
|
|
|
|
|
self.wfile.write(payload)
|
|
|
|
|
|
|
|
|
|
def log_message(self, format: str, *args: object) -> None:
|
|
|
|
|
del format, args
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class _ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer):
|
|
|
|
|
allow_reuse_address = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class _slow_feed_server:
|
|
|
|
|
def __enter__(self) -> str:
|
|
|
|
|
self._server = _ThreadedTCPServer(("127.0.0.1", 0), _SlowFeedRequestHandler)
|
|
|
|
|
self._thread = threading.Thread(
|
|
|
|
|
target=self._server.serve_forever,
|
|
|
|
|
kwargs={"poll_interval": 0.01},
|
|
|
|
|
daemon=True,
|
|
|
|
|
)
|
|
|
|
|
self._thread.start()
|
|
|
|
|
host = str(self._server.server_address[0])
|
|
|
|
|
port = int(self._server.server_address[1])
|
|
|
|
|
return f"http://{host}:{port}/slow-feed.rss"
|
|
|
|
|
|
|
|
|
|
def __exit__(self, exc_type, exc, tb) -> None:
|
|
|
|
|
del exc_type, exc, tb
|
|
|
|
|
self._server.shutdown()
|
|
|
|
|
self._server.server_close()
|
|
|
|
|
self._thread.join(timeout=1)
|