Add persistent job run queue

This commit is contained in:
Abel Luck 2026-03-31 09:24:46 +02:00
parent 2bd0651478
commit 0b3b1b2731
8 changed files with 1047 additions and 27 deletions

View file

@ -186,13 +186,24 @@ def test_job_runtime_respects_max_concurrent_jobs_setting(tmp_path: Path) -> Non
second_execution_id = runtime.run_job_now(second_job.id, reason="manual")
assert second_execution_id is None
assert second_execution_id is not None
second_execution = _wait_for_execution_status(
second_execution_id,
JobExecutionStatus.PENDING,
)
assert (
JobExecution.select()
.where(JobExecution.running_status == JobExecutionStatus.RUNNING)
.count()
== 1
)
assert second_execution.started_at is None
assert (
JobExecution.select()
.where(JobExecution.running_status == JobExecutionStatus.PENDING)
.count()
== 1
)
runtime.request_execution_cancel(first_execution_id)
finished_execution = _wait_for_terminal_execution(first_execution_id)
assert finished_execution.running_status == JobExecutionStatus.CANCELED
@ -200,6 +211,332 @@ def test_job_runtime_respects_max_concurrent_jobs_setting(tmp_path: Path) -> Non
runtime.shutdown()
def test_job_runtime_starts_queued_execution_after_capacity_opens(
tmp_path: Path,
) -> None:
db_path = tmp_path / "drain-queue.db"
log_dir = tmp_path / "out" / "logs"
initialize_database(db_path)
save_setting("max_concurrent_jobs", 1)
with _slow_feed_server() as feed_url:
first_source = create_source(
name="First source",
slug="first-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url=feed_url,
)
second_source = create_source(
name="Second source",
slug="second-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url=FIXTURE_FEED_PATH.as_uri(),
)
first_job = Job.get(Job.source == first_source)
second_job = Job.get(Job.source == second_source)
runtime = JobRuntime(log_dir=log_dir)
try:
runtime.start()
first_execution_id = runtime.run_job_now(first_job.id, reason="manual")
assert first_execution_id is not None
_wait_for_running_execution(first_execution_id)
second_execution_id = runtime.run_job_now(second_job.id, reason="manual")
assert second_execution_id is not None
_wait_for_execution_status(second_execution_id, JobExecutionStatus.PENDING)
runtime.request_execution_cancel(first_execution_id)
finished_execution = _wait_for_terminal_execution(first_execution_id)
assert finished_execution.running_status == JobExecutionStatus.CANCELED
_wait_for_running_execution(second_execution_id)
drained_execution = _wait_for_terminal_execution(second_execution_id)
assert drained_execution.running_status == JobExecutionStatus.SUCCEEDED
assert drained_execution.started_at is not None
finally:
runtime.shutdown()
def test_job_runtime_deduplicates_manual_queue_requests(tmp_path: Path) -> None:
db_path = tmp_path / "queue-dedup.db"
log_dir = tmp_path / "out" / "logs"
initialize_database(db_path)
save_setting("max_concurrent_jobs", 1)
with _slow_feed_server() as feed_url:
blocking_source = create_source(
name="Blocking source",
slug="blocking-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url=feed_url,
)
queued_source = create_source(
name="Queued source",
slug="queued-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url="https://example.com/queued.xml",
)
blocking_job = Job.get(Job.source == blocking_source)
queued_job = Job.get(Job.source == queued_source)
runtime = JobRuntime(log_dir=log_dir)
try:
runtime.start()
blocking_execution_id = runtime.run_job_now(
blocking_job.id, reason="manual"
)
assert blocking_execution_id is not None
_wait_for_running_execution(blocking_execution_id)
first_pending_id = runtime.run_job_now(queued_job.id, reason="manual")
second_pending_id = runtime.run_job_now(queued_job.id, reason="manual")
assert first_pending_id is not None
assert second_pending_id == first_pending_id
assert (
JobExecution.select()
.where(
(JobExecution.job == queued_job)
& (JobExecution.running_status == JobExecutionStatus.PENDING)
)
.count()
== 1
)
finally:
runtime.shutdown()
def test_job_runtime_allows_one_running_and_one_pending_per_job(
tmp_path: Path,
) -> None:
db_path = tmp_path / "running-plus-pending.db"
log_dir = tmp_path / "out" / "logs"
initialize_database(db_path)
save_setting("max_concurrent_jobs", 1)
with _slow_feed_server() as feed_url:
source = create_source(
name="Busy source",
slug="busy-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url=feed_url,
)
job = Job.get(Job.source == source)
runtime = JobRuntime(log_dir=log_dir)
try:
runtime.start()
running_execution_id = runtime.run_job_now(job.id, reason="manual")
assert running_execution_id is not None
_wait_for_running_execution(running_execution_id)
pending_execution_id = runtime.run_job_now(job.id, reason="manual")
duplicate_pending_id = runtime.run_job_now(job.id, reason="manual")
runtime.run_scheduled_job(job.id)
assert pending_execution_id is not None
assert duplicate_pending_id == pending_execution_id
assert (
JobExecution.select()
.where(JobExecution.job == job)
.where(JobExecution.running_status == JobExecutionStatus.RUNNING)
.count()
== 1
)
assert (
JobExecution.select()
.where(JobExecution.job == job)
.where(JobExecution.running_status == JobExecutionStatus.PENDING)
.count()
== 1
)
finally:
runtime.shutdown()
def test_job_runtime_start_drains_pending_rows_created_before_start(
tmp_path: Path,
) -> None:
db_path = tmp_path / "startup-drain.db"
log_dir = tmp_path / "out" / "logs"
initialize_database(db_path)
source = create_source(
name="Queued source",
slug="queued-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url=FIXTURE_FEED_PATH.as_uri(),
)
job = Job.get(Job.source == source)
pending_execution = JobExecution.create(
job=job,
running_status=JobExecutionStatus.PENDING,
)
runtime = JobRuntime(log_dir=log_dir)
try:
runtime.start()
_wait_for_running_execution(int(pending_execution.get_id()))
drained_execution = _wait_for_terminal_execution(
int(pending_execution.get_id())
)
assert drained_execution.running_status == JobExecutionStatus.SUCCEEDED
assert drained_execution.started_at is not None
finally:
runtime.shutdown()
def test_job_runtime_scheduled_runs_use_the_persistent_queue(
tmp_path: Path,
) -> None:
db_path = tmp_path / "scheduled-queue.db"
log_dir = tmp_path / "out" / "logs"
initialize_database(db_path)
save_setting("max_concurrent_jobs", 1)
with _slow_feed_server() as feed_url:
first_source = create_source(
name="First scheduled source",
slug="first-scheduled-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=True,
cron_minute="*",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url=feed_url,
)
second_source = create_source(
name="Second scheduled source",
slug="second-scheduled-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=True,
cron_minute="*",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url="https://example.com/second-scheduled.xml",
)
first_job = Job.get(Job.source == first_source)
second_job = Job.get(Job.source == second_source)
runtime = JobRuntime(log_dir=log_dir)
try:
runtime.start()
runtime.run_scheduled_job(first_job.id)
first_execution = JobExecution.get(JobExecution.job == first_job)
_wait_for_running_execution(int(first_execution.get_id()))
runtime.run_scheduled_job(second_job.id)
second_execution = JobExecution.get(JobExecution.job == second_job)
assert second_execution.running_status == JobExecutionStatus.PENDING
assert second_execution.started_at is None
finally:
runtime.shutdown()
def test_job_runtime_cancel_pending_follow_up_keeps_running_worker_alive(
tmp_path: Path,
) -> None:
db_path = tmp_path / "cancel-pending.db"
log_dir = tmp_path / "out" / "logs"
initialize_database(db_path)
save_setting("max_concurrent_jobs", 1)
with _slow_feed_server() as feed_url:
source = create_source(
name="Cancelable queued source",
slug="cancelable-queued-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url=feed_url,
)
job = Job.get(Job.source == source)
runtime = JobRuntime(log_dir=log_dir)
try:
runtime.start()
running_execution_id = runtime.run_job_now(job.id, reason="manual")
assert running_execution_id is not None
_wait_for_running_execution(running_execution_id)
pending_execution_id = runtime.run_job_now(job.id, reason="manual")
assert pending_execution_id is not None
_wait_for_execution_status(pending_execution_id, JobExecutionStatus.PENDING)
assert runtime.cancel_queued_execution(pending_execution_id) is True
assert JobExecution.get_or_none(id=pending_execution_id) is None
assert (
JobExecution.get_by_id(running_execution_id).running_status
== JobExecutionStatus.RUNNING
)
finally:
runtime.shutdown()
def test_job_runtime_cancel_marks_execution_canceled(tmp_path: Path) -> None:
initialize_database(tmp_path / "cancel.db")
with _slow_feed_server() as feed_url:
@ -571,7 +908,7 @@ def test_render_runs_uses_database_backed_jobs_and_executions(
assert "runs-page-source" in body
assert "Running job executions" in body
assert "Upcoming jobs" in body
assert "Scheduled jobs" in body
assert "Completed job executions" in body
assert f"/job/{job.id}/execution/{execution.get_id()}/logs" in body
assert "Succeeded" in body
@ -719,6 +1056,21 @@ def _wait_for_running_execution(
raise AssertionError(f"execution {execution_id} never entered RUNNING state")
def _wait_for_execution_status(
execution_id: int,
status: JobExecutionStatus,
*,
timeout_seconds: float = 2.0,
) -> JobExecution:
deadline = time.monotonic() + timeout_seconds
while time.monotonic() < deadline:
execution = JobExecution.get_by_id(execution_id)
if execution.running_status == status:
return execution
time.sleep(0.02)
raise AssertionError(f"execution {execution_id} never entered {status.name}")
def _wait_for_terminal_execution(
execution_id: int, *, timeout_seconds: float = 4.0
) -> JobExecution: