Add persistent job run queue
This commit is contained in:
parent
2bd0651478
commit
0b3b1b2731
8 changed files with 1047 additions and 27 deletions
|
|
@ -186,13 +186,24 @@ def test_job_runtime_respects_max_concurrent_jobs_setting(tmp_path: Path) -> Non
|
|||
|
||||
second_execution_id = runtime.run_job_now(second_job.id, reason="manual")
|
||||
|
||||
assert second_execution_id is None
|
||||
assert second_execution_id is not None
|
||||
second_execution = _wait_for_execution_status(
|
||||
second_execution_id,
|
||||
JobExecutionStatus.PENDING,
|
||||
)
|
||||
assert (
|
||||
JobExecution.select()
|
||||
.where(JobExecution.running_status == JobExecutionStatus.RUNNING)
|
||||
.count()
|
||||
== 1
|
||||
)
|
||||
assert second_execution.started_at is None
|
||||
assert (
|
||||
JobExecution.select()
|
||||
.where(JobExecution.running_status == JobExecutionStatus.PENDING)
|
||||
.count()
|
||||
== 1
|
||||
)
|
||||
runtime.request_execution_cancel(first_execution_id)
|
||||
finished_execution = _wait_for_terminal_execution(first_execution_id)
|
||||
assert finished_execution.running_status == JobExecutionStatus.CANCELED
|
||||
|
|
@ -200,6 +211,332 @@ def test_job_runtime_respects_max_concurrent_jobs_setting(tmp_path: Path) -> Non
|
|||
runtime.shutdown()
|
||||
|
||||
|
||||
def test_job_runtime_starts_queued_execution_after_capacity_opens(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
db_path = tmp_path / "drain-queue.db"
|
||||
log_dir = tmp_path / "out" / "logs"
|
||||
initialize_database(db_path)
|
||||
save_setting("max_concurrent_jobs", 1)
|
||||
|
||||
with _slow_feed_server() as feed_url:
|
||||
first_source = create_source(
|
||||
name="First source",
|
||||
slug="first-source",
|
||||
source_type="feed",
|
||||
notes="",
|
||||
spider_arguments="",
|
||||
enabled=False,
|
||||
cron_minute="*/5",
|
||||
cron_hour="*",
|
||||
cron_day_of_month="*",
|
||||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
feed_url=feed_url,
|
||||
)
|
||||
second_source = create_source(
|
||||
name="Second source",
|
||||
slug="second-source",
|
||||
source_type="feed",
|
||||
notes="",
|
||||
spider_arguments="",
|
||||
enabled=False,
|
||||
cron_minute="*/5",
|
||||
cron_hour="*",
|
||||
cron_day_of_month="*",
|
||||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
feed_url=FIXTURE_FEED_PATH.as_uri(),
|
||||
)
|
||||
first_job = Job.get(Job.source == first_source)
|
||||
second_job = Job.get(Job.source == second_source)
|
||||
|
||||
runtime = JobRuntime(log_dir=log_dir)
|
||||
try:
|
||||
runtime.start()
|
||||
first_execution_id = runtime.run_job_now(first_job.id, reason="manual")
|
||||
assert first_execution_id is not None
|
||||
_wait_for_running_execution(first_execution_id)
|
||||
|
||||
second_execution_id = runtime.run_job_now(second_job.id, reason="manual")
|
||||
assert second_execution_id is not None
|
||||
_wait_for_execution_status(second_execution_id, JobExecutionStatus.PENDING)
|
||||
|
||||
runtime.request_execution_cancel(first_execution_id)
|
||||
finished_execution = _wait_for_terminal_execution(first_execution_id)
|
||||
assert finished_execution.running_status == JobExecutionStatus.CANCELED
|
||||
|
||||
_wait_for_running_execution(second_execution_id)
|
||||
drained_execution = _wait_for_terminal_execution(second_execution_id)
|
||||
assert drained_execution.running_status == JobExecutionStatus.SUCCEEDED
|
||||
assert drained_execution.started_at is not None
|
||||
finally:
|
||||
runtime.shutdown()
|
||||
|
||||
|
||||
def test_job_runtime_deduplicates_manual_queue_requests(tmp_path: Path) -> None:
|
||||
db_path = tmp_path / "queue-dedup.db"
|
||||
log_dir = tmp_path / "out" / "logs"
|
||||
initialize_database(db_path)
|
||||
save_setting("max_concurrent_jobs", 1)
|
||||
|
||||
with _slow_feed_server() as feed_url:
|
||||
blocking_source = create_source(
|
||||
name="Blocking source",
|
||||
slug="blocking-source",
|
||||
source_type="feed",
|
||||
notes="",
|
||||
spider_arguments="",
|
||||
enabled=False,
|
||||
cron_minute="*/5",
|
||||
cron_hour="*",
|
||||
cron_day_of_month="*",
|
||||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
feed_url=feed_url,
|
||||
)
|
||||
queued_source = create_source(
|
||||
name="Queued source",
|
||||
slug="queued-source",
|
||||
source_type="feed",
|
||||
notes="",
|
||||
spider_arguments="",
|
||||
enabled=False,
|
||||
cron_minute="*/5",
|
||||
cron_hour="*",
|
||||
cron_day_of_month="*",
|
||||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
feed_url="https://example.com/queued.xml",
|
||||
)
|
||||
blocking_job = Job.get(Job.source == blocking_source)
|
||||
queued_job = Job.get(Job.source == queued_source)
|
||||
|
||||
runtime = JobRuntime(log_dir=log_dir)
|
||||
try:
|
||||
runtime.start()
|
||||
blocking_execution_id = runtime.run_job_now(
|
||||
blocking_job.id, reason="manual"
|
||||
)
|
||||
assert blocking_execution_id is not None
|
||||
_wait_for_running_execution(blocking_execution_id)
|
||||
|
||||
first_pending_id = runtime.run_job_now(queued_job.id, reason="manual")
|
||||
second_pending_id = runtime.run_job_now(queued_job.id, reason="manual")
|
||||
|
||||
assert first_pending_id is not None
|
||||
assert second_pending_id == first_pending_id
|
||||
assert (
|
||||
JobExecution.select()
|
||||
.where(
|
||||
(JobExecution.job == queued_job)
|
||||
& (JobExecution.running_status == JobExecutionStatus.PENDING)
|
||||
)
|
||||
.count()
|
||||
== 1
|
||||
)
|
||||
finally:
|
||||
runtime.shutdown()
|
||||
|
||||
|
||||
def test_job_runtime_allows_one_running_and_one_pending_per_job(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
db_path = tmp_path / "running-plus-pending.db"
|
||||
log_dir = tmp_path / "out" / "logs"
|
||||
initialize_database(db_path)
|
||||
save_setting("max_concurrent_jobs", 1)
|
||||
|
||||
with _slow_feed_server() as feed_url:
|
||||
source = create_source(
|
||||
name="Busy source",
|
||||
slug="busy-source",
|
||||
source_type="feed",
|
||||
notes="",
|
||||
spider_arguments="",
|
||||
enabled=False,
|
||||
cron_minute="*/5",
|
||||
cron_hour="*",
|
||||
cron_day_of_month="*",
|
||||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
feed_url=feed_url,
|
||||
)
|
||||
job = Job.get(Job.source == source)
|
||||
|
||||
runtime = JobRuntime(log_dir=log_dir)
|
||||
try:
|
||||
runtime.start()
|
||||
running_execution_id = runtime.run_job_now(job.id, reason="manual")
|
||||
assert running_execution_id is not None
|
||||
_wait_for_running_execution(running_execution_id)
|
||||
|
||||
pending_execution_id = runtime.run_job_now(job.id, reason="manual")
|
||||
duplicate_pending_id = runtime.run_job_now(job.id, reason="manual")
|
||||
runtime.run_scheduled_job(job.id)
|
||||
|
||||
assert pending_execution_id is not None
|
||||
assert duplicate_pending_id == pending_execution_id
|
||||
assert (
|
||||
JobExecution.select()
|
||||
.where(JobExecution.job == job)
|
||||
.where(JobExecution.running_status == JobExecutionStatus.RUNNING)
|
||||
.count()
|
||||
== 1
|
||||
)
|
||||
assert (
|
||||
JobExecution.select()
|
||||
.where(JobExecution.job == job)
|
||||
.where(JobExecution.running_status == JobExecutionStatus.PENDING)
|
||||
.count()
|
||||
== 1
|
||||
)
|
||||
finally:
|
||||
runtime.shutdown()
|
||||
|
||||
|
||||
def test_job_runtime_start_drains_pending_rows_created_before_start(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
db_path = tmp_path / "startup-drain.db"
|
||||
log_dir = tmp_path / "out" / "logs"
|
||||
initialize_database(db_path)
|
||||
source = create_source(
|
||||
name="Queued source",
|
||||
slug="queued-source",
|
||||
source_type="feed",
|
||||
notes="",
|
||||
spider_arguments="",
|
||||
enabled=False,
|
||||
cron_minute="*/5",
|
||||
cron_hour="*",
|
||||
cron_day_of_month="*",
|
||||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
feed_url=FIXTURE_FEED_PATH.as_uri(),
|
||||
)
|
||||
job = Job.get(Job.source == source)
|
||||
pending_execution = JobExecution.create(
|
||||
job=job,
|
||||
running_status=JobExecutionStatus.PENDING,
|
||||
)
|
||||
|
||||
runtime = JobRuntime(log_dir=log_dir)
|
||||
try:
|
||||
runtime.start()
|
||||
_wait_for_running_execution(int(pending_execution.get_id()))
|
||||
drained_execution = _wait_for_terminal_execution(
|
||||
int(pending_execution.get_id())
|
||||
)
|
||||
|
||||
assert drained_execution.running_status == JobExecutionStatus.SUCCEEDED
|
||||
assert drained_execution.started_at is not None
|
||||
finally:
|
||||
runtime.shutdown()
|
||||
|
||||
|
||||
def test_job_runtime_scheduled_runs_use_the_persistent_queue(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
db_path = tmp_path / "scheduled-queue.db"
|
||||
log_dir = tmp_path / "out" / "logs"
|
||||
initialize_database(db_path)
|
||||
save_setting("max_concurrent_jobs", 1)
|
||||
|
||||
with _slow_feed_server() as feed_url:
|
||||
first_source = create_source(
|
||||
name="First scheduled source",
|
||||
slug="first-scheduled-source",
|
||||
source_type="feed",
|
||||
notes="",
|
||||
spider_arguments="",
|
||||
enabled=True,
|
||||
cron_minute="*",
|
||||
cron_hour="*",
|
||||
cron_day_of_month="*",
|
||||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
feed_url=feed_url,
|
||||
)
|
||||
second_source = create_source(
|
||||
name="Second scheduled source",
|
||||
slug="second-scheduled-source",
|
||||
source_type="feed",
|
||||
notes="",
|
||||
spider_arguments="",
|
||||
enabled=True,
|
||||
cron_minute="*",
|
||||
cron_hour="*",
|
||||
cron_day_of_month="*",
|
||||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
feed_url="https://example.com/second-scheduled.xml",
|
||||
)
|
||||
first_job = Job.get(Job.source == first_source)
|
||||
second_job = Job.get(Job.source == second_source)
|
||||
|
||||
runtime = JobRuntime(log_dir=log_dir)
|
||||
try:
|
||||
runtime.start()
|
||||
runtime.run_scheduled_job(first_job.id)
|
||||
first_execution = JobExecution.get(JobExecution.job == first_job)
|
||||
_wait_for_running_execution(int(first_execution.get_id()))
|
||||
|
||||
runtime.run_scheduled_job(second_job.id)
|
||||
second_execution = JobExecution.get(JobExecution.job == second_job)
|
||||
|
||||
assert second_execution.running_status == JobExecutionStatus.PENDING
|
||||
assert second_execution.started_at is None
|
||||
finally:
|
||||
runtime.shutdown()
|
||||
|
||||
|
||||
def test_job_runtime_cancel_pending_follow_up_keeps_running_worker_alive(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
db_path = tmp_path / "cancel-pending.db"
|
||||
log_dir = tmp_path / "out" / "logs"
|
||||
initialize_database(db_path)
|
||||
save_setting("max_concurrent_jobs", 1)
|
||||
|
||||
with _slow_feed_server() as feed_url:
|
||||
source = create_source(
|
||||
name="Cancelable queued source",
|
||||
slug="cancelable-queued-source",
|
||||
source_type="feed",
|
||||
notes="",
|
||||
spider_arguments="",
|
||||
enabled=False,
|
||||
cron_minute="*/5",
|
||||
cron_hour="*",
|
||||
cron_day_of_month="*",
|
||||
cron_day_of_week="*",
|
||||
cron_month="*",
|
||||
feed_url=feed_url,
|
||||
)
|
||||
job = Job.get(Job.source == source)
|
||||
|
||||
runtime = JobRuntime(log_dir=log_dir)
|
||||
try:
|
||||
runtime.start()
|
||||
running_execution_id = runtime.run_job_now(job.id, reason="manual")
|
||||
assert running_execution_id is not None
|
||||
_wait_for_running_execution(running_execution_id)
|
||||
|
||||
pending_execution_id = runtime.run_job_now(job.id, reason="manual")
|
||||
assert pending_execution_id is not None
|
||||
_wait_for_execution_status(pending_execution_id, JobExecutionStatus.PENDING)
|
||||
|
||||
assert runtime.cancel_queued_execution(pending_execution_id) is True
|
||||
assert JobExecution.get_or_none(id=pending_execution_id) is None
|
||||
assert (
|
||||
JobExecution.get_by_id(running_execution_id).running_status
|
||||
== JobExecutionStatus.RUNNING
|
||||
)
|
||||
finally:
|
||||
runtime.shutdown()
|
||||
|
||||
|
||||
def test_job_runtime_cancel_marks_execution_canceled(tmp_path: Path) -> None:
|
||||
initialize_database(tmp_path / "cancel.db")
|
||||
with _slow_feed_server() as feed_url:
|
||||
|
|
@ -571,7 +908,7 @@ def test_render_runs_uses_database_backed_jobs_and_executions(
|
|||
|
||||
assert "runs-page-source" in body
|
||||
assert "Running job executions" in body
|
||||
assert "Upcoming jobs" in body
|
||||
assert "Scheduled jobs" in body
|
||||
assert "Completed job executions" in body
|
||||
assert f"/job/{job.id}/execution/{execution.get_id()}/logs" in body
|
||||
assert "Succeeded" in body
|
||||
|
|
@ -719,6 +1056,21 @@ def _wait_for_running_execution(
|
|||
raise AssertionError(f"execution {execution_id} never entered RUNNING state")
|
||||
|
||||
|
||||
def _wait_for_execution_status(
|
||||
execution_id: int,
|
||||
status: JobExecutionStatus,
|
||||
*,
|
||||
timeout_seconds: float = 2.0,
|
||||
) -> JobExecution:
|
||||
deadline = time.monotonic() + timeout_seconds
|
||||
while time.monotonic() < deadline:
|
||||
execution = JobExecution.get_by_id(execution_id)
|
||||
if execution.running_status == status:
|
||||
return execution
|
||||
time.sleep(0.02)
|
||||
raise AssertionError(f"execution {execution_id} never entered {status.name}")
|
||||
|
||||
|
||||
def _wait_for_terminal_execution(
|
||||
execution_id: int, *, timeout_seconds: float = 4.0
|
||||
) -> JobExecution:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue