fix attach workers

This commit is contained in:
Abel Luck 2026-03-30 15:53:04 +02:00
parent 0c36ee6662
commit ec4bdf1096
3 changed files with 354 additions and 6 deletions

View file

@ -3,6 +3,8 @@ from __future__ import annotations
import asyncio
import json
import socketserver
import subprocess
import sys
import threading
import time
from datetime import UTC, datetime, timedelta
@ -226,6 +228,161 @@ def test_job_runtime_start_reconciles_stale_running_execution(tmp_path: Path) ->
runtime.shutdown()
def test_job_runtime_start_reattaches_live_worker_after_app_restart(
tmp_path: Path,
) -> None:
db_path = tmp_path / "live-worker.db"
log_dir = tmp_path / "out" / "logs"
initialize_database(db_path)
with _slow_feed_server() as feed_url:
source = create_source(
name="Live worker source",
slug="live-worker-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url=feed_url,
)
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
started_at=datetime.now(UTC),
running_status=JobExecutionStatus.RUNNING,
)
artifacts = JobArtifacts.for_execution(
log_dir=log_dir,
job_id=job.id,
execution_id=int(execution.get_id()),
)
artifacts.log_path.parent.mkdir(parents=True, exist_ok=True)
log_handle = artifacts.log_path.open("a", encoding="utf-8", buffering=1)
process = subprocess.Popen(
[
sys.executable,
"-u",
"-m",
"repub.job_runner",
"--job-id",
str(job.id),
"--execution-id",
str(execution.get_id()),
"--db-path",
str(db_path),
"--out-dir",
str(log_dir.parent),
"--stats-path",
str(artifacts.stats_path),
],
stdout=log_handle,
stderr=subprocess.STDOUT,
text=True,
)
runtime = JobRuntime(log_dir=log_dir)
try:
time.sleep(0.1)
runtime.start()
running_execution = JobExecution.get_by_id(execution.get_id())
assert running_execution.running_status == JobExecutionStatus.RUNNING
assert running_execution.ended_at is None
completed_execution = _wait_for_terminal_execution(int(execution.get_id()))
assert completed_execution.running_status == JobExecutionStatus.SUCCEEDED
assert "reattached" in artifacts.log_path.read_text(encoding="utf-8")
finally:
runtime.shutdown()
if process.poll() is None:
process.kill()
process.wait(timeout=2)
log_handle.close()
def test_job_runtime_start_restores_live_worker_marked_failed_by_restart_bug(
tmp_path: Path,
) -> None:
db_path = tmp_path / "restore-live-worker.db"
log_dir = tmp_path / "out" / "logs"
initialize_database(db_path)
with _slow_feed_server() as feed_url:
source = create_source(
name="Recovered worker source",
slug="recovered-worker-source",
source_type="feed",
notes="",
spider_arguments="",
enabled=False,
cron_minute="*/5",
cron_hour="*",
cron_day_of_month="*",
cron_day_of_week="*",
cron_month="*",
feed_url=feed_url,
)
job = Job.get(Job.source == source)
execution = JobExecution.create(
job=job,
started_at=datetime.now(UTC),
ended_at=datetime.now(UTC),
running_status=JobExecutionStatus.FAILED,
)
artifacts = JobArtifacts.for_execution(
log_dir=log_dir,
job_id=job.id,
execution_id=int(execution.get_id()),
)
artifacts.log_path.parent.mkdir(parents=True, exist_ok=True)
log_handle = artifacts.log_path.open("a", encoding="utf-8", buffering=1)
process = subprocess.Popen(
[
sys.executable,
"-u",
"-m",
"repub.job_runner",
"--job-id",
str(job.id),
"--execution-id",
str(execution.get_id()),
"--db-path",
str(db_path),
"--out-dir",
str(log_dir.parent),
"--stats-path",
str(artifacts.stats_path),
],
stdout=log_handle,
stderr=subprocess.STDOUT,
text=True,
)
runtime = JobRuntime(log_dir=log_dir)
try:
time.sleep(0.1)
runtime.start()
restored_execution = JobExecution.get_by_id(execution.get_id())
assert restored_execution.running_status == JobExecutionStatus.RUNNING
assert restored_execution.ended_at is None
completed_execution = _wait_for_terminal_execution(int(execution.get_id()))
assert completed_execution.running_status == JobExecutionStatus.SUCCEEDED
assert "restored execution state" in artifacts.log_path.read_text(
encoding="utf-8"
)
finally:
runtime.shutdown()
if process.poll() is None:
process.kill()
process.wait(timeout=2)
log_handle.close()
def test_generate_pangea_feed_writes_pangea_rss_file(
monkeypatch, tmp_path: Path
) -> None: