reconcile stale execs

This commit is contained in:
Abel Luck 2026-03-30 14:18:55 +02:00
parent 90674e6515
commit 916968c579
2 changed files with 86 additions and 0 deletions

View file

@ -80,6 +80,7 @@ class JobRuntime:
if self._started:
return
self._reconcile_stale_executions()
self.scheduler.start()
self.scheduler.add_job(
self.poll_workers,
@ -311,6 +312,39 @@ class JobRuntime:
if self.refresh_callback is not None:
self.refresh_callback()
def _reconcile_stale_executions(self) -> None:
with database.connection_context():
stale_executions = tuple(
JobExecution.select(JobExecution, Job)
.join(Job)
.where(JobExecution.running_status == JobExecutionStatus.RUNNING)
)
for execution in stale_executions:
job = cast(Job, execution.job)
execution_id = _execution_id(execution)
artifacts = JobArtifacts.for_execution(
log_dir=self.log_dir,
job_id=_job_id(job),
execution_id=execution_id,
)
artifacts.log_path.parent.mkdir(parents=True, exist_ok=True)
with artifacts.log_path.open("a", encoding="utf-8") as log_handle:
log_handle.write(
"scheduler: execution marked failed after app restart\n"
)
execution.ended_at = utc_now()
execution.running_status = (
JobExecutionStatus.CANCELED
if execution.stop_requested_at is not None
else JobExecutionStatus.FAILED
)
execution.save()
if stale_executions:
self._trigger_refresh()
def load_runs_view(
*, log_dir: str | Path, now: datetime | None = None