221 lines
6.7 KiB
Python
221 lines
6.7 KiB
Python
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import sys
|
||
|
|
from dataclasses import dataclass
|
||
|
|
from datetime import UTC, datetime, timedelta
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import TextIO, cast
|
||
|
|
|
||
|
|
from repub.db import get_database_connection, initialize_database
|
||
|
|
from repub.jobs import JobArtifacts
|
||
|
|
from repub.model import Job, JobExecution, JobExecutionStatus, database
|
||
|
|
|
||
|
|
DEFAULT_SUCCESSFUL_EXECUTION_RETENTION_DAYS = 7
|
||
|
|
DEFAULT_UNSUCCESSFUL_EXECUTION_RETENTION_DAYS = 90
|
||
|
|
UNSUCCESSFUL_EXECUTION_STATUSES = (
|
||
|
|
JobExecutionStatus.FAILED,
|
||
|
|
JobExecutionStatus.CANCELED,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass
|
||
|
|
class JobExecutionRetentionResult:
|
||
|
|
log_dir: Path
|
||
|
|
successful_cutoff: datetime
|
||
|
|
unsuccessful_cutoff: datetime
|
||
|
|
dry_run: bool
|
||
|
|
matched_executions: int = 0
|
||
|
|
deleted_executions: int = 0
|
||
|
|
matched_files: int = 0
|
||
|
|
deleted_files: int = 0
|
||
|
|
bytes_deleted: int = 0
|
||
|
|
failures: int = 0
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass(frozen=True)
|
||
|
|
class _ExecutionRetentionCandidate:
|
||
|
|
execution_id: int
|
||
|
|
job_id: int
|
||
|
|
status: JobExecutionStatus
|
||
|
|
ended_at: datetime
|
||
|
|
|
||
|
|
|
||
|
|
def cleanup_job_executions(
|
||
|
|
*,
|
||
|
|
log_dir: str | Path,
|
||
|
|
successful_days: int = DEFAULT_SUCCESSFUL_EXECUTION_RETENTION_DAYS,
|
||
|
|
unsuccessful_days: int = DEFAULT_UNSUCCESSFUL_EXECUTION_RETENTION_DAYS,
|
||
|
|
now: datetime | None = None,
|
||
|
|
dry_run: bool = False,
|
||
|
|
output: TextIO = sys.stdout,
|
||
|
|
) -> JobExecutionRetentionResult:
|
||
|
|
if get_database_connection() is None:
|
||
|
|
initialize_database()
|
||
|
|
reference_time = _coerce_datetime(now or datetime.now(UTC))
|
||
|
|
successful_cutoff = reference_time - timedelta(days=successful_days)
|
||
|
|
unsuccessful_cutoff = reference_time - timedelta(days=unsuccessful_days)
|
||
|
|
resolved_log_dir = Path(log_dir).resolve()
|
||
|
|
result = JobExecutionRetentionResult(
|
||
|
|
log_dir=resolved_log_dir,
|
||
|
|
successful_cutoff=successful_cutoff,
|
||
|
|
unsuccessful_cutoff=unsuccessful_cutoff,
|
||
|
|
dry_run=dry_run,
|
||
|
|
)
|
||
|
|
|
||
|
|
candidates = _retention_candidates(
|
||
|
|
successful_cutoff=successful_cutoff,
|
||
|
|
unsuccessful_cutoff=unsuccessful_cutoff,
|
||
|
|
)
|
||
|
|
execution_ids_to_delete: list[int] = []
|
||
|
|
for candidate in candidates:
|
||
|
|
result.matched_executions += 1
|
||
|
|
print(
|
||
|
|
"job retention: matched "
|
||
|
|
f"execution_id={candidate.execution_id} "
|
||
|
|
f"job_id={candidate.job_id} "
|
||
|
|
f"status={candidate.status.name} "
|
||
|
|
f"ended_at={candidate.ended_at.isoformat()}",
|
||
|
|
file=output,
|
||
|
|
)
|
||
|
|
artifacts = JobArtifacts.for_execution(
|
||
|
|
log_dir=resolved_log_dir,
|
||
|
|
job_id=candidate.job_id,
|
||
|
|
execution_id=candidate.execution_id,
|
||
|
|
)
|
||
|
|
artifact_cleanup_succeeded = _cleanup_artifacts(
|
||
|
|
artifacts=artifacts,
|
||
|
|
result=result,
|
||
|
|
dry_run=dry_run,
|
||
|
|
output=output,
|
||
|
|
)
|
||
|
|
if dry_run or not artifact_cleanup_succeeded:
|
||
|
|
continue
|
||
|
|
execution_ids_to_delete.append(candidate.execution_id)
|
||
|
|
|
||
|
|
if execution_ids_to_delete:
|
||
|
|
with database.writer():
|
||
|
|
execution_primary_key = getattr(JobExecution, "_meta").primary_key
|
||
|
|
result.deleted_executions = (
|
||
|
|
JobExecution.delete()
|
||
|
|
.where(execution_primary_key.in_(tuple(execution_ids_to_delete)))
|
||
|
|
.execute()
|
||
|
|
)
|
||
|
|
|
||
|
|
print(
|
||
|
|
"job retention: "
|
||
|
|
f"dry_run={_bool_text(result.dry_run)} "
|
||
|
|
f"successful_cutoff={result.successful_cutoff.isoformat()} "
|
||
|
|
f"unsuccessful_cutoff={result.unsuccessful_cutoff.isoformat()} "
|
||
|
|
f"root={result.log_dir} "
|
||
|
|
f"matched_executions={result.matched_executions} "
|
||
|
|
f"deleted_executions={result.deleted_executions} "
|
||
|
|
f"matched_files={result.matched_files} "
|
||
|
|
f"deleted_files={result.deleted_files} "
|
||
|
|
f"bytes_deleted={result.bytes_deleted} "
|
||
|
|
f"failures={result.failures}",
|
||
|
|
file=output,
|
||
|
|
)
|
||
|
|
return result
|
||
|
|
|
||
|
|
|
||
|
|
def _retention_candidates(
|
||
|
|
*, successful_cutoff: datetime, unsuccessful_cutoff: datetime
|
||
|
|
) -> tuple[_ExecutionRetentionCandidate, ...]:
|
||
|
|
with database.reader():
|
||
|
|
executions = tuple(
|
||
|
|
JobExecution.select(JobExecution, Job)
|
||
|
|
.join(Job)
|
||
|
|
.where(
|
||
|
|
(
|
||
|
|
JobExecution.running_status.in_(
|
||
|
|
(
|
||
|
|
JobExecutionStatus.SUCCEEDED,
|
||
|
|
*UNSUCCESSFUL_EXECUTION_STATUSES,
|
||
|
|
)
|
||
|
|
)
|
||
|
|
)
|
||
|
|
& (JobExecution.ended_at.is_null(False))
|
||
|
|
)
|
||
|
|
)
|
||
|
|
|
||
|
|
candidates: list[_ExecutionRetentionCandidate] = []
|
||
|
|
for execution in executions:
|
||
|
|
status = JobExecutionStatus(int(execution.running_status))
|
||
|
|
ended_at = _coerce_datetime(cast(datetime | str, execution.ended_at))
|
||
|
|
if status == JobExecutionStatus.SUCCEEDED:
|
||
|
|
if ended_at >= successful_cutoff:
|
||
|
|
continue
|
||
|
|
elif status in UNSUCCESSFUL_EXECUTION_STATUSES:
|
||
|
|
if ended_at >= unsuccessful_cutoff:
|
||
|
|
continue
|
||
|
|
else:
|
||
|
|
continue
|
||
|
|
|
||
|
|
job = cast(Job, execution.job)
|
||
|
|
candidates.append(
|
||
|
|
_ExecutionRetentionCandidate(
|
||
|
|
execution_id=int(execution.get_id()),
|
||
|
|
job_id=int(job.get_id()),
|
||
|
|
status=status,
|
||
|
|
ended_at=ended_at,
|
||
|
|
)
|
||
|
|
)
|
||
|
|
return tuple(candidates)
|
||
|
|
|
||
|
|
|
||
|
|
def _cleanup_artifacts(
|
||
|
|
*,
|
||
|
|
artifacts: JobArtifacts,
|
||
|
|
result: JobExecutionRetentionResult,
|
||
|
|
dry_run: bool,
|
||
|
|
output: TextIO,
|
||
|
|
) -> bool:
|
||
|
|
succeeded = True
|
||
|
|
for path in artifacts.existing_paths():
|
||
|
|
result.matched_files += 1
|
||
|
|
try:
|
||
|
|
file_size = path.stat().st_size
|
||
|
|
except OSError as error:
|
||
|
|
result.failures += 1
|
||
|
|
succeeded = False
|
||
|
|
print(
|
||
|
|
f"job retention: stat failed path={path} error={error}",
|
||
|
|
file=output,
|
||
|
|
)
|
||
|
|
continue
|
||
|
|
|
||
|
|
if dry_run:
|
||
|
|
continue
|
||
|
|
|
||
|
|
try:
|
||
|
|
path.unlink()
|
||
|
|
except FileNotFoundError:
|
||
|
|
continue
|
||
|
|
except OSError as error:
|
||
|
|
result.failures += 1
|
||
|
|
succeeded = False
|
||
|
|
print(
|
||
|
|
f"job retention: delete failed path={path} error={error}",
|
||
|
|
file=output,
|
||
|
|
)
|
||
|
|
continue
|
||
|
|
result.deleted_files += 1
|
||
|
|
result.bytes_deleted += file_size
|
||
|
|
return succeeded
|
||
|
|
|
||
|
|
|
||
|
|
def _coerce_datetime(value: datetime | str) -> datetime:
|
||
|
|
if isinstance(value, datetime):
|
||
|
|
if value.tzinfo is None:
|
||
|
|
return value.replace(tzinfo=UTC)
|
||
|
|
return value.astimezone(UTC)
|
||
|
|
|
||
|
|
parsed = datetime.fromisoformat(value)
|
||
|
|
if parsed.tzinfo is None:
|
||
|
|
return parsed.replace(tzinfo=UTC)
|
||
|
|
return parsed.astimezone(UTC)
|
||
|
|
|
||
|
|
|
||
|
|
def _bool_text(value: bool) -> str:
|
||
|
|
return "true" if value else "false"
|