implement scrapy + pygea job runner
This commit is contained in:
parent
916968c579
commit
8af28c2f68
8 changed files with 888 additions and 163 deletions
|
|
@ -188,14 +188,12 @@ class JobRuntime:
|
|||
str(job_id),
|
||||
"--execution-id",
|
||||
str(execution_id),
|
||||
"--db-path",
|
||||
str(database.database),
|
||||
"--out-dir",
|
||||
str(self.log_dir.parent),
|
||||
"--stats-path",
|
||||
str(artifacts.stats_path),
|
||||
"--duration-seconds",
|
||||
str(self.worker_duration_seconds),
|
||||
"--interval-seconds",
|
||||
str(self.worker_stats_interval_seconds),
|
||||
"--failure-probability",
|
||||
str(self.worker_failure_probability),
|
||||
],
|
||||
stdout=log_handle,
|
||||
stderr=subprocess.STDOUT,
|
||||
|
|
@ -390,7 +388,7 @@ def load_runs_view(
|
|||
for job in jobs
|
||||
),
|
||||
"completed": tuple(
|
||||
_project_completed_execution(execution, resolved_log_dir)
|
||||
_project_completed_execution(execution, resolved_log_dir, reference_time)
|
||||
for execution in completed_executions
|
||||
),
|
||||
}
|
||||
|
|
@ -401,6 +399,7 @@ def load_dashboard_view(
|
|||
) -> dict[str, object]:
|
||||
reference_time = now or datetime.now(UTC)
|
||||
runs_view = load_runs_view(log_dir=log_dir, now=reference_time)
|
||||
output_dir = Path(log_dir).parent
|
||||
with database.connection_context():
|
||||
failed_last_day = (
|
||||
JobExecution.select()
|
||||
|
|
@ -414,7 +413,7 @@ def load_dashboard_view(
|
|||
upcoming_ready = sum(
|
||||
1 for job in runs_view["upcoming"] if str(job["run_reason"]) == "Ready"
|
||||
)
|
||||
footprint_bytes = _directory_size(Path(log_dir))
|
||||
footprint_bytes = _directory_size(output_dir)
|
||||
return {
|
||||
"running": runs_view["running"],
|
||||
"snapshot": {
|
||||
|
|
@ -538,7 +537,7 @@ def _project_upcoming_job(
|
|||
"slug": job.source.slug,
|
||||
"job_id": job_id,
|
||||
"next_run": (
|
||||
_humanize_future_time(reference_time, next_run)
|
||||
_humanize_relative_time(reference_time, next_run)
|
||||
if next_run is not None
|
||||
else ("Running now" if running_execution is not None else "Not scheduled")
|
||||
),
|
||||
|
|
@ -565,7 +564,7 @@ def _project_upcoming_job(
|
|||
|
||||
|
||||
def _project_completed_execution(
|
||||
execution: JobExecution, log_dir: Path
|
||||
execution: JobExecution, log_dir: Path, reference_time: datetime
|
||||
) -> dict[str, object]:
|
||||
job = cast(Job, execution.job)
|
||||
job_id = _job_id(job)
|
||||
|
|
@ -573,18 +572,22 @@ def _project_completed_execution(
|
|||
artifacts = JobArtifacts.for_execution(
|
||||
log_dir=log_dir, job_id=job_id, execution_id=execution_id
|
||||
)
|
||||
ended_at = (
|
||||
_coerce_datetime(cast(datetime | str, execution.ended_at))
|
||||
if execution.ended_at is not None
|
||||
else None
|
||||
)
|
||||
return {
|
||||
"source": job.source.name,
|
||||
"slug": job.source.slug,
|
||||
"job_id": job_id,
|
||||
"execution_id": execution_id,
|
||||
"ended_at": (
|
||||
_coerce_datetime(cast(datetime | str, execution.ended_at)).strftime(
|
||||
"%Y-%m-%d %H:%M UTC"
|
||||
)
|
||||
if execution.ended_at is not None
|
||||
_humanize_relative_time(reference_time, ended_at)
|
||||
if ended_at is not None
|
||||
else "Pending"
|
||||
),
|
||||
"ended_at_iso": ended_at.isoformat() if ended_at is not None else None,
|
||||
"status": _execution_status_label(execution),
|
||||
"status_tone": _execution_status_tone(execution),
|
||||
"stats": _stats_summary(execution),
|
||||
|
|
@ -678,20 +681,25 @@ def _format_bytes(value: int) -> str:
|
|||
return f"{value / (1024 * 1024 * 1024):.1f} GB"
|
||||
|
||||
|
||||
def _humanize_future_time(reference_time: datetime, target_time: datetime) -> str:
|
||||
def _humanize_relative_time(reference_time: datetime, target_time: datetime) -> str:
|
||||
delta_seconds = int(round((target_time - reference_time).total_seconds()))
|
||||
if delta_seconds <= 0:
|
||||
if delta_seconds == 0:
|
||||
return "now"
|
||||
|
||||
absolute_delta_seconds = abs(delta_seconds)
|
||||
units = (
|
||||
("day", 24 * 60 * 60),
|
||||
("hour", 60 * 60),
|
||||
("minute", 60),
|
||||
)
|
||||
for label, size in units:
|
||||
if delta_seconds >= size:
|
||||
count = max(1, round(delta_seconds / size))
|
||||
if absolute_delta_seconds >= size:
|
||||
count = max(1, round(absolute_delta_seconds / size))
|
||||
suffix = "" if count == 1 else "s"
|
||||
return f"in {count} {label}{suffix}"
|
||||
if delta_seconds > 0:
|
||||
return f"in {count} {label}{suffix}"
|
||||
return f"{count} {label}{suffix} ago"
|
||||
|
||||
return f"in {delta_seconds} seconds"
|
||||
if delta_seconds > 0:
|
||||
return f"in {absolute_delta_seconds} seconds"
|
||||
return f"{absolute_delta_seconds} seconds ago"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue