add sqlite database
This commit is contained in:
parent
06066c2394
commit
b9e288a22d
5 changed files with 440 additions and 0 deletions
168
repub/model.py
Normal file
168
repub/model.py
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from datetime import UTC, datetime
|
||||
from enum import IntEnum
|
||||
from importlib import resources
|
||||
from importlib.resources.abc import Traversable
|
||||
from pathlib import Path
|
||||
|
||||
from peewee import (
|
||||
BooleanField,
|
||||
Check,
|
||||
DateTimeField,
|
||||
ForeignKeyField,
|
||||
IntegerField,
|
||||
Model,
|
||||
SqliteDatabase,
|
||||
TextField,
|
||||
)
|
||||
|
||||
DEFAULT_DB_PATH = Path("republisher.db")
|
||||
DATABASE_PRAGMAS = {
|
||||
"busy_timeout": 5000,
|
||||
"cache_size": 15625,
|
||||
"foreign_keys": 1,
|
||||
"journal_mode": "wal",
|
||||
"page_size": 4096,
|
||||
"synchronous": "normal",
|
||||
"temp_store": "memory",
|
||||
}
|
||||
SCHEMA_GLOB = "*.sql"
|
||||
|
||||
database = SqliteDatabase(None, pragmas=DATABASE_PRAGMAS)
|
||||
|
||||
|
||||
class JobExecutionStatus(IntEnum):
|
||||
PENDING = 0
|
||||
RUNNING = 1
|
||||
SUCCEEDED = 2
|
||||
FAILED = 3
|
||||
CANCELED = 4
|
||||
|
||||
|
||||
def utc_now() -> datetime:
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
def resolve_database_path(db_path: str | Path | None = None) -> Path:
|
||||
raw_value = (
|
||||
os.environ.get("REPUBLISHER_DB_PATH", DEFAULT_DB_PATH)
|
||||
if db_path is None
|
||||
else db_path
|
||||
)
|
||||
raw_path = Path(raw_value)
|
||||
return raw_path.expanduser().resolve()
|
||||
|
||||
|
||||
def schema_paths() -> tuple[Traversable, ...]:
|
||||
schema_dir = resources.files("repub").joinpath("sql")
|
||||
return tuple(
|
||||
sorted(
|
||||
(path for path in schema_dir.iterdir() if path.name.endswith(".sql")),
|
||||
key=lambda path: path.name,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def initialize_database(db_path: str | Path | None = None) -> Path:
|
||||
resolved_path = resolve_database_path(db_path)
|
||||
resolved_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not database.is_closed():
|
||||
database.close()
|
||||
|
||||
database.init(str(resolved_path), pragmas=DATABASE_PRAGMAS)
|
||||
database.connect(reuse_if_open=True)
|
||||
try:
|
||||
connection = database.connection()
|
||||
for path in schema_paths():
|
||||
connection.executescript(path.read_text(encoding="utf-8"))
|
||||
finally:
|
||||
database.close()
|
||||
|
||||
return resolved_path
|
||||
|
||||
|
||||
class BaseModel(Model):
|
||||
class Meta:
|
||||
database = database
|
||||
|
||||
|
||||
class Source(BaseModel):
|
||||
created_at = DateTimeField(default=utc_now)
|
||||
updated_at = DateTimeField(default=utc_now)
|
||||
name = TextField()
|
||||
slug = TextField(unique=True)
|
||||
source_type = TextField(constraints=[Check("source_type IN ('feed', 'pangea')")])
|
||||
notes = TextField(default="")
|
||||
|
||||
class Meta:
|
||||
table_name = "source"
|
||||
|
||||
|
||||
class SourceFeed(BaseModel):
|
||||
source = ForeignKeyField(Source, primary_key=True, backref="feed_config")
|
||||
feed_url = TextField()
|
||||
etag = TextField(null=True)
|
||||
last_modified = TextField(null=True)
|
||||
|
||||
class Meta:
|
||||
table_name = "source_feed"
|
||||
|
||||
|
||||
class SourcePangea(BaseModel):
|
||||
source = ForeignKeyField(Source, primary_key=True, backref="pangea_config")
|
||||
domain = TextField()
|
||||
category_name = TextField()
|
||||
content_type = TextField()
|
||||
only_newest = BooleanField()
|
||||
max_articles = IntegerField()
|
||||
oldest_article = IntegerField()
|
||||
include_authors = BooleanField()
|
||||
exclude_media = BooleanField()
|
||||
include_content = BooleanField()
|
||||
content_format = TextField()
|
||||
|
||||
class Meta:
|
||||
table_name = "source_pangea"
|
||||
|
||||
|
||||
class Job(BaseModel):
|
||||
source = ForeignKeyField(Source, unique=True, backref="job")
|
||||
created_at = DateTimeField(default=utc_now)
|
||||
updated_at = DateTimeField(default=utc_now)
|
||||
enabled = BooleanField()
|
||||
spider_arguments = TextField(default="")
|
||||
cron_minute = TextField()
|
||||
cron_hour = TextField()
|
||||
cron_day_of_month = TextField()
|
||||
cron_day_of_week = TextField()
|
||||
cron_month = TextField()
|
||||
|
||||
class Meta:
|
||||
table_name = "job"
|
||||
|
||||
|
||||
class JobExecution(BaseModel):
|
||||
job = ForeignKeyField(Job, backref="executions")
|
||||
created_at = DateTimeField(default=utc_now)
|
||||
started_at = DateTimeField(null=True)
|
||||
ended_at = DateTimeField(null=True)
|
||||
running_status = IntegerField(
|
||||
default=JobExecutionStatus.PENDING,
|
||||
constraints=[Check("running_status BETWEEN 0 AND 4")],
|
||||
)
|
||||
requests_count = IntegerField(default=0)
|
||||
items_count = IntegerField(default=0)
|
||||
warnings_count = IntegerField(default=0)
|
||||
errors_count = IntegerField(default=0)
|
||||
bytes_count = IntegerField(default=0)
|
||||
retries_count = IntegerField(default=0)
|
||||
exceptions_count = IntegerField(default=0)
|
||||
cache_size_count = IntegerField(default=0)
|
||||
cache_object_count = IntegerField(default=0)
|
||||
raw_stats = TextField(default="{}")
|
||||
|
||||
class Meta:
|
||||
table_name = "job_execution"
|
||||
Loading…
Add table
Add a link
Reference in a new issue