Persist processing-log retention settings and wire cleanup defaults

This commit is contained in:
2026-02-21 12:05:48 -03:00
parent 992f897878
commit 4beab4bc09
9 changed files with 284 additions and 16 deletions

View File

@@ -7,6 +7,7 @@ from sqlalchemy.orm import Session
from app.db.base import get_session
from app.schemas.processing_logs import ProcessingLogEntryResponse, ProcessingLogListResponse
from app.services.app_settings import read_processing_log_retention_settings
from app.services.processing_logs import (
cleanup_processing_logs,
clear_processing_logs,
@@ -42,16 +43,28 @@ def get_processing_logs(
@router.post("/trim")
def trim_processing_logs(
keep_document_sessions: int = Query(default=2, ge=0, le=20),
keep_unbound_entries: int = Query(default=80, ge=0, le=400),
keep_document_sessions: int | None = Query(default=None, ge=0, le=20),
keep_unbound_entries: int | None = Query(default=None, ge=0, le=400),
session: Session = Depends(get_session),
) -> dict[str, int]:
"""Deletes old processing logs while keeping recent document sessions and unbound events."""
"""Deletes old processing logs using query values or persisted retention defaults."""
retention_defaults = read_processing_log_retention_settings()
resolved_keep_document_sessions = (
keep_document_sessions
if keep_document_sessions is not None
else int(retention_defaults.get("keep_document_sessions", 2))
)
resolved_keep_unbound_entries = (
keep_unbound_entries
if keep_unbound_entries is not None
else int(retention_defaults.get("keep_unbound_entries", 80))
)
result = cleanup_processing_logs(
session=session,
keep_document_sessions=keep_document_sessions,
keep_unbound_entries=keep_unbound_entries,
keep_document_sessions=resolved_keep_document_sessions,
keep_unbound_entries=resolved_keep_unbound_entries,
)
session.commit()
return result

View File

@@ -10,6 +10,7 @@ from app.schemas.settings import (
HandwritingStyleSettingsResponse,
HandwritingSettingsUpdateRequest,
OcrTaskSettingsResponse,
ProcessingLogRetentionSettingsResponse,
ProviderSettingsResponse,
RoutingTaskSettingsResponse,
SummaryTaskSettingsResponse,
@@ -35,6 +36,7 @@ def _build_response(payload: dict) -> AppSettingsResponse:
upload_defaults_payload = payload.get("upload_defaults", {})
display_payload = payload.get("display", {})
processing_log_retention_payload = payload.get("processing_log_retention", {})
providers_payload = payload.get("providers", [])
tasks_payload = payload.get("tasks", {})
handwriting_style_payload = payload.get("handwriting_style_clustering", {})
@@ -55,6 +57,10 @@ def _build_response(payload: dict) -> AppSettingsResponse:
cards_per_page=int(display_payload.get("cards_per_page", 12)),
log_typing_animation_enabled=bool(display_payload.get("log_typing_animation_enabled", True)),
),
processing_log_retention=ProcessingLogRetentionSettingsResponse(
keep_document_sessions=int(processing_log_retention_payload.get("keep_document_sessions", 2)),
keep_unbound_entries=int(processing_log_retention_payload.get("keep_unbound_entries", 80)),
),
handwriting_style_clustering=HandwritingStyleSettingsResponse(
enabled=bool(handwriting_style_payload.get("enabled", True)),
embed_model=str(handwriting_style_payload.get("embed_model", "ts/clip-vit-b-p32")),
@@ -159,6 +165,10 @@ def set_app_settings(payload: AppSettingsUpdateRequest) -> AppSettingsResponse:
if payload.display is not None:
display_payload = payload.display.model_dump(exclude_none=True)
processing_log_retention_payload = None
if payload.processing_log_retention is not None:
processing_log_retention_payload = payload.processing_log_retention.model_dump(exclude_none=True)
handwriting_style_payload = None
if payload.handwriting_style_clustering is not None:
handwriting_style_payload = payload.handwriting_style_clustering.model_dump(exclude_none=True)
@@ -174,6 +184,7 @@ def set_app_settings(payload: AppSettingsUpdateRequest) -> AppSettingsResponse:
tasks=tasks_payload,
upload_defaults=upload_defaults_payload,
display=display_payload,
processing_log_retention=processing_log_retention_payload,
handwriting_style=handwriting_style_payload,
predefined_paths=predefined_paths_payload,
predefined_tags=predefined_tags_payload,

View File

@@ -127,6 +127,20 @@ class DisplaySettingsUpdateRequest(BaseModel):
log_typing_animation_enabled: bool | None = None
class ProcessingLogRetentionSettingsResponse(BaseModel):
"""Represents retention limits used when pruning processing pipeline logs."""
keep_document_sessions: int = Field(default=2, ge=0, le=20)
keep_unbound_entries: int = Field(default=80, ge=0, le=400)
class ProcessingLogRetentionSettingsUpdateRequest(BaseModel):
"""Represents partial updates for processing log retention limits."""
keep_document_sessions: int | None = Field(default=None, ge=0, le=20)
keep_unbound_entries: int | None = Field(default=None, ge=0, le=400)
class PredefinedPathEntryResponse(BaseModel):
"""Represents one predefined logical path with global discoverability scope."""
@@ -200,6 +214,7 @@ class AppSettingsResponse(BaseModel):
upload_defaults: UploadDefaultsResponse
display: DisplaySettingsResponse
processing_log_retention: ProcessingLogRetentionSettingsResponse
handwriting_style_clustering: HandwritingStyleSettingsResponse
predefined_paths: list[PredefinedPathEntryResponse] = Field(default_factory=list)
predefined_tags: list[PredefinedTagEntryResponse] = Field(default_factory=list)
@@ -212,6 +227,7 @@ class AppSettingsUpdateRequest(BaseModel):
upload_defaults: UploadDefaultsUpdateRequest | None = None
display: DisplaySettingsUpdateRequest | None = None
processing_log_retention: ProcessingLogRetentionSettingsUpdateRequest | None = None
handwriting_style_clustering: HandwritingStyleSettingsUpdateRequest | None = None
predefined_paths: list[PredefinedPathEntryUpdateRequest] | None = None
predefined_tags: list[PredefinedTagEntryUpdateRequest] | None = None

View File

@@ -15,6 +15,7 @@ TASK_OCR_HANDWRITING = "ocr_handwriting"
TASK_SUMMARY_GENERATION = "summary_generation"
TASK_ROUTING_CLASSIFICATION = "routing_classification"
HANDWRITING_STYLE_SETTINGS_KEY = "handwriting_style_clustering"
PROCESSING_LOG_RETENTION_SETTINGS_KEY = "processing_log_retention"
PREDEFINED_PATHS_SETTINGS_KEY = "predefined_paths"
PREDEFINED_TAGS_SETTINGS_KEY = "predefined_tags"
DEFAULT_HANDWRITING_STYLE_EMBED_MODEL = "ts/clip-vit-b-p32"
@@ -65,6 +66,10 @@ def _default_settings() -> dict[str, Any]:
"cards_per_page": 12,
"log_typing_animation_enabled": True,
},
PROCESSING_LOG_RETENTION_SETTINGS_KEY: {
"keep_document_sessions": 2,
"keep_unbound_entries": 80,
},
PREDEFINED_PATHS_SETTINGS_KEY: [],
PREDEFINED_TAGS_SETTINGS_KEY: [],
HANDWRITING_STYLE_SETTINGS_KEY: {
@@ -148,6 +153,18 @@ def _clamp_cards_per_page(value: int) -> int:
return max(1, min(200, value))
def _clamp_processing_log_document_sessions(value: int) -> int:
"""Clamps the number of recent document log sessions kept during cleanup."""
return max(0, min(20, value))
def _clamp_processing_log_unbound_entries(value: int) -> int:
"""Clamps retained unbound processing log events kept during cleanup."""
return max(0, min(400, value))
def _clamp_predefined_entries_limit(value: int) -> int:
"""Clamps maximum count for predefined tag/path catalog entries."""
@@ -401,6 +418,28 @@ def _normalize_display_settings(payload: dict[str, Any], defaults: dict[str, Any
}
def _normalize_processing_log_retention(payload: dict[str, Any], defaults: dict[str, Any]) -> dict[str, int]:
"""Normalizes processing log retention settings used by API and worker cleanup defaults."""
if not isinstance(payload, dict):
payload = {}
default_keep_document_sessions = _clamp_processing_log_document_sessions(
_safe_int(defaults.get("keep_document_sessions", 2), 2)
)
default_keep_unbound_entries = _clamp_processing_log_unbound_entries(
_safe_int(defaults.get("keep_unbound_entries", 80), 80)
)
return {
"keep_document_sessions": _clamp_processing_log_document_sessions(
_safe_int(payload.get("keep_document_sessions", default_keep_document_sessions), default_keep_document_sessions)
),
"keep_unbound_entries": _clamp_processing_log_unbound_entries(
_safe_int(payload.get("keep_unbound_entries", default_keep_unbound_entries), default_keep_unbound_entries)
),
}
def _normalize_predefined_paths(
payload: Any,
existing_items: list[dict[str, Any]] | None = None,
@@ -567,6 +606,10 @@ def _sanitize_settings(payload: dict[str, Any]) -> dict[str, Any]:
normalized_tasks = _normalize_tasks(tasks_payload, provider_ids)
upload_defaults = _normalize_upload_defaults(payload.get("upload_defaults", {}), defaults["upload_defaults"])
display_settings = _normalize_display_settings(payload.get("display", {}), defaults["display"])
processing_log_retention = _normalize_processing_log_retention(
payload.get(PROCESSING_LOG_RETENTION_SETTINGS_KEY, {}),
defaults[PROCESSING_LOG_RETENTION_SETTINGS_KEY],
)
predefined_paths = _normalize_predefined_paths(
payload.get(PREDEFINED_PATHS_SETTINGS_KEY, []),
existing_items=payload.get(PREDEFINED_PATHS_SETTINGS_KEY, []),
@@ -583,6 +626,7 @@ def _sanitize_settings(payload: dict[str, Any]) -> dict[str, Any]:
return {
"upload_defaults": upload_defaults,
"display": display_settings,
PROCESSING_LOG_RETENTION_SETTINGS_KEY: processing_log_retention,
PREDEFINED_PATHS_SETTINGS_KEY: predefined_paths,
PREDEFINED_TAGS_SETTINGS_KEY: predefined_tags,
HANDWRITING_STYLE_SETTINGS_KEY: handwriting_style_settings,
@@ -645,6 +689,10 @@ def read_app_settings() -> dict[str, Any]:
return {
"upload_defaults": payload.get("upload_defaults", {"logical_path": "Inbox", "tags": []}),
"display": payload.get("display", {"cards_per_page": 12, "log_typing_animation_enabled": True}),
PROCESSING_LOG_RETENTION_SETTINGS_KEY: payload.get(
PROCESSING_LOG_RETENTION_SETTINGS_KEY,
_default_settings()[PROCESSING_LOG_RETENTION_SETTINGS_KEY],
),
PREDEFINED_PATHS_SETTINGS_KEY: payload.get(PREDEFINED_PATHS_SETTINGS_KEY, []),
PREDEFINED_TAGS_SETTINGS_KEY: payload.get(PREDEFINED_TAGS_SETTINGS_KEY, []),
HANDWRITING_STYLE_SETTINGS_KEY: payload.get(HANDWRITING_STYLE_SETTINGS_KEY, {}),
@@ -687,16 +735,23 @@ def update_app_settings(
tasks: dict[str, dict[str, Any]] | None = None,
upload_defaults: dict[str, Any] | None = None,
display: dict[str, Any] | None = None,
processing_log_retention: dict[str, Any] | None = None,
handwriting_style: dict[str, Any] | None = None,
predefined_paths: list[dict[str, Any]] | None = None,
predefined_tags: list[dict[str, Any]] | None = None,
) -> dict[str, Any]:
"""Updates app settings, persists them, and returns API-safe values."""
"""Updates app settings blocks, persists them, and returns API-safe values."""
current_payload = _read_raw_settings()
next_payload: dict[str, Any] = {
"upload_defaults": dict(current_payload.get("upload_defaults", {"logical_path": "Inbox", "tags": []})),
"display": dict(current_payload.get("display", {"cards_per_page": 12, "log_typing_animation_enabled": True})),
PROCESSING_LOG_RETENTION_SETTINGS_KEY: dict(
current_payload.get(
PROCESSING_LOG_RETENTION_SETTINGS_KEY,
_default_settings()[PROCESSING_LOG_RETENTION_SETTINGS_KEY],
)
),
PREDEFINED_PATHS_SETTINGS_KEY: list(current_payload.get(PREDEFINED_PATHS_SETTINGS_KEY, [])),
PREDEFINED_TAGS_SETTINGS_KEY: list(current_payload.get(PREDEFINED_TAGS_SETTINGS_KEY, [])),
HANDWRITING_STYLE_SETTINGS_KEY: dict(
@@ -766,6 +821,13 @@ def update_app_settings(
next_display["log_typing_animation_enabled"] = bool(display["log_typing_animation_enabled"])
next_payload["display"] = next_display
if processing_log_retention is not None and isinstance(processing_log_retention, dict):
next_retention = dict(next_payload.get(PROCESSING_LOG_RETENTION_SETTINGS_KEY, {}))
for key in ("keep_document_sessions", "keep_unbound_entries"):
if key in processing_log_retention:
next_retention[key] = processing_log_retention[key]
next_payload[PROCESSING_LOG_RETENTION_SETTINGS_KEY] = next_retention
if handwriting_style is not None and isinstance(handwriting_style, dict):
next_handwriting_style = dict(next_payload.get(HANDWRITING_STYLE_SETTINGS_KEY, {}))
for key in (
@@ -828,6 +890,17 @@ def read_handwriting_style_settings() -> dict[str, Any]:
)
def read_processing_log_retention_settings() -> dict[str, int]:
"""Returns normalized processing log retention defaults used by worker and trim APIs."""
payload = _read_raw_settings()
defaults = _default_settings()[PROCESSING_LOG_RETENTION_SETTINGS_KEY]
return _normalize_processing_log_retention(
payload.get(PROCESSING_LOG_RETENTION_SETTINGS_KEY, {}),
defaults,
)
def read_predefined_paths_settings() -> list[dict[str, Any]]:
"""Returns normalized predefined logical path catalog entries."""

View File

@@ -5,10 +5,15 @@ from datetime import UTC, datetime
from pathlib import Path
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.db.base import SessionLocal
from app.models.document import Document, DocumentStatus
from app.services.app_settings import read_handwriting_provider_settings, read_handwriting_style_settings
from app.services.app_settings import (
read_handwriting_provider_settings,
read_handwriting_style_settings,
read_processing_log_retention_settings,
)
from app.services.extractor import (
IMAGE_EXTENSIONS,
extract_archive_members,
@@ -32,6 +37,17 @@ from app.services.storage import absolute_path, compute_sha256, store_bytes, wri
from app.worker.queue import get_processing_queue
def _cleanup_processing_logs_with_settings(session: Session) -> None:
"""Applies configured processing log retention while trimming old log entries."""
retention = read_processing_log_retention_settings()
cleanup_processing_logs(
session=session,
keep_document_sessions=int(retention.get("keep_document_sessions", 2)),
keep_unbound_entries=int(retention.get("keep_unbound_entries", 80)),
)
def _create_archive_member_document(
parent: Document,
member_name: str,
@@ -204,7 +220,7 @@ def process_document_task(document_id: str) -> None:
document=document,
payload_json={"status": document.status.value},
)
cleanup_processing_logs(session=session, keep_document_sessions=2, keep_unbound_entries=80)
_cleanup_processing_logs_with_settings(session=session)
session.commit()
for child_id in child_ids:
queue.enqueue("app.worker.tasks.process_document_task", child_id)
@@ -239,7 +255,7 @@ def process_document_task(document_id: str) -> None:
document=document,
payload_json={"status": document.status.value},
)
cleanup_processing_logs(session=session, keep_document_sessions=2, keep_unbound_entries=80)
_cleanup_processing_logs_with_settings(session=session)
session.commit()
return
@@ -330,7 +346,7 @@ def process_document_task(document_id: str) -> None:
document=document,
payload_json={"status": document.status.value},
)
cleanup_processing_logs(session=session, keep_document_sessions=2, keep_unbound_entries=80)
_cleanup_processing_logs_with_settings(session=session)
session.commit()
return
@@ -362,7 +378,7 @@ def process_document_task(document_id: str) -> None:
document=document,
payload_json={"status": document.status.value},
)
cleanup_processing_logs(session=session, keep_document_sessions=2, keep_unbound_entries=80)
_cleanup_processing_logs_with_settings(session=session)
session.commit()
return
@@ -540,5 +556,5 @@ def process_document_task(document_id: str) -> None:
document=document,
payload_json={"status": document.status.value},
)
cleanup_processing_logs(session=session, keep_document_sessions=2, keep_unbound_entries=80)
_cleanup_processing_logs_with_settings(session=session)
session.commit()