Persist processing-log retention settings and wire cleanup defaults

This commit is contained in:
2026-02-21 12:05:48 -03:00
parent 992f897878
commit 4beab4bc09
9 changed files with 284 additions and 16 deletions

View File

@@ -15,6 +15,7 @@ TASK_OCR_HANDWRITING = "ocr_handwriting"
TASK_SUMMARY_GENERATION = "summary_generation"
TASK_ROUTING_CLASSIFICATION = "routing_classification"
HANDWRITING_STYLE_SETTINGS_KEY = "handwriting_style_clustering"
PROCESSING_LOG_RETENTION_SETTINGS_KEY = "processing_log_retention"
PREDEFINED_PATHS_SETTINGS_KEY = "predefined_paths"
PREDEFINED_TAGS_SETTINGS_KEY = "predefined_tags"
DEFAULT_HANDWRITING_STYLE_EMBED_MODEL = "ts/clip-vit-b-p32"
@@ -65,6 +66,10 @@ def _default_settings() -> dict[str, Any]:
"cards_per_page": 12,
"log_typing_animation_enabled": True,
},
PROCESSING_LOG_RETENTION_SETTINGS_KEY: {
"keep_document_sessions": 2,
"keep_unbound_entries": 80,
},
PREDEFINED_PATHS_SETTINGS_KEY: [],
PREDEFINED_TAGS_SETTINGS_KEY: [],
HANDWRITING_STYLE_SETTINGS_KEY: {
@@ -148,6 +153,18 @@ def _clamp_cards_per_page(value: int) -> int:
return max(1, min(200, value))
def _clamp_processing_log_document_sessions(value: int) -> int:
"""Clamps the number of recent document log sessions kept during cleanup."""
return max(0, min(20, value))
def _clamp_processing_log_unbound_entries(value: int) -> int:
"""Clamps retained unbound processing log events kept during cleanup."""
return max(0, min(400, value))
def _clamp_predefined_entries_limit(value: int) -> int:
"""Clamps maximum count for predefined tag/path catalog entries."""
@@ -401,6 +418,28 @@ def _normalize_display_settings(payload: dict[str, Any], defaults: dict[str, Any
}
def _normalize_processing_log_retention(payload: dict[str, Any], defaults: dict[str, Any]) -> dict[str, int]:
"""Normalizes processing log retention settings used by API and worker cleanup defaults."""
if not isinstance(payload, dict):
payload = {}
default_keep_document_sessions = _clamp_processing_log_document_sessions(
_safe_int(defaults.get("keep_document_sessions", 2), 2)
)
default_keep_unbound_entries = _clamp_processing_log_unbound_entries(
_safe_int(defaults.get("keep_unbound_entries", 80), 80)
)
return {
"keep_document_sessions": _clamp_processing_log_document_sessions(
_safe_int(payload.get("keep_document_sessions", default_keep_document_sessions), default_keep_document_sessions)
),
"keep_unbound_entries": _clamp_processing_log_unbound_entries(
_safe_int(payload.get("keep_unbound_entries", default_keep_unbound_entries), default_keep_unbound_entries)
),
}
def _normalize_predefined_paths(
payload: Any,
existing_items: list[dict[str, Any]] | None = None,
@@ -567,6 +606,10 @@ def _sanitize_settings(payload: dict[str, Any]) -> dict[str, Any]:
normalized_tasks = _normalize_tasks(tasks_payload, provider_ids)
upload_defaults = _normalize_upload_defaults(payload.get("upload_defaults", {}), defaults["upload_defaults"])
display_settings = _normalize_display_settings(payload.get("display", {}), defaults["display"])
processing_log_retention = _normalize_processing_log_retention(
payload.get(PROCESSING_LOG_RETENTION_SETTINGS_KEY, {}),
defaults[PROCESSING_LOG_RETENTION_SETTINGS_KEY],
)
predefined_paths = _normalize_predefined_paths(
payload.get(PREDEFINED_PATHS_SETTINGS_KEY, []),
existing_items=payload.get(PREDEFINED_PATHS_SETTINGS_KEY, []),
@@ -583,6 +626,7 @@ def _sanitize_settings(payload: dict[str, Any]) -> dict[str, Any]:
return {
"upload_defaults": upload_defaults,
"display": display_settings,
PROCESSING_LOG_RETENTION_SETTINGS_KEY: processing_log_retention,
PREDEFINED_PATHS_SETTINGS_KEY: predefined_paths,
PREDEFINED_TAGS_SETTINGS_KEY: predefined_tags,
HANDWRITING_STYLE_SETTINGS_KEY: handwriting_style_settings,
@@ -645,6 +689,10 @@ def read_app_settings() -> dict[str, Any]:
return {
"upload_defaults": payload.get("upload_defaults", {"logical_path": "Inbox", "tags": []}),
"display": payload.get("display", {"cards_per_page": 12, "log_typing_animation_enabled": True}),
PROCESSING_LOG_RETENTION_SETTINGS_KEY: payload.get(
PROCESSING_LOG_RETENTION_SETTINGS_KEY,
_default_settings()[PROCESSING_LOG_RETENTION_SETTINGS_KEY],
),
PREDEFINED_PATHS_SETTINGS_KEY: payload.get(PREDEFINED_PATHS_SETTINGS_KEY, []),
PREDEFINED_TAGS_SETTINGS_KEY: payload.get(PREDEFINED_TAGS_SETTINGS_KEY, []),
HANDWRITING_STYLE_SETTINGS_KEY: payload.get(HANDWRITING_STYLE_SETTINGS_KEY, {}),
@@ -687,16 +735,23 @@ def update_app_settings(
tasks: dict[str, dict[str, Any]] | None = None,
upload_defaults: dict[str, Any] | None = None,
display: dict[str, Any] | None = None,
processing_log_retention: dict[str, Any] | None = None,
handwriting_style: dict[str, Any] | None = None,
predefined_paths: list[dict[str, Any]] | None = None,
predefined_tags: list[dict[str, Any]] | None = None,
) -> dict[str, Any]:
"""Updates app settings, persists them, and returns API-safe values."""
"""Updates app settings blocks, persists them, and returns API-safe values."""
current_payload = _read_raw_settings()
next_payload: dict[str, Any] = {
"upload_defaults": dict(current_payload.get("upload_defaults", {"logical_path": "Inbox", "tags": []})),
"display": dict(current_payload.get("display", {"cards_per_page": 12, "log_typing_animation_enabled": True})),
PROCESSING_LOG_RETENTION_SETTINGS_KEY: dict(
current_payload.get(
PROCESSING_LOG_RETENTION_SETTINGS_KEY,
_default_settings()[PROCESSING_LOG_RETENTION_SETTINGS_KEY],
)
),
PREDEFINED_PATHS_SETTINGS_KEY: list(current_payload.get(PREDEFINED_PATHS_SETTINGS_KEY, [])),
PREDEFINED_TAGS_SETTINGS_KEY: list(current_payload.get(PREDEFINED_TAGS_SETTINGS_KEY, [])),
HANDWRITING_STYLE_SETTINGS_KEY: dict(
@@ -766,6 +821,13 @@ def update_app_settings(
next_display["log_typing_animation_enabled"] = bool(display["log_typing_animation_enabled"])
next_payload["display"] = next_display
if processing_log_retention is not None and isinstance(processing_log_retention, dict):
next_retention = dict(next_payload.get(PROCESSING_LOG_RETENTION_SETTINGS_KEY, {}))
for key in ("keep_document_sessions", "keep_unbound_entries"):
if key in processing_log_retention:
next_retention[key] = processing_log_retention[key]
next_payload[PROCESSING_LOG_RETENTION_SETTINGS_KEY] = next_retention
if handwriting_style is not None and isinstance(handwriting_style, dict):
next_handwriting_style = dict(next_payload.get(HANDWRITING_STYLE_SETTINGS_KEY, {}))
for key in (
@@ -828,6 +890,17 @@ def read_handwriting_style_settings() -> dict[str, Any]:
)
def read_processing_log_retention_settings() -> dict[str, int]:
"""Returns normalized processing log retention defaults used by worker and trim APIs."""
payload = _read_raw_settings()
defaults = _default_settings()[PROCESSING_LOG_RETENTION_SETTINGS_KEY]
return _normalize_processing_log_retention(
payload.get(PROCESSING_LOG_RETENTION_SETTINGS_KEY, {}),
defaults,
)
def read_predefined_paths_settings() -> list[dict[str, Any]]:
"""Returns normalized predefined logical path catalog entries."""