diff --git a/backend/app/api/routes_processing_logs.py b/backend/app/api/routes_processing_logs.py index d436107..fcbd051 100644 --- a/backend/app/api/routes_processing_logs.py +++ b/backend/app/api/routes_processing_logs.py @@ -7,6 +7,7 @@ from sqlalchemy.orm import Session from app.db.base import get_session from app.schemas.processing_logs import ProcessingLogEntryResponse, ProcessingLogListResponse +from app.services.app_settings import read_processing_log_retention_settings from app.services.processing_logs import ( cleanup_processing_logs, clear_processing_logs, @@ -42,16 +43,28 @@ def get_processing_logs( @router.post("/trim") def trim_processing_logs( - keep_document_sessions: int = Query(default=2, ge=0, le=20), - keep_unbound_entries: int = Query(default=80, ge=0, le=400), + keep_document_sessions: int | None = Query(default=None, ge=0, le=20), + keep_unbound_entries: int | None = Query(default=None, ge=0, le=400), session: Session = Depends(get_session), ) -> dict[str, int]: - """Deletes old processing logs while keeping recent document sessions and unbound events.""" + """Deletes old processing logs using query values or persisted retention defaults.""" + + retention_defaults = read_processing_log_retention_settings() + resolved_keep_document_sessions = ( + keep_document_sessions + if keep_document_sessions is not None + else int(retention_defaults.get("keep_document_sessions", 2)) + ) + resolved_keep_unbound_entries = ( + keep_unbound_entries + if keep_unbound_entries is not None + else int(retention_defaults.get("keep_unbound_entries", 80)) + ) result = cleanup_processing_logs( session=session, - keep_document_sessions=keep_document_sessions, - keep_unbound_entries=keep_unbound_entries, + keep_document_sessions=resolved_keep_document_sessions, + keep_unbound_entries=resolved_keep_unbound_entries, ) session.commit() return result diff --git a/backend/app/api/routes_settings.py b/backend/app/api/routes_settings.py index a8e0a65..865eb71 100644 --- a/backend/app/api/routes_settings.py +++ b/backend/app/api/routes_settings.py @@ -10,6 +10,7 @@ from app.schemas.settings import ( HandwritingStyleSettingsResponse, HandwritingSettingsUpdateRequest, OcrTaskSettingsResponse, + ProcessingLogRetentionSettingsResponse, ProviderSettingsResponse, RoutingTaskSettingsResponse, SummaryTaskSettingsResponse, @@ -35,6 +36,7 @@ def _build_response(payload: dict) -> AppSettingsResponse: upload_defaults_payload = payload.get("upload_defaults", {}) display_payload = payload.get("display", {}) + processing_log_retention_payload = payload.get("processing_log_retention", {}) providers_payload = payload.get("providers", []) tasks_payload = payload.get("tasks", {}) handwriting_style_payload = payload.get("handwriting_style_clustering", {}) @@ -55,6 +57,10 @@ def _build_response(payload: dict) -> AppSettingsResponse: cards_per_page=int(display_payload.get("cards_per_page", 12)), log_typing_animation_enabled=bool(display_payload.get("log_typing_animation_enabled", True)), ), + processing_log_retention=ProcessingLogRetentionSettingsResponse( + keep_document_sessions=int(processing_log_retention_payload.get("keep_document_sessions", 2)), + keep_unbound_entries=int(processing_log_retention_payload.get("keep_unbound_entries", 80)), + ), handwriting_style_clustering=HandwritingStyleSettingsResponse( enabled=bool(handwriting_style_payload.get("enabled", True)), embed_model=str(handwriting_style_payload.get("embed_model", "ts/clip-vit-b-p32")), @@ -159,6 +165,10 @@ def set_app_settings(payload: AppSettingsUpdateRequest) -> AppSettingsResponse: if payload.display is not None: display_payload = payload.display.model_dump(exclude_none=True) + processing_log_retention_payload = None + if payload.processing_log_retention is not None: + processing_log_retention_payload = payload.processing_log_retention.model_dump(exclude_none=True) + handwriting_style_payload = None if payload.handwriting_style_clustering is not None: handwriting_style_payload = payload.handwriting_style_clustering.model_dump(exclude_none=True) @@ -174,6 +184,7 @@ def set_app_settings(payload: AppSettingsUpdateRequest) -> AppSettingsResponse: tasks=tasks_payload, upload_defaults=upload_defaults_payload, display=display_payload, + processing_log_retention=processing_log_retention_payload, handwriting_style=handwriting_style_payload, predefined_paths=predefined_paths_payload, predefined_tags=predefined_tags_payload, diff --git a/backend/app/schemas/settings.py b/backend/app/schemas/settings.py index 29abd4c..2bbae5e 100644 --- a/backend/app/schemas/settings.py +++ b/backend/app/schemas/settings.py @@ -127,6 +127,20 @@ class DisplaySettingsUpdateRequest(BaseModel): log_typing_animation_enabled: bool | None = None +class ProcessingLogRetentionSettingsResponse(BaseModel): + """Represents retention limits used when pruning processing pipeline logs.""" + + keep_document_sessions: int = Field(default=2, ge=0, le=20) + keep_unbound_entries: int = Field(default=80, ge=0, le=400) + + +class ProcessingLogRetentionSettingsUpdateRequest(BaseModel): + """Represents partial updates for processing log retention limits.""" + + keep_document_sessions: int | None = Field(default=None, ge=0, le=20) + keep_unbound_entries: int | None = Field(default=None, ge=0, le=400) + + class PredefinedPathEntryResponse(BaseModel): """Represents one predefined logical path with global discoverability scope.""" @@ -200,6 +214,7 @@ class AppSettingsResponse(BaseModel): upload_defaults: UploadDefaultsResponse display: DisplaySettingsResponse + processing_log_retention: ProcessingLogRetentionSettingsResponse handwriting_style_clustering: HandwritingStyleSettingsResponse predefined_paths: list[PredefinedPathEntryResponse] = Field(default_factory=list) predefined_tags: list[PredefinedTagEntryResponse] = Field(default_factory=list) @@ -212,6 +227,7 @@ class AppSettingsUpdateRequest(BaseModel): upload_defaults: UploadDefaultsUpdateRequest | None = None display: DisplaySettingsUpdateRequest | None = None + processing_log_retention: ProcessingLogRetentionSettingsUpdateRequest | None = None handwriting_style_clustering: HandwritingStyleSettingsUpdateRequest | None = None predefined_paths: list[PredefinedPathEntryUpdateRequest] | None = None predefined_tags: list[PredefinedTagEntryUpdateRequest] | None = None diff --git a/backend/app/services/app_settings.py b/backend/app/services/app_settings.py index 5709fac..c3d6117 100644 --- a/backend/app/services/app_settings.py +++ b/backend/app/services/app_settings.py @@ -15,6 +15,7 @@ TASK_OCR_HANDWRITING = "ocr_handwriting" TASK_SUMMARY_GENERATION = "summary_generation" TASK_ROUTING_CLASSIFICATION = "routing_classification" HANDWRITING_STYLE_SETTINGS_KEY = "handwriting_style_clustering" +PROCESSING_LOG_RETENTION_SETTINGS_KEY = "processing_log_retention" PREDEFINED_PATHS_SETTINGS_KEY = "predefined_paths" PREDEFINED_TAGS_SETTINGS_KEY = "predefined_tags" DEFAULT_HANDWRITING_STYLE_EMBED_MODEL = "ts/clip-vit-b-p32" @@ -65,6 +66,10 @@ def _default_settings() -> dict[str, Any]: "cards_per_page": 12, "log_typing_animation_enabled": True, }, + PROCESSING_LOG_RETENTION_SETTINGS_KEY: { + "keep_document_sessions": 2, + "keep_unbound_entries": 80, + }, PREDEFINED_PATHS_SETTINGS_KEY: [], PREDEFINED_TAGS_SETTINGS_KEY: [], HANDWRITING_STYLE_SETTINGS_KEY: { @@ -148,6 +153,18 @@ def _clamp_cards_per_page(value: int) -> int: return max(1, min(200, value)) +def _clamp_processing_log_document_sessions(value: int) -> int: + """Clamps the number of recent document log sessions kept during cleanup.""" + + return max(0, min(20, value)) + + +def _clamp_processing_log_unbound_entries(value: int) -> int: + """Clamps retained unbound processing log events kept during cleanup.""" + + return max(0, min(400, value)) + + def _clamp_predefined_entries_limit(value: int) -> int: """Clamps maximum count for predefined tag/path catalog entries.""" @@ -401,6 +418,28 @@ def _normalize_display_settings(payload: dict[str, Any], defaults: dict[str, Any } +def _normalize_processing_log_retention(payload: dict[str, Any], defaults: dict[str, Any]) -> dict[str, int]: + """Normalizes processing log retention settings used by API and worker cleanup defaults.""" + + if not isinstance(payload, dict): + payload = {} + + default_keep_document_sessions = _clamp_processing_log_document_sessions( + _safe_int(defaults.get("keep_document_sessions", 2), 2) + ) + default_keep_unbound_entries = _clamp_processing_log_unbound_entries( + _safe_int(defaults.get("keep_unbound_entries", 80), 80) + ) + return { + "keep_document_sessions": _clamp_processing_log_document_sessions( + _safe_int(payload.get("keep_document_sessions", default_keep_document_sessions), default_keep_document_sessions) + ), + "keep_unbound_entries": _clamp_processing_log_unbound_entries( + _safe_int(payload.get("keep_unbound_entries", default_keep_unbound_entries), default_keep_unbound_entries) + ), + } + + def _normalize_predefined_paths( payload: Any, existing_items: list[dict[str, Any]] | None = None, @@ -567,6 +606,10 @@ def _sanitize_settings(payload: dict[str, Any]) -> dict[str, Any]: normalized_tasks = _normalize_tasks(tasks_payload, provider_ids) upload_defaults = _normalize_upload_defaults(payload.get("upload_defaults", {}), defaults["upload_defaults"]) display_settings = _normalize_display_settings(payload.get("display", {}), defaults["display"]) + processing_log_retention = _normalize_processing_log_retention( + payload.get(PROCESSING_LOG_RETENTION_SETTINGS_KEY, {}), + defaults[PROCESSING_LOG_RETENTION_SETTINGS_KEY], + ) predefined_paths = _normalize_predefined_paths( payload.get(PREDEFINED_PATHS_SETTINGS_KEY, []), existing_items=payload.get(PREDEFINED_PATHS_SETTINGS_KEY, []), @@ -583,6 +626,7 @@ def _sanitize_settings(payload: dict[str, Any]) -> dict[str, Any]: return { "upload_defaults": upload_defaults, "display": display_settings, + PROCESSING_LOG_RETENTION_SETTINGS_KEY: processing_log_retention, PREDEFINED_PATHS_SETTINGS_KEY: predefined_paths, PREDEFINED_TAGS_SETTINGS_KEY: predefined_tags, HANDWRITING_STYLE_SETTINGS_KEY: handwriting_style_settings, @@ -645,6 +689,10 @@ def read_app_settings() -> dict[str, Any]: return { "upload_defaults": payload.get("upload_defaults", {"logical_path": "Inbox", "tags": []}), "display": payload.get("display", {"cards_per_page": 12, "log_typing_animation_enabled": True}), + PROCESSING_LOG_RETENTION_SETTINGS_KEY: payload.get( + PROCESSING_LOG_RETENTION_SETTINGS_KEY, + _default_settings()[PROCESSING_LOG_RETENTION_SETTINGS_KEY], + ), PREDEFINED_PATHS_SETTINGS_KEY: payload.get(PREDEFINED_PATHS_SETTINGS_KEY, []), PREDEFINED_TAGS_SETTINGS_KEY: payload.get(PREDEFINED_TAGS_SETTINGS_KEY, []), HANDWRITING_STYLE_SETTINGS_KEY: payload.get(HANDWRITING_STYLE_SETTINGS_KEY, {}), @@ -687,16 +735,23 @@ def update_app_settings( tasks: dict[str, dict[str, Any]] | None = None, upload_defaults: dict[str, Any] | None = None, display: dict[str, Any] | None = None, + processing_log_retention: dict[str, Any] | None = None, handwriting_style: dict[str, Any] | None = None, predefined_paths: list[dict[str, Any]] | None = None, predefined_tags: list[dict[str, Any]] | None = None, ) -> dict[str, Any]: - """Updates app settings, persists them, and returns API-safe values.""" + """Updates app settings blocks, persists them, and returns API-safe values.""" current_payload = _read_raw_settings() next_payload: dict[str, Any] = { "upload_defaults": dict(current_payload.get("upload_defaults", {"logical_path": "Inbox", "tags": []})), "display": dict(current_payload.get("display", {"cards_per_page": 12, "log_typing_animation_enabled": True})), + PROCESSING_LOG_RETENTION_SETTINGS_KEY: dict( + current_payload.get( + PROCESSING_LOG_RETENTION_SETTINGS_KEY, + _default_settings()[PROCESSING_LOG_RETENTION_SETTINGS_KEY], + ) + ), PREDEFINED_PATHS_SETTINGS_KEY: list(current_payload.get(PREDEFINED_PATHS_SETTINGS_KEY, [])), PREDEFINED_TAGS_SETTINGS_KEY: list(current_payload.get(PREDEFINED_TAGS_SETTINGS_KEY, [])), HANDWRITING_STYLE_SETTINGS_KEY: dict( @@ -766,6 +821,13 @@ def update_app_settings( next_display["log_typing_animation_enabled"] = bool(display["log_typing_animation_enabled"]) next_payload["display"] = next_display + if processing_log_retention is not None and isinstance(processing_log_retention, dict): + next_retention = dict(next_payload.get(PROCESSING_LOG_RETENTION_SETTINGS_KEY, {})) + for key in ("keep_document_sessions", "keep_unbound_entries"): + if key in processing_log_retention: + next_retention[key] = processing_log_retention[key] + next_payload[PROCESSING_LOG_RETENTION_SETTINGS_KEY] = next_retention + if handwriting_style is not None and isinstance(handwriting_style, dict): next_handwriting_style = dict(next_payload.get(HANDWRITING_STYLE_SETTINGS_KEY, {})) for key in ( @@ -828,6 +890,17 @@ def read_handwriting_style_settings() -> dict[str, Any]: ) +def read_processing_log_retention_settings() -> dict[str, int]: + """Returns normalized processing log retention defaults used by worker and trim APIs.""" + + payload = _read_raw_settings() + defaults = _default_settings()[PROCESSING_LOG_RETENTION_SETTINGS_KEY] + return _normalize_processing_log_retention( + payload.get(PROCESSING_LOG_RETENTION_SETTINGS_KEY, {}), + defaults, + ) + + def read_predefined_paths_settings() -> list[dict[str, Any]]: """Returns normalized predefined logical path catalog entries.""" diff --git a/backend/app/worker/tasks.py b/backend/app/worker/tasks.py index fb160f6..f5295e4 100644 --- a/backend/app/worker/tasks.py +++ b/backend/app/worker/tasks.py @@ -5,10 +5,15 @@ from datetime import UTC, datetime from pathlib import Path from sqlalchemy import select +from sqlalchemy.orm import Session from app.db.base import SessionLocal from app.models.document import Document, DocumentStatus -from app.services.app_settings import read_handwriting_provider_settings, read_handwriting_style_settings +from app.services.app_settings import ( + read_handwriting_provider_settings, + read_handwriting_style_settings, + read_processing_log_retention_settings, +) from app.services.extractor import ( IMAGE_EXTENSIONS, extract_archive_members, @@ -32,6 +37,17 @@ from app.services.storage import absolute_path, compute_sha256, store_bytes, wri from app.worker.queue import get_processing_queue +def _cleanup_processing_logs_with_settings(session: Session) -> None: + """Applies configured processing log retention while trimming old log entries.""" + + retention = read_processing_log_retention_settings() + cleanup_processing_logs( + session=session, + keep_document_sessions=int(retention.get("keep_document_sessions", 2)), + keep_unbound_entries=int(retention.get("keep_unbound_entries", 80)), + ) + + def _create_archive_member_document( parent: Document, member_name: str, @@ -204,7 +220,7 @@ def process_document_task(document_id: str) -> None: document=document, payload_json={"status": document.status.value}, ) - cleanup_processing_logs(session=session, keep_document_sessions=2, keep_unbound_entries=80) + _cleanup_processing_logs_with_settings(session=session) session.commit() for child_id in child_ids: queue.enqueue("app.worker.tasks.process_document_task", child_id) @@ -239,7 +255,7 @@ def process_document_task(document_id: str) -> None: document=document, payload_json={"status": document.status.value}, ) - cleanup_processing_logs(session=session, keep_document_sessions=2, keep_unbound_entries=80) + _cleanup_processing_logs_with_settings(session=session) session.commit() return @@ -330,7 +346,7 @@ def process_document_task(document_id: str) -> None: document=document, payload_json={"status": document.status.value}, ) - cleanup_processing_logs(session=session, keep_document_sessions=2, keep_unbound_entries=80) + _cleanup_processing_logs_with_settings(session=session) session.commit() return @@ -362,7 +378,7 @@ def process_document_task(document_id: str) -> None: document=document, payload_json={"status": document.status.value}, ) - cleanup_processing_logs(session=session, keep_document_sessions=2, keep_unbound_entries=80) + _cleanup_processing_logs_with_settings(session=session) session.commit() return @@ -540,5 +556,5 @@ def process_document_task(document_id: str) -> None: document=document, payload_json={"status": document.status.value}, ) - cleanup_processing_logs(session=session, keep_document_sessions=2, keep_unbound_entries=80) + _cleanup_processing_logs_with_settings(session=session) session.commit() diff --git a/backend/tests/test_processing_log_retention_settings.py b/backend/tests/test_processing_log_retention_settings.py new file mode 100644 index 0000000..ab75fd3 --- /dev/null +++ b/backend/tests/test_processing_log_retention_settings.py @@ -0,0 +1,135 @@ +"""Unit coverage for persisted processing log retention settings behavior.""" + +from __future__ import annotations + +import sys +import unittest +from pathlib import Path +from types import ModuleType +from unittest.mock import patch + + +BACKEND_ROOT = Path(__file__).resolve().parents[1] +if str(BACKEND_ROOT) not in sys.path: + sys.path.insert(0, str(BACKEND_ROOT)) + +if "pydantic_settings" not in sys.modules: + pydantic_settings_stub = ModuleType("pydantic_settings") + + class _BaseSettings: + """Minimal BaseSettings replacement for dependency-light unit test execution.""" + + def __init__(self, **kwargs: object) -> None: + for key, value in kwargs.items(): + setattr(self, key, value) + + def _settings_config_dict(**kwargs: object) -> dict[str, object]: + """Returns configuration values using dict semantics expected by settings module.""" + + return kwargs + + pydantic_settings_stub.BaseSettings = _BaseSettings + pydantic_settings_stub.SettingsConfigDict = _settings_config_dict + sys.modules["pydantic_settings"] = pydantic_settings_stub + +from app.schemas.settings import AppSettingsUpdateRequest, ProcessingLogRetentionSettingsUpdateRequest +from app.services import app_settings + + +def _sample_current_payload() -> dict: + """Builds a sanitized payload used as in-memory persistence fixture for update tests.""" + + return app_settings._sanitize_settings(app_settings._default_settings()) + + +class ProcessingLogRetentionSettingsTests(unittest.TestCase): + """Verifies defaulting, sanitization, schema mapping, and update merge behavior.""" + + def test_sanitize_settings_uses_default_retention_values(self) -> None: + """Defaults are restored when persisted payload omits retention settings.""" + + sanitized = app_settings._sanitize_settings({}) + self.assertEqual( + sanitized["processing_log_retention"], + { + "keep_document_sessions": 2, + "keep_unbound_entries": 80, + }, + ) + + def test_sanitize_settings_clamps_retention_values(self) -> None: + """Retention values are clamped to same bounds enforced by trim endpoint query rules.""" + + sanitized = app_settings._sanitize_settings( + { + "processing_log_retention": { + "keep_document_sessions": 99, + "keep_unbound_entries": -5, + } + } + ) + self.assertEqual( + sanitized["processing_log_retention"], + { + "keep_document_sessions": 20, + "keep_unbound_entries": 0, + }, + ) + + def test_update_request_schema_accepts_processing_log_retention_payload(self) -> None: + """Settings PATCH schema keeps retention fields in serialized payloads.""" + + request_payload = AppSettingsUpdateRequest( + processing_log_retention=ProcessingLogRetentionSettingsUpdateRequest( + keep_document_sessions=7, + ) + ) + self.assertEqual( + request_payload.model_dump(exclude_none=True)["processing_log_retention"], + {"keep_document_sessions": 7}, + ) + + def test_update_app_settings_merges_retention_block_and_sanitizes_values(self) -> None: + """Settings updates merge partial retention values and persist sanitized results.""" + + current_payload = _sample_current_payload() + + with ( + patch.object(app_settings, "_read_raw_settings", return_value=current_payload), + patch.object(app_settings, "read_app_settings", return_value={"processing_log_retention": {}}), + patch.object(app_settings, "_write_settings") as write_settings_mock, + ): + app_settings.update_app_settings( + processing_log_retention={ + "keep_document_sessions": 9, + "keep_unbound_entries": 999, + } + ) + + written_payload = write_settings_mock.call_args.args[0] + self.assertEqual( + written_payload["processing_log_retention"], + { + "keep_document_sessions": 9, + "keep_unbound_entries": 400, + }, + ) + + def test_read_processing_log_retention_settings_returns_defaults_when_key_missing(self) -> None: + """Reader falls back to defaults when persisted payload omits retention key.""" + + payload_without_retention = _sample_current_payload() + payload_without_retention.pop("processing_log_retention", None) + with patch.object(app_settings, "_read_raw_settings", return_value=payload_without_retention): + retention = app_settings.read_processing_log_retention_settings() + self.assertEqual( + retention, + { + "keep_document_sessions": 2, + "keep_unbound_entries": 80, + }, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/doc/README.md b/doc/README.md index e4c135e..3521b19 100644 --- a/doc/README.md +++ b/doc/README.md @@ -6,7 +6,7 @@ This directory contains technical documentation for DMS. - `../README.md` - project overview, setup, and quick operations - `architecture-overview.md` - backend, frontend, and infrastructure architecture -- `api-contract.md` - API endpoint contract grouped by route module +- `api-contract.md` - API endpoint contract grouped by route module, including settings and processing-log trim defaults - `data-model-reference.md` - database entity definitions and lifecycle states -- `operations-and-configuration.md` - runtime operations, ports, volumes, and configuration values +- `operations-and-configuration.md` - runtime operations, ports, volumes, and persisted settings configuration - `frontend-design-foundation.md` - frontend visual system, tokens, and UI implementation rules diff --git a/doc/api-contract.md b/doc/api-contract.md index e596462..d37b888 100644 --- a/doc/api-contract.md +++ b/doc/api-contract.md @@ -90,7 +90,8 @@ Primary implementation modules: - Query: `offset`, `limit`, `document_id` - Response model: `ProcessingLogListResponse` - `POST /processing/logs/trim` - - Query: `keep_document_sessions`, `keep_unbound_entries` + - Query: optional `keep_document_sessions`, `keep_unbound_entries` + - Behavior: omitted query values fall back to persisted `/settings.processing_log_retention` - Response: trim counters - `POST /processing/logs/clear` - Response: clear counters @@ -127,4 +128,4 @@ Processing log schemas in `backend/app/schemas/processing_logs.py`: - `ProcessingLogListResponse` Settings schemas in `backend/app/schemas/settings.py`: -- Provider, task, upload-default, display, predefined paths or tags, handwriting-style, and legacy handwriting models grouped under `AppSettingsResponse` and `AppSettingsUpdateRequest`. +- Provider, task, upload-default, display, processing-log retention, predefined paths or tags, handwriting-style, and legacy handwriting models grouped under `AppSettingsResponse` and `AppSettingsUpdateRequest`. diff --git a/doc/operations-and-configuration.md b/doc/operations-and-configuration.md index 5b1c4bb..483aace 100644 --- a/doc/operations-and-configuration.md +++ b/doc/operations-and-configuration.md @@ -97,11 +97,14 @@ Application-level settings managed from the UI are persisted by backend settings Settings include: - upload defaults - display options +- processing-log retention options (`keep_document_sessions`, `keep_unbound_entries`) - provider configuration - OCR, summary, and routing task settings - predefined paths and tags - handwriting-style clustering settings +Retention settings are used by worker cleanup and by `POST /api/v1/processing/logs/trim` when trim query values are not provided. + ## Validation Checklist After operational or configuration changes, verify: