Harden settings read sanitization for invalid providers

This commit is contained in:
2026-02-21 15:15:43 -03:00
parent a69702f099
commit 1cb6bfee58
5 changed files with 162 additions and 3 deletions

View File

@@ -587,7 +587,7 @@ def _normalize_handwriting_style_settings(payload: dict[str, Any], defaults: dic
def _sanitize_settings(payload: dict[str, Any]) -> dict[str, Any]: def _sanitize_settings(payload: dict[str, Any]) -> dict[str, Any]:
"""Sanitizes all persisted settings into a stable normalized structure.""" """Sanitizes persisted settings into a stable structure while tolerating corrupt provider rows."""
if not isinstance(payload, dict): if not isinstance(payload, dict):
payload = {} payload = {}
@@ -603,7 +603,14 @@ def _sanitize_settings(payload: dict[str, Any]) -> dict[str, Any]:
if not isinstance(provider_payload, dict): if not isinstance(provider_payload, dict):
continue continue
fallback = defaults["providers"][0] fallback = defaults["providers"][0]
candidate = _normalize_provider(provider_payload, fallback_id=f"provider-{index + 1}", fallback_values=fallback) try:
candidate = _normalize_provider(
provider_payload,
fallback_id=f"provider-{index + 1}",
fallback_values=fallback,
)
except AppSettingsValidationError:
continue
if candidate["id"] in seen_provider_ids: if candidate["id"] in seen_provider_ids:
continue continue
seen_provider_ids.add(candidate["id"]) seen_provider_ids.add(candidate["id"])

View File

@@ -0,0 +1,149 @@
"""Unit coverage for resilient provider sanitization in persisted app settings."""
from __future__ import annotations
import sys
import unittest
from pathlib import Path
from types import ModuleType
from typing import Any
from unittest.mock import patch
BACKEND_ROOT = Path(__file__).resolve().parents[1]
if str(BACKEND_ROOT) not in sys.path:
sys.path.insert(0, str(BACKEND_ROOT))
if "pydantic_settings" not in sys.modules:
pydantic_settings_stub = ModuleType("pydantic_settings")
class _BaseSettings:
"""Minimal BaseSettings replacement for dependency-light unit test execution."""
def __init__(self, **kwargs: object) -> None:
for key, value in kwargs.items():
setattr(self, key, value)
def _settings_config_dict(**kwargs: object) -> dict[str, object]:
"""Returns configuration values using dict semantics expected by settings module."""
return kwargs
pydantic_settings_stub.BaseSettings = _BaseSettings
pydantic_settings_stub.SettingsConfigDict = _settings_config_dict
sys.modules["pydantic_settings"] = pydantic_settings_stub
from app.services import app_settings
def _sample_current_payload() -> dict[str, Any]:
"""Builds a sanitized payload used as in-memory persistence fixture for update tests."""
return app_settings._sanitize_settings(app_settings._default_settings())
class AppSettingsProviderResilienceTests(unittest.TestCase):
"""Verifies read-path resilience for corrupt persisted providers without weakening writes."""
def test_sanitize_settings_skips_invalid_persisted_provider_entries(self) -> None:
"""Invalid persisted providers are skipped and tasks rebind to remaining valid providers."""
payload = {
"providers": [
{
"id": "insecure-provider",
"label": "Insecure Provider",
"provider_type": "openai_compatible",
"base_url": "http://api.openai.com/v1",
"timeout_seconds": 45,
"api_key": "",
},
{
"id": "secure-provider",
"label": "Secure Provider",
"provider_type": "openai_compatible",
"base_url": "https://api.openai.com/v1",
"timeout_seconds": 45,
"api_key": "",
},
],
"tasks": {
app_settings.TASK_OCR_HANDWRITING: {"provider_id": "insecure-provider"},
app_settings.TASK_SUMMARY_GENERATION: {"provider_id": "insecure-provider"},
app_settings.TASK_ROUTING_CLASSIFICATION: {"provider_id": "insecure-provider"},
},
}
sanitized = app_settings._sanitize_settings(payload)
self.assertEqual([provider["id"] for provider in sanitized["providers"]], ["secure-provider"])
self.assertEqual(
sanitized["tasks"][app_settings.TASK_OCR_HANDWRITING]["provider_id"],
"secure-provider",
)
self.assertEqual(
sanitized["tasks"][app_settings.TASK_SUMMARY_GENERATION]["provider_id"],
"secure-provider",
)
self.assertEqual(
sanitized["tasks"][app_settings.TASK_ROUTING_CLASSIFICATION]["provider_id"],
"secure-provider",
)
def test_sanitize_settings_uses_default_provider_when_all_persisted_entries_are_invalid(self) -> None:
"""Default provider is restored when all persisted provider rows are invalid."""
payload = {
"providers": [
{
"id": "insecure-provider",
"label": "Insecure Provider",
"provider_type": "openai_compatible",
"base_url": "http://api.openai.com/v1",
"timeout_seconds": 45,
"api_key": "",
}
]
}
sanitized = app_settings._sanitize_settings(payload)
defaults = app_settings._default_settings()
default_provider_id = defaults["providers"][0]["id"]
self.assertEqual(sanitized["providers"][0]["id"], default_provider_id)
self.assertEqual(sanitized["providers"][0]["base_url"], defaults["providers"][0]["base_url"])
self.assertEqual(
sanitized["tasks"][app_settings.TASK_OCR_HANDWRITING]["provider_id"],
default_provider_id,
)
self.assertEqual(
sanitized["tasks"][app_settings.TASK_SUMMARY_GENERATION]["provider_id"],
default_provider_id,
)
self.assertEqual(
sanitized["tasks"][app_settings.TASK_ROUTING_CLASSIFICATION]["provider_id"],
default_provider_id,
)
def test_update_app_settings_keeps_provider_base_url_validation_strict(self) -> None:
"""Provider write updates still reject invalid base URLs instead of silently sanitizing."""
current_payload = _sample_current_payload()
current_provider = current_payload["providers"][0]
provider_update = {
"id": current_provider["id"],
"label": current_provider["label"],
"provider_type": current_provider["provider_type"],
"base_url": "http://api.openai.com/v1",
"timeout_seconds": current_provider["timeout_seconds"],
}
with (
patch.object(app_settings, "_read_raw_settings", return_value=current_payload),
patch.object(app_settings, "_write_settings") as write_settings_mock,
):
with self.assertRaises(app_settings.AppSettingsValidationError):
app_settings.update_app_settings(providers=[provider_update])
write_settings_mock.assert_not_called()
if __name__ == "__main__":
unittest.main()

View File

@@ -8,5 +8,5 @@ This directory contains technical documentation for DMS.
- `architecture-overview.md` - backend, frontend, and infrastructure architecture - `architecture-overview.md` - backend, frontend, and infrastructure architecture
- `api-contract.md` - API endpoint contract grouped by route module, including token auth roles, upload limits, and settings or processing-log security constraints - `api-contract.md` - API endpoint contract grouped by route module, including token auth roles, upload limits, and settings or processing-log security constraints
- `data-model-reference.md` - database entity definitions and lifecycle states - `data-model-reference.md` - database entity definitions and lifecycle states
- `operations-and-configuration.md` - runtime operations, hardened compose defaults, security environment variables, and persisted settings configuration - `operations-and-configuration.md` - runtime operations, hardened compose defaults, security environment variables, and persisted settings configuration and read-sanitization behavior
- `frontend-design-foundation.md` - frontend visual system, tokens, UI implementation rules, processing-log timeline behavior, and settings helper-copy guidance - `frontend-design-foundation.md` - frontend visual system, tokens, UI implementation rules, processing-log timeline behavior, and settings helper-copy guidance

View File

@@ -124,6 +124,7 @@ Primary implementation modules:
- `GET /settings` - `GET /settings`
- Response model: `AppSettingsResponse` - Response model: `AppSettingsResponse`
- persisted providers with invalid base URLs are ignored during read sanitization; response falls back to remaining valid providers or secure defaults
- `PATCH /settings` - `PATCH /settings`
- Body model: `AppSettingsUpdateRequest` - Body model: `AppSettingsUpdateRequest`
- Response model: `AppSettingsResponse` - Response model: `AppSettingsResponse`

View File

@@ -134,6 +134,8 @@ Settings include:
- predefined paths and tags - predefined paths and tags
- handwriting-style clustering settings - handwriting-style clustering settings
Read sanitization is resilient to corrupt persisted provider rows. If a persisted provider entry fails URL validation, the entry is skipped and defaults are used when no valid provider remains. This prevents unrelated read endpoints from failing due to stale invalid provider data.
Retention settings are used by worker cleanup and by `POST /api/v1/processing/logs/trim` when trim query values are not provided. Retention settings are used by worker cleanup and by `POST /api/v1/processing/logs/trim` when trim query values are not provided.
## Security Controls ## Security Controls