"""Unit coverage for resilient provider sanitization in persisted app settings.""" from __future__ import annotations import sys import unittest from pathlib import Path from types import ModuleType from typing import Any from unittest.mock import patch BACKEND_ROOT = Path(__file__).resolve().parents[1] if str(BACKEND_ROOT) not in sys.path: sys.path.insert(0, str(BACKEND_ROOT)) if "pydantic_settings" not in sys.modules: pydantic_settings_stub = ModuleType("pydantic_settings") class _BaseSettings: """Minimal BaseSettings replacement for dependency-light unit test execution.""" def __init__(self, **kwargs: object) -> None: for key, value in kwargs.items(): setattr(self, key, value) def _settings_config_dict(**kwargs: object) -> dict[str, object]: """Returns configuration values using dict semantics expected by settings module.""" return kwargs pydantic_settings_stub.BaseSettings = _BaseSettings pydantic_settings_stub.SettingsConfigDict = _settings_config_dict sys.modules["pydantic_settings"] = pydantic_settings_stub from app.services import app_settings def _sample_current_payload() -> dict[str, Any]: """Builds a sanitized payload used as in-memory persistence fixture for update tests.""" return app_settings._sanitize_settings(app_settings._default_settings()) class AppSettingsProviderResilienceTests(unittest.TestCase): """Verifies read-path resilience for corrupt persisted providers without weakening writes.""" def test_sanitize_settings_skips_invalid_persisted_provider_entries(self) -> None: """Invalid persisted providers are skipped and tasks rebind to remaining valid providers.""" payload = { "providers": [ { "id": "insecure-provider", "label": "Insecure Provider", "provider_type": "openai_compatible", "base_url": "http://api.openai.com/v1", "timeout_seconds": 45, "api_key": "", }, { "id": "secure-provider", "label": "Secure Provider", "provider_type": "openai_compatible", "base_url": "https://api.openai.com/v1", "timeout_seconds": 45, "api_key": "", }, ], "tasks": { app_settings.TASK_OCR_HANDWRITING: {"provider_id": "insecure-provider"}, app_settings.TASK_SUMMARY_GENERATION: {"provider_id": "insecure-provider"}, app_settings.TASK_ROUTING_CLASSIFICATION: {"provider_id": "insecure-provider"}, }, } sanitized = app_settings._sanitize_settings(payload) self.assertEqual([provider["id"] for provider in sanitized["providers"]], ["secure-provider"]) self.assertEqual( sanitized["tasks"][app_settings.TASK_OCR_HANDWRITING]["provider_id"], "secure-provider", ) self.assertEqual( sanitized["tasks"][app_settings.TASK_SUMMARY_GENERATION]["provider_id"], "secure-provider", ) self.assertEqual( sanitized["tasks"][app_settings.TASK_ROUTING_CLASSIFICATION]["provider_id"], "secure-provider", ) def test_sanitize_settings_uses_default_provider_when_all_persisted_entries_are_invalid(self) -> None: """Default provider is restored when all persisted provider rows are invalid.""" payload = { "providers": [ { "id": "insecure-provider", "label": "Insecure Provider", "provider_type": "openai_compatible", "base_url": "http://api.openai.com/v1", "timeout_seconds": 45, "api_key": "", } ] } sanitized = app_settings._sanitize_settings(payload) defaults = app_settings._default_settings() default_provider_id = defaults["providers"][0]["id"] self.assertEqual(sanitized["providers"][0]["id"], default_provider_id) self.assertEqual(sanitized["providers"][0]["base_url"], defaults["providers"][0]["base_url"]) self.assertEqual( sanitized["tasks"][app_settings.TASK_OCR_HANDWRITING]["provider_id"], default_provider_id, ) self.assertEqual( sanitized["tasks"][app_settings.TASK_SUMMARY_GENERATION]["provider_id"], default_provider_id, ) self.assertEqual( sanitized["tasks"][app_settings.TASK_ROUTING_CLASSIFICATION]["provider_id"], default_provider_id, ) def test_update_app_settings_keeps_provider_base_url_validation_strict(self) -> None: """Provider write updates still reject invalid base URLs instead of silently sanitizing.""" current_payload = _sample_current_payload() current_provider = current_payload["providers"][0] provider_update = { "id": current_provider["id"], "label": current_provider["label"], "provider_type": current_provider["provider_type"], "base_url": "http://api.openai.com/v1", "timeout_seconds": current_provider["timeout_seconds"], } with ( patch.object(app_settings, "_read_raw_settings", return_value=current_payload), patch.object(app_settings, "_write_settings") as write_settings_mock, ): with self.assertRaises(app_settings.AppSettingsValidationError): app_settings.update_app_settings(providers=[provider_update]) write_settings_mock.assert_not_called() def test_sanitize_settings_migrates_legacy_plaintext_api_key_to_encrypted_field(self) -> None: """Legacy plaintext API keys are still readable and emitted with encrypted storage representation.""" payload = { "providers": [ { "id": "secure-provider", "label": "Secure Provider", "provider_type": "openai_compatible", "base_url": "https://api.openai.com/v1", "timeout_seconds": 45, "api_key": "legacy-plaintext-secret", } ], "tasks": { app_settings.TASK_OCR_HANDWRITING: {"provider_id": "secure-provider"}, app_settings.TASK_SUMMARY_GENERATION: {"provider_id": "secure-provider"}, app_settings.TASK_ROUTING_CLASSIFICATION: {"provider_id": "secure-provider"}, }, } with patch.object(app_settings, "_derive_provider_api_key_key", return_value=b"k" * 32): sanitized = app_settings._sanitize_settings(payload) provider = sanitized["providers"][0] self.assertEqual(provider["api_key"], "legacy-plaintext-secret") self.assertTrue( str(provider.get("api_key_encrypted", "")).startswith( f"{app_settings.PROVIDER_API_KEY_CIPHERTEXT_PREFIX}:" ) ) def test_serialize_settings_for_storage_excludes_plaintext_api_key(self) -> None: """Storage payload serialization persists encrypted provider API keys only.""" payload = _sample_current_payload() payload["providers"][0]["api_key"] = "storage-secret" payload["providers"][0]["api_key_encrypted"] = "" with patch.object(app_settings, "_derive_provider_api_key_key", return_value=b"s" * 32): storage_payload = app_settings._serialize_settings_for_storage(payload) provider_storage = storage_payload["providers"][0] self.assertNotIn("api_key", provider_storage) self.assertTrue( str(provider_storage.get("api_key_encrypted", "")).startswith( f"{app_settings.PROVIDER_API_KEY_CIPHERTEXT_PREFIX}:" ) ) def test_read_handwriting_provider_settings_revalidates_dns(self) -> None: """OCR runtime provider settings enforce DNS revalidation before creating outbound clients.""" runtime_payload = { "provider": { "id": "openai-default", "provider_type": "openai_compatible", "base_url": "https://api.openai.com/v1", "timeout_seconds": 45, "api_key": "runtime-secret", }, "task": { "enabled": True, "model": "gpt-4.1-mini", "prompt": "prompt", }, } with ( patch.object(app_settings, "read_task_runtime_settings", return_value=runtime_payload), patch.object( app_settings, "normalize_and_validate_provider_base_url", return_value="https://api.openai.com/v1", ) as normalize_mock, ): runtime_settings = app_settings.read_handwriting_provider_settings() normalize_mock.assert_called_once_with("https://api.openai.com/v1", resolve_dns=True) self.assertEqual(runtime_settings["openai_base_url"], "https://api.openai.com/v1") self.assertEqual(runtime_settings["openai_api_key"], "runtime-secret") if __name__ == "__main__": unittest.main()