Harden security controls from REPORT findings

This commit is contained in:
2026-03-01 13:32:08 -03:00
parent da5cbc2c01
commit bdd97d1c62
20 changed files with 1455 additions and 97 deletions

View File

@@ -1,7 +1,13 @@
"""Persistent single-user application settings service backed by host-mounted storage."""
import base64
import binascii
import hashlib
import hmac
import json
import os
import re
import secrets
from pathlib import Path
from typing import Any
@@ -57,6 +63,172 @@ DEFAULT_ROUTING_PROMPT = (
"Confidence must be between 0 and 1."
)
PROVIDER_API_KEY_CIPHERTEXT_PREFIX = "enc-v1"
PROVIDER_API_KEY_KEYFILE_NAME = ".settings-api-key"
PROVIDER_API_KEY_STREAM_CONTEXT = b"dcm-provider-api-key-stream"
PROVIDER_API_KEY_AUTH_CONTEXT = b"dcm-provider-api-key-auth"
PROVIDER_API_KEY_NONCE_BYTES = 16
PROVIDER_API_KEY_TAG_BYTES = 32
def _settings_api_key_path() -> Path:
"""Returns the storage path used for local symmetric encryption key persistence."""
return settings.storage_root / PROVIDER_API_KEY_KEYFILE_NAME
def _write_private_text_file(path: Path, content: str) -> None:
"""Writes text files with restrictive owner-only permissions for local secret material."""
path.parent.mkdir(parents=True, exist_ok=True)
file_descriptor = os.open(str(path), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
with os.fdopen(file_descriptor, "w", encoding="utf-8") as handle:
handle.write(content)
os.chmod(path, 0o600)
def _urlsafe_b64encode_no_padding(data: bytes) -> str:
"""Encodes bytes to URL-safe base64 without padding for compact JSON persistence."""
return base64.urlsafe_b64encode(data).decode("ascii").rstrip("=")
def _urlsafe_b64decode_no_padding(data: str) -> bytes:
"""Decodes URL-safe base64 values that may omit trailing padding characters."""
padded = data + "=" * (-len(data) % 4)
return base64.urlsafe_b64decode(padded.encode("ascii"))
def _derive_provider_api_key_key() -> bytes:
"""Resolves the master key used to encrypt provider API keys for settings storage."""
configured_key = settings.app_settings_encryption_key.strip()
if configured_key:
try:
decoded = _urlsafe_b64decode_no_padding(configured_key)
if len(decoded) >= 32:
return decoded[:32]
except (binascii.Error, ValueError):
pass
return hashlib.sha256(configured_key.encode("utf-8")).digest()
key_path = _settings_api_key_path()
if key_path.exists():
try:
persisted = key_path.read_text(encoding="utf-8").strip()
decoded = _urlsafe_b64decode_no_padding(persisted)
if len(decoded) >= 32:
return decoded[:32]
except (OSError, UnicodeDecodeError, binascii.Error, ValueError):
pass
generated = secrets.token_bytes(32)
_write_private_text_file(key_path, _urlsafe_b64encode_no_padding(generated))
return generated
def _xor_bytes(left: bytes, right: bytes) -> bytes:
"""Applies byte-wise XOR for equal-length byte sequences."""
return bytes(first ^ second for first, second in zip(left, right))
def _derive_stream_cipher_bytes(master_key: bytes, nonce: bytes, length: int) -> bytes:
"""Derives deterministic stream bytes from HMAC-SHA256 blocks for payload masking."""
stream = bytearray()
counter = 0
while len(stream) < length:
counter_bytes = counter.to_bytes(4, "big")
block = hmac.new(
master_key,
PROVIDER_API_KEY_STREAM_CONTEXT + nonce + counter_bytes,
hashlib.sha256,
).digest()
stream.extend(block)
counter += 1
return bytes(stream[:length])
def _encrypt_provider_api_key(value: str) -> str:
"""Encrypts one provider API key for at-rest JSON persistence."""
normalized = value.strip()
if not normalized:
return ""
plaintext = normalized.encode("utf-8")
master_key = _derive_provider_api_key_key()
nonce = secrets.token_bytes(PROVIDER_API_KEY_NONCE_BYTES)
keystream = _derive_stream_cipher_bytes(master_key, nonce, len(plaintext))
ciphertext = _xor_bytes(plaintext, keystream)
tag = hmac.new(
master_key,
PROVIDER_API_KEY_AUTH_CONTEXT + nonce + ciphertext,
hashlib.sha256,
).digest()
payload = nonce + ciphertext + tag
encoded = _urlsafe_b64encode_no_padding(payload)
return f"{PROVIDER_API_KEY_CIPHERTEXT_PREFIX}:{encoded}"
def _decrypt_provider_api_key(value: str) -> str:
"""Decrypts provider API key ciphertext while rejecting tampered payloads."""
normalized = value.strip()
if not normalized:
return ""
if not normalized.startswith(f"{PROVIDER_API_KEY_CIPHERTEXT_PREFIX}:"):
return normalized
encoded_payload = normalized.split(":", 1)[1]
if not encoded_payload:
raise AppSettingsValidationError("Provider API key ciphertext is missing payload bytes")
try:
payload = _urlsafe_b64decode_no_padding(encoded_payload)
except (binascii.Error, ValueError) as error:
raise AppSettingsValidationError("Provider API key ciphertext is not valid base64") from error
minimum_length = PROVIDER_API_KEY_NONCE_BYTES + PROVIDER_API_KEY_TAG_BYTES
if len(payload) < minimum_length:
raise AppSettingsValidationError("Provider API key ciphertext payload is truncated")
nonce = payload[:PROVIDER_API_KEY_NONCE_BYTES]
ciphertext = payload[PROVIDER_API_KEY_NONCE_BYTES:-PROVIDER_API_KEY_TAG_BYTES]
received_tag = payload[-PROVIDER_API_KEY_TAG_BYTES:]
master_key = _derive_provider_api_key_key()
expected_tag = hmac.new(
master_key,
PROVIDER_API_KEY_AUTH_CONTEXT + nonce + ciphertext,
hashlib.sha256,
).digest()
if not hmac.compare_digest(received_tag, expected_tag):
raise AppSettingsValidationError("Provider API key ciphertext integrity check failed")
keystream = _derive_stream_cipher_bytes(master_key, nonce, len(ciphertext))
plaintext = _xor_bytes(ciphertext, keystream)
try:
return plaintext.decode("utf-8").strip()
except UnicodeDecodeError as error:
raise AppSettingsValidationError("Provider API key ciphertext is not valid UTF-8") from error
def _read_provider_api_key(provider_payload: dict[str, Any]) -> str:
"""Reads provider API key values from encrypted or legacy plaintext settings payloads."""
encrypted_value = provider_payload.get("api_key_encrypted")
if isinstance(encrypted_value, str) and encrypted_value.strip():
try:
return _decrypt_provider_api_key(encrypted_value)
except AppSettingsValidationError:
return ""
plaintext_value = provider_payload.get("api_key")
if plaintext_value is None:
return ""
return str(plaintext_value).strip()
def _default_settings() -> dict[str, Any]:
"""Builds default settings including providers and model task bindings."""
@@ -243,8 +415,17 @@ def _normalize_provider(
if provider_type != "openai_compatible":
provider_type = "openai_compatible"
api_key_value = payload.get("api_key", fallback_values.get("api_key", defaults["api_key"]))
api_key = str(api_key_value).strip() if api_key_value is not None else ""
payload_api_key = _read_provider_api_key(payload)
fallback_api_key = _read_provider_api_key(fallback_values)
default_api_key = _read_provider_api_key(defaults)
if "api_key" in payload and payload.get("api_key") is not None:
api_key = str(payload.get("api_key")).strip()
elif payload_api_key:
api_key = payload_api_key
elif fallback_api_key:
api_key = fallback_api_key
else:
api_key = default_api_key
raw_base_url = str(payload.get("base_url", fallback_values.get("base_url", defaults["base_url"]))).strip()
if not raw_base_url:
@@ -266,6 +447,7 @@ def _normalize_provider(
)
),
"api_key": api_key,
"api_key_encrypted": _encrypt_provider_api_key(api_key),
}
@@ -653,6 +835,26 @@ def _sanitize_settings(payload: dict[str, Any]) -> dict[str, Any]:
}
def _serialize_settings_for_storage(payload: dict[str, Any]) -> dict[str, Any]:
"""Converts sanitized runtime payload into storage-safe form without plaintext provider keys."""
storage_payload = dict(payload)
providers_storage: list[dict[str, Any]] = []
for provider in payload.get("providers", []):
if not isinstance(provider, dict):
continue
provider_storage = dict(provider)
plaintext_api_key = str(provider_storage.pop("api_key", "")).strip()
encrypted_api_key = str(provider_storage.get("api_key_encrypted", "")).strip()
if plaintext_api_key:
encrypted_api_key = _encrypt_provider_api_key(plaintext_api_key)
provider_storage["api_key_encrypted"] = encrypted_api_key
providers_storage.append(provider_storage)
storage_payload["providers"] = providers_storage
return storage_payload
def ensure_app_settings() -> None:
"""Creates a settings file with defaults when no persisted settings are present."""
@@ -662,7 +864,7 @@ def ensure_app_settings() -> None:
return
defaults = _sanitize_settings(_default_settings())
path.write_text(json.dumps(defaults, indent=2), encoding="utf-8")
_write_private_text_file(path, json.dumps(_serialize_settings_for_storage(defaults), indent=2))
def _read_raw_settings() -> dict[str, Any]:
@@ -682,7 +884,8 @@ def _write_settings(payload: dict[str, Any]) -> None:
path = _settings_path()
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
storage_payload = _serialize_settings_for_storage(payload)
_write_private_text_file(path, json.dumps(storage_payload, indent=2))
def read_app_settings() -> dict[str, Any]:
@@ -879,16 +1082,21 @@ def update_app_settings(
def read_handwriting_provider_settings() -> dict[str, Any]:
"""Returns OCR settings in legacy shape for the handwriting transcription service."""
"""Returns OCR settings in legacy shape with DNS-revalidated provider base URL safety checks."""
runtime = read_task_runtime_settings(TASK_OCR_HANDWRITING)
provider = runtime["provider"]
task = runtime["task"]
raw_base_url = str(provider.get("base_url", settings.default_openai_base_url))
try:
normalized_base_url = normalize_and_validate_provider_base_url(raw_base_url, resolve_dns=True)
except ValueError as error:
raise AppSettingsValidationError(str(error)) from error
return {
"provider": provider["provider_type"],
"enabled": bool(task.get("enabled", True)),
"openai_base_url": str(provider.get("base_url", settings.default_openai_base_url)),
"openai_base_url": normalized_base_url,
"openai_model": str(task.get("model", settings.default_openai_model)),
"openai_timeout_seconds": int(provider.get("timeout_seconds", settings.default_openai_timeout_seconds)),
"openai_api_key": str(provider.get("api_key", "")),