Harden auth, redaction, upload size checks, and compose token requirements
This commit is contained in:
@@ -2,14 +2,118 @@
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import BigInteger, DateTime, ForeignKey, String, Text
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
from sqlalchemy.orm import Mapped, mapped_column, validates
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.db.base import Base
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
|
||||
SENSITIVE_KEY_MARKERS = (
|
||||
"api_key",
|
||||
"apikey",
|
||||
"authorization",
|
||||
"bearer",
|
||||
"token",
|
||||
"secret",
|
||||
"password",
|
||||
"credential",
|
||||
"cookie",
|
||||
)
|
||||
SENSITIVE_TEXT_PATTERNS = (
|
||||
re.compile(r"(?i)\bauthorization\b\s*[:=]\s*bearer\s+[a-z0-9._~+/\-]+=*"),
|
||||
re.compile(r"(?i)\bbearer\s+[a-z0-9._~+/\-]+=*"),
|
||||
re.compile(r"\b[a-z0-9_-]{8,}\.[a-z0-9_-]{8,}\.[a-z0-9_-]{8,}\b", flags=re.IGNORECASE),
|
||||
re.compile(r"(?i)\bsk-[a-z0-9]{16,}\b"),
|
||||
re.compile(r"(?i)\b(api[_-]?key|token|secret|password)\b\s*[:=]\s*['\"]?[^\s,'\";]+['\"]?"),
|
||||
)
|
||||
REDACTED_TEXT = "[REDACTED]"
|
||||
MAX_PAYLOAD_KEYS = 80
|
||||
MAX_PAYLOAD_LIST_ITEMS = 80
|
||||
|
||||
|
||||
def _truncate(value: str, limit: int) -> str:
|
||||
"""Truncates long log fields to configured bounds with stable suffix marker."""
|
||||
|
||||
normalized = value.strip()
|
||||
if len(normalized) <= limit:
|
||||
return normalized
|
||||
return normalized[: max(0, limit - 3)] + "..."
|
||||
|
||||
|
||||
def _is_sensitive_key(key: str) -> bool:
|
||||
"""Returns whether a payload key likely contains sensitive credential data."""
|
||||
|
||||
normalized = key.strip().lower()
|
||||
return any(marker in normalized for marker in SENSITIVE_KEY_MARKERS)
|
||||
|
||||
|
||||
def _redact_sensitive_text(value: str) -> str:
|
||||
"""Redacts token-like segments from log text while retaining non-sensitive context."""
|
||||
|
||||
redacted = value
|
||||
for pattern in SENSITIVE_TEXT_PATTERNS:
|
||||
redacted = pattern.sub(lambda _: REDACTED_TEXT, redacted)
|
||||
return redacted
|
||||
|
||||
|
||||
def sanitize_processing_log_payload_value(value: Any, *, parent_key: str | None = None) -> Any:
|
||||
"""Sanitizes payload structures by redacting sensitive fields and bounding size."""
|
||||
|
||||
if parent_key and _is_sensitive_key(parent_key):
|
||||
return REDACTED_TEXT
|
||||
|
||||
if isinstance(value, dict):
|
||||
sanitized: dict[str, Any] = {}
|
||||
for index, (raw_key, raw_value) in enumerate(value.items()):
|
||||
if index >= MAX_PAYLOAD_KEYS:
|
||||
break
|
||||
key = str(raw_key)
|
||||
sanitized[key] = sanitize_processing_log_payload_value(raw_value, parent_key=key)
|
||||
return sanitized
|
||||
|
||||
if isinstance(value, list):
|
||||
return [
|
||||
sanitize_processing_log_payload_value(item, parent_key=parent_key)
|
||||
for item in value[:MAX_PAYLOAD_LIST_ITEMS]
|
||||
]
|
||||
|
||||
if isinstance(value, tuple):
|
||||
return [
|
||||
sanitize_processing_log_payload_value(item, parent_key=parent_key)
|
||||
for item in list(value)[:MAX_PAYLOAD_LIST_ITEMS]
|
||||
]
|
||||
|
||||
if isinstance(value, str):
|
||||
redacted = _redact_sensitive_text(value)
|
||||
return _truncate(redacted, settings.processing_log_max_payload_chars)
|
||||
|
||||
if isinstance(value, (int, float, bool)) or value is None:
|
||||
return value
|
||||
|
||||
as_text = _truncate(str(value), settings.processing_log_max_payload_chars)
|
||||
return _redact_sensitive_text(as_text)
|
||||
|
||||
|
||||
def sanitize_processing_log_text(value: str | None) -> str | None:
|
||||
"""Sanitizes prompt and response fields by redacting credentials and clamping length."""
|
||||
|
||||
if value is None:
|
||||
return None
|
||||
normalized = value.strip()
|
||||
if not normalized:
|
||||
return None
|
||||
redacted = _redact_sensitive_text(normalized)
|
||||
return _truncate(redacted, settings.processing_log_max_text_chars)
|
||||
|
||||
|
||||
class ProcessingLogEntry(Base):
|
||||
"""Stores a timestamped processing event with optional model prompt and response text."""
|
||||
|
||||
@@ -31,3 +135,17 @@ class ProcessingLogEntry(Base):
|
||||
prompt_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
response_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
payload_json: Mapped[dict] = mapped_column(JSONB, nullable=False, default=dict)
|
||||
|
||||
@validates("prompt_text", "response_text")
|
||||
def _validate_text_fields(self, key: str, value: str | None) -> str | None:
|
||||
"""Redacts and bounds free-text log fields before persistence."""
|
||||
|
||||
return sanitize_processing_log_text(value)
|
||||
|
||||
@validates("payload_json")
|
||||
def _validate_payload_json(self, key: str, value: dict[str, Any] | None) -> dict[str, Any]:
|
||||
"""Redacts and bounds structured payload fields before persistence."""
|
||||
|
||||
if not isinstance(value, dict):
|
||||
return {}
|
||||
return sanitize_processing_log_payload_value(value)
|
||||
|
||||
Reference in New Issue
Block a user