"""Persistence helpers for writing and querying processing pipeline log events."""

from typing import Any
from uuid import UUID

from sqlalchemy import delete, func, select
from sqlalchemy.orm import Session

from app.core.config import get_settings
from app.models.document import Document
from app.models.processing_log import ProcessingLogEntry


settings = get_settings()

MAX_STAGE_LENGTH = 64
MAX_EVENT_LENGTH = 256
MAX_LEVEL_LENGTH = 16
MAX_PROVIDER_LENGTH = 128
MAX_MODEL_LENGTH = 256
MAX_DOCUMENT_FILENAME_LENGTH = 512
MAX_PROMPT_LENGTH = 200000
MAX_RESPONSE_LENGTH = 200000
DEFAULT_KEEP_DOCUMENT_SESSIONS = 2
DEFAULT_KEEP_UNBOUND_ENTRIES = 80
PROCESSING_LOG_AUTOCOMMIT_SESSION_KEY = "processing_log_autocommit"


def _trim(value: str | None, max_length: int) -> str | None:
    """Normalizes and truncates text values for safe log persistence."""

    if value is None:
        return None
    normalized = value.strip()
    if not normalized:
        return None
    if len(normalized) <= max_length:
        return normalized
    return normalized[: max_length - 3] + "..."


def _safe_payload(payload_json: dict[str, Any] | None) -> dict[str, Any]:
    """Normalizes payload persistence mode using metadata-only defaults for sensitive content."""

    if not isinstance(payload_json, dict):
        return {}
    if settings.processing_log_store_payload_text:
        return payload_json
    return _metadata_only_payload(payload_json)


def _metadata_only_payload(payload_json: dict[str, Any]) -> dict[str, Any]:
    """Converts payload content into metadata descriptors without persisting raw text values."""

    metadata: dict[str, Any] = {}
    for index, (raw_key, raw_value) in enumerate(payload_json.items()):
        if index >= 80:
            break
        key = str(raw_key)
        metadata[key] = _metadata_only_payload_value(raw_value)
    return metadata


def _metadata_only_payload_value(value: Any) -> Any:
    """Converts one payload value into non-sensitive metadata representation."""

    if isinstance(value, dict):
        return _metadata_only_payload(value)
    if isinstance(value, (list, tuple)):
        items = list(value)
        return {
            "item_count": len(items),
            "items_preview": [_metadata_only_payload_value(item) for item in items[:20]],
        }
    if isinstance(value, str):
        normalized = value.strip()
        return {
            "text_chars": len(normalized),
            "text_omitted": bool(normalized),
        }
    if isinstance(value, bytes):
        return {"binary_bytes": len(value)}
    if isinstance(value, (int, float, bool)) or value is None:
        return value
    return {"value_type": type(value).__name__}


def set_processing_log_autocommit(session: Session, enabled: bool) -> None:
    """Toggles per-session immediate commit behavior for processing log events."""

    session.info[PROCESSING_LOG_AUTOCOMMIT_SESSION_KEY] = bool(enabled)


def is_processing_log_autocommit_enabled(session: Session) -> bool:
    """Returns whether processing logs are committed immediately for the current session."""

    return bool(session.info.get(PROCESSING_LOG_AUTOCOMMIT_SESSION_KEY, False))


def log_processing_event(
    session: Session,
    stage: str,
    event: str,
    *,
    level: str = "info",
    document: Document | None = None,
    document_id: UUID | None = None,
    document_filename: str | None = None,
    provider_id: str | None = None,
    model_name: str | None = None,
    prompt_text: str | None = None,
    response_text: str | None = None,
    payload_json: dict[str, Any] | None = None,
) -> None:
    """Persists one processing log entry linked to an optional document context."""

    resolved_document_id = document.id if document is not None else document_id
    resolved_document_filename = document.original_filename if document is not None else document_filename

    entry = ProcessingLogEntry(
        level=_trim(level, MAX_LEVEL_LENGTH) or "info",
        stage=_trim(stage, MAX_STAGE_LENGTH) or "pipeline",
        event=_trim(event, MAX_EVENT_LENGTH) or "event",
        document_id=resolved_document_id,
        document_filename=_trim(resolved_document_filename, MAX_DOCUMENT_FILENAME_LENGTH),
        provider_id=_trim(provider_id, MAX_PROVIDER_LENGTH),
        model_name=_trim(model_name, MAX_MODEL_LENGTH),
        prompt_text=_trim(prompt_text, MAX_PROMPT_LENGTH) if settings.processing_log_store_model_io_text else None,
        response_text=_trim(response_text, MAX_RESPONSE_LENGTH) if settings.processing_log_store_model_io_text else None,
        payload_json=_safe_payload(payload_json),
    )
    session.add(entry)
    if is_processing_log_autocommit_enabled(session):
        session.commit()


def count_processing_logs(session: Session, document_id: UUID | None = None) -> int:
    """Counts persisted processing logs, optionally restricted to one document."""

    statement = select(func.count()).select_from(ProcessingLogEntry)
    if document_id is not None:
        statement = statement.where(ProcessingLogEntry.document_id == document_id)
    return int(session.execute(statement).scalar_one())


def list_processing_logs(
    session: Session,
    *,
    limit: int,
    offset: int,
    document_id: UUID | None = None,
) -> list[ProcessingLogEntry]:
    """Lists processing logs ordered by newest-first with optional document filter."""

    statement = select(ProcessingLogEntry)
    if document_id is not None:
        statement = statement.where(ProcessingLogEntry.document_id == document_id)
    statement = statement.order_by(ProcessingLogEntry.created_at.desc(), ProcessingLogEntry.id.desc()).offset(offset).limit(limit)
    return session.execute(statement).scalars().all()


def cleanup_processing_logs(
    session: Session,
    *,
    keep_document_sessions: int = DEFAULT_KEEP_DOCUMENT_SESSIONS,
    keep_unbound_entries: int = DEFAULT_KEEP_UNBOUND_ENTRIES,
) -> dict[str, int]:
    """Deletes old log entries while keeping recent document sessions and unbound events."""

    normalized_keep_sessions = max(0, keep_document_sessions)
    normalized_keep_unbound = max(0, keep_unbound_entries)
    deleted_document_entries = 0
    deleted_unbound_entries = 0

    recent_document_rows = session.execute(
        select(
            ProcessingLogEntry.document_id,
            func.max(ProcessingLogEntry.created_at).label("last_seen"),
        )
        .where(ProcessingLogEntry.document_id.is_not(None))
        .group_by(ProcessingLogEntry.document_id)
        .order_by(func.max(ProcessingLogEntry.created_at).desc())
        .limit(normalized_keep_sessions)
    ).all()
    keep_document_ids = [row[0] for row in recent_document_rows if row[0] is not None]

    if keep_document_ids:
        deleted_document_entries = int(
            session.execute(
                delete(ProcessingLogEntry).where(
                    ProcessingLogEntry.document_id.is_not(None),
                    ProcessingLogEntry.document_id.notin_(keep_document_ids),
                )
            ).rowcount
            or 0
        )
    else:
        deleted_document_entries = int(
            session.execute(delete(ProcessingLogEntry).where(ProcessingLogEntry.document_id.is_not(None))).rowcount or 0
        )

    keep_unbound_rows = session.execute(
        select(ProcessingLogEntry.id)
        .where(ProcessingLogEntry.document_id.is_(None))
        .order_by(ProcessingLogEntry.created_at.desc(), ProcessingLogEntry.id.desc())
        .limit(normalized_keep_unbound)
    ).all()
    keep_unbound_ids = [row[0] for row in keep_unbound_rows]

    if keep_unbound_ids:
        deleted_unbound_entries = int(
            session.execute(
                delete(ProcessingLogEntry).where(
                    ProcessingLogEntry.document_id.is_(None),
                    ProcessingLogEntry.id.notin_(keep_unbound_ids),
                )
            ).rowcount
            or 0
        )
    else:
        deleted_unbound_entries = int(
            session.execute(delete(ProcessingLogEntry).where(ProcessingLogEntry.document_id.is_(None))).rowcount or 0
        )

    return {
        "deleted_document_entries": deleted_document_entries,
        "deleted_unbound_entries": deleted_unbound_entries,
    }


def clear_processing_logs(session: Session) -> dict[str, int]:
    """Deletes all persisted processing log entries and returns deletion count."""

    deleted_entries = int(session.execute(delete(ProcessingLogEntry)).rowcount or 0)
    return {"deleted_entries": deleted_entries}