"""Search endpoints for full-text and metadata document discovery.""" from fastapi import APIRouter, Depends, Query from sqlalchemy import Text, cast, func, select from sqlalchemy.orm import Session from app.api.auth import AuthContext, require_user_or_admin from app.api.routes_documents import _apply_discovery_filters, _scope_document_statement_for_auth_context from app.db.base import get_session from app.models.document import Document, DocumentStatus from app.schemas.documents import DocumentResponse, SearchResponse router = APIRouter() @router.get("", response_model=SearchResponse) def search_documents( query: str = Query(min_length=2), offset: int = Query(default=0, ge=0), limit: int = Query(default=50, ge=1, le=200), include_trashed: bool = Query(default=False), only_trashed: bool = Query(default=False), path_filter: str | None = Query(default=None), tag_filter: str | None = Query(default=None), type_filter: str | None = Query(default=None), processed_from: str | None = Query(default=None), processed_to: str | None = Query(default=None), auth_context: AuthContext = Depends(require_user_or_admin), session: Session = Depends(get_session), ) -> SearchResponse: """Searches documents using PostgreSQL full-text ranking plus metadata matching.""" vector = func.to_tsvector( "simple", func.coalesce(Document.original_filename, "") + " " + func.coalesce(Document.logical_path, "") + " " + func.coalesce(Document.extracted_text, "") + " " + func.coalesce(cast(Document.tags, Text), ""), ) ts_query = func.plainto_tsquery("simple", query) rank = func.ts_rank_cd(vector, ts_query) search_filter = ( vector.op("@@")(ts_query) | Document.original_filename.ilike(f"%{query}%") | Document.logical_path.ilike(f"%{query}%") | cast(Document.tags, Text).ilike(f"%{query}%") ) statement = select(Document).where(search_filter) statement = _scope_document_statement_for_auth_context(statement, auth_context) if only_trashed: statement = statement.where(Document.status == DocumentStatus.TRASHED) elif not include_trashed: statement = statement.where(Document.status != DocumentStatus.TRASHED) statement = _apply_discovery_filters( statement, path_filter=path_filter, tag_filter=tag_filter, type_filter=type_filter, processed_from=processed_from, processed_to=processed_to, ) statement = statement.order_by(rank.desc(), Document.created_at.desc()).offset(offset).limit(limit) items = session.execute(statement).scalars().all() count_statement = select(func.count(Document.id)).where(search_filter) count_statement = _scope_document_statement_for_auth_context(count_statement, auth_context) if only_trashed: count_statement = count_statement.where(Document.status == DocumentStatus.TRASHED) elif not include_trashed: count_statement = count_statement.where(Document.status != DocumentStatus.TRASHED) count_statement = _apply_discovery_filters( count_statement, path_filter=path_filter, tag_filter=tag_filter, type_filter=type_filter, processed_from=processed_from, processed_to=processed_to, ) total = session.execute(count_statement).scalar_one() return SearchResponse(total=total, items=[DocumentResponse.model_validate(item) for item in items])