Files
ledgerdock/backend/app/api/routes_search.py
2026-02-21 09:44:18 -03:00

85 lines
3.1 KiB
Python

"""Search endpoints for full-text and metadata document discovery."""
from fastapi import APIRouter, Depends, Query
from sqlalchemy import Text, cast, func, select
from sqlalchemy.orm import Session
from app.api.routes_documents import _apply_discovery_filters
from app.db.base import get_session
from app.models.document import Document, DocumentStatus
from app.schemas.documents import DocumentResponse, SearchResponse
router = APIRouter()
@router.get("", response_model=SearchResponse)
def search_documents(
query: str = Query(min_length=2),
offset: int = Query(default=0, ge=0),
limit: int = Query(default=50, ge=1, le=200),
include_trashed: bool = Query(default=False),
only_trashed: bool = Query(default=False),
path_filter: str | None = Query(default=None),
tag_filter: str | None = Query(default=None),
type_filter: str | None = Query(default=None),
processed_from: str | None = Query(default=None),
processed_to: str | None = Query(default=None),
session: Session = Depends(get_session),
) -> SearchResponse:
"""Searches documents using PostgreSQL full-text ranking plus metadata matching."""
vector = func.to_tsvector(
"simple",
func.coalesce(Document.original_filename, "")
+ " "
+ func.coalesce(Document.logical_path, "")
+ " "
+ func.coalesce(Document.extracted_text, "")
+ " "
+ func.coalesce(cast(Document.tags, Text), ""),
)
ts_query = func.plainto_tsquery("simple", query)
rank = func.ts_rank_cd(vector, ts_query)
search_filter = (
vector.op("@@")(ts_query)
| Document.original_filename.ilike(f"%{query}%")
| Document.logical_path.ilike(f"%{query}%")
| cast(Document.tags, Text).ilike(f"%{query}%")
)
statement = select(Document).where(search_filter)
if only_trashed:
statement = statement.where(Document.status == DocumentStatus.TRASHED)
elif not include_trashed:
statement = statement.where(Document.status != DocumentStatus.TRASHED)
statement = _apply_discovery_filters(
statement,
path_filter=path_filter,
tag_filter=tag_filter,
type_filter=type_filter,
processed_from=processed_from,
processed_to=processed_to,
)
statement = statement.order_by(rank.desc(), Document.created_at.desc()).offset(offset).limit(limit)
items = session.execute(statement).scalars().all()
count_statement = select(func.count(Document.id)).where(search_filter)
if only_trashed:
count_statement = count_statement.where(Document.status == DocumentStatus.TRASHED)
elif not include_trashed:
count_statement = count_statement.where(Document.status != DocumentStatus.TRASHED)
count_statement = _apply_discovery_filters(
count_statement,
path_filter=path_filter,
tag_filter=tag_filter,
type_filter=type_filter,
processed_from=processed_from,
processed_to=processed_to,
)
total = session.execute(count_statement).scalar_one()
return SearchResponse(total=total, items=[DocumentResponse.model_validate(item) for item in items])