85 lines
3.1 KiB
Python
85 lines
3.1 KiB
Python
"""Search endpoints for full-text and metadata document discovery."""
|
|
|
|
from fastapi import APIRouter, Depends, Query
|
|
from sqlalchemy import Text, cast, func, select
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.api.routes_documents import _apply_discovery_filters
|
|
from app.db.base import get_session
|
|
from app.models.document import Document, DocumentStatus
|
|
from app.schemas.documents import DocumentResponse, SearchResponse
|
|
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.get("", response_model=SearchResponse)
|
|
def search_documents(
|
|
query: str = Query(min_length=2),
|
|
offset: int = Query(default=0, ge=0),
|
|
limit: int = Query(default=50, ge=1, le=200),
|
|
include_trashed: bool = Query(default=False),
|
|
only_trashed: bool = Query(default=False),
|
|
path_filter: str | None = Query(default=None),
|
|
tag_filter: str | None = Query(default=None),
|
|
type_filter: str | None = Query(default=None),
|
|
processed_from: str | None = Query(default=None),
|
|
processed_to: str | None = Query(default=None),
|
|
session: Session = Depends(get_session),
|
|
) -> SearchResponse:
|
|
"""Searches documents using PostgreSQL full-text ranking plus metadata matching."""
|
|
|
|
vector = func.to_tsvector(
|
|
"simple",
|
|
func.coalesce(Document.original_filename, "")
|
|
+ " "
|
|
+ func.coalesce(Document.logical_path, "")
|
|
+ " "
|
|
+ func.coalesce(Document.extracted_text, "")
|
|
+ " "
|
|
+ func.coalesce(cast(Document.tags, Text), ""),
|
|
)
|
|
ts_query = func.plainto_tsquery("simple", query)
|
|
rank = func.ts_rank_cd(vector, ts_query)
|
|
|
|
search_filter = (
|
|
vector.op("@@")(ts_query)
|
|
| Document.original_filename.ilike(f"%{query}%")
|
|
| Document.logical_path.ilike(f"%{query}%")
|
|
| cast(Document.tags, Text).ilike(f"%{query}%")
|
|
)
|
|
|
|
statement = select(Document).where(search_filter)
|
|
if only_trashed:
|
|
statement = statement.where(Document.status == DocumentStatus.TRASHED)
|
|
elif not include_trashed:
|
|
statement = statement.where(Document.status != DocumentStatus.TRASHED)
|
|
statement = _apply_discovery_filters(
|
|
statement,
|
|
path_filter=path_filter,
|
|
tag_filter=tag_filter,
|
|
type_filter=type_filter,
|
|
processed_from=processed_from,
|
|
processed_to=processed_to,
|
|
)
|
|
statement = statement.order_by(rank.desc(), Document.created_at.desc()).offset(offset).limit(limit)
|
|
|
|
items = session.execute(statement).scalars().all()
|
|
|
|
count_statement = select(func.count(Document.id)).where(search_filter)
|
|
if only_trashed:
|
|
count_statement = count_statement.where(Document.status == DocumentStatus.TRASHED)
|
|
elif not include_trashed:
|
|
count_statement = count_statement.where(Document.status != DocumentStatus.TRASHED)
|
|
count_statement = _apply_discovery_filters(
|
|
count_statement,
|
|
path_filter=path_filter,
|
|
tag_filter=tag_filter,
|
|
type_filter=type_filter,
|
|
processed_from=processed_from,
|
|
processed_to=processed_to,
|
|
)
|
|
total = session.execute(count_statement).scalar_one()
|
|
|
|
return SearchResponse(total=total, items=[DocumentResponse.model_validate(item) for item in items])
|