Harden auth, redaction, upload size checks, and compose token requirements
This commit is contained in:
87
backend/app/api/auth.py
Normal file
87
backend/app/api/auth.py
Normal file
@@ -0,0 +1,87 @@
|
||||
"""Token-based authentication and authorization dependencies for privileged API routes."""
|
||||
|
||||
import hmac
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import Depends, HTTPException, status
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
|
||||
from app.core.config import Settings, get_settings
|
||||
|
||||
|
||||
bearer_auth = HTTPBearer(auto_error=False)
|
||||
|
||||
|
||||
class AuthRole:
|
||||
"""Declares supported authorization roles for privileged API operations."""
|
||||
|
||||
ADMIN = "admin"
|
||||
USER = "user"
|
||||
|
||||
|
||||
def _raise_unauthorized() -> None:
|
||||
"""Raises an HTTP 401 response with bearer authentication challenge headers."""
|
||||
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid or missing API token",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
|
||||
def _configured_admin_token(settings: Settings) -> str:
|
||||
"""Returns required admin token or raises configuration error when unset."""
|
||||
|
||||
token = settings.admin_api_token.strip()
|
||||
if token:
|
||||
return token
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
detail="Admin API token is not configured",
|
||||
)
|
||||
|
||||
|
||||
def _resolve_token_role(token: str, settings: Settings) -> str:
|
||||
"""Resolves role from a bearer token using constant-time comparisons."""
|
||||
|
||||
admin_token = _configured_admin_token(settings)
|
||||
if hmac.compare_digest(token, admin_token):
|
||||
return AuthRole.ADMIN
|
||||
|
||||
user_token = settings.user_api_token.strip()
|
||||
if user_token and hmac.compare_digest(token, user_token):
|
||||
return AuthRole.USER
|
||||
|
||||
_raise_unauthorized()
|
||||
|
||||
|
||||
def get_request_role(
|
||||
credentials: Annotated[HTTPAuthorizationCredentials | None, Depends(bearer_auth)],
|
||||
settings: Annotated[Settings, Depends(get_settings)],
|
||||
) -> str:
|
||||
"""Authenticates request token and returns its authorization role."""
|
||||
|
||||
if credentials is None:
|
||||
_raise_unauthorized()
|
||||
|
||||
token = credentials.credentials.strip()
|
||||
if not token:
|
||||
_raise_unauthorized()
|
||||
return _resolve_token_role(token=token, settings=settings)
|
||||
|
||||
|
||||
def require_user_or_admin(role: Annotated[str, Depends(get_request_role)]) -> str:
|
||||
"""Requires a valid user or admin token and returns resolved role."""
|
||||
|
||||
return role
|
||||
|
||||
|
||||
def require_admin(role: Annotated[str, Depends(get_request_role)]) -> str:
|
||||
"""Requires admin role and rejects requests authenticated as regular users."""
|
||||
|
||||
if role != AuthRole.ADMIN:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Admin token required",
|
||||
)
|
||||
return role
|
||||
@@ -1,7 +1,8 @@
|
||||
"""API router registration for all HTTP route modules."""
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import APIRouter, Depends
|
||||
|
||||
from app.api.auth import require_admin, require_user_or_admin
|
||||
from app.api.routes_documents import router as documents_router
|
||||
from app.api.routes_health import router as health_router
|
||||
from app.api.routes_processing_logs import router as processing_logs_router
|
||||
@@ -11,7 +12,27 @@ from app.api.routes_settings import router as settings_router
|
||||
|
||||
api_router = APIRouter()
|
||||
api_router.include_router(health_router)
|
||||
api_router.include_router(documents_router, prefix="/documents", tags=["documents"])
|
||||
api_router.include_router(processing_logs_router, prefix="/processing/logs", tags=["processing-logs"])
|
||||
api_router.include_router(search_router, prefix="/search", tags=["search"])
|
||||
api_router.include_router(settings_router, prefix="/settings", tags=["settings"])
|
||||
api_router.include_router(
|
||||
documents_router,
|
||||
prefix="/documents",
|
||||
tags=["documents"],
|
||||
dependencies=[Depends(require_user_or_admin)],
|
||||
)
|
||||
api_router.include_router(
|
||||
processing_logs_router,
|
||||
prefix="/processing/logs",
|
||||
tags=["processing-logs"],
|
||||
dependencies=[Depends(require_admin)],
|
||||
)
|
||||
api_router.include_router(
|
||||
search_router,
|
||||
prefix="/search",
|
||||
tags=["search"],
|
||||
dependencies=[Depends(require_user_or_admin)],
|
||||
)
|
||||
api_router.include_router(
|
||||
settings_router,
|
||||
prefix="/settings",
|
||||
tags=["settings"],
|
||||
dependencies=[Depends(require_admin)],
|
||||
)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Document CRUD, lifecycle, metadata, file access, and content export endpoints."""
|
||||
"""Authenticated document CRUD, lifecycle, metadata, file access, and content export endpoints."""
|
||||
|
||||
import io
|
||||
import re
|
||||
@@ -14,7 +14,7 @@ from fastapi.responses import FileResponse, Response, StreamingResponse
|
||||
from sqlalchemy import or_, func, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.services.app_settings import read_predefined_paths_settings, read_predefined_tags_settings
|
||||
from app.core.config import get_settings
|
||||
from app.db.base import get_session
|
||||
from app.models.document import Document, DocumentStatus
|
||||
from app.schemas.documents import (
|
||||
@@ -26,6 +26,7 @@ from app.schemas.documents import (
|
||||
UploadConflict,
|
||||
UploadResponse,
|
||||
)
|
||||
from app.services.app_settings import read_predefined_paths_settings, read_predefined_tags_settings
|
||||
from app.services.extractor import sniff_mime
|
||||
from app.services.handwriting_style import delete_many_handwriting_style_documents
|
||||
from app.services.processing_logs import log_processing_event, set_processing_log_autocommit
|
||||
@@ -35,6 +36,7 @@ from app.worker.queue import get_processing_queue
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
settings = get_settings()
|
||||
|
||||
|
||||
def _parse_csv(value: str | None) -> list[str]:
|
||||
@@ -227,6 +229,33 @@ def _build_document_list_statement(
|
||||
return statement
|
||||
|
||||
|
||||
def _enforce_upload_shape(files: list[UploadFile]) -> None:
|
||||
"""Validates upload request shape against configured file-count bounds."""
|
||||
|
||||
if not files:
|
||||
raise HTTPException(status_code=400, detail="Upload request must include at least one file")
|
||||
if len(files) > settings.max_upload_files_per_request:
|
||||
raise HTTPException(
|
||||
status_code=413,
|
||||
detail=(
|
||||
"Upload request exceeds file count limit "
|
||||
f"({len(files)} > {settings.max_upload_files_per_request})"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
async def _read_upload_bytes(file: UploadFile, max_bytes: int) -> bytes:
|
||||
"""Reads one upload file while enforcing per-file byte limits."""
|
||||
|
||||
data = await file.read(max_bytes + 1)
|
||||
if len(data) > max_bytes:
|
||||
raise HTTPException(
|
||||
status_code=413,
|
||||
detail=f"File '{file.filename or 'upload'}' exceeds per-file limit of {max_bytes} bytes",
|
||||
)
|
||||
return data
|
||||
|
||||
|
||||
def _collect_document_tree(session: Session, root_document_id: UUID) -> list[tuple[int, Document]]:
|
||||
"""Collects a document and all descendants for recursive permanent deletion."""
|
||||
|
||||
@@ -472,18 +501,29 @@ async def upload_documents(
|
||||
) -> UploadResponse:
|
||||
"""Uploads files, records metadata, and enqueues asynchronous extraction tasks."""
|
||||
|
||||
_enforce_upload_shape(files)
|
||||
set_processing_log_autocommit(session, True)
|
||||
normalized_tags = _normalize_tags(tags)
|
||||
queue = get_processing_queue()
|
||||
uploaded: list[DocumentResponse] = []
|
||||
conflicts: list[UploadConflict] = []
|
||||
total_request_bytes = 0
|
||||
|
||||
indexed_relative_paths = relative_paths or []
|
||||
prepared_uploads: list[dict[str, object]] = []
|
||||
|
||||
for idx, file in enumerate(files):
|
||||
filename = file.filename or f"uploaded_{idx}"
|
||||
data = await file.read()
|
||||
data = await _read_upload_bytes(file, settings.max_upload_file_size_bytes)
|
||||
total_request_bytes += len(data)
|
||||
if total_request_bytes > settings.max_upload_request_size_bytes:
|
||||
raise HTTPException(
|
||||
status_code=413,
|
||||
detail=(
|
||||
"Upload request exceeds total size limit "
|
||||
f"({total_request_bytes} > {settings.max_upload_request_size_bytes} bytes)"
|
||||
),
|
||||
)
|
||||
sha256 = compute_sha256(data)
|
||||
source_relative_path = indexed_relative_paths[idx] if idx < len(indexed_relative_paths) else filename
|
||||
extension = Path(filename).suffix.lower()
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
"""Read-only API endpoints for processing pipeline event logs."""
|
||||
"""Admin-only API endpoints for processing pipeline event logs."""
|
||||
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.db.base import get_session
|
||||
from app.schemas.processing_logs import ProcessingLogEntryResponse, ProcessingLogListResponse
|
||||
from app.services.app_settings import read_processing_log_retention_settings
|
||||
@@ -17,12 +18,13 @@ from app.services.processing_logs import (
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
settings = get_settings()
|
||||
|
||||
|
||||
@router.get("", response_model=ProcessingLogListResponse)
|
||||
def get_processing_logs(
|
||||
offset: int = Query(default=0, ge=0),
|
||||
limit: int = Query(default=120, ge=1, le=400),
|
||||
limit: int = Query(default=120, ge=1, le=settings.processing_log_max_unbound_entries),
|
||||
document_id: UUID | None = Query(default=None),
|
||||
session: Session = Depends(get_session),
|
||||
) -> ProcessingLogListResponse:
|
||||
@@ -43,8 +45,8 @@ def get_processing_logs(
|
||||
|
||||
@router.post("/trim")
|
||||
def trim_processing_logs(
|
||||
keep_document_sessions: int | None = Query(default=None, ge=0, le=20),
|
||||
keep_unbound_entries: int | None = Query(default=None, ge=0, le=400),
|
||||
keep_document_sessions: int | None = Query(default=None, ge=0, le=settings.processing_log_max_document_sessions),
|
||||
keep_unbound_entries: int | None = Query(default=None, ge=0, le=settings.processing_log_max_unbound_entries),
|
||||
session: Session = Depends(get_session),
|
||||
) -> dict[str, int]:
|
||||
"""Deletes old processing logs using query values or persisted retention defaults."""
|
||||
@@ -61,10 +63,19 @@ def trim_processing_logs(
|
||||
else int(retention_defaults.get("keep_unbound_entries", 80))
|
||||
)
|
||||
|
||||
capped_keep_document_sessions = min(
|
||||
settings.processing_log_max_document_sessions,
|
||||
max(0, int(resolved_keep_document_sessions)),
|
||||
)
|
||||
capped_keep_unbound_entries = min(
|
||||
settings.processing_log_max_unbound_entries,
|
||||
max(0, int(resolved_keep_unbound_entries)),
|
||||
)
|
||||
|
||||
result = cleanup_processing_logs(
|
||||
session=session,
|
||||
keep_document_sessions=resolved_keep_document_sessions,
|
||||
keep_unbound_entries=resolved_keep_unbound_entries,
|
||||
keep_document_sessions=capped_keep_document_sessions,
|
||||
keep_unbound_entries=capped_keep_unbound_entries,
|
||||
)
|
||||
session.commit()
|
||||
return result
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""API routes for managing persistent single-user application settings."""
|
||||
"""Admin-only API routes for managing persistent single-user application settings."""
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from app.schemas.settings import (
|
||||
AppSettingsUpdateRequest,
|
||||
@@ -18,6 +18,7 @@ from app.schemas.settings import (
|
||||
UploadDefaultsResponse,
|
||||
)
|
||||
from app.services.app_settings import (
|
||||
AppSettingsValidationError,
|
||||
TASK_OCR_HANDWRITING,
|
||||
TASK_ROUTING_CLASSIFICATION,
|
||||
TASK_SUMMARY_GENERATION,
|
||||
@@ -179,16 +180,19 @@ def set_app_settings(payload: AppSettingsUpdateRequest) -> AppSettingsResponse:
|
||||
if payload.predefined_tags is not None:
|
||||
predefined_tags_payload = [item.model_dump(exclude_none=True) for item in payload.predefined_tags]
|
||||
|
||||
updated = update_app_settings(
|
||||
providers=providers_payload,
|
||||
tasks=tasks_payload,
|
||||
upload_defaults=upload_defaults_payload,
|
||||
display=display_payload,
|
||||
processing_log_retention=processing_log_retention_payload,
|
||||
handwriting_style=handwriting_style_payload,
|
||||
predefined_paths=predefined_paths_payload,
|
||||
predefined_tags=predefined_tags_payload,
|
||||
)
|
||||
try:
|
||||
updated = update_app_settings(
|
||||
providers=providers_payload,
|
||||
tasks=tasks_payload,
|
||||
upload_defaults=upload_defaults_payload,
|
||||
display=display_payload,
|
||||
processing_log_retention=processing_log_retention_payload,
|
||||
handwriting_style=handwriting_style_payload,
|
||||
predefined_paths=predefined_paths_payload,
|
||||
predefined_tags=predefined_tags_payload,
|
||||
)
|
||||
except AppSettingsValidationError as error:
|
||||
raise HTTPException(status_code=400, detail=str(error)) from error
|
||||
return _build_response(updated)
|
||||
|
||||
|
||||
@@ -203,14 +207,17 @@ def reset_settings_to_defaults() -> AppSettingsResponse:
|
||||
def set_handwriting_settings(payload: HandwritingSettingsUpdateRequest) -> AppSettingsResponse:
|
||||
"""Updates handwriting transcription settings and returns the resulting configuration."""
|
||||
|
||||
updated = update_handwriting_settings(
|
||||
enabled=payload.enabled,
|
||||
openai_base_url=payload.openai_base_url,
|
||||
openai_model=payload.openai_model,
|
||||
openai_timeout_seconds=payload.openai_timeout_seconds,
|
||||
openai_api_key=payload.openai_api_key,
|
||||
clear_openai_api_key=payload.clear_openai_api_key,
|
||||
)
|
||||
try:
|
||||
updated = update_handwriting_settings(
|
||||
enabled=payload.enabled,
|
||||
openai_base_url=payload.openai_base_url,
|
||||
openai_model=payload.openai_model,
|
||||
openai_timeout_seconds=payload.openai_timeout_seconds,
|
||||
openai_api_key=payload.openai_api_key,
|
||||
clear_openai_api_key=payload.clear_openai_api_key,
|
||||
)
|
||||
except AppSettingsValidationError as error:
|
||||
raise HTTPException(status_code=400, detail=str(error)) from error
|
||||
return _build_response(updated)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user