Compare commits
59 Commits
3b72919015
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
60ce69e115
|
|||
|
d6d0735ff8
|
|||
|
72088dba9a
|
|||
|
6f1fffd6e8
|
|||
|
490cbbb812
|
|||
|
4fe22e3539
|
|||
|
3f7cdee995
|
|||
|
1a04b23e89
|
|||
|
2a5dfc3713
|
|||
|
1cd7d6541d
|
|||
|
ec6a20ebd1
|
|||
|
83d6a4f367
|
|||
|
8cf3748015
|
|||
|
daa11cb768
|
|||
|
8f2c357bfc
|
|||
|
d50169b883
|
|||
|
b5b74845f2
|
|||
|
0acce2e260
|
|||
|
b86223f943
|
|||
|
8dc4013e76
|
|||
|
668c22f692
|
|||
|
89ec3584f9
|
|||
|
8dded6383e
|
|||
|
c47fc48533
|
|||
|
b6d470590e
|
|||
|
41bbe87b4c
|
|||
|
6fba581865
|
|||
|
4b34d6153c
|
|||
|
700f0d6d79
|
|||
|
3cccf2e0e8
|
|||
|
26eae1a09b
|
|||
|
a9333ec973
|
|||
|
8eaaa01186
|
|||
|
eae7afd36e
|
|||
|
874597e40b
|
|||
|
32b4589b28
|
|||
|
4c27fd6483
|
|||
|
9cbbd80f47
|
|||
|
aba320b617
|
|||
|
74d91eb4b1
|
|||
|
1c57084ebf
|
|||
|
bfc89fe5ce
|
|||
|
1b2e0cb8af
|
|||
|
0242e061c2
|
|||
|
7a19f22f41
|
|||
|
c5423fc9c3
|
|||
|
3d280396ae
|
|||
|
48cfc79b5f
|
|||
|
bdd97d1c62
|
|||
|
da5cbc2c01
|
|||
|
652d7e8f25
|
|||
|
c3f34b38b4
|
|||
|
1cb6bfee58
|
|||
|
a69702f099
|
|||
|
c1a7011d71
|
|||
|
b25e508a00
|
|||
|
74a6551237
|
|||
|
3cbad053cc
|
|||
|
5792586a90
|
72
.env.example
Normal file
72
.env.example
Normal file
@@ -0,0 +1,72 @@
|
||||
# LedgerDock environment template
|
||||
# Copy to .env and adjust all secret values before first run.
|
||||
|
||||
# Development defaults (HTTP local stack)
|
||||
APP_ENV=development
|
||||
HOST_BIND_IP=127.0.0.1
|
||||
# Optional host directory for persistent bind mounts in docker-compose.yml.
|
||||
# Defaults to ./data when unset.
|
||||
# DCM_DATA_DIR=./data
|
||||
|
||||
POSTGRES_USER=dcm
|
||||
POSTGRES_PASSWORD=ChangeMe-Postgres-Secret
|
||||
POSTGRES_DB=dcm
|
||||
DATABASE_URL=postgresql+psycopg://dcm:ChangeMe-Postgres-Secret@db:5432/dcm
|
||||
|
||||
REDIS_PASSWORD=ChangeMe-Redis-Secret
|
||||
REDIS_URL=redis://:ChangeMe-Redis-Secret@redis:6379/0
|
||||
REDIS_SECURITY_MODE=compat
|
||||
REDIS_TLS_MODE=allow_insecure
|
||||
|
||||
AUTH_BOOTSTRAP_ADMIN_USERNAME=admin
|
||||
AUTH_BOOTSTRAP_ADMIN_PASSWORD=ChangeMe-Admin-Password
|
||||
AUTH_BOOTSTRAP_USER_USERNAME=user
|
||||
AUTH_BOOTSTRAP_USER_PASSWORD=ChangeMe-User-Password
|
||||
AUTH_LOGIN_FAILURE_LIMIT=5
|
||||
AUTH_LOGIN_FAILURE_WINDOW_SECONDS=900
|
||||
AUTH_LOGIN_LOCKOUT_BASE_SECONDS=30
|
||||
AUTH_LOGIN_LOCKOUT_MAX_SECONDS=900
|
||||
# Optional cookie controls for split frontend/api hosts:
|
||||
# Leave AUTH_COOKIE_DOMAIN empty unless you explicitly need a parent-domain CSRF cookie mirror.
|
||||
# Host-only auth cookies are issued automatically for the API host.
|
||||
# AUTH_COOKIE_DOMAIN=docs.lan
|
||||
# AUTH_COOKIE_SAMESITE=auto
|
||||
|
||||
APP_SETTINGS_ENCRYPTION_KEY=ChangeMe-Settings-Encryption-Key
|
||||
TYPESENSE_API_KEY=ChangeMe-Typesense-Key
|
||||
|
||||
PROCESSING_LOG_STORE_MODEL_IO_TEXT=false
|
||||
PROCESSING_LOG_STORE_PAYLOAD_TEXT=false
|
||||
CONTENT_EXPORT_MAX_DOCUMENTS=250
|
||||
CONTENT_EXPORT_MAX_TOTAL_BYTES=52428800
|
||||
CONTENT_EXPORT_RATE_LIMIT_PER_MINUTE=6
|
||||
|
||||
PROVIDER_BASE_URL_ALLOW_HTTP=true
|
||||
PROVIDER_BASE_URL_ALLOW_PRIVATE_NETWORK=true
|
||||
PROVIDER_BASE_URL_ALLOWLIST=[]
|
||||
|
||||
PUBLIC_BASE_URL=http://localhost:8000
|
||||
CORS_ORIGINS=["http://localhost:5173","http://localhost:3000"]
|
||||
# Leave empty to use same-origin /api/v1 through the frontend proxy.
|
||||
# Set an absolute URL only when you intentionally want split-origin frontend/API traffic.
|
||||
VITE_API_BASE=
|
||||
# Development-only Vite proxy target. Docker compose sets this to http://api:8000 automatically.
|
||||
VITE_API_PROXY_TARGET=http://localhost:8000
|
||||
# Development-only Vite host allowlist override.
|
||||
VITE_ALLOWED_HOSTS=
|
||||
|
||||
# Production baseline overrides (set explicitly for live deployments):
|
||||
# APP_ENV=production
|
||||
# HOST_BIND_IP=127.0.0.1
|
||||
# REDIS_URL=rediss://:<strong-password>@redis.example.internal:6379/0
|
||||
# REDIS_SECURITY_MODE=strict
|
||||
# REDIS_TLS_MODE=required
|
||||
# AUTH_COOKIE_DOMAIN=example.com
|
||||
# AUTH_COOKIE_SAMESITE=none
|
||||
# PROVIDER_BASE_URL_ALLOW_HTTP=false
|
||||
# PROVIDER_BASE_URL_ALLOW_PRIVATE_NETWORK=false
|
||||
# PROVIDER_BASE_URL_ALLOWLIST=["api.openai.com"]
|
||||
# PUBLIC_BASE_URL=https://api.example.com
|
||||
# CORS_ORIGINS=["https://app.example.com"]
|
||||
# VITE_API_BASE=https://api.example.com/api/v1
|
||||
# VITE_ALLOWED_HOSTS=app.example.com
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -20,9 +20,8 @@ build/
|
||||
!.env.example
|
||||
|
||||
# Data and generated artifacts (runtime only)
|
||||
data/postgres/
|
||||
data/redis/
|
||||
data/storage/
|
||||
data/
|
||||
typesense-data/
|
||||
|
||||
# OS / IDE
|
||||
.DS_Store
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
## Stack Snapshot
|
||||
- DMS monorepo with FastAPI API + RQ worker (`backend/`) and React + Vite + TypeScript frontend (`frontend/`).
|
||||
- Services in `docker-compose.yml`: `api`, `worker`, `frontend`, `db` (Postgres), `redis`, `typesense`.
|
||||
- Runtime persistence uses Docker named volumes (`db-data`, `redis-data`, `dcm-storage`, `typesense-data`).
|
||||
- Runtime persistence uses host bind mounts under `${DCM_DATA_DIR:-./data}` (`db-data`, `redis-data`, `storage`, `typesense-data`).
|
||||
|
||||
## Project Layout
|
||||
- Backend app code: `backend/app/` (`api/`, `services/`, `db/`, `models/`, `schemas/`, `worker/`).
|
||||
@@ -25,7 +25,7 @@ If required to run the docker image, follow these steps:
|
||||
- Frontend dev only: `cd frontend && npm run dev`
|
||||
- Frontend production build: `cd frontend && npm run build`
|
||||
|
||||
## Validation
|
||||
## Validation
|
||||
- No automated test suite is currently committed.
|
||||
- Manual checks for code changes inside VM: `GET /api/v1/health`, upload + processing flow, search, document preview/download, and clean `docker compose logs -f` output for `api` and `worker`.
|
||||
|
||||
|
||||
15
CHANGELOG.md
15
CHANGELOG.md
@@ -3,18 +3,5 @@ All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
- Initialized `CHANGELOG.md` with Keep a Changelog structure for ongoing release-note tracking.
|
||||
|
||||
### Changed
|
||||
- Refreshed `README.md` with current stack details, runtime services, setup commands, configuration notes, and manual validation guidance.
|
||||
|
||||
### Deprecated
|
||||
|
||||
### Removed
|
||||
|
||||
### Fixed
|
||||
|
||||
### Security
|
||||
- Initial release
|
||||
|
||||
192
README.md
192
README.md
@@ -1,60 +1,91 @@
|
||||
# DMS
|
||||
# LedgerDock
|
||||
|
||||
DMS is a self-hosted document management system for ingesting, processing, organizing, and searching files.
|
||||
LedgerDock is a private document workspace you can run on your own computer or server.
|
||||
It helps teams collect files, process text from documents, and find information quickly with search.
|
||||
|
||||
## Core Capabilities
|
||||
## What LedgerDock Is For
|
||||
|
||||
- Drag and drop upload from anywhere in the UI
|
||||
- File and folder upload with path preservation
|
||||
- Asynchronous extraction and OCR for PDF, images, DOCX, XLSX, TXT, and ZIP
|
||||
- Metadata and full-text search
|
||||
- Routing suggestions based on previous decisions
|
||||
- Original file download and extracted markdown export
|
||||
- Upload files and folders from one place
|
||||
- Keep documents organized and searchable
|
||||
- Extract text from scans and images (OCR)
|
||||
- Download originals or extracted text
|
||||
|
||||
## Technology Stack
|
||||
## Before You Start
|
||||
|
||||
- Backend: FastAPI, SQLAlchemy, RQ worker (`backend/`)
|
||||
- Frontend: React, Vite, TypeScript (`frontend/`)
|
||||
- Infrastructure: PostgreSQL, Redis, Typesense (`docker-compose.yml`)
|
||||
You need:
|
||||
|
||||
## Runtime Services
|
||||
- Docker Desktop (Windows or macOS) or Docker Engine + Docker Compose (Linux)
|
||||
- A terminal app
|
||||
- The project folder on your machine
|
||||
- Internet access the first time you build containers
|
||||
|
||||
The default `docker compose` stack includes:
|
||||
## Install With Docker Compose
|
||||
|
||||
- `frontend` - React UI (`http://localhost:5173`)
|
||||
- `api` - FastAPI backend (`http://localhost:8000`, docs at `/docs`)
|
||||
- `worker` - background processing jobs
|
||||
- `db` - PostgreSQL (`localhost:5432`)
|
||||
- `redis` - queue backend (`localhost:6379`)
|
||||
- `typesense` - search index (`localhost:8108`)
|
||||
Follow these steps from the project folder (where `docker-compose.yml` is located).
|
||||
|
||||
## Requirements
|
||||
1. Create your local settings file from the template.
|
||||
|
||||
- Docker Engine
|
||||
- Docker Compose plugin
|
||||
- Internet access for first-time image build
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
From repository root:
|
||||
2. Open `.env` in a text editor and set your own passwords and keys.
|
||||
3. Start LedgerDock.
|
||||
|
||||
```bash
|
||||
docker compose up --build -d
|
||||
```
|
||||
|
||||
Open:
|
||||
4. Wait until startup is complete, then open the app:
|
||||
- LedgerDock web app: `http://localhost:5173`
|
||||
- Health check: `http://localhost:8000/api/v1/health`
|
||||
5. Sign in with the admin username and password you set in `.env`.
|
||||
|
||||
- Frontend: `http://localhost:5173`
|
||||
- API docs: `http://localhost:8000/docs`
|
||||
- Health: `http://localhost:8000/api/v1/health`
|
||||
## `.env` Settings Explained In Plain Language
|
||||
|
||||
Stop the stack:
|
||||
LedgerDock reads settings from `.env`. Some values are required and some are optional.
|
||||
|
||||
```bash
|
||||
docker compose down
|
||||
```
|
||||
### Required: Change These Before First Use
|
||||
|
||||
## Common Operations
|
||||
- `POSTGRES_PASSWORD`: Password for the internal database.
|
||||
- `REDIS_PASSWORD`: Password for the internal queue service.
|
||||
- `AUTH_BOOTSTRAP_ADMIN_PASSWORD`: First admin login password.
|
||||
- `APP_SETTINGS_ENCRYPTION_KEY`: Secret used to protect saved app settings.
|
||||
- `TYPESENSE_API_KEY`: Secret key for the search engine.
|
||||
|
||||
Use long, unique values for each one. Do not reuse personal passwords.
|
||||
|
||||
### Required: Usually Keep Defaults Unless You Know You Need Changes
|
||||
|
||||
- `POSTGRES_USER`: Database username.
|
||||
- `POSTGRES_DB`: Database name.
|
||||
- `DATABASE_URL`: Connection string to the database service.
|
||||
- `REDIS_URL`: Connection string to the Redis service.
|
||||
- `AUTH_BOOTSTRAP_ADMIN_USERNAME`: First admin username (default `admin`).
|
||||
|
||||
If you change passwords, make sure matching URLs use the same new password.
|
||||
|
||||
### Optional User Account (Can Be Left Empty)
|
||||
|
||||
- `AUTH_BOOTSTRAP_USER_USERNAME`
|
||||
- `AUTH_BOOTSTRAP_USER_PASSWORD`
|
||||
|
||||
These create an extra non-admin account on first startup.
|
||||
|
||||
### Network and Access Settings
|
||||
|
||||
- `HOST_BIND_IP`: Where services listen. Keep `127.0.0.1` for local-only access.
|
||||
- `PUBLIC_BASE_URL`: Backend base URL. Local default is `http://localhost:8000`.
|
||||
- `CORS_ORIGINS`: Allowed frontend origins. Keep local defaults for single-machine use.
|
||||
- `VITE_API_BASE`: Frontend API URL override. Leave empty unless you know you need it.
|
||||
|
||||
### Environment Mode
|
||||
|
||||
- `APP_ENV=development`: Local mode (default).
|
||||
- `APP_ENV=production`: Use when running as a real shared deployment with HTTPS and tighter security settings.
|
||||
- Frontend runtime switches to a static build served by Nginx in this mode.
|
||||
|
||||
## Daily Use Commands
|
||||
|
||||
Start or rebuild:
|
||||
|
||||
@@ -68,87 +99,50 @@ Stop:
|
||||
docker compose down
|
||||
```
|
||||
|
||||
Tail logs:
|
||||
View logs:
|
||||
|
||||
```bash
|
||||
docker compose logs -f
|
||||
```
|
||||
|
||||
Tail API and worker logs only:
|
||||
View backend logs only:
|
||||
|
||||
```bash
|
||||
docker compose logs -f api worker
|
||||
```
|
||||
|
||||
Reset all runtime data (destructive):
|
||||
## Where Your Data Is Stored
|
||||
|
||||
LedgerDock stores persistent runtime data in host bind mounts. By default the host root is `./data`, or set `DCM_DATA_DIR` to move it:
|
||||
|
||||
- `${DCM_DATA_DIR:-./data}/db-data` for PostgreSQL data
|
||||
- `${DCM_DATA_DIR:-./data}/redis-data` for Redis data
|
||||
- `${DCM_DATA_DIR:-./data}/storage` for uploaded files and app storage
|
||||
- `${DCM_DATA_DIR:-./data}/typesense-data` for the search index
|
||||
|
||||
On startup, Compose runs a one-shot `storage-init` service that creates the storage tree and applies write access for the backend runtime user `uid=10001`. If you want to inspect or repair it manually, use:
|
||||
|
||||
```bash
|
||||
docker compose down -v
|
||||
mkdir -p ${DCM_DATA_DIR:-./data}/storage
|
||||
sudo chown -R 10001:10001 ${DCM_DATA_DIR:-./data}/storage
|
||||
sudo chmod -R u+rwX,g+rwX ${DCM_DATA_DIR:-./data}/storage
|
||||
```
|
||||
|
||||
## Frontend-Only Local Workflow
|
||||
|
||||
If backend services are already running, you can run frontend tooling locally:
|
||||
To remove everything, including data:
|
||||
|
||||
```bash
|
||||
cd frontend && npm run dev
|
||||
cd frontend && npm run build
|
||||
cd frontend && npm run preview
|
||||
docker compose down
|
||||
rm -rf ${DCM_DATA_DIR:-./data}
|
||||
```
|
||||
|
||||
`npm run preview` serves the built app on port `4173`.
|
||||
Warning: this permanently deletes your LedgerDock data on this machine.
|
||||
|
||||
## Configuration
|
||||
## First Checks After Install
|
||||
|
||||
Main runtime variables are defined in `docker-compose.yml`:
|
||||
- Open `http://localhost:5173` and confirm the login page appears.
|
||||
- Open `http://localhost:8000/api/v1/health` and confirm you get `{"status":"ok"}`.
|
||||
- Upload one sample file and confirm it appears in search.
|
||||
|
||||
- API and worker: `DATABASE_URL`, `REDIS_URL`, `STORAGE_ROOT`, `PUBLIC_BASE_URL`, `CORS_ORIGINS`, `TYPESENSE_*`
|
||||
- Frontend: `VITE_API_BASE`
|
||||
## Need Technical Documentation?
|
||||
|
||||
Application settings saved from the UI persist at:
|
||||
|
||||
- `<STORAGE_ROOT>/settings.json` (inside the storage volume)
|
||||
|
||||
Settings endpoints:
|
||||
|
||||
- `GET/PUT /api/v1/settings`
|
||||
- `POST /api/v1/settings/reset`
|
||||
- `POST /api/v1/settings/handwriting`
|
||||
- `POST /api/v1/processing/logs/trim`
|
||||
|
||||
Note: the compose file currently includes host-specific URL values (for example `PUBLIC_BASE_URL` and `VITE_API_BASE`). Adjust these for your environment when needed.
|
||||
|
||||
## Data Persistence
|
||||
|
||||
Docker named volumes used by the stack:
|
||||
|
||||
- `db-data`
|
||||
- `redis-data`
|
||||
- `dcm-storage`
|
||||
- `typesense-data`
|
||||
|
||||
## Validation Checklist
|
||||
|
||||
After setup or config changes, verify:
|
||||
|
||||
- `GET /api/v1/health` returns `{"status":"ok"}`
|
||||
- Upload and processing complete successfully
|
||||
- Search returns expected results
|
||||
- Preview and download work for uploaded documents
|
||||
- `docker compose logs -f api worker` has no failures
|
||||
|
||||
## Repository Layout
|
||||
|
||||
- `backend/` - FastAPI API, services, models, worker
|
||||
- `frontend/` - React application
|
||||
- `doc/` - technical documentation for architecture, API, data model, and operations
|
||||
- `docker-compose.yml` - local runtime topology
|
||||
|
||||
## Documentation Index
|
||||
|
||||
- `doc/README.md` - technical documentation entrypoint
|
||||
- `doc/architecture-overview.md` - service and runtime architecture
|
||||
- `doc/api-contract.md` - endpoint and payload contract
|
||||
- `doc/data-model-reference.md` - persistence model reference
|
||||
- `doc/operations-and-configuration.md` - runtime operations and configuration
|
||||
- `doc/frontend-design-foundation.md` - frontend design rules
|
||||
Developer and operator docs are in `doc/`, starting at `doc/README.md`.
|
||||
|
||||
@@ -1,7 +1,33 @@
|
||||
APP_ENV=development
|
||||
DATABASE_URL=postgresql+psycopg://dcm:dcm@db:5432/dcm
|
||||
REDIS_URL=redis://redis:6379/0
|
||||
REDIS_URL=redis://:replace-with-redis-password@redis:6379/0
|
||||
REDIS_SECURITY_MODE=auto
|
||||
REDIS_TLS_MODE=auto
|
||||
STORAGE_ROOT=/data/storage
|
||||
AUTH_BOOTSTRAP_ADMIN_USERNAME=admin
|
||||
AUTH_BOOTSTRAP_ADMIN_PASSWORD=replace-with-random-admin-password
|
||||
AUTH_BOOTSTRAP_USER_USERNAME=user
|
||||
AUTH_BOOTSTRAP_USER_PASSWORD=replace-with-random-user-password
|
||||
AUTH_LOGIN_FAILURE_LIMIT=5
|
||||
AUTH_LOGIN_FAILURE_WINDOW_SECONDS=900
|
||||
AUTH_LOGIN_LOCKOUT_BASE_SECONDS=30
|
||||
AUTH_LOGIN_LOCKOUT_MAX_SECONDS=900
|
||||
APP_SETTINGS_ENCRYPTION_KEY=replace-with-random-settings-encryption-key
|
||||
PROCESSING_LOG_STORE_MODEL_IO_TEXT=false
|
||||
PROCESSING_LOG_STORE_PAYLOAD_TEXT=false
|
||||
CONTENT_EXPORT_MAX_DOCUMENTS=250
|
||||
CONTENT_EXPORT_MAX_TOTAL_BYTES=52428800
|
||||
CONTENT_EXPORT_RATE_LIMIT_PER_MINUTE=6
|
||||
MAX_UPLOAD_FILES_PER_REQUEST=50
|
||||
MAX_UPLOAD_FILE_SIZE_BYTES=26214400
|
||||
MAX_UPLOAD_REQUEST_SIZE_BYTES=104857600
|
||||
MAX_ZIP_MEMBER_UNCOMPRESSED_BYTES=26214400
|
||||
MAX_ZIP_TOTAL_UNCOMPRESSED_BYTES=157286400
|
||||
MAX_ZIP_COMPRESSION_RATIO=120
|
||||
MAX_ZIP_DESCENDANTS_PER_ROOT=1000
|
||||
PROVIDER_BASE_URL_ALLOWLIST=["api.openai.com"]
|
||||
PROVIDER_BASE_URL_ALLOW_HTTP=false
|
||||
PROVIDER_BASE_URL_ALLOW_PRIVATE_NETWORK=false
|
||||
DEFAULT_OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
DEFAULT_OPENAI_MODEL=gpt-4.1-mini
|
||||
DEFAULT_OPENAI_TIMEOUT_SECONDS=45
|
||||
@@ -12,6 +38,6 @@ DEFAULT_ROUTING_MODEL=gpt-4.1-mini
|
||||
TYPESENSE_PROTOCOL=http
|
||||
TYPESENSE_HOST=typesense
|
||||
TYPESENSE_PORT=8108
|
||||
TYPESENSE_API_KEY=dcm-typesense-key
|
||||
TYPESENSE_API_KEY=replace-with-random-typesense-api-key
|
||||
TYPESENSE_COLLECTION_NAME=documents
|
||||
PUBLIC_BASE_URL=http://localhost:8000
|
||||
|
||||
@@ -12,6 +12,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
COPY requirements.txt /app/requirements.txt
|
||||
RUN pip install --no-cache-dir -r /app/requirements.txt
|
||||
|
||||
COPY app /app/app
|
||||
RUN addgroup --system appgroup && adduser --system --ingroup appgroup --uid 10001 appuser
|
||||
RUN mkdir -p /data/storage && chown -R appuser:appgroup /app /data
|
||||
|
||||
COPY --chown=appuser:appgroup app /app/app
|
||||
|
||||
USER appuser
|
||||
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
|
||||
169
backend/app/api/auth.py
Normal file
169
backend/app/api/auth.py
Normal file
@@ -0,0 +1,169 @@
|
||||
"""Authentication and authorization dependencies for protected API routes."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Annotated
|
||||
from uuid import UUID
|
||||
|
||||
import hmac
|
||||
from fastapi import Depends, HTTPException, Request, status
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.db.base import get_session
|
||||
from app.models.auth import UserRole
|
||||
from app.services.authentication import resolve_auth_session
|
||||
|
||||
|
||||
try:
|
||||
from fastapi import Cookie, Header
|
||||
except (ImportError, AttributeError):
|
||||
|
||||
def Cookie(_default=None, **_kwargs): # type: ignore[no-untyped-def]
|
||||
"""Compatibility fallback for environments that stub fastapi without request params."""
|
||||
|
||||
return None
|
||||
|
||||
def Header(_default=None, **_kwargs): # type: ignore[no-untyped-def]
|
||||
"""Compatibility fallback for environments that stub fastapi without request params."""
|
||||
|
||||
return None
|
||||
|
||||
|
||||
bearer_auth = HTTPBearer(auto_error=False)
|
||||
SESSION_COOKIE_NAME = "dcm_session"
|
||||
CSRF_COOKIE_NAME = "dcm_csrf"
|
||||
CSRF_HEADER_NAME = "x-csrf-token"
|
||||
CSRF_PROTECTED_METHODS = frozenset({"POST", "PATCH", "PUT", "DELETE"})
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AuthContext:
|
||||
"""Carries authenticated identity and role details for one request."""
|
||||
|
||||
user_id: UUID
|
||||
username: str
|
||||
role: UserRole
|
||||
session_id: UUID
|
||||
expires_at: datetime
|
||||
|
||||
|
||||
def _requires_csrf_validation(method: str) -> bool:
|
||||
"""Returns whether an HTTP method should be protected by cookie CSRF validation."""
|
||||
|
||||
return method.upper() in CSRF_PROTECTED_METHODS
|
||||
|
||||
|
||||
def _extract_cookie_values(request: Request, cookie_name: str) -> tuple[str, ...]:
|
||||
"""Extracts all values for one cookie name from raw Cookie header order."""
|
||||
|
||||
request_headers = getattr(request, "headers", None)
|
||||
raw_cookie_header = request_headers.get("cookie", "") if request_headers is not None else ""
|
||||
if not raw_cookie_header:
|
||||
return ()
|
||||
|
||||
extracted_values: list[str] = []
|
||||
for cookie_pair in raw_cookie_header.split(";"):
|
||||
normalized_pair = cookie_pair.strip()
|
||||
if not normalized_pair or "=" not in normalized_pair:
|
||||
continue
|
||||
key, value = normalized_pair.split("=", 1)
|
||||
if key.strip() != cookie_name:
|
||||
continue
|
||||
normalized_value = value.strip()
|
||||
if normalized_value:
|
||||
extracted_values.append(normalized_value)
|
||||
return tuple(extracted_values)
|
||||
|
||||
|
||||
def _raise_unauthorized() -> None:
|
||||
"""Raises a 401 challenge response for missing or invalid auth sessions."""
|
||||
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid or expired authentication session",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
|
||||
def _raise_csrf_rejected() -> None:
|
||||
"""Raises a forbidden response for CSRF validation failure."""
|
||||
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Invalid CSRF token",
|
||||
)
|
||||
|
||||
|
||||
def get_request_auth_context(
|
||||
request: Request,
|
||||
credentials: HTTPAuthorizationCredentials | None = Depends(bearer_auth),
|
||||
csrf_header: str | None = Header(None, alias=CSRF_HEADER_NAME),
|
||||
csrf_cookie: str | None = Cookie(None, alias=CSRF_COOKIE_NAME),
|
||||
session_cookie: str | None = Cookie(None, alias=SESSION_COOKIE_NAME),
|
||||
session: Session = Depends(get_session),
|
||||
) -> AuthContext:
|
||||
"""Authenticates auth session token and validates CSRF for cookie sessions."""
|
||||
|
||||
token = credentials.credentials.strip() if credentials is not None and credentials.credentials else ""
|
||||
using_cookie_session = False
|
||||
session_candidates: list[str] = []
|
||||
|
||||
if not token:
|
||||
using_cookie_session = True
|
||||
session_candidates = [candidate for candidate in _extract_cookie_values(request, SESSION_COOKIE_NAME) if candidate]
|
||||
normalized_session_cookie = (session_cookie or "").strip()
|
||||
if normalized_session_cookie and normalized_session_cookie not in session_candidates:
|
||||
session_candidates.append(normalized_session_cookie)
|
||||
if not session_candidates:
|
||||
_raise_unauthorized()
|
||||
|
||||
if _requires_csrf_validation(request.method) and using_cookie_session:
|
||||
normalized_csrf_header = (csrf_header or "").strip()
|
||||
csrf_candidates = [candidate for candidate in _extract_cookie_values(request, CSRF_COOKIE_NAME) if candidate]
|
||||
normalized_csrf_cookie = (csrf_cookie or "").strip()
|
||||
if normalized_csrf_cookie and normalized_csrf_cookie not in csrf_candidates:
|
||||
csrf_candidates.append(normalized_csrf_cookie)
|
||||
if (
|
||||
not csrf_candidates
|
||||
or not normalized_csrf_header
|
||||
or not any(hmac.compare_digest(candidate, normalized_csrf_header) for candidate in csrf_candidates)
|
||||
):
|
||||
_raise_csrf_rejected()
|
||||
|
||||
resolved_session = None
|
||||
if token:
|
||||
resolved_session = resolve_auth_session(session, token=token)
|
||||
else:
|
||||
for candidate in session_candidates:
|
||||
resolved_session = resolve_auth_session(session, token=candidate)
|
||||
if resolved_session is not None and resolved_session.user is not None:
|
||||
break
|
||||
|
||||
if resolved_session is None or resolved_session.user is None:
|
||||
_raise_unauthorized()
|
||||
|
||||
return AuthContext(
|
||||
user_id=resolved_session.user.id,
|
||||
username=resolved_session.user.username,
|
||||
role=resolved_session.user.role,
|
||||
session_id=resolved_session.id,
|
||||
expires_at=resolved_session.expires_at,
|
||||
)
|
||||
|
||||
|
||||
def require_user_or_admin(context: Annotated[AuthContext, Depends(get_request_auth_context)]) -> AuthContext:
|
||||
"""Requires any authenticated user session and returns its request identity context."""
|
||||
|
||||
return context
|
||||
|
||||
|
||||
def require_admin(context: Annotated[AuthContext, Depends(get_request_auth_context)]) -> AuthContext:
|
||||
"""Requires authenticated admin role and rejects standard user sessions."""
|
||||
|
||||
if context.role != UserRole.ADMIN:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Administrator role required",
|
||||
)
|
||||
return context
|
||||
@@ -1,7 +1,9 @@
|
||||
"""API router registration for all HTTP route modules."""
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import APIRouter, Depends
|
||||
|
||||
from app.api.auth import require_admin
|
||||
from app.api.routes_auth import router as auth_router
|
||||
from app.api.routes_documents import router as documents_router
|
||||
from app.api.routes_health import router as health_router
|
||||
from app.api.routes_processing_logs import router as processing_logs_router
|
||||
@@ -11,7 +13,26 @@ from app.api.routes_settings import router as settings_router
|
||||
|
||||
api_router = APIRouter()
|
||||
api_router.include_router(health_router)
|
||||
api_router.include_router(documents_router, prefix="/documents", tags=["documents"])
|
||||
api_router.include_router(processing_logs_router, prefix="/processing/logs", tags=["processing-logs"])
|
||||
api_router.include_router(search_router, prefix="/search", tags=["search"])
|
||||
api_router.include_router(settings_router, prefix="/settings", tags=["settings"])
|
||||
api_router.include_router(auth_router)
|
||||
api_router.include_router(
|
||||
documents_router,
|
||||
prefix="/documents",
|
||||
tags=["documents"],
|
||||
)
|
||||
api_router.include_router(
|
||||
processing_logs_router,
|
||||
prefix="/processing/logs",
|
||||
tags=["processing-logs"],
|
||||
dependencies=[Depends(require_admin)],
|
||||
)
|
||||
api_router.include_router(
|
||||
search_router,
|
||||
prefix="/search",
|
||||
tags=["search"],
|
||||
)
|
||||
api_router.include_router(
|
||||
settings_router,
|
||||
prefix="/settings",
|
||||
tags=["settings"],
|
||||
dependencies=[Depends(require_admin)],
|
||||
)
|
||||
|
||||
349
backend/app/api/routes_auth.py
Normal file
349
backend/app/api/routes_auth.py
Normal file
@@ -0,0 +1,349 @@
|
||||
"""Authentication endpoints for credential login, session introspection, and logout."""
|
||||
|
||||
import logging
|
||||
import secrets
|
||||
from datetime import UTC, datetime
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, status
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.auth import (
|
||||
AuthContext,
|
||||
SESSION_COOKIE_NAME,
|
||||
CSRF_COOKIE_NAME,
|
||||
require_user_or_admin,
|
||||
)
|
||||
from app.core.config import get_settings
|
||||
from app.db.base import get_session
|
||||
from app.schemas.auth import (
|
||||
AuthLoginRequest,
|
||||
AuthLoginResponse,
|
||||
AuthLogoutResponse,
|
||||
AuthSessionResponse,
|
||||
AuthUserResponse,
|
||||
)
|
||||
from app.services.auth_login_throttle import (
|
||||
check_login_throttle,
|
||||
clear_login_throttle,
|
||||
record_failed_login_attempt,
|
||||
)
|
||||
|
||||
try:
|
||||
from fastapi import Cookie, Response
|
||||
except (ImportError, AttributeError):
|
||||
from fastapi.responses import Response
|
||||
|
||||
def Cookie(_default=None, **_kwargs): # type: ignore[no-untyped-def]
|
||||
"""Compatibility fallback for environments that stub fastapi without request params."""
|
||||
|
||||
return None
|
||||
from app.services.authentication import authenticate_user, issue_user_session, revoke_auth_session
|
||||
|
||||
router = APIRouter(prefix="/auth", tags=["auth"])
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
LOGIN_THROTTLED_DETAIL = "Too many login attempts. Try again later."
|
||||
LOGIN_RATE_LIMITER_UNAVAILABLE_DETAIL = "Login rate limiter backend unavailable"
|
||||
|
||||
|
||||
def _request_ip_address(request: Request) -> str | None:
|
||||
"""Returns best-effort client IP extracted from the request transport context."""
|
||||
|
||||
return request.client.host if request.client is not None else None
|
||||
|
||||
|
||||
def _request_user_agent(request: Request) -> str | None:
|
||||
"""Returns best-effort user-agent metadata for created auth sessions."""
|
||||
|
||||
user_agent = request.headers.get("user-agent", "").strip()
|
||||
return user_agent[:512] if user_agent else None
|
||||
|
||||
|
||||
def _retry_after_headers(retry_after_seconds: int) -> dict[str, str]:
|
||||
"""Returns a bounded Retry-After header payload for throttled authentication responses."""
|
||||
|
||||
return {"Retry-After": str(max(1, int(retry_after_seconds)))}
|
||||
|
||||
|
||||
def _is_https_request(request: Request) -> bool:
|
||||
"""Returns whether the incoming request should be treated as HTTPS for cookie flags."""
|
||||
|
||||
forwarded_protocol = request.headers.get("x-forwarded-proto", "").strip().lower().split(",")[0]
|
||||
if forwarded_protocol:
|
||||
return forwarded_protocol == "https"
|
||||
request_url = getattr(request, "url", None)
|
||||
request_scheme = str(getattr(request_url, "scheme", "")).lower() if request_url is not None else ""
|
||||
if request_scheme == "https":
|
||||
return True
|
||||
|
||||
parsed_public_base_url = urlparse(get_settings().public_base_url.strip())
|
||||
return parsed_public_base_url.scheme.lower() == "https"
|
||||
|
||||
|
||||
def _resolve_cookie_domain() -> str | None:
|
||||
"""Returns optional cookie domain override for multi-subdomain deployments."""
|
||||
|
||||
configured_domain = get_settings().auth_cookie_domain.strip().lower().lstrip(".")
|
||||
if not configured_domain or "." not in configured_domain:
|
||||
return None
|
||||
return configured_domain
|
||||
|
||||
|
||||
def _resolve_cookie_domains() -> tuple[str | None, ...]:
|
||||
"""Returns cookie domain variants with a host-only cookie first for browser compatibility."""
|
||||
|
||||
configured_domain = _resolve_cookie_domain()
|
||||
if configured_domain is None:
|
||||
return (None,)
|
||||
return (None, configured_domain)
|
||||
|
||||
|
||||
def _request_matches_cookie_domain(request: Request) -> bool:
|
||||
"""Returns whether request and origin hosts both sit under the configured cookie domain."""
|
||||
|
||||
configured_domain = _resolve_cookie_domain()
|
||||
if configured_domain is None:
|
||||
return False
|
||||
|
||||
origin_header = request.headers.get("origin", "").strip()
|
||||
origin_host = urlparse(origin_header).hostname.strip().lower() if origin_header else ""
|
||||
if not origin_host:
|
||||
return False
|
||||
|
||||
request_url = getattr(request, "url", None)
|
||||
request_host = str(getattr(request_url, "hostname", "")).strip().lower() if request_url is not None else ""
|
||||
if not request_host:
|
||||
parsed_public_base_url = urlparse(get_settings().public_base_url.strip())
|
||||
request_host = parsed_public_base_url.hostname.strip().lower() if parsed_public_base_url.hostname else ""
|
||||
if not request_host:
|
||||
return False
|
||||
|
||||
def _matches(candidate: str) -> bool:
|
||||
return candidate == configured_domain or candidate.endswith(f".{configured_domain}")
|
||||
|
||||
return _matches(origin_host) and _matches(request_host)
|
||||
|
||||
|
||||
def _resolve_cookie_samesite(request: Request, secure_cookie: bool) -> str:
|
||||
"""Returns cookie SameSite mode with same-site subdomain compatibility defaults."""
|
||||
|
||||
configured_mode = get_settings().auth_cookie_samesite.strip().lower()
|
||||
if configured_mode in {"strict", "lax"}:
|
||||
return configured_mode
|
||||
if configured_mode == "none":
|
||||
return "lax" if _request_matches_cookie_domain(request) else "none"
|
||||
return "none" if secure_cookie else "lax"
|
||||
|
||||
|
||||
def _session_cookie_ttl_seconds(expires_at: datetime) -> int:
|
||||
"""Converts session expiration datetime into cookie max-age seconds."""
|
||||
|
||||
now = datetime.now(UTC)
|
||||
ttl = int((expires_at - now).total_seconds())
|
||||
return max(1, ttl)
|
||||
|
||||
|
||||
def _set_session_cookie(
|
||||
response: Response,
|
||||
session_token: str,
|
||||
*,
|
||||
request: Request,
|
||||
expires_at: datetime,
|
||||
secure: bool,
|
||||
) -> None:
|
||||
"""Stores the issued session token in a browser HttpOnly auth cookie."""
|
||||
|
||||
if response is None or not hasattr(response, "set_cookie"):
|
||||
return
|
||||
expires_seconds = _session_cookie_ttl_seconds(expires_at)
|
||||
same_site_mode = _resolve_cookie_samesite(request, secure)
|
||||
for cookie_domain in _resolve_cookie_domains():
|
||||
cookie_kwargs = {
|
||||
"value": session_token,
|
||||
"max_age": expires_seconds,
|
||||
"httponly": True,
|
||||
"secure": secure,
|
||||
"samesite": same_site_mode,
|
||||
"path": "/",
|
||||
}
|
||||
if cookie_domain is not None:
|
||||
cookie_kwargs["domain"] = cookie_domain
|
||||
response.set_cookie(SESSION_COOKIE_NAME, **cookie_kwargs)
|
||||
|
||||
|
||||
def _set_csrf_cookie(
|
||||
response: Response,
|
||||
csrf_token: str,
|
||||
*,
|
||||
request: Request,
|
||||
expires_at: datetime,
|
||||
secure: bool,
|
||||
) -> None:
|
||||
"""Stores an anti-CSRF token in a browser cookie for JavaScript-safe extraction."""
|
||||
|
||||
if response is None or not hasattr(response, "set_cookie"):
|
||||
return
|
||||
same_site_mode = _resolve_cookie_samesite(request, secure)
|
||||
for cookie_domain in _resolve_cookie_domains():
|
||||
cookie_kwargs = {
|
||||
"value": csrf_token,
|
||||
"max_age": _session_cookie_ttl_seconds(expires_at),
|
||||
"httponly": False,
|
||||
"secure": secure,
|
||||
"samesite": same_site_mode,
|
||||
"path": "/",
|
||||
}
|
||||
if cookie_domain is not None:
|
||||
cookie_kwargs["domain"] = cookie_domain
|
||||
response.set_cookie(CSRF_COOKIE_NAME, **cookie_kwargs)
|
||||
|
||||
|
||||
def _clear_session_cookies(response: Response) -> None:
|
||||
"""Clears auth cookies returned by login responses."""
|
||||
|
||||
if response is None or not hasattr(response, "delete_cookie"):
|
||||
return
|
||||
for cookie_domain in _resolve_cookie_domains():
|
||||
delete_kwargs = {"path": "/"}
|
||||
if cookie_domain is not None:
|
||||
delete_kwargs["domain"] = cookie_domain
|
||||
response.delete_cookie(SESSION_COOKIE_NAME, **delete_kwargs)
|
||||
response.delete_cookie(CSRF_COOKIE_NAME, **delete_kwargs)
|
||||
|
||||
|
||||
@router.post("/login", response_model=AuthLoginResponse)
|
||||
def login(
|
||||
payload: AuthLoginRequest,
|
||||
request: Request,
|
||||
response: Response,
|
||||
session: Session = Depends(get_session),
|
||||
) -> AuthLoginResponse:
|
||||
"""Authenticates credentials with throttle protection and returns issued session metadata."""
|
||||
|
||||
ip_address = _request_ip_address(request)
|
||||
try:
|
||||
throttle_status = check_login_throttle(
|
||||
username=payload.username,
|
||||
ip_address=ip_address,
|
||||
)
|
||||
except RuntimeError as error:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
detail=LOGIN_RATE_LIMITER_UNAVAILABLE_DETAIL,
|
||||
) from error
|
||||
if throttle_status.is_throttled:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
|
||||
detail=LOGIN_THROTTLED_DETAIL,
|
||||
headers=_retry_after_headers(throttle_status.retry_after_seconds),
|
||||
)
|
||||
|
||||
user = authenticate_user(
|
||||
session,
|
||||
username=payload.username,
|
||||
password=payload.password,
|
||||
)
|
||||
if user is None:
|
||||
try:
|
||||
lockout_seconds = record_failed_login_attempt(
|
||||
username=payload.username,
|
||||
ip_address=ip_address,
|
||||
)
|
||||
except RuntimeError as error:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
detail=LOGIN_RATE_LIMITER_UNAVAILABLE_DETAIL,
|
||||
) from error
|
||||
if lockout_seconds > 0:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
|
||||
detail=LOGIN_THROTTLED_DETAIL,
|
||||
headers=_retry_after_headers(lockout_seconds),
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid username or password",
|
||||
)
|
||||
|
||||
try:
|
||||
clear_login_throttle(
|
||||
username=payload.username,
|
||||
ip_address=ip_address,
|
||||
)
|
||||
except RuntimeError:
|
||||
logger.warning(
|
||||
"Failed to clear login throttle state after successful authentication: username=%s ip=%s",
|
||||
payload.username.strip().lower(),
|
||||
ip_address or "",
|
||||
)
|
||||
|
||||
issued_session = issue_user_session(
|
||||
session,
|
||||
user=user,
|
||||
user_agent=_request_user_agent(request),
|
||||
ip_address=ip_address,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
csrf_token = secrets.token_urlsafe(32)
|
||||
secure_cookie = _is_https_request(request)
|
||||
_set_session_cookie(
|
||||
response,
|
||||
issued_session.token,
|
||||
request=request,
|
||||
expires_at=issued_session.expires_at,
|
||||
secure=secure_cookie,
|
||||
)
|
||||
_set_csrf_cookie(
|
||||
response,
|
||||
csrf_token,
|
||||
request=request,
|
||||
expires_at=issued_session.expires_at,
|
||||
secure=secure_cookie,
|
||||
)
|
||||
|
||||
return AuthLoginResponse(
|
||||
user=AuthUserResponse.model_validate(user),
|
||||
expires_at=issued_session.expires_at,
|
||||
access_token=issued_session.token,
|
||||
csrf_token=csrf_token,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/me", response_model=AuthSessionResponse)
|
||||
def me(
|
||||
context: AuthContext = Depends(require_user_or_admin),
|
||||
csrf_cookie: str | None = Cookie(None, alias=CSRF_COOKIE_NAME),
|
||||
) -> AuthSessionResponse:
|
||||
"""Returns current authenticated session identity and expiration metadata."""
|
||||
|
||||
normalized_csrf_cookie = (csrf_cookie or "").strip() or None
|
||||
return AuthSessionResponse(
|
||||
expires_at=context.expires_at,
|
||||
user=AuthUserResponse(
|
||||
id=context.user_id,
|
||||
username=context.username,
|
||||
role=context.role,
|
||||
),
|
||||
csrf_token=normalized_csrf_cookie,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/logout", response_model=AuthLogoutResponse)
|
||||
def logout(
|
||||
response: Response,
|
||||
context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> AuthLogoutResponse:
|
||||
"""Revokes current session token and clears client auth cookies."""
|
||||
|
||||
revoked = revoke_auth_session(
|
||||
session,
|
||||
session_id=context.session_id,
|
||||
)
|
||||
if revoked:
|
||||
session.commit()
|
||||
|
||||
_clear_session_cookies(response)
|
||||
return AuthLogoutResponse(revoked=revoked)
|
||||
@@ -1,12 +1,12 @@
|
||||
"""Document CRUD, lifecycle, metadata, file access, and content export endpoints."""
|
||||
"""Authenticated document CRUD, lifecycle, metadata, file access, and content export endpoints."""
|
||||
|
||||
import io
|
||||
import re
|
||||
import tempfile
|
||||
import unicodedata
|
||||
import zipfile
|
||||
from datetime import datetime, time
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Literal
|
||||
from typing import Annotated, BinaryIO, Iterator, Literal
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Depends, File, Form, HTTPException, Query, UploadFile
|
||||
@@ -14,8 +14,10 @@ from fastapi.responses import FileResponse, Response, StreamingResponse
|
||||
from sqlalchemy import or_, func, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.services.app_settings import read_predefined_paths_settings, read_predefined_tags_settings
|
||||
from app.api.auth import AuthContext, require_user_or_admin
|
||||
from app.core.config import get_settings, is_inline_preview_mime_type_safe
|
||||
from app.db.base import get_session
|
||||
from app.models.auth import UserRole
|
||||
from app.models.document import Document, DocumentStatus
|
||||
from app.schemas.documents import (
|
||||
ContentExportRequest,
|
||||
@@ -26,15 +28,96 @@ from app.schemas.documents import (
|
||||
UploadConflict,
|
||||
UploadResponse,
|
||||
)
|
||||
from app.services.app_settings import read_predefined_paths_settings, read_predefined_tags_settings
|
||||
from app.services.extractor import sniff_mime
|
||||
from app.services.handwriting_style import delete_many_handwriting_style_documents
|
||||
from app.services.processing_logs import log_processing_event, set_processing_log_autocommit
|
||||
from app.services.rate_limiter import increment_rate_limit
|
||||
from app.services.storage import absolute_path, compute_sha256, store_bytes
|
||||
from app.services.typesense_index import delete_many_documents_index, upsert_document_index
|
||||
from app.worker.queue import get_processing_queue
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
settings = get_settings()
|
||||
|
||||
|
||||
def _scope_document_statement_for_auth_context(statement, auth_context: AuthContext):
|
||||
"""Restricts document statements to caller-owned rows for non-admin users."""
|
||||
|
||||
if auth_context.role == UserRole.ADMIN:
|
||||
return statement
|
||||
return statement.where(Document.owner_user_id == auth_context.user_id)
|
||||
|
||||
|
||||
def _is_predefined_entry_visible_to_auth_context(entry: dict[str, object], auth_context: AuthContext) -> bool:
|
||||
"""Returns whether one predefined catalog entry is visible to the active caller role."""
|
||||
|
||||
if auth_context.role == UserRole.ADMIN:
|
||||
return True
|
||||
return bool(entry.get("global_shared", False))
|
||||
|
||||
|
||||
def _collect_visible_predefined_values(
|
||||
entries: list[dict[str, object]],
|
||||
*,
|
||||
auth_context: AuthContext,
|
||||
) -> set[str]:
|
||||
"""Collects normalized predefined values visible for the active caller role."""
|
||||
|
||||
visible_values: set[str] = set()
|
||||
for entry in entries:
|
||||
if not _is_predefined_entry_visible_to_auth_context(entry, auth_context):
|
||||
continue
|
||||
normalized = str(entry.get("value", "")).strip()
|
||||
if normalized:
|
||||
visible_values.add(normalized)
|
||||
return visible_values
|
||||
|
||||
|
||||
def _ensure_document_access(document: Document, auth_context: AuthContext) -> None:
|
||||
"""Enforces owner-level access for non-admin users and raises not-found on violations."""
|
||||
|
||||
if auth_context.role == UserRole.ADMIN:
|
||||
return
|
||||
if document.owner_user_id != auth_context.user_id:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
|
||||
def _stream_binary_file_chunks(handle: BinaryIO, *, chunk_bytes: int) -> Iterator[bytes]:
|
||||
"""Streams binary file-like content in bounded chunks and closes handle after completion."""
|
||||
|
||||
try:
|
||||
while True:
|
||||
chunk = handle.read(chunk_bytes)
|
||||
if not chunk:
|
||||
break
|
||||
yield chunk
|
||||
finally:
|
||||
handle.close()
|
||||
|
||||
|
||||
def _enforce_content_export_rate_limit(auth_context: AuthContext) -> None:
|
||||
"""Applies per-user fixed-window rate limiting for markdown export requests."""
|
||||
|
||||
try:
|
||||
current_count, limit = increment_rate_limit(
|
||||
scope="content-md-export",
|
||||
subject=str(auth_context.user_id),
|
||||
limit=settings.content_export_rate_limit_per_minute,
|
||||
window_seconds=60,
|
||||
)
|
||||
except RuntimeError as error:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="Rate limiter backend unavailable",
|
||||
) from error
|
||||
|
||||
if limit > 0 and current_count > limit:
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail=f"Export rate limit exceeded ({limit} requests per minute)",
|
||||
)
|
||||
|
||||
|
||||
def _parse_csv(value: str | None) -> list[str]:
|
||||
@@ -227,6 +310,33 @@ def _build_document_list_statement(
|
||||
return statement
|
||||
|
||||
|
||||
def _enforce_upload_shape(files: list[UploadFile]) -> None:
|
||||
"""Validates upload request shape against configured file-count bounds."""
|
||||
|
||||
if not files:
|
||||
raise HTTPException(status_code=400, detail="Upload request must include at least one file")
|
||||
if len(files) > settings.max_upload_files_per_request:
|
||||
raise HTTPException(
|
||||
status_code=413,
|
||||
detail=(
|
||||
"Upload request exceeds file count limit "
|
||||
f"({len(files)} > {settings.max_upload_files_per_request})"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
async def _read_upload_bytes(file: UploadFile, max_bytes: int) -> bytes:
|
||||
"""Reads one upload file while enforcing per-file byte limits."""
|
||||
|
||||
data = await file.read(max_bytes + 1)
|
||||
if len(data) > max_bytes:
|
||||
raise HTTPException(
|
||||
status_code=413,
|
||||
detail=f"File '{file.filename or 'upload'}' exceeds per-file limit of {max_bytes} bytes",
|
||||
)
|
||||
return data
|
||||
|
||||
|
||||
def _collect_document_tree(session: Session, root_document_id: UUID) -> list[tuple[int, Document]]:
|
||||
"""Collects a document and all descendants for recursive permanent deletion."""
|
||||
|
||||
@@ -267,6 +377,7 @@ def list_documents(
|
||||
type_filter: str | None = Query(default=None),
|
||||
processed_from: str | None = Query(default=None),
|
||||
processed_to: str | None = Query(default=None),
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> DocumentsListResponse:
|
||||
"""Returns paginated documents ordered by newest upload timestamp."""
|
||||
@@ -276,6 +387,7 @@ def list_documents(
|
||||
include_trashed=include_trashed,
|
||||
path_prefix=path_prefix,
|
||||
)
|
||||
base_statement = _scope_document_statement_for_auth_context(base_statement, auth_context)
|
||||
base_statement = _apply_discovery_filters(
|
||||
base_statement,
|
||||
path_filter=path_filter,
|
||||
@@ -297,20 +409,23 @@ def list_documents(
|
||||
@router.get("/tags")
|
||||
def list_tags(
|
||||
include_trashed: bool = Query(default=False),
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> dict[str, list[str]]:
|
||||
"""Returns distinct tags currently assigned across all matching documents."""
|
||||
|
||||
statement = select(Document.tags)
|
||||
statement = _scope_document_statement_for_auth_context(statement, auth_context)
|
||||
if not include_trashed:
|
||||
statement = statement.where(Document.status != DocumentStatus.TRASHED)
|
||||
|
||||
rows = session.execute(statement).scalars().all()
|
||||
tags = {tag for row in rows for tag in row if tag}
|
||||
tags.update(
|
||||
str(item.get("value", "")).strip()
|
||||
for item in read_predefined_tags_settings()
|
||||
if str(item.get("value", "")).strip()
|
||||
_collect_visible_predefined_values(
|
||||
read_predefined_tags_settings(),
|
||||
auth_context=auth_context,
|
||||
)
|
||||
)
|
||||
tags = sorted(tags)
|
||||
return {"tags": tags}
|
||||
@@ -319,20 +434,23 @@ def list_tags(
|
||||
@router.get("/paths")
|
||||
def list_paths(
|
||||
include_trashed: bool = Query(default=False),
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> dict[str, list[str]]:
|
||||
"""Returns distinct logical paths currently assigned across all matching documents."""
|
||||
|
||||
statement = select(Document.logical_path)
|
||||
statement = _scope_document_statement_for_auth_context(statement, auth_context)
|
||||
if not include_trashed:
|
||||
statement = statement.where(Document.status != DocumentStatus.TRASHED)
|
||||
|
||||
rows = session.execute(statement).scalars().all()
|
||||
paths = {row for row in rows if row}
|
||||
paths.update(
|
||||
str(item.get("value", "")).strip()
|
||||
for item in read_predefined_paths_settings()
|
||||
if str(item.get("value", "")).strip()
|
||||
_collect_visible_predefined_values(
|
||||
read_predefined_paths_settings(),
|
||||
auth_context=auth_context,
|
||||
)
|
||||
)
|
||||
paths = sorted(paths)
|
||||
return {"paths": paths}
|
||||
@@ -341,11 +459,13 @@ def list_paths(
|
||||
@router.get("/types")
|
||||
def list_types(
|
||||
include_trashed: bool = Query(default=False),
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> dict[str, list[str]]:
|
||||
"""Returns distinct document type values from extension, MIME, and image text type."""
|
||||
|
||||
statement = select(Document.extension, Document.mime_type, Document.image_text_type)
|
||||
statement = _scope_document_statement_for_auth_context(statement, auth_context)
|
||||
if not include_trashed:
|
||||
statement = statement.where(Document.status != DocumentStatus.TRASHED)
|
||||
rows = session.execute(statement).all()
|
||||
@@ -361,16 +481,20 @@ def list_types(
|
||||
@router.post("/content-md/export")
|
||||
def export_contents_markdown(
|
||||
payload: ContentExportRequest,
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> StreamingResponse:
|
||||
"""Exports extracted contents for selected documents as individual markdown files in a ZIP archive."""
|
||||
|
||||
_enforce_content_export_rate_limit(auth_context)
|
||||
|
||||
has_document_ids = len(payload.document_ids) > 0
|
||||
has_path_prefix = bool(payload.path_prefix and payload.path_prefix.strip())
|
||||
if not has_document_ids and not has_path_prefix:
|
||||
raise HTTPException(status_code=400, detail="Provide document_ids or path_prefix for export")
|
||||
|
||||
statement = select(Document)
|
||||
statement = _scope_document_statement_for_auth_context(statement, auth_context)
|
||||
if has_document_ids:
|
||||
statement = statement.where(Document.id.in_(payload.document_ids))
|
||||
if has_path_prefix:
|
||||
@@ -380,37 +504,82 @@ def export_contents_markdown(
|
||||
elif not payload.include_trashed:
|
||||
statement = statement.where(Document.status != DocumentStatus.TRASHED)
|
||||
|
||||
documents = session.execute(statement.order_by(Document.logical_path.asc(), Document.created_at.asc())).scalars().all()
|
||||
max_documents = max(1, int(settings.content_export_max_documents))
|
||||
ordered_statement = statement.order_by(Document.logical_path.asc(), Document.created_at.asc()).limit(max_documents + 1)
|
||||
documents = session.execute(ordered_statement).scalars().all()
|
||||
if len(documents) > max_documents:
|
||||
raise HTTPException(
|
||||
status_code=413,
|
||||
detail=f"Export exceeds maximum document count ({len(documents)} > {max_documents})",
|
||||
)
|
||||
if not documents:
|
||||
raise HTTPException(status_code=404, detail="No matching documents found for export")
|
||||
|
||||
archive_buffer = io.BytesIO()
|
||||
max_total_bytes = max(1, int(settings.content_export_max_total_bytes))
|
||||
max_spool_memory = max(64 * 1024, int(settings.content_export_spool_max_memory_bytes))
|
||||
archive_file = tempfile.SpooledTemporaryFile(max_size=max_spool_memory, mode="w+b")
|
||||
total_export_bytes = 0
|
||||
used_entries: set[str] = set()
|
||||
with zipfile.ZipFile(archive_buffer, mode="w", compression=zipfile.ZIP_DEFLATED) as archive:
|
||||
for document in documents:
|
||||
entry_name = _zip_entry_name(document, used_entries)
|
||||
archive.writestr(entry_name, _markdown_for_document(document))
|
||||
try:
|
||||
with zipfile.ZipFile(archive_file, mode="w", compression=zipfile.ZIP_DEFLATED) as archive:
|
||||
for document in documents:
|
||||
markdown_bytes = _markdown_for_document(document).encode("utf-8")
|
||||
total_export_bytes += len(markdown_bytes)
|
||||
if total_export_bytes > max_total_bytes:
|
||||
raise HTTPException(
|
||||
status_code=413,
|
||||
detail=(
|
||||
"Export exceeds total markdown size limit "
|
||||
f"({total_export_bytes} > {max_total_bytes} bytes)"
|
||||
),
|
||||
)
|
||||
entry_name = _zip_entry_name(document, used_entries)
|
||||
archive.writestr(entry_name, markdown_bytes)
|
||||
archive_file.seek(0)
|
||||
except Exception:
|
||||
archive_file.close()
|
||||
raise
|
||||
|
||||
archive_buffer.seek(0)
|
||||
chunk_bytes = max(4 * 1024, int(settings.content_export_stream_chunk_bytes))
|
||||
headers = {"Content-Disposition": 'attachment; filename="document-contents-md.zip"'}
|
||||
return StreamingResponse(archive_buffer, media_type="application/zip", headers=headers)
|
||||
return StreamingResponse(
|
||||
_stream_binary_file_chunks(archive_file, chunk_bytes=chunk_bytes),
|
||||
media_type="application/zip",
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{document_id}", response_model=DocumentDetailResponse)
|
||||
def get_document(document_id: UUID, session: Session = Depends(get_session)) -> DocumentDetailResponse:
|
||||
def get_document(
|
||||
document_id: UUID,
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> DocumentDetailResponse:
|
||||
"""Returns one document by unique identifier."""
|
||||
|
||||
document = session.execute(select(Document).where(Document.id == document_id)).scalar_one_or_none()
|
||||
statement = _scope_document_statement_for_auth_context(
|
||||
select(Document).where(Document.id == document_id),
|
||||
auth_context,
|
||||
)
|
||||
document = session.execute(statement).scalar_one_or_none()
|
||||
if document is None:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
return DocumentDetailResponse.model_validate(document)
|
||||
|
||||
|
||||
@router.get("/{document_id}/download")
|
||||
def download_document(document_id: UUID, session: Session = Depends(get_session)) -> FileResponse:
|
||||
def download_document(
|
||||
document_id: UUID,
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> FileResponse:
|
||||
"""Downloads original document bytes for the requested document identifier."""
|
||||
|
||||
document = session.execute(select(Document).where(Document.id == document_id)).scalar_one_or_none()
|
||||
statement = _scope_document_statement_for_auth_context(
|
||||
select(Document).where(Document.id == document_id),
|
||||
auth_context,
|
||||
)
|
||||
document = session.execute(statement).scalar_one_or_none()
|
||||
if document is None:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
file_path = absolute_path(document.stored_relative_path)
|
||||
@@ -418,22 +587,46 @@ def download_document(document_id: UUID, session: Session = Depends(get_session)
|
||||
|
||||
|
||||
@router.get("/{document_id}/preview")
|
||||
def preview_document(document_id: UUID, session: Session = Depends(get_session)) -> FileResponse:
|
||||
"""Streams the original document inline when browser rendering is supported."""
|
||||
def preview_document(
|
||||
document_id: UUID,
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> FileResponse:
|
||||
"""Streams trusted-safe MIME types inline and forces attachment for active script-capable types."""
|
||||
|
||||
document = session.execute(select(Document).where(Document.id == document_id)).scalar_one_or_none()
|
||||
statement = _scope_document_statement_for_auth_context(
|
||||
select(Document).where(Document.id == document_id),
|
||||
auth_context,
|
||||
)
|
||||
document = session.execute(statement).scalar_one_or_none()
|
||||
if document is None:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
original_path = absolute_path(document.stored_relative_path)
|
||||
return FileResponse(path=original_path, media_type=document.mime_type)
|
||||
common_headers = {"X-Content-Type-Options": "nosniff"}
|
||||
if not is_inline_preview_mime_type_safe(document.mime_type):
|
||||
return FileResponse(
|
||||
path=original_path,
|
||||
filename=document.original_filename,
|
||||
media_type="application/octet-stream",
|
||||
headers=common_headers,
|
||||
)
|
||||
return FileResponse(path=original_path, media_type=document.mime_type, headers=common_headers)
|
||||
|
||||
|
||||
@router.get("/{document_id}/thumbnail")
|
||||
def thumbnail_document(document_id: UUID, session: Session = Depends(get_session)) -> FileResponse:
|
||||
def thumbnail_document(
|
||||
document_id: UUID,
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> FileResponse:
|
||||
"""Returns a generated thumbnail image for dashboard card previews."""
|
||||
|
||||
document = session.execute(select(Document).where(Document.id == document_id)).scalar_one_or_none()
|
||||
statement = _scope_document_statement_for_auth_context(
|
||||
select(Document).where(Document.id == document_id),
|
||||
auth_context,
|
||||
)
|
||||
document = session.execute(statement).scalar_one_or_none()
|
||||
if document is None:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
@@ -448,10 +641,18 @@ def thumbnail_document(document_id: UUID, session: Session = Depends(get_session
|
||||
|
||||
|
||||
@router.get("/{document_id}/content-md")
|
||||
def download_document_content_markdown(document_id: UUID, session: Session = Depends(get_session)) -> Response:
|
||||
def download_document_content_markdown(
|
||||
document_id: UUID,
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> Response:
|
||||
"""Downloads extracted content for one document as a markdown file."""
|
||||
|
||||
document = session.execute(select(Document).where(Document.id == document_id)).scalar_one_or_none()
|
||||
statement = _scope_document_statement_for_auth_context(
|
||||
select(Document).where(Document.id == document_id),
|
||||
auth_context,
|
||||
)
|
||||
document = session.execute(statement).scalar_one_or_none()
|
||||
if document is None:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
@@ -468,22 +669,34 @@ async def upload_documents(
|
||||
logical_path: Annotated[str, Form()] = "Inbox",
|
||||
tags: Annotated[str | None, Form()] = None,
|
||||
conflict_mode: Annotated[Literal["ask", "replace", "duplicate"], Form()] = "ask",
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> UploadResponse:
|
||||
"""Uploads files, records metadata, and enqueues asynchronous extraction tasks."""
|
||||
|
||||
_enforce_upload_shape(files)
|
||||
set_processing_log_autocommit(session, True)
|
||||
normalized_tags = _normalize_tags(tags)
|
||||
queue = get_processing_queue()
|
||||
uploaded: list[DocumentResponse] = []
|
||||
conflicts: list[UploadConflict] = []
|
||||
total_request_bytes = 0
|
||||
|
||||
indexed_relative_paths = relative_paths or []
|
||||
prepared_uploads: list[dict[str, object]] = []
|
||||
|
||||
for idx, file in enumerate(files):
|
||||
filename = file.filename or f"uploaded_{idx}"
|
||||
data = await file.read()
|
||||
data = await _read_upload_bytes(file, settings.max_upload_file_size_bytes)
|
||||
total_request_bytes += len(data)
|
||||
if total_request_bytes > settings.max_upload_request_size_bytes:
|
||||
raise HTTPException(
|
||||
status_code=413,
|
||||
detail=(
|
||||
"Upload request exceeds total size limit "
|
||||
f"({total_request_bytes} > {settings.max_upload_request_size_bytes} bytes)"
|
||||
),
|
||||
)
|
||||
sha256 = compute_sha256(data)
|
||||
source_relative_path = indexed_relative_paths[idx] if idx < len(indexed_relative_paths) else filename
|
||||
extension = Path(filename).suffix.lower()
|
||||
@@ -514,7 +727,11 @@ async def upload_documents(
|
||||
}
|
||||
)
|
||||
|
||||
existing = session.execute(select(Document).where(Document.sha256 == sha256)).scalar_one_or_none()
|
||||
existing_statement = _scope_document_statement_for_auth_context(
|
||||
select(Document).where(Document.sha256 == sha256),
|
||||
auth_context,
|
||||
)
|
||||
existing = session.execute(existing_statement).scalar_one_or_none()
|
||||
if existing and conflict_mode == "ask":
|
||||
log_processing_event(
|
||||
session=session,
|
||||
@@ -541,9 +758,11 @@ async def upload_documents(
|
||||
return UploadResponse(uploaded=[], conflicts=conflicts)
|
||||
|
||||
for prepared in prepared_uploads:
|
||||
existing = session.execute(
|
||||
select(Document).where(Document.sha256 == str(prepared["sha256"]))
|
||||
).scalar_one_or_none()
|
||||
existing_statement = _scope_document_statement_for_auth_context(
|
||||
select(Document).where(Document.sha256 == str(prepared["sha256"])),
|
||||
auth_context,
|
||||
)
|
||||
existing = session.execute(existing_statement).scalar_one_or_none()
|
||||
replaces_document_id = existing.id if existing and conflict_mode == "replace" else None
|
||||
|
||||
stored_relative_path = store_bytes(str(prepared["filename"]), bytes(prepared["data"]))
|
||||
@@ -558,6 +777,7 @@ async def upload_documents(
|
||||
size_bytes=len(bytes(prepared["data"])),
|
||||
logical_path=logical_path,
|
||||
tags=list(normalized_tags),
|
||||
owner_user_id=auth_context.user_id,
|
||||
replaces_document_id=replaces_document_id,
|
||||
metadata_json={"upload": "web"},
|
||||
)
|
||||
@@ -589,11 +809,16 @@ async def upload_documents(
|
||||
def update_document(
|
||||
document_id: UUID,
|
||||
payload: DocumentUpdateRequest,
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> DocumentResponse:
|
||||
"""Updates document metadata and refreshes semantic index representation."""
|
||||
|
||||
document = session.execute(select(Document).where(Document.id == document_id)).scalar_one_or_none()
|
||||
statement = _scope_document_statement_for_auth_context(
|
||||
select(Document).where(Document.id == document_id),
|
||||
auth_context,
|
||||
)
|
||||
document = session.execute(statement).scalar_one_or_none()
|
||||
if document is None:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
@@ -615,10 +840,18 @@ def update_document(
|
||||
|
||||
|
||||
@router.post("/{document_id}/trash", response_model=DocumentResponse)
|
||||
def trash_document(document_id: UUID, session: Session = Depends(get_session)) -> DocumentResponse:
|
||||
def trash_document(
|
||||
document_id: UUID,
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> DocumentResponse:
|
||||
"""Marks a document as trashed without deleting files from storage."""
|
||||
|
||||
document = session.execute(select(Document).where(Document.id == document_id)).scalar_one_or_none()
|
||||
statement = _scope_document_statement_for_auth_context(
|
||||
select(Document).where(Document.id == document_id),
|
||||
auth_context,
|
||||
)
|
||||
document = session.execute(statement).scalar_one_or_none()
|
||||
if document is None:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
@@ -639,10 +872,18 @@ def trash_document(document_id: UUID, session: Session = Depends(get_session)) -
|
||||
|
||||
|
||||
@router.post("/{document_id}/restore", response_model=DocumentResponse)
|
||||
def restore_document(document_id: UUID, session: Session = Depends(get_session)) -> DocumentResponse:
|
||||
def restore_document(
|
||||
document_id: UUID,
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> DocumentResponse:
|
||||
"""Restores a trashed document to its previous lifecycle status."""
|
||||
|
||||
document = session.execute(select(Document).where(Document.id == document_id)).scalar_one_or_none()
|
||||
statement = _scope_document_statement_for_auth_context(
|
||||
select(Document).where(Document.id == document_id),
|
||||
auth_context,
|
||||
)
|
||||
document = session.execute(statement).scalar_one_or_none()
|
||||
if document is None:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
@@ -664,16 +905,27 @@ def restore_document(document_id: UUID, session: Session = Depends(get_session))
|
||||
|
||||
|
||||
@router.delete("/{document_id}")
|
||||
def delete_document(document_id: UUID, session: Session = Depends(get_session)) -> dict[str, int]:
|
||||
def delete_document(
|
||||
document_id: UUID,
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> dict[str, int]:
|
||||
"""Permanently deletes a document and all descendant archive members including stored files."""
|
||||
|
||||
root = session.execute(select(Document).where(Document.id == document_id)).scalar_one_or_none()
|
||||
root_statement = _scope_document_statement_for_auth_context(
|
||||
select(Document).where(Document.id == document_id),
|
||||
auth_context,
|
||||
)
|
||||
root = session.execute(root_statement).scalar_one_or_none()
|
||||
if root is None:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
if root.status != DocumentStatus.TRASHED:
|
||||
raise HTTPException(status_code=400, detail="Move document to trash before permanent deletion")
|
||||
|
||||
document_tree = _collect_document_tree(session=session, root_document_id=document_id)
|
||||
if auth_context.role != UserRole.ADMIN:
|
||||
for _, document in document_tree:
|
||||
_ensure_document_access(document, auth_context)
|
||||
document_ids = [document.id for _, document in document_tree]
|
||||
try:
|
||||
delete_many_documents_index([str(current_id) for current_id in document_ids])
|
||||
@@ -704,10 +956,18 @@ def delete_document(document_id: UUID, session: Session = Depends(get_session))
|
||||
|
||||
|
||||
@router.post("/{document_id}/reprocess", response_model=DocumentResponse)
|
||||
def reprocess_document(document_id: UUID, session: Session = Depends(get_session)) -> DocumentResponse:
|
||||
def reprocess_document(
|
||||
document_id: UUID,
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> DocumentResponse:
|
||||
"""Re-enqueues a document for extraction and suggestion processing."""
|
||||
|
||||
document = session.execute(select(Document).where(Document.id == document_id)).scalar_one_or_none()
|
||||
statement = _scope_document_statement_for_auth_context(
|
||||
select(Document).where(Document.id == document_id),
|
||||
auth_context,
|
||||
)
|
||||
document = session.execute(statement).scalar_one_or_none()
|
||||
if document is None:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
if document.status == DocumentStatus.TRASHED:
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
"""Read-only API endpoints for processing pipeline event logs."""
|
||||
"""Admin-only API endpoints for processing pipeline event logs."""
|
||||
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.db.base import get_session
|
||||
from app.schemas.processing_logs import ProcessingLogEntryResponse, ProcessingLogListResponse
|
||||
from app.services.app_settings import read_processing_log_retention_settings
|
||||
@@ -17,12 +18,13 @@ from app.services.processing_logs import (
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
settings = get_settings()
|
||||
|
||||
|
||||
@router.get("", response_model=ProcessingLogListResponse)
|
||||
def get_processing_logs(
|
||||
offset: int = Query(default=0, ge=0),
|
||||
limit: int = Query(default=120, ge=1, le=400),
|
||||
limit: int = Query(default=120, ge=1, le=settings.processing_log_max_unbound_entries),
|
||||
document_id: UUID | None = Query(default=None),
|
||||
session: Session = Depends(get_session),
|
||||
) -> ProcessingLogListResponse:
|
||||
@@ -43,8 +45,8 @@ def get_processing_logs(
|
||||
|
||||
@router.post("/trim")
|
||||
def trim_processing_logs(
|
||||
keep_document_sessions: int | None = Query(default=None, ge=0, le=20),
|
||||
keep_unbound_entries: int | None = Query(default=None, ge=0, le=400),
|
||||
keep_document_sessions: int | None = Query(default=None, ge=0, le=settings.processing_log_max_document_sessions),
|
||||
keep_unbound_entries: int | None = Query(default=None, ge=0, le=settings.processing_log_max_unbound_entries),
|
||||
session: Session = Depends(get_session),
|
||||
) -> dict[str, int]:
|
||||
"""Deletes old processing logs using query values or persisted retention defaults."""
|
||||
@@ -61,10 +63,19 @@ def trim_processing_logs(
|
||||
else int(retention_defaults.get("keep_unbound_entries", 80))
|
||||
)
|
||||
|
||||
capped_keep_document_sessions = min(
|
||||
settings.processing_log_max_document_sessions,
|
||||
max(0, int(resolved_keep_document_sessions)),
|
||||
)
|
||||
capped_keep_unbound_entries = min(
|
||||
settings.processing_log_max_unbound_entries,
|
||||
max(0, int(resolved_keep_unbound_entries)),
|
||||
)
|
||||
|
||||
result = cleanup_processing_logs(
|
||||
session=session,
|
||||
keep_document_sessions=resolved_keep_document_sessions,
|
||||
keep_unbound_entries=resolved_keep_unbound_entries,
|
||||
keep_document_sessions=capped_keep_document_sessions,
|
||||
keep_unbound_entries=capped_keep_unbound_entries,
|
||||
)
|
||||
session.commit()
|
||||
return result
|
||||
|
||||
@@ -4,7 +4,8 @@ from fastapi import APIRouter, Depends, Query
|
||||
from sqlalchemy import Text, cast, func, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.routes_documents import _apply_discovery_filters
|
||||
from app.api.auth import AuthContext, require_user_or_admin
|
||||
from app.api.routes_documents import _apply_discovery_filters, _scope_document_statement_for_auth_context
|
||||
from app.db.base import get_session
|
||||
from app.models.document import Document, DocumentStatus
|
||||
from app.schemas.documents import DocumentResponse, SearchResponse
|
||||
@@ -25,6 +26,7 @@ def search_documents(
|
||||
type_filter: str | None = Query(default=None),
|
||||
processed_from: str | None = Query(default=None),
|
||||
processed_to: str | None = Query(default=None),
|
||||
auth_context: AuthContext = Depends(require_user_or_admin),
|
||||
session: Session = Depends(get_session),
|
||||
) -> SearchResponse:
|
||||
"""Searches documents using PostgreSQL full-text ranking plus metadata matching."""
|
||||
@@ -50,6 +52,7 @@ def search_documents(
|
||||
)
|
||||
|
||||
statement = select(Document).where(search_filter)
|
||||
statement = _scope_document_statement_for_auth_context(statement, auth_context)
|
||||
if only_trashed:
|
||||
statement = statement.where(Document.status == DocumentStatus.TRASHED)
|
||||
elif not include_trashed:
|
||||
@@ -67,6 +70,7 @@ def search_documents(
|
||||
items = session.execute(statement).scalars().all()
|
||||
|
||||
count_statement = select(func.count(Document.id)).where(search_filter)
|
||||
count_statement = _scope_document_statement_for_auth_context(count_statement, auth_context)
|
||||
if only_trashed:
|
||||
count_statement = count_statement.where(Document.status == DocumentStatus.TRASHED)
|
||||
elif not include_trashed:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""API routes for managing persistent single-user application settings."""
|
||||
"""Admin-only API routes for managing persistent single-user application settings."""
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from app.schemas.settings import (
|
||||
AppSettingsUpdateRequest,
|
||||
@@ -18,6 +18,7 @@ from app.schemas.settings import (
|
||||
UploadDefaultsResponse,
|
||||
)
|
||||
from app.services.app_settings import (
|
||||
AppSettingsValidationError,
|
||||
TASK_OCR_HANDWRITING,
|
||||
TASK_ROUTING_CLASSIFICATION,
|
||||
TASK_SUMMARY_GENERATION,
|
||||
@@ -179,16 +180,19 @@ def set_app_settings(payload: AppSettingsUpdateRequest) -> AppSettingsResponse:
|
||||
if payload.predefined_tags is not None:
|
||||
predefined_tags_payload = [item.model_dump(exclude_none=True) for item in payload.predefined_tags]
|
||||
|
||||
updated = update_app_settings(
|
||||
providers=providers_payload,
|
||||
tasks=tasks_payload,
|
||||
upload_defaults=upload_defaults_payload,
|
||||
display=display_payload,
|
||||
processing_log_retention=processing_log_retention_payload,
|
||||
handwriting_style=handwriting_style_payload,
|
||||
predefined_paths=predefined_paths_payload,
|
||||
predefined_tags=predefined_tags_payload,
|
||||
)
|
||||
try:
|
||||
updated = update_app_settings(
|
||||
providers=providers_payload,
|
||||
tasks=tasks_payload,
|
||||
upload_defaults=upload_defaults_payload,
|
||||
display=display_payload,
|
||||
processing_log_retention=processing_log_retention_payload,
|
||||
handwriting_style=handwriting_style_payload,
|
||||
predefined_paths=predefined_paths_payload,
|
||||
predefined_tags=predefined_tags_payload,
|
||||
)
|
||||
except AppSettingsValidationError as error:
|
||||
raise HTTPException(status_code=400, detail=str(error)) from error
|
||||
return _build_response(updated)
|
||||
|
||||
|
||||
@@ -203,14 +207,17 @@ def reset_settings_to_defaults() -> AppSettingsResponse:
|
||||
def set_handwriting_settings(payload: HandwritingSettingsUpdateRequest) -> AppSettingsResponse:
|
||||
"""Updates handwriting transcription settings and returns the resulting configuration."""
|
||||
|
||||
updated = update_handwriting_settings(
|
||||
enabled=payload.enabled,
|
||||
openai_base_url=payload.openai_base_url,
|
||||
openai_model=payload.openai_model,
|
||||
openai_timeout_seconds=payload.openai_timeout_seconds,
|
||||
openai_api_key=payload.openai_api_key,
|
||||
clear_openai_api_key=payload.clear_openai_api_key,
|
||||
)
|
||||
try:
|
||||
updated = update_handwriting_settings(
|
||||
enabled=payload.enabled,
|
||||
openai_base_url=payload.openai_base_url,
|
||||
openai_model=payload.openai_model,
|
||||
openai_timeout_seconds=payload.openai_timeout_seconds,
|
||||
openai_api_key=payload.openai_api_key,
|
||||
clear_openai_api_key=payload.clear_openai_api_key,
|
||||
)
|
||||
except AppSettingsValidationError as error:
|
||||
raise HTTPException(status_code=400, detail=str(error)) from error
|
||||
return _build_response(updated)
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
"""Application settings and environment configuration."""
|
||||
|
||||
from functools import lru_cache
|
||||
import ipaddress
|
||||
from pathlib import Path
|
||||
import socket
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
@@ -16,22 +19,60 @@ class Settings(BaseSettings):
|
||||
app_env: str = "development"
|
||||
database_url: str = "postgresql+psycopg://dcm:dcm@db:5432/dcm"
|
||||
redis_url: str = "redis://redis:6379/0"
|
||||
redis_security_mode: str = "auto"
|
||||
redis_tls_mode: str = "auto"
|
||||
auth_bootstrap_admin_username: str = "admin"
|
||||
auth_bootstrap_admin_password: str = ""
|
||||
auth_bootstrap_user_username: str = ""
|
||||
auth_bootstrap_user_password: str = ""
|
||||
auth_session_ttl_minutes: int = 720
|
||||
auth_password_pbkdf2_iterations: int = 390000
|
||||
auth_session_token_bytes: int = 32
|
||||
auth_session_pepper: str = ""
|
||||
auth_login_failure_limit: int = 5
|
||||
auth_login_failure_window_seconds: int = 900
|
||||
auth_login_lockout_base_seconds: int = 30
|
||||
auth_login_lockout_max_seconds: int = 900
|
||||
auth_cookie_domain: str = ""
|
||||
auth_cookie_samesite: str = "auto"
|
||||
storage_root: Path = Path("/data/storage")
|
||||
upload_chunk_size: int = 4 * 1024 * 1024
|
||||
max_upload_files_per_request: int = 50
|
||||
max_upload_file_size_bytes: int = 25 * 1024 * 1024
|
||||
max_upload_request_size_bytes: int = 100 * 1024 * 1024
|
||||
content_export_max_documents: int = 250
|
||||
content_export_max_total_bytes: int = 50 * 1024 * 1024
|
||||
content_export_rate_limit_per_minute: int = 6
|
||||
content_export_stream_chunk_bytes: int = 256 * 1024
|
||||
content_export_spool_max_memory_bytes: int = 2 * 1024 * 1024
|
||||
max_zip_members: int = 250
|
||||
max_zip_depth: int = 2
|
||||
max_zip_descendants_per_root: int = 1000
|
||||
max_zip_member_uncompressed_bytes: int = 25 * 1024 * 1024
|
||||
max_zip_total_uncompressed_bytes: int = 150 * 1024 * 1024
|
||||
max_zip_compression_ratio: float = 120.0
|
||||
max_text_length: int = 500_000
|
||||
provider_base_url_allowlist: list[str] = Field(default_factory=lambda: ["api.openai.com"])
|
||||
provider_base_url_allow_http: bool = False
|
||||
provider_base_url_allow_private_network: bool = False
|
||||
processing_log_max_document_sessions: int = 20
|
||||
processing_log_max_unbound_entries: int = 400
|
||||
processing_log_max_payload_chars: int = 4096
|
||||
processing_log_max_text_chars: int = 12000
|
||||
processing_log_store_model_io_text: bool = False
|
||||
processing_log_store_payload_text: bool = False
|
||||
default_openai_base_url: str = "https://api.openai.com/v1"
|
||||
default_openai_model: str = "gpt-4.1-mini"
|
||||
default_openai_timeout_seconds: int = 45
|
||||
default_openai_handwriting_enabled: bool = True
|
||||
default_openai_api_key: str = ""
|
||||
app_settings_encryption_key: str = ""
|
||||
default_summary_model: str = "gpt-4.1-mini"
|
||||
default_routing_model: str = "gpt-4.1-mini"
|
||||
typesense_protocol: str = "http"
|
||||
typesense_host: str = "typesense"
|
||||
typesense_port: int = 8108
|
||||
typesense_api_key: str = "dcm-typesense-key"
|
||||
typesense_api_key: str = ""
|
||||
typesense_collection_name: str = "documents"
|
||||
typesense_timeout_seconds: int = 120
|
||||
typesense_num_retries: int = 0
|
||||
@@ -39,6 +80,292 @@ class Settings(BaseSettings):
|
||||
cors_origins: list[str] = Field(default_factory=lambda: ["http://localhost:5173", "http://localhost:3000"])
|
||||
|
||||
|
||||
LOCAL_HOSTNAME_SUFFIXES = (".local", ".internal", ".home.arpa")
|
||||
SCRIPT_CAPABLE_INLINE_MIME_TYPES = frozenset(
|
||||
{
|
||||
"application/ecmascript",
|
||||
"application/javascript",
|
||||
"application/x-javascript",
|
||||
"application/xhtml+xml",
|
||||
"image/svg+xml",
|
||||
"text/ecmascript",
|
||||
"text/html",
|
||||
"text/javascript",
|
||||
}
|
||||
)
|
||||
SCRIPT_CAPABLE_XML_MIME_TYPES = frozenset({"application/xml", "text/xml"})
|
||||
REDIS_SECURITY_MODES = frozenset({"auto", "strict", "compat"})
|
||||
REDIS_TLS_MODES = frozenset({"auto", "required", "allow_insecure"})
|
||||
|
||||
|
||||
def _is_production_environment(app_env: str) -> bool:
|
||||
"""Returns whether the runtime environment should enforce production-only security gates."""
|
||||
|
||||
normalized = app_env.strip().lower()
|
||||
return normalized in {"production", "prod"}
|
||||
|
||||
|
||||
def _normalize_redis_security_mode(raw_mode: str) -> str:
|
||||
"""Normalizes Redis security mode values into one supported mode."""
|
||||
|
||||
normalized = raw_mode.strip().lower()
|
||||
if normalized not in REDIS_SECURITY_MODES:
|
||||
return "auto"
|
||||
return normalized
|
||||
|
||||
|
||||
def _normalize_redis_tls_mode(raw_mode: str) -> str:
|
||||
"""Normalizes Redis TLS mode values into one supported mode."""
|
||||
|
||||
normalized = raw_mode.strip().lower()
|
||||
if normalized not in REDIS_TLS_MODES:
|
||||
return "auto"
|
||||
return normalized
|
||||
|
||||
|
||||
def validate_redis_url_security(
|
||||
redis_url: str,
|
||||
*,
|
||||
app_env: str | None = None,
|
||||
security_mode: str | None = None,
|
||||
tls_mode: str | None = None,
|
||||
) -> str:
|
||||
"""Validates Redis URL security posture with production fail-closed defaults."""
|
||||
|
||||
settings = get_settings()
|
||||
resolved_app_env = app_env if app_env is not None else settings.app_env
|
||||
resolved_security_mode = (
|
||||
_normalize_redis_security_mode(security_mode)
|
||||
if security_mode is not None
|
||||
else _normalize_redis_security_mode(settings.redis_security_mode)
|
||||
)
|
||||
resolved_tls_mode = (
|
||||
_normalize_redis_tls_mode(tls_mode)
|
||||
if tls_mode is not None
|
||||
else _normalize_redis_tls_mode(settings.redis_tls_mode)
|
||||
)
|
||||
|
||||
candidate = redis_url.strip()
|
||||
if not candidate:
|
||||
raise ValueError("Redis URL must not be empty")
|
||||
|
||||
parsed = urlparse(candidate)
|
||||
scheme = parsed.scheme.lower()
|
||||
if scheme not in {"redis", "rediss"}:
|
||||
raise ValueError("Redis URL must use redis:// or rediss://")
|
||||
if not parsed.hostname:
|
||||
raise ValueError("Redis URL must include a hostname")
|
||||
|
||||
strict_security = (
|
||||
resolved_security_mode == "strict"
|
||||
or (resolved_security_mode == "auto" and _is_production_environment(resolved_app_env))
|
||||
)
|
||||
require_tls = (
|
||||
resolved_tls_mode == "required"
|
||||
or (resolved_tls_mode == "auto" and strict_security)
|
||||
)
|
||||
has_password = bool(parsed.password and parsed.password.strip())
|
||||
uses_tls = scheme == "rediss"
|
||||
|
||||
if strict_security and not has_password:
|
||||
raise ValueError("Redis URL must include authentication when security mode is strict")
|
||||
if require_tls and not uses_tls:
|
||||
raise ValueError("Redis URL must use rediss:// when TLS is required")
|
||||
|
||||
return candidate
|
||||
|
||||
|
||||
def is_inline_preview_mime_type_safe(mime_type: str) -> bool:
|
||||
"""Returns whether a MIME type is safe to serve inline from untrusted document uploads."""
|
||||
|
||||
normalized = mime_type.split(";", 1)[0].strip().lower() if mime_type else ""
|
||||
if not normalized:
|
||||
return False
|
||||
if normalized in SCRIPT_CAPABLE_INLINE_MIME_TYPES:
|
||||
return False
|
||||
if normalized in SCRIPT_CAPABLE_XML_MIME_TYPES or normalized.endswith("+xml"):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _normalize_allowlist(allowlist: object) -> tuple[str, ...]:
|
||||
"""Normalizes host allowlist entries to lowercase DNS labels."""
|
||||
|
||||
if not isinstance(allowlist, (list, tuple, set)):
|
||||
return ()
|
||||
normalized = {
|
||||
candidate.strip().lower().rstrip(".")
|
||||
for candidate in allowlist
|
||||
if isinstance(candidate, str) and candidate.strip()
|
||||
}
|
||||
return tuple(sorted(normalized))
|
||||
|
||||
|
||||
def _host_matches_allowlist(hostname: str, allowlist: tuple[str, ...]) -> bool:
|
||||
"""Returns whether a hostname is included by an exact or subdomain allowlist rule."""
|
||||
|
||||
if not allowlist:
|
||||
return False
|
||||
candidate = hostname.lower().rstrip(".")
|
||||
for allowed_host in allowlist:
|
||||
if candidate == allowed_host or candidate.endswith(f".{allowed_host}"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _is_private_or_special_ip(value: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
|
||||
"""Returns whether an IP belongs to private, loopback, link-local, or reserved ranges."""
|
||||
|
||||
return (
|
||||
value.is_private
|
||||
or value.is_loopback
|
||||
or value.is_link_local
|
||||
or value.is_multicast
|
||||
or value.is_reserved
|
||||
or value.is_unspecified
|
||||
)
|
||||
|
||||
|
||||
def _validate_resolved_host_ips(hostname: str, port: int, allow_private_network: bool) -> None:
|
||||
"""Resolves hostnames and rejects private or special addresses when private network access is disabled."""
|
||||
|
||||
try:
|
||||
addresses = socket.getaddrinfo(hostname, port, type=socket.SOCK_STREAM)
|
||||
except socket.gaierror as error:
|
||||
raise ValueError(f"Provider base URL host cannot be resolved: {hostname}") from error
|
||||
|
||||
resolved_ips: set[ipaddress.IPv4Address | ipaddress.IPv6Address] = set()
|
||||
for entry in addresses:
|
||||
sockaddr = entry[4]
|
||||
if not sockaddr:
|
||||
continue
|
||||
ip_text = sockaddr[0]
|
||||
try:
|
||||
resolved_ips.add(ipaddress.ip_address(ip_text))
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
if not resolved_ips:
|
||||
raise ValueError(f"Provider base URL host resolved without usable IP addresses: {hostname}")
|
||||
|
||||
if allow_private_network:
|
||||
return
|
||||
|
||||
blocked = [ip for ip in resolved_ips if _is_private_or_special_ip(ip)]
|
||||
if blocked:
|
||||
blocked_text = ", ".join(str(ip) for ip in blocked)
|
||||
raise ValueError(f"Provider base URL resolves to private or special IP addresses: {blocked_text}")
|
||||
|
||||
|
||||
def _normalize_and_validate_provider_base_url(
|
||||
raw_value: str,
|
||||
allowlist: tuple[str, ...],
|
||||
allow_http: bool,
|
||||
allow_private_network: bool,
|
||||
resolve_dns: bool,
|
||||
) -> str:
|
||||
"""Normalizes and validates provider base URLs with SSRF-safe scheme and host checks."""
|
||||
|
||||
trimmed = raw_value.strip().rstrip("/")
|
||||
if not trimmed:
|
||||
raise ValueError("Provider base URL must not be empty")
|
||||
|
||||
parsed = urlparse(trimmed)
|
||||
scheme = parsed.scheme.lower()
|
||||
if scheme not in {"http", "https"}:
|
||||
raise ValueError("Provider base URL must use http or https")
|
||||
if scheme == "http" and not allow_http:
|
||||
raise ValueError("Provider base URL must use https")
|
||||
if parsed.query or parsed.fragment:
|
||||
raise ValueError("Provider base URL must not include query strings or fragments")
|
||||
if parsed.username or parsed.password:
|
||||
raise ValueError("Provider base URL must not include embedded credentials")
|
||||
|
||||
hostname = (parsed.hostname or "").lower().rstrip(".")
|
||||
if not hostname:
|
||||
raise ValueError("Provider base URL must include a hostname")
|
||||
if allowlist and not _host_matches_allowlist(hostname, allowlist):
|
||||
allowed_hosts = ", ".join(allowlist)
|
||||
raise ValueError(f"Provider base URL host is not in allowlist: {hostname}. Allowed hosts: {allowed_hosts}")
|
||||
|
||||
if hostname == "localhost" or hostname.endswith(LOCAL_HOSTNAME_SUFFIXES):
|
||||
if not allow_private_network:
|
||||
raise ValueError("Provider base URL must not target local or internal hostnames")
|
||||
|
||||
try:
|
||||
ip_host = ipaddress.ip_address(hostname)
|
||||
except ValueError:
|
||||
ip_host = None
|
||||
|
||||
if ip_host is not None:
|
||||
if not allow_private_network and _is_private_or_special_ip(ip_host):
|
||||
raise ValueError("Provider base URL must not target private or special IP addresses")
|
||||
elif resolve_dns:
|
||||
resolved_port = parsed.port
|
||||
if resolved_port is None:
|
||||
resolved_port = 443 if scheme == "https" else 80
|
||||
_validate_resolved_host_ips(
|
||||
hostname=hostname,
|
||||
port=resolved_port,
|
||||
allow_private_network=allow_private_network,
|
||||
)
|
||||
|
||||
path = (parsed.path or "").rstrip("/")
|
||||
if not path.endswith("/v1"):
|
||||
path = f"{path}/v1" if path else "/v1"
|
||||
|
||||
normalized_hostname = hostname
|
||||
if ":" in normalized_hostname and not normalized_hostname.startswith("["):
|
||||
normalized_hostname = f"[{normalized_hostname}]"
|
||||
netloc = f"{normalized_hostname}:{parsed.port}" if parsed.port is not None else normalized_hostname
|
||||
return urlunparse((scheme, netloc, path, "", "", ""))
|
||||
|
||||
|
||||
@lru_cache(maxsize=256)
|
||||
def _normalize_and_validate_provider_base_url_cached(
|
||||
raw_value: str,
|
||||
allowlist: tuple[str, ...],
|
||||
allow_http: bool,
|
||||
allow_private_network: bool,
|
||||
) -> str:
|
||||
"""Caches provider URL validation results for non-DNS-resolved checks."""
|
||||
|
||||
return _normalize_and_validate_provider_base_url(
|
||||
raw_value=raw_value,
|
||||
allowlist=allowlist,
|
||||
allow_http=allow_http,
|
||||
allow_private_network=allow_private_network,
|
||||
resolve_dns=False,
|
||||
)
|
||||
|
||||
|
||||
def normalize_and_validate_provider_base_url(raw_value: str, *, resolve_dns: bool = False) -> str:
|
||||
"""Validates and normalizes provider base URL values using configured SSRF protections."""
|
||||
|
||||
settings = get_settings()
|
||||
allowlist = _normalize_allowlist(settings.provider_base_url_allowlist)
|
||||
allow_http = settings.provider_base_url_allow_http if isinstance(settings.provider_base_url_allow_http, bool) else False
|
||||
allow_private_network = (
|
||||
settings.provider_base_url_allow_private_network
|
||||
if isinstance(settings.provider_base_url_allow_private_network, bool)
|
||||
else False
|
||||
)
|
||||
if resolve_dns:
|
||||
return _normalize_and_validate_provider_base_url(
|
||||
raw_value=raw_value,
|
||||
allowlist=allowlist,
|
||||
allow_http=allow_http,
|
||||
allow_private_network=allow_private_network,
|
||||
resolve_dns=True,
|
||||
)
|
||||
return _normalize_and_validate_provider_base_url_cached(
|
||||
raw_value=raw_value,
|
||||
allowlist=allowlist,
|
||||
allow_http=allow_http,
|
||||
allow_private_network=allow_private_network,
|
||||
)
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_settings() -> Settings:
|
||||
"""Returns a cached settings object for dependency injection and service access."""
|
||||
|
||||
@@ -1,40 +1,90 @@
|
||||
"""FastAPI entrypoint for the DMS backend service."""
|
||||
|
||||
from fastapi import FastAPI
|
||||
from typing import Awaitable, Callable
|
||||
|
||||
from fastapi import FastAPI, Request, Response
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from app.api.router import api_router
|
||||
from app.core.config import get_settings
|
||||
from app.db.base import init_db
|
||||
from app.services.app_settings import ensure_app_settings
|
||||
from app.services.authentication import ensure_bootstrap_users
|
||||
from app.services.handwriting_style import ensure_handwriting_style_collection
|
||||
from app.services.storage import ensure_storage
|
||||
from app.services.typesense_index import ensure_typesense_collection
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
UPLOAD_ENDPOINT_PATH = "/api/v1/documents/upload"
|
||||
UPLOAD_ENDPOINT_METHOD = "POST"
|
||||
|
||||
|
||||
def _is_upload_size_guard_target(request: Request) -> bool:
|
||||
"""Returns whether upload request-size enforcement applies to this request.
|
||||
|
||||
Upload-size validation is intentionally scoped to the upload POST endpoint so CORS
|
||||
preflight OPTIONS requests can pass through CORSMiddleware.
|
||||
"""
|
||||
|
||||
return request.method.upper() == UPLOAD_ENDPOINT_METHOD and request.url.path == UPLOAD_ENDPOINT_PATH
|
||||
|
||||
|
||||
def create_app() -> FastAPI:
|
||||
"""Builds and configures the FastAPI application instance."""
|
||||
|
||||
app = FastAPI(title="DCM DMS API", version="0.1.0")
|
||||
allowed_origins = [origin.strip() for origin in settings.cors_origins if isinstance(origin, str) and origin.strip()]
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=settings.cors_origins,
|
||||
allow_origins=allowed_origins,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
app.include_router(api_router, prefix="/api/v1")
|
||||
|
||||
@app.middleware("http")
|
||||
async def enforce_upload_request_size(
|
||||
request: Request,
|
||||
call_next: Callable[[Request], Awaitable[Response]],
|
||||
) -> Response:
|
||||
"""Rejects only POST upload bodies without deterministic length or with oversized request totals."""
|
||||
|
||||
if _is_upload_size_guard_target(request):
|
||||
content_length = request.headers.get("content-length", "").strip()
|
||||
if not content_length:
|
||||
return JSONResponse(
|
||||
status_code=411,
|
||||
content={"detail": "Content-Length header is required for document uploads"},
|
||||
)
|
||||
try:
|
||||
content_length_value = int(content_length)
|
||||
except ValueError:
|
||||
return JSONResponse(status_code=400, content={"detail": "Invalid Content-Length header"})
|
||||
if content_length_value <= 0:
|
||||
return JSONResponse(status_code=400, content={"detail": "Content-Length must be a positive integer"})
|
||||
if content_length_value > settings.max_upload_request_size_bytes:
|
||||
return JSONResponse(
|
||||
status_code=413,
|
||||
content={
|
||||
"detail": (
|
||||
"Upload request exceeds total size limit "
|
||||
f"({content_length_value} > {settings.max_upload_request_size_bytes} bytes)"
|
||||
)
|
||||
},
|
||||
)
|
||||
return await call_next(request)
|
||||
|
||||
@app.on_event("startup")
|
||||
def startup_event() -> None:
|
||||
"""Initializes storage directories and database schema on service startup."""
|
||||
|
||||
ensure_storage()
|
||||
ensure_app_settings()
|
||||
init_db()
|
||||
ensure_bootstrap_users()
|
||||
ensure_app_settings()
|
||||
try:
|
||||
ensure_typesense_collection()
|
||||
except Exception:
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Model exports for ORM metadata discovery."""
|
||||
|
||||
from app.models.auth import AppUser, AuthSession, UserRole
|
||||
from app.models.document import Document, DocumentStatus
|
||||
from app.models.processing_log import ProcessingLogEntry
|
||||
|
||||
__all__ = ["Document", "DocumentStatus", "ProcessingLogEntry"]
|
||||
__all__ = ["AppUser", "AuthSession", "Document", "DocumentStatus", "ProcessingLogEntry", "UserRole"]
|
||||
|
||||
66
backend/app/models/auth.py
Normal file
66
backend/app/models/auth.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""Data models for authenticated users and issued API sessions."""
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
from enum import Enum
|
||||
|
||||
from sqlalchemy import Boolean, DateTime, Enum as SqlEnum, ForeignKey, String
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.db.base import Base
|
||||
|
||||
|
||||
class UserRole(str, Enum):
|
||||
"""Declares authorization roles used for API route access control."""
|
||||
|
||||
ADMIN = "admin"
|
||||
USER = "user"
|
||||
|
||||
|
||||
class AppUser(Base):
|
||||
"""Stores one authenticatable user account with role-bound authorization."""
|
||||
|
||||
__tablename__ = "app_users"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
username: Mapped[str] = mapped_column(String(128), nullable=False, unique=True, index=True)
|
||||
password_hash: Mapped[str] = mapped_column(String(512), nullable=False)
|
||||
role: Mapped[UserRole] = mapped_column(SqlEnum(UserRole), nullable=False, default=UserRole.USER)
|
||||
is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, default=lambda: datetime.now(UTC))
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=lambda: datetime.now(UTC),
|
||||
onupdate=lambda: datetime.now(UTC),
|
||||
)
|
||||
|
||||
sessions: Mapped[list["AuthSession"]] = relationship(
|
||||
"AuthSession",
|
||||
back_populates="user",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class AuthSession(Base):
|
||||
"""Stores one issued bearer session token for a specific authenticated user."""
|
||||
|
||||
__tablename__ = "auth_sessions"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
user_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), ForeignKey("app_users.id", ondelete="CASCADE"), nullable=False, index=True)
|
||||
token_hash: Mapped[str] = mapped_column(String(128), nullable=False, unique=True, index=True)
|
||||
expires_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, index=True)
|
||||
revoked_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
user_agent: Mapped[str | None] = mapped_column(String(512), nullable=True)
|
||||
ip_address: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, default=lambda: datetime.now(UTC))
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=lambda: datetime.now(UTC),
|
||||
onupdate=lambda: datetime.now(UTC),
|
||||
)
|
||||
|
||||
user: Mapped[AppUser] = relationship("AppUser", back_populates="sessions")
|
||||
@@ -38,6 +38,12 @@ class Document(Base):
|
||||
suggested_path: Mapped[str | None] = mapped_column(String(1024), nullable=True)
|
||||
tags: Mapped[list[str]] = mapped_column(ARRAY(String), nullable=False, default=list)
|
||||
suggested_tags: Mapped[list[str]] = mapped_column(ARRAY(String), nullable=False, default=list)
|
||||
owner_user_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("app_users.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
index=True,
|
||||
)
|
||||
metadata_json: Mapped[dict] = mapped_column(JSONB, nullable=False, default=dict)
|
||||
extracted_text: Mapped[str] = mapped_column(Text, nullable=False, default="")
|
||||
image_text_type: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
@@ -63,3 +69,4 @@ class Document(Base):
|
||||
foreign_keys=[parent_document_id],
|
||||
post_update=True,
|
||||
)
|
||||
owner_user: Mapped["AppUser | None"] = relationship("AppUser", foreign_keys=[owner_user_id], post_update=True)
|
||||
|
||||
@@ -2,14 +2,121 @@
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import BigInteger, DateTime, ForeignKey, String, Text
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
from sqlalchemy.orm import Mapped, mapped_column, validates
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.db.base import Base
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
|
||||
SENSITIVE_KEY_MARKERS = (
|
||||
"api_key",
|
||||
"apikey",
|
||||
"authorization",
|
||||
"bearer",
|
||||
"token",
|
||||
"secret",
|
||||
"password",
|
||||
"credential",
|
||||
"cookie",
|
||||
)
|
||||
SENSITIVE_TEXT_PATTERNS = (
|
||||
re.compile(r"(?i)[\"']authorization[\"']\s*:\s*[\"']bearer\s+[^\"']+[\"']"),
|
||||
re.compile(r"(?i)[\"']bearer[\"']\s*:\s*[\"'][^\"']+[\"']"),
|
||||
re.compile(r"(?i)[\"'](?:api[_-]?key|token|secret|password)[\"']\s*:\s*[\"'][^\"']+[\"']"),
|
||||
re.compile(r"(?i)\bauthorization\b\s*[:=]\s*bearer\s+[a-z0-9._~+/\-]+=*"),
|
||||
re.compile(r"(?i)\bbearer\s+[a-z0-9._~+/\-]+=*"),
|
||||
re.compile(r"\b[a-z0-9_-]{8,}\.[a-z0-9_-]{8,}\.[a-z0-9_-]{8,}\b", flags=re.IGNORECASE),
|
||||
re.compile(r"(?i)\bsk-[a-z0-9]{16,}\b"),
|
||||
re.compile(r"(?i)\b(api[_-]?key|token|secret|password)\b\s*[:=]\s*['\"]?[^\s,'\";]+['\"]?"),
|
||||
)
|
||||
REDACTED_TEXT = "[REDACTED]"
|
||||
MAX_PAYLOAD_KEYS = 80
|
||||
MAX_PAYLOAD_LIST_ITEMS = 80
|
||||
|
||||
|
||||
def _truncate(value: str, limit: int) -> str:
|
||||
"""Truncates long log fields to configured bounds with stable suffix marker."""
|
||||
|
||||
normalized = value.strip()
|
||||
if len(normalized) <= limit:
|
||||
return normalized
|
||||
return normalized[: max(0, limit - 3)] + "..."
|
||||
|
||||
|
||||
def _is_sensitive_key(key: str) -> bool:
|
||||
"""Returns whether a payload key likely contains sensitive credential data."""
|
||||
|
||||
normalized = key.strip().lower()
|
||||
return any(marker in normalized for marker in SENSITIVE_KEY_MARKERS)
|
||||
|
||||
|
||||
def _redact_sensitive_text(value: str) -> str:
|
||||
"""Redacts token-like segments from log text while retaining non-sensitive context."""
|
||||
|
||||
redacted = value
|
||||
for pattern in SENSITIVE_TEXT_PATTERNS:
|
||||
redacted = pattern.sub(lambda _: REDACTED_TEXT, redacted)
|
||||
return redacted
|
||||
|
||||
|
||||
def sanitize_processing_log_payload_value(value: Any, *, parent_key: str | None = None) -> Any:
|
||||
"""Sanitizes payload structures by redacting sensitive fields and bounding size."""
|
||||
|
||||
if parent_key and _is_sensitive_key(parent_key):
|
||||
return REDACTED_TEXT
|
||||
|
||||
if isinstance(value, dict):
|
||||
sanitized: dict[str, Any] = {}
|
||||
for index, (raw_key, raw_value) in enumerate(value.items()):
|
||||
if index >= MAX_PAYLOAD_KEYS:
|
||||
break
|
||||
key = str(raw_key)
|
||||
sanitized[key] = sanitize_processing_log_payload_value(raw_value, parent_key=key)
|
||||
return sanitized
|
||||
|
||||
if isinstance(value, list):
|
||||
return [
|
||||
sanitize_processing_log_payload_value(item, parent_key=parent_key)
|
||||
for item in value[:MAX_PAYLOAD_LIST_ITEMS]
|
||||
]
|
||||
|
||||
if isinstance(value, tuple):
|
||||
return [
|
||||
sanitize_processing_log_payload_value(item, parent_key=parent_key)
|
||||
for item in list(value)[:MAX_PAYLOAD_LIST_ITEMS]
|
||||
]
|
||||
|
||||
if isinstance(value, str):
|
||||
redacted = _redact_sensitive_text(value)
|
||||
return _truncate(redacted, settings.processing_log_max_payload_chars)
|
||||
|
||||
if isinstance(value, (int, float, bool)) or value is None:
|
||||
return value
|
||||
|
||||
as_text = _truncate(str(value), settings.processing_log_max_payload_chars)
|
||||
return _redact_sensitive_text(as_text)
|
||||
|
||||
|
||||
def sanitize_processing_log_text(value: str | None) -> str | None:
|
||||
"""Sanitizes prompt and response fields by redacting credentials and clamping length."""
|
||||
|
||||
if value is None:
|
||||
return None
|
||||
normalized = value.strip()
|
||||
if not normalized:
|
||||
return None
|
||||
redacted = _redact_sensitive_text(normalized)
|
||||
return _truncate(redacted, settings.processing_log_max_text_chars)
|
||||
|
||||
|
||||
class ProcessingLogEntry(Base):
|
||||
"""Stores a timestamped processing event with optional model prompt and response text."""
|
||||
|
||||
@@ -31,3 +138,17 @@ class ProcessingLogEntry(Base):
|
||||
prompt_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
response_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
payload_json: Mapped[dict] = mapped_column(JSONB, nullable=False, default=dict)
|
||||
|
||||
@validates("prompt_text", "response_text")
|
||||
def _validate_text_fields(self, key: str, value: str | None) -> str | None:
|
||||
"""Redacts and bounds free-text log fields before persistence."""
|
||||
|
||||
return sanitize_processing_log_text(value)
|
||||
|
||||
@validates("payload_json")
|
||||
def _validate_payload_json(self, key: str, value: dict[str, Any] | None) -> dict[str, Any]:
|
||||
"""Redacts and bounds structured payload fields before persistence."""
|
||||
|
||||
if not isinstance(value, dict):
|
||||
return {}
|
||||
return sanitize_processing_log_payload_value(value)
|
||||
|
||||
50
backend/app/schemas/auth.py
Normal file
50
backend/app/schemas/auth.py
Normal file
@@ -0,0 +1,50 @@
|
||||
"""Pydantic schemas for authentication and session API payloads."""
|
||||
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app.models.auth import UserRole
|
||||
|
||||
|
||||
class AuthLoginRequest(BaseModel):
|
||||
"""Represents credential input used to create one authenticated API session."""
|
||||
|
||||
username: str = Field(min_length=1, max_length=128)
|
||||
password: str = Field(min_length=1, max_length=256)
|
||||
|
||||
|
||||
class AuthUserResponse(BaseModel):
|
||||
"""Represents one authenticated user identity and authorization role."""
|
||||
|
||||
id: UUID
|
||||
username: str
|
||||
role: UserRole
|
||||
|
||||
class Config:
|
||||
"""Enables ORM object parsing for SQLAlchemy model instances."""
|
||||
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class AuthSessionResponse(BaseModel):
|
||||
"""Represents active session metadata for one authenticated user."""
|
||||
|
||||
user: AuthUserResponse
|
||||
expires_at: datetime
|
||||
csrf_token: str | None = None
|
||||
|
||||
|
||||
class AuthLoginResponse(AuthSessionResponse):
|
||||
"""Represents one newly issued bearer token and associated user context."""
|
||||
|
||||
access_token: str | None = None
|
||||
token_type: str = "bearer"
|
||||
csrf_token: str | None = None
|
||||
|
||||
|
||||
class AuthLogoutResponse(BaseModel):
|
||||
"""Represents logout outcome after current session revocation attempt."""
|
||||
|
||||
revoked: bool
|
||||
@@ -1,13 +1,16 @@
|
||||
"""Pydantic schemas for processing pipeline log API payloads."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from app.models.processing_log import sanitize_processing_log_payload_value, sanitize_processing_log_text
|
||||
|
||||
|
||||
class ProcessingLogEntryResponse(BaseModel):
|
||||
"""Represents one persisted processing log event returned by API endpoints."""
|
||||
"""Represents one persisted processing log event with already-redacted sensitive fields."""
|
||||
|
||||
id: int
|
||||
created_at: datetime
|
||||
@@ -20,7 +23,26 @@ class ProcessingLogEntryResponse(BaseModel):
|
||||
model_name: str | None
|
||||
prompt_text: str | None
|
||||
response_text: str | None
|
||||
payload_json: dict
|
||||
payload_json: dict[str, Any]
|
||||
|
||||
@field_validator("prompt_text", "response_text", mode="before")
|
||||
@classmethod
|
||||
def _sanitize_text_fields(cls, value: Any) -> str | None:
|
||||
"""Ensures log text fields are redacted in API responses."""
|
||||
|
||||
if value is None:
|
||||
return None
|
||||
return sanitize_processing_log_text(str(value))
|
||||
|
||||
@field_validator("payload_json", mode="before")
|
||||
@classmethod
|
||||
def _sanitize_payload_field(cls, value: Any) -> dict[str, Any]:
|
||||
"""Ensures payload fields are redacted in API responses."""
|
||||
|
||||
if not isinstance(value, dict):
|
||||
return {}
|
||||
sanitized = sanitize_processing_log_payload_value(value)
|
||||
return sanitized if isinstance(sanitized, dict) else {}
|
||||
|
||||
class Config:
|
||||
"""Enables ORM object parsing for SQLAlchemy model instances."""
|
||||
|
||||
@@ -1,16 +1,34 @@
|
||||
"""Persistent single-user application settings service backed by host-mounted storage."""
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import secrets
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from app.core.config import get_settings
|
||||
try:
|
||||
from cryptography.fernet import Fernet, InvalidToken
|
||||
except Exception: # pragma: no cover - dependency failures are surfaced at runtime usage.
|
||||
Fernet = None # type: ignore[assignment]
|
||||
|
||||
class InvalidToken(Exception):
|
||||
"""Fallback InvalidToken type used when cryptography dependency import fails."""
|
||||
|
||||
from app.core.config import get_settings, normalize_and_validate_provider_base_url
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
|
||||
class AppSettingsValidationError(ValueError):
|
||||
"""Raised when user-provided settings values fail security or contract validation."""
|
||||
|
||||
|
||||
TASK_OCR_HANDWRITING = "ocr_handwriting"
|
||||
TASK_SUMMARY_GENERATION = "summary_generation"
|
||||
TASK_ROUTING_CLASSIFICATION = "routing_classification"
|
||||
@@ -53,6 +71,221 @@ DEFAULT_ROUTING_PROMPT = (
|
||||
"Confidence must be between 0 and 1."
|
||||
)
|
||||
|
||||
PROVIDER_API_KEY_CIPHERTEXT_PREFIX = "enc-v2"
|
||||
PROVIDER_API_KEY_LEGACY_CIPHERTEXT_PREFIX = "enc-v1"
|
||||
PROVIDER_API_KEY_KEYFILE_NAME = ".settings-api-key"
|
||||
PROVIDER_API_KEY_LEGACY_STREAM_CONTEXT = b"dcm-provider-api-key-stream"
|
||||
PROVIDER_API_KEY_LEGACY_AUTH_CONTEXT = b"dcm-provider-api-key-auth"
|
||||
PROVIDER_API_KEY_LEGACY_NONCE_BYTES = 16
|
||||
PROVIDER_API_KEY_LEGACY_TAG_BYTES = 32
|
||||
|
||||
|
||||
def _settings_api_key_path() -> Path:
|
||||
"""Returns the storage path used for local symmetric encryption key persistence."""
|
||||
|
||||
return settings.storage_root / PROVIDER_API_KEY_KEYFILE_NAME
|
||||
|
||||
|
||||
def _write_private_text_file(path: Path, content: str) -> None:
|
||||
"""Writes text files with restrictive owner-only permissions for local secret material."""
|
||||
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
file_descriptor = os.open(str(path), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
||||
with os.fdopen(file_descriptor, "w", encoding="utf-8") as handle:
|
||||
handle.write(content)
|
||||
os.chmod(path, 0o600)
|
||||
|
||||
|
||||
def _urlsafe_b64encode_no_padding(data: bytes) -> str:
|
||||
"""Encodes bytes to URL-safe base64 without padding for compact JSON persistence."""
|
||||
|
||||
return base64.urlsafe_b64encode(data).decode("ascii").rstrip("=")
|
||||
|
||||
|
||||
def _urlsafe_b64decode_no_padding(data: str) -> bytes:
|
||||
"""Decodes URL-safe base64 values that may omit trailing padding characters."""
|
||||
|
||||
padded = data + "=" * (-len(data) % 4)
|
||||
return base64.urlsafe_b64decode(padded.encode("ascii"))
|
||||
|
||||
|
||||
def _derive_provider_api_key_key() -> bytes:
|
||||
"""Resolves the master key used to encrypt provider API keys for settings storage."""
|
||||
|
||||
configured_key = settings.app_settings_encryption_key.strip()
|
||||
if configured_key:
|
||||
try:
|
||||
decoded = _urlsafe_b64decode_no_padding(configured_key)
|
||||
if len(decoded) >= 32:
|
||||
return decoded[:32]
|
||||
except (binascii.Error, ValueError):
|
||||
pass
|
||||
return hashlib.sha256(configured_key.encode("utf-8")).digest()
|
||||
|
||||
key_path = _settings_api_key_path()
|
||||
if key_path.exists():
|
||||
try:
|
||||
persisted = key_path.read_text(encoding="utf-8").strip()
|
||||
decoded = _urlsafe_b64decode_no_padding(persisted)
|
||||
if len(decoded) >= 32:
|
||||
return decoded[:32]
|
||||
except (OSError, UnicodeDecodeError, binascii.Error, ValueError):
|
||||
pass
|
||||
|
||||
generated = secrets.token_bytes(32)
|
||||
_write_private_text_file(key_path, _urlsafe_b64encode_no_padding(generated))
|
||||
return generated
|
||||
|
||||
|
||||
def _legacy_xor_bytes(left: bytes, right: bytes) -> bytes:
|
||||
"""Applies byte-wise XOR for equal-length byte sequences used by legacy ciphertext migration."""
|
||||
|
||||
return bytes(first ^ second for first, second in zip(left, right))
|
||||
|
||||
|
||||
def _legacy_derive_stream_cipher_bytes(master_key: bytes, nonce: bytes, length: int) -> bytes:
|
||||
"""Derives legacy deterministic stream bytes from HMAC-SHA256 blocks for migration reads."""
|
||||
|
||||
stream = bytearray()
|
||||
counter = 0
|
||||
while len(stream) < length:
|
||||
counter_bytes = counter.to_bytes(4, "big")
|
||||
block = hmac.new(
|
||||
master_key,
|
||||
PROVIDER_API_KEY_LEGACY_STREAM_CONTEXT + nonce + counter_bytes,
|
||||
hashlib.sha256,
|
||||
).digest()
|
||||
stream.extend(block)
|
||||
counter += 1
|
||||
return bytes(stream[:length])
|
||||
|
||||
|
||||
def _provider_key_fernet(master_key: bytes) -> Fernet:
|
||||
"""Builds Fernet instance from 32-byte symmetric key material."""
|
||||
|
||||
if Fernet is None:
|
||||
raise AppSettingsValidationError("cryptography dependency is not available")
|
||||
fernet_key = base64.urlsafe_b64encode(master_key[:32])
|
||||
return Fernet(fernet_key)
|
||||
|
||||
|
||||
def _encrypt_provider_api_key_fallback(value: str) -> str:
|
||||
"""Encrypts provider keys with legacy HMAC stream construction when cryptography is unavailable."""
|
||||
|
||||
plaintext = value.encode("utf-8")
|
||||
master_key = _derive_provider_api_key_key()
|
||||
nonce = secrets.token_bytes(PROVIDER_API_KEY_LEGACY_NONCE_BYTES)
|
||||
keystream = _legacy_derive_stream_cipher_bytes(master_key, nonce, len(plaintext))
|
||||
ciphertext = _legacy_xor_bytes(plaintext, keystream)
|
||||
tag = hmac.new(
|
||||
master_key,
|
||||
PROVIDER_API_KEY_LEGACY_AUTH_CONTEXT + nonce + ciphertext,
|
||||
hashlib.sha256,
|
||||
).digest()
|
||||
payload = nonce + ciphertext + tag
|
||||
encoded = _urlsafe_b64encode_no_padding(payload)
|
||||
return f"{PROVIDER_API_KEY_CIPHERTEXT_PREFIX}:{encoded}"
|
||||
|
||||
|
||||
def _encrypt_provider_api_key(value: str) -> str:
|
||||
"""Encrypts one provider API key for at-rest JSON persistence."""
|
||||
|
||||
normalized = value.strip()
|
||||
if not normalized:
|
||||
return ""
|
||||
|
||||
if Fernet is None:
|
||||
return _encrypt_provider_api_key_fallback(normalized)
|
||||
master_key = _derive_provider_api_key_key()
|
||||
token = _provider_key_fernet(master_key).encrypt(normalized.encode("utf-8")).decode("ascii")
|
||||
return f"{PROVIDER_API_KEY_CIPHERTEXT_PREFIX}:{token}"
|
||||
|
||||
|
||||
def _decrypt_provider_api_key_legacy_payload(encoded_payload: str) -> str:
|
||||
"""Decrypts legacy stream-cipher payload bytes used for migration and fallback reads."""
|
||||
|
||||
if not encoded_payload:
|
||||
raise AppSettingsValidationError("Provider API key ciphertext is missing payload bytes")
|
||||
try:
|
||||
payload = _urlsafe_b64decode_no_padding(encoded_payload)
|
||||
except (binascii.Error, ValueError) as error:
|
||||
raise AppSettingsValidationError("Provider API key ciphertext is not valid base64") from error
|
||||
|
||||
minimum_length = PROVIDER_API_KEY_LEGACY_NONCE_BYTES + PROVIDER_API_KEY_LEGACY_TAG_BYTES
|
||||
if len(payload) < minimum_length:
|
||||
raise AppSettingsValidationError("Provider API key ciphertext payload is truncated")
|
||||
|
||||
nonce = payload[:PROVIDER_API_KEY_LEGACY_NONCE_BYTES]
|
||||
ciphertext = payload[PROVIDER_API_KEY_LEGACY_NONCE_BYTES:-PROVIDER_API_KEY_LEGACY_TAG_BYTES]
|
||||
received_tag = payload[-PROVIDER_API_KEY_LEGACY_TAG_BYTES:]
|
||||
master_key = _derive_provider_api_key_key()
|
||||
expected_tag = hmac.new(
|
||||
master_key,
|
||||
PROVIDER_API_KEY_LEGACY_AUTH_CONTEXT + nonce + ciphertext,
|
||||
hashlib.sha256,
|
||||
).digest()
|
||||
if not hmac.compare_digest(received_tag, expected_tag):
|
||||
raise AppSettingsValidationError("Provider API key ciphertext integrity check failed")
|
||||
|
||||
keystream = _legacy_derive_stream_cipher_bytes(master_key, nonce, len(ciphertext))
|
||||
plaintext = _legacy_xor_bytes(ciphertext, keystream)
|
||||
try:
|
||||
return plaintext.decode("utf-8").strip()
|
||||
except UnicodeDecodeError as error:
|
||||
raise AppSettingsValidationError("Provider API key ciphertext is not valid UTF-8") from error
|
||||
|
||||
|
||||
def _decrypt_provider_api_key_legacy(value: str) -> str:
|
||||
"""Decrypts legacy `enc-v1` payloads to support non-breaking key migration."""
|
||||
|
||||
encoded_payload = value.split(":", 1)[1]
|
||||
return _decrypt_provider_api_key_legacy_payload(encoded_payload)
|
||||
|
||||
|
||||
def _decrypt_provider_api_key(value: str) -> str:
|
||||
"""Decrypts provider API key ciphertext while rejecting tampered payloads."""
|
||||
|
||||
normalized = value.strip()
|
||||
if not normalized:
|
||||
return ""
|
||||
if not normalized.startswith(f"{PROVIDER_API_KEY_CIPHERTEXT_PREFIX}:") and not normalized.startswith(
|
||||
f"{PROVIDER_API_KEY_LEGACY_CIPHERTEXT_PREFIX}:"
|
||||
):
|
||||
return normalized
|
||||
|
||||
if normalized.startswith(f"{PROVIDER_API_KEY_LEGACY_CIPHERTEXT_PREFIX}:"):
|
||||
return _decrypt_provider_api_key_legacy(normalized)
|
||||
|
||||
token = normalized.split(":", 1)[1].strip()
|
||||
if not token:
|
||||
raise AppSettingsValidationError("Provider API key ciphertext is missing payload bytes")
|
||||
if Fernet is None:
|
||||
return _decrypt_provider_api_key_legacy_payload(token)
|
||||
try:
|
||||
plaintext = _provider_key_fernet(_derive_provider_api_key_key()).decrypt(token.encode("ascii"))
|
||||
except (InvalidToken, ValueError, UnicodeEncodeError) as error:
|
||||
raise AppSettingsValidationError("Provider API key ciphertext integrity check failed") from error
|
||||
try:
|
||||
return plaintext.decode("utf-8").strip()
|
||||
except UnicodeDecodeError as error:
|
||||
raise AppSettingsValidationError("Provider API key ciphertext is not valid UTF-8") from error
|
||||
|
||||
|
||||
def _read_provider_api_key(provider_payload: dict[str, Any]) -> str:
|
||||
"""Reads provider API key values from encrypted or legacy plaintext settings payloads."""
|
||||
|
||||
encrypted_value = provider_payload.get("api_key_encrypted")
|
||||
if isinstance(encrypted_value, str) and encrypted_value.strip():
|
||||
try:
|
||||
return _decrypt_provider_api_key(encrypted_value)
|
||||
except AppSettingsValidationError:
|
||||
return ""
|
||||
|
||||
plaintext_value = provider_payload.get("api_key")
|
||||
if plaintext_value is None:
|
||||
return ""
|
||||
return str(plaintext_value).strip()
|
||||
|
||||
|
||||
def _default_settings() -> dict[str, Any]:
|
||||
"""Builds default settings including providers and model task bindings."""
|
||||
@@ -156,13 +389,13 @@ def _clamp_cards_per_page(value: int) -> int:
|
||||
def _clamp_processing_log_document_sessions(value: int) -> int:
|
||||
"""Clamps the number of recent document log sessions kept during cleanup."""
|
||||
|
||||
return max(0, min(20, value))
|
||||
return max(0, min(settings.processing_log_max_document_sessions, value))
|
||||
|
||||
|
||||
def _clamp_processing_log_unbound_entries(value: int) -> int:
|
||||
"""Clamps retained unbound processing log events kept during cleanup."""
|
||||
|
||||
return max(0, min(400, value))
|
||||
return max(0, min(settings.processing_log_max_unbound_entries, value))
|
||||
|
||||
|
||||
def _clamp_predefined_entries_limit(value: int) -> int:
|
||||
@@ -239,15 +472,31 @@ def _normalize_provider(
|
||||
if provider_type != "openai_compatible":
|
||||
provider_type = "openai_compatible"
|
||||
|
||||
api_key_value = payload.get("api_key", fallback_values.get("api_key", defaults["api_key"]))
|
||||
api_key = str(api_key_value).strip() if api_key_value is not None else ""
|
||||
payload_api_key = _read_provider_api_key(payload)
|
||||
fallback_api_key = _read_provider_api_key(fallback_values)
|
||||
default_api_key = _read_provider_api_key(defaults)
|
||||
if "api_key" in payload and payload.get("api_key") is not None:
|
||||
api_key = str(payload.get("api_key")).strip()
|
||||
elif payload_api_key:
|
||||
api_key = payload_api_key
|
||||
elif fallback_api_key:
|
||||
api_key = fallback_api_key
|
||||
else:
|
||||
api_key = default_api_key
|
||||
|
||||
raw_base_url = str(payload.get("base_url", fallback_values.get("base_url", defaults["base_url"]))).strip()
|
||||
if not raw_base_url:
|
||||
raw_base_url = str(defaults["base_url"]).strip()
|
||||
try:
|
||||
normalized_base_url = normalize_and_validate_provider_base_url(raw_base_url)
|
||||
except ValueError as error:
|
||||
raise AppSettingsValidationError(str(error)) from error
|
||||
|
||||
return {
|
||||
"id": provider_id,
|
||||
"label": str(payload.get("label", fallback_values.get("label", provider_id))).strip() or provider_id,
|
||||
"provider_type": provider_type,
|
||||
"base_url": str(payload.get("base_url", fallback_values.get("base_url", defaults["base_url"]))).strip()
|
||||
or defaults["base_url"],
|
||||
"base_url": normalized_base_url,
|
||||
"timeout_seconds": _clamp_timeout(
|
||||
_safe_int(
|
||||
payload.get("timeout_seconds", fallback_values.get("timeout_seconds", defaults["timeout_seconds"])),
|
||||
@@ -255,6 +504,7 @@ def _normalize_provider(
|
||||
)
|
||||
),
|
||||
"api_key": api_key,
|
||||
"api_key_encrypted": _encrypt_provider_api_key(api_key),
|
||||
}
|
||||
|
||||
|
||||
@@ -576,7 +826,7 @@ def _normalize_handwriting_style_settings(payload: dict[str, Any], defaults: dic
|
||||
|
||||
|
||||
def _sanitize_settings(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Sanitizes all persisted settings into a stable normalized structure."""
|
||||
"""Sanitizes persisted settings into a stable structure while tolerating corrupt provider rows."""
|
||||
|
||||
if not isinstance(payload, dict):
|
||||
payload = {}
|
||||
@@ -592,7 +842,14 @@ def _sanitize_settings(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
if not isinstance(provider_payload, dict):
|
||||
continue
|
||||
fallback = defaults["providers"][0]
|
||||
candidate = _normalize_provider(provider_payload, fallback_id=f"provider-{index + 1}", fallback_values=fallback)
|
||||
try:
|
||||
candidate = _normalize_provider(
|
||||
provider_payload,
|
||||
fallback_id=f"provider-{index + 1}",
|
||||
fallback_values=fallback,
|
||||
)
|
||||
except AppSettingsValidationError:
|
||||
continue
|
||||
if candidate["id"] in seen_provider_ids:
|
||||
continue
|
||||
seen_provider_ids.add(candidate["id"])
|
||||
@@ -635,6 +892,26 @@ def _sanitize_settings(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
def _serialize_settings_for_storage(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Converts sanitized runtime payload into storage-safe form without plaintext provider keys."""
|
||||
|
||||
storage_payload = dict(payload)
|
||||
providers_storage: list[dict[str, Any]] = []
|
||||
for provider in payload.get("providers", []):
|
||||
if not isinstance(provider, dict):
|
||||
continue
|
||||
provider_storage = dict(provider)
|
||||
plaintext_api_key = str(provider_storage.pop("api_key", "")).strip()
|
||||
encrypted_api_key = str(provider_storage.get("api_key_encrypted", "")).strip()
|
||||
if plaintext_api_key:
|
||||
encrypted_api_key = _encrypt_provider_api_key(plaintext_api_key)
|
||||
provider_storage["api_key_encrypted"] = encrypted_api_key
|
||||
providers_storage.append(provider_storage)
|
||||
|
||||
storage_payload["providers"] = providers_storage
|
||||
return storage_payload
|
||||
|
||||
|
||||
def ensure_app_settings() -> None:
|
||||
"""Creates a settings file with defaults when no persisted settings are present."""
|
||||
|
||||
@@ -644,7 +921,7 @@ def ensure_app_settings() -> None:
|
||||
return
|
||||
|
||||
defaults = _sanitize_settings(_default_settings())
|
||||
path.write_text(json.dumps(defaults, indent=2), encoding="utf-8")
|
||||
_write_private_text_file(path, json.dumps(_serialize_settings_for_storage(defaults), indent=2))
|
||||
|
||||
|
||||
def _read_raw_settings() -> dict[str, Any]:
|
||||
@@ -664,7 +941,8 @@ def _write_settings(payload: dict[str, Any]) -> None:
|
||||
|
||||
path = _settings_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
|
||||
storage_payload = _serialize_settings_for_storage(payload)
|
||||
_write_private_text_file(path, json.dumps(storage_payload, indent=2))
|
||||
|
||||
|
||||
def read_app_settings() -> dict[str, Any]:
|
||||
@@ -861,16 +1139,21 @@ def update_app_settings(
|
||||
|
||||
|
||||
def read_handwriting_provider_settings() -> dict[str, Any]:
|
||||
"""Returns OCR settings in legacy shape for the handwriting transcription service."""
|
||||
"""Returns OCR settings in legacy shape with DNS-revalidated provider base URL safety checks."""
|
||||
|
||||
runtime = read_task_runtime_settings(TASK_OCR_HANDWRITING)
|
||||
provider = runtime["provider"]
|
||||
task = runtime["task"]
|
||||
raw_base_url = str(provider.get("base_url", settings.default_openai_base_url))
|
||||
try:
|
||||
normalized_base_url = normalize_and_validate_provider_base_url(raw_base_url, resolve_dns=True)
|
||||
except ValueError as error:
|
||||
raise AppSettingsValidationError(str(error)) from error
|
||||
|
||||
return {
|
||||
"provider": provider["provider_type"],
|
||||
"enabled": bool(task.get("enabled", True)),
|
||||
"openai_base_url": str(provider.get("base_url", settings.default_openai_base_url)),
|
||||
"openai_base_url": normalized_base_url,
|
||||
"openai_model": str(task.get("model", settings.default_openai_model)),
|
||||
"openai_timeout_seconds": int(provider.get("timeout_seconds", settings.default_openai_timeout_seconds)),
|
||||
"openai_api_key": str(provider.get("api_key", "")),
|
||||
|
||||
187
backend/app/services/auth_login_throttle.py
Normal file
187
backend/app/services/auth_login_throttle.py
Normal file
@@ -0,0 +1,187 @@
|
||||
"""Redis-backed brute-force protections for authentication login requests."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import logging
|
||||
|
||||
from redis.exceptions import RedisError
|
||||
|
||||
from app.core.config import Settings, get_settings
|
||||
from app.services.authentication import normalize_username
|
||||
from app.worker.queue import get_redis
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
USERNAME_SUBJECT_KIND = "username"
|
||||
IP_SUBJECT_KIND = "ip"
|
||||
UNKNOWN_USERNAME_SUBJECT = "unknown-username"
|
||||
UNKNOWN_IP_SUBJECT = "unknown-ip"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LoginThrottlePolicy:
|
||||
"""Captures login throttle policy values resolved from runtime settings."""
|
||||
|
||||
failure_limit: int
|
||||
failure_window_seconds: int
|
||||
lockout_base_seconds: int
|
||||
lockout_max_seconds: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LoginThrottleStatus:
|
||||
"""Represents whether login attempts are currently throttled and retry metadata."""
|
||||
|
||||
is_throttled: bool
|
||||
retry_after_seconds: int = 0
|
||||
|
||||
|
||||
def _bounded_int(value: int, *, minimum: int, maximum: int) -> int:
|
||||
"""Clamps one integer value to an inclusive minimum and maximum range."""
|
||||
|
||||
return max(minimum, min(maximum, int(value)))
|
||||
|
||||
|
||||
def _resolve_policy(settings: Settings) -> LoginThrottlePolicy:
|
||||
"""Resolves login throttle policy from settings with defensive value bounds."""
|
||||
|
||||
failure_limit = _bounded_int(settings.auth_login_failure_limit, minimum=1, maximum=1000)
|
||||
failure_window_seconds = _bounded_int(settings.auth_login_failure_window_seconds, minimum=30, maximum=86400)
|
||||
lockout_base_seconds = _bounded_int(settings.auth_login_lockout_base_seconds, minimum=1, maximum=3600)
|
||||
lockout_max_seconds = _bounded_int(settings.auth_login_lockout_max_seconds, minimum=1, maximum=86400)
|
||||
if lockout_max_seconds < lockout_base_seconds:
|
||||
lockout_max_seconds = lockout_base_seconds
|
||||
return LoginThrottlePolicy(
|
||||
failure_limit=failure_limit,
|
||||
failure_window_seconds=failure_window_seconds,
|
||||
lockout_base_seconds=lockout_base_seconds,
|
||||
lockout_max_seconds=lockout_max_seconds,
|
||||
)
|
||||
|
||||
|
||||
def _normalize_login_identity(username: str, ip_address: str | None) -> tuple[str, str]:
|
||||
"""Normalizes username and source IP identity values used by throttle storage keys."""
|
||||
|
||||
normalized_username = normalize_username(username) or UNKNOWN_USERNAME_SUBJECT
|
||||
normalized_ip = (ip_address or "").strip()[:64] or UNKNOWN_IP_SUBJECT
|
||||
return normalized_username, normalized_ip
|
||||
|
||||
|
||||
def _identity_subjects(username: str, ip_address: str | None) -> tuple[tuple[str, str], tuple[str, str]]:
|
||||
"""Builds the username and IP throttle subject tuples for one login attempt."""
|
||||
|
||||
normalized_username, normalized_ip = _normalize_login_identity(username, ip_address)
|
||||
return (
|
||||
(USERNAME_SUBJECT_KIND, normalized_username),
|
||||
(IP_SUBJECT_KIND, normalized_ip),
|
||||
)
|
||||
|
||||
|
||||
def _failure_key(*, subject_kind: str, subject_value: str) -> str:
|
||||
"""Builds the Redis key used to track failed login counts for one subject."""
|
||||
|
||||
return f"dcm:auth-login:fail:{subject_kind}:{subject_value}"
|
||||
|
||||
|
||||
def _lock_key(*, subject_kind: str, subject_value: str) -> str:
|
||||
"""Builds the Redis key used to store active lockout state for one subject."""
|
||||
|
||||
return f"dcm:auth-login:lock:{subject_kind}:{subject_value}"
|
||||
|
||||
|
||||
def _next_lockout_seconds(*, failure_count: int, policy: LoginThrottlePolicy) -> int:
|
||||
"""Computes exponential lockout duration when failed attempts exceed configured limit."""
|
||||
|
||||
if failure_count <= policy.failure_limit:
|
||||
return 0
|
||||
|
||||
additional_failures = failure_count - policy.failure_limit - 1
|
||||
lockout_seconds = policy.lockout_base_seconds
|
||||
while additional_failures > 0 and lockout_seconds < policy.lockout_max_seconds:
|
||||
lockout_seconds = min(policy.lockout_max_seconds, lockout_seconds * 2)
|
||||
additional_failures -= 1
|
||||
return lockout_seconds
|
||||
|
||||
|
||||
def check_login_throttle(*, username: str, ip_address: str | None) -> LoginThrottleStatus:
|
||||
"""Returns active login throttle status for the username and source IP identity tuple."""
|
||||
|
||||
redis_client = get_redis()
|
||||
try:
|
||||
retry_after_seconds = 0
|
||||
for subject_kind, subject_value in _identity_subjects(username, ip_address):
|
||||
subject_ttl = int(redis_client.ttl(_lock_key(subject_kind=subject_kind, subject_value=subject_value)))
|
||||
if subject_ttl == -1:
|
||||
retry_after_seconds = max(retry_after_seconds, 1)
|
||||
elif subject_ttl > 0:
|
||||
retry_after_seconds = max(retry_after_seconds, subject_ttl)
|
||||
except RedisError as error:
|
||||
raise RuntimeError("Login throttle backend unavailable") from error
|
||||
|
||||
return LoginThrottleStatus(
|
||||
is_throttled=retry_after_seconds > 0,
|
||||
retry_after_seconds=retry_after_seconds,
|
||||
)
|
||||
|
||||
|
||||
def record_failed_login_attempt(*, username: str, ip_address: str | None) -> int:
|
||||
"""Records one failed login attempt and returns active lockout seconds, if any."""
|
||||
|
||||
settings = get_settings()
|
||||
policy = _resolve_policy(settings)
|
||||
normalized_username, normalized_ip = _normalize_login_identity(username, ip_address)
|
||||
redis_client = get_redis()
|
||||
|
||||
try:
|
||||
highest_failure_count = 0
|
||||
active_lockout_seconds = 0
|
||||
for subject_kind, subject_value in (
|
||||
(USERNAME_SUBJECT_KIND, normalized_username),
|
||||
(IP_SUBJECT_KIND, normalized_ip),
|
||||
):
|
||||
failure_key = _failure_key(subject_kind=subject_kind, subject_value=subject_value)
|
||||
pipeline = redis_client.pipeline(transaction=True)
|
||||
pipeline.incr(failure_key, 1)
|
||||
pipeline.expire(failure_key, policy.failure_window_seconds + 5)
|
||||
count_value, _ = pipeline.execute()
|
||||
failure_count = int(count_value)
|
||||
highest_failure_count = max(highest_failure_count, failure_count)
|
||||
|
||||
lockout_seconds = _next_lockout_seconds(failure_count=failure_count, policy=policy)
|
||||
if lockout_seconds > 0:
|
||||
redis_client.set(
|
||||
_lock_key(subject_kind=subject_kind, subject_value=subject_value),
|
||||
"1",
|
||||
ex=lockout_seconds,
|
||||
)
|
||||
active_lockout_seconds = max(active_lockout_seconds, lockout_seconds)
|
||||
except RedisError as error:
|
||||
raise RuntimeError("Login throttle backend unavailable") from error
|
||||
|
||||
logger.warning(
|
||||
"Authentication login failure: username=%s ip=%s failed_attempts=%s lockout_seconds=%s",
|
||||
normalized_username,
|
||||
normalized_ip,
|
||||
highest_failure_count,
|
||||
active_lockout_seconds,
|
||||
)
|
||||
return active_lockout_seconds
|
||||
|
||||
|
||||
def clear_login_throttle(*, username: str, ip_address: str | None) -> None:
|
||||
"""Clears username and source-IP login throttle state after successful authentication."""
|
||||
|
||||
normalized_username, normalized_ip = _normalize_login_identity(username, ip_address)
|
||||
redis_client = get_redis()
|
||||
keys = [
|
||||
_failure_key(subject_kind=USERNAME_SUBJECT_KIND, subject_value=normalized_username),
|
||||
_lock_key(subject_kind=USERNAME_SUBJECT_KIND, subject_value=normalized_username),
|
||||
_failure_key(subject_kind=IP_SUBJECT_KIND, subject_value=normalized_ip),
|
||||
_lock_key(subject_kind=IP_SUBJECT_KIND, subject_value=normalized_ip),
|
||||
]
|
||||
try:
|
||||
redis_client.delete(*keys)
|
||||
except RedisError as error:
|
||||
raise RuntimeError("Login throttle backend unavailable") from error
|
||||
289
backend/app/services/authentication.py
Normal file
289
backend/app/services/authentication.py
Normal file
@@ -0,0 +1,289 @@
|
||||
"""Authentication services for user credential validation and session issuance."""
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime, timedelta
|
||||
import hashlib
|
||||
import hmac
|
||||
import secrets
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import delete, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import Settings, get_settings
|
||||
from app.db.base import SessionLocal
|
||||
from app.models.auth import AppUser, AuthSession, UserRole
|
||||
|
||||
|
||||
PASSWORD_HASH_SCHEME = "pbkdf2_sha256"
|
||||
DEFAULT_AUTH_FALLBACK_SECRET = "dcm-session-secret"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IssuedSession:
|
||||
"""Represents one newly issued bearer session token and expiration timestamp."""
|
||||
|
||||
token: str
|
||||
expires_at: datetime
|
||||
|
||||
|
||||
def normalize_username(username: str) -> str:
|
||||
"""Normalizes usernames to a stable lowercase identity key."""
|
||||
|
||||
return username.strip().lower()
|
||||
|
||||
|
||||
def _urlsafe_b64encode_no_padding(data: bytes) -> str:
|
||||
"""Encodes bytes to compact URL-safe base64 without padding."""
|
||||
|
||||
return base64.urlsafe_b64encode(data).decode("ascii").rstrip("=")
|
||||
|
||||
|
||||
def _urlsafe_b64decode_no_padding(data: str) -> bytes:
|
||||
"""Decodes URL-safe base64 values that may omit trailing padding characters."""
|
||||
|
||||
padded = data + "=" * (-len(data) % 4)
|
||||
return base64.urlsafe_b64decode(padded.encode("ascii"))
|
||||
|
||||
|
||||
def _password_iterations(settings: Settings) -> int:
|
||||
"""Returns PBKDF2 iteration count clamped to a secure operational range."""
|
||||
|
||||
return max(200_000, min(1_200_000, int(settings.auth_password_pbkdf2_iterations)))
|
||||
|
||||
|
||||
def hash_password(password: str, settings: Settings | None = None) -> str:
|
||||
"""Derives and formats a PBKDF2-SHA256 password hash for persisted user credentials."""
|
||||
|
||||
resolved_settings = settings or get_settings()
|
||||
normalized_password = password.strip()
|
||||
if not normalized_password:
|
||||
raise ValueError("Password must not be empty")
|
||||
|
||||
iterations = _password_iterations(resolved_settings)
|
||||
salt = secrets.token_bytes(16)
|
||||
derived = hashlib.pbkdf2_hmac(
|
||||
"sha256",
|
||||
normalized_password.encode("utf-8"),
|
||||
salt,
|
||||
iterations,
|
||||
dklen=32,
|
||||
)
|
||||
return (
|
||||
f"{PASSWORD_HASH_SCHEME}$"
|
||||
f"{iterations}$"
|
||||
f"{_urlsafe_b64encode_no_padding(salt)}$"
|
||||
f"{_urlsafe_b64encode_no_padding(derived)}"
|
||||
)
|
||||
|
||||
|
||||
def verify_password(password: str, stored_hash: str, settings: Settings | None = None) -> bool:
|
||||
"""Verifies one plaintext password against persisted PBKDF2-SHA256 hash material."""
|
||||
|
||||
resolved_settings = settings or get_settings()
|
||||
normalized_password = password.strip()
|
||||
if not normalized_password:
|
||||
return False
|
||||
|
||||
parts = stored_hash.strip().split("$")
|
||||
if len(parts) != 4:
|
||||
return False
|
||||
scheme, iterations_text, salt_text, digest_text = parts
|
||||
if scheme != PASSWORD_HASH_SCHEME:
|
||||
return False
|
||||
try:
|
||||
iterations = int(iterations_text)
|
||||
except ValueError:
|
||||
return False
|
||||
if iterations < 200_000 or iterations > 2_000_000:
|
||||
return False
|
||||
try:
|
||||
salt = _urlsafe_b64decode_no_padding(salt_text)
|
||||
expected_digest = _urlsafe_b64decode_no_padding(digest_text)
|
||||
except (binascii.Error, ValueError):
|
||||
return False
|
||||
derived_digest = hashlib.pbkdf2_hmac(
|
||||
"sha256",
|
||||
normalized_password.encode("utf-8"),
|
||||
salt,
|
||||
iterations,
|
||||
dklen=len(expected_digest),
|
||||
)
|
||||
if not hmac.compare_digest(expected_digest, derived_digest):
|
||||
return False
|
||||
return iterations >= _password_iterations(resolved_settings)
|
||||
|
||||
|
||||
def _auth_session_secret(settings: Settings) -> bytes:
|
||||
"""Resolves a stable secret used to hash issued bearer session tokens."""
|
||||
|
||||
candidate = settings.auth_session_pepper.strip() or settings.app_settings_encryption_key.strip()
|
||||
if not candidate:
|
||||
candidate = DEFAULT_AUTH_FALLBACK_SECRET
|
||||
return hashlib.sha256(candidate.encode("utf-8")).digest()
|
||||
|
||||
|
||||
def _hash_session_token(token: str, settings: Settings | None = None) -> str:
|
||||
"""Derives a deterministic SHA256 token hash guarded by secret pepper material."""
|
||||
|
||||
resolved_settings = settings or get_settings()
|
||||
secret = _auth_session_secret(resolved_settings)
|
||||
digest = hmac.new(secret, token.encode("utf-8"), hashlib.sha256).hexdigest()
|
||||
return digest
|
||||
|
||||
|
||||
def _new_session_token(settings: Settings) -> str:
|
||||
"""Creates a random URL-safe bearer token for one API session."""
|
||||
|
||||
token_bytes = max(24, min(128, int(settings.auth_session_token_bytes)))
|
||||
return secrets.token_urlsafe(token_bytes)
|
||||
|
||||
|
||||
def _resolve_optional_user_credentials(username: str, password: str) -> tuple[str, str] | None:
|
||||
"""Returns optional user credentials only when both username and password are configured."""
|
||||
|
||||
normalized_username = normalize_username(username)
|
||||
normalized_password = password.strip()
|
||||
if not normalized_username and not normalized_password:
|
||||
return None
|
||||
if not normalized_username or not normalized_password:
|
||||
raise ValueError("Optional bootstrap user requires both username and password")
|
||||
return normalized_username, normalized_password
|
||||
|
||||
|
||||
def _upsert_bootstrap_user(session: Session, *, username: str, password: str, role: UserRole) -> AppUser:
|
||||
"""Creates or updates one bootstrap account with deterministic role assignment."""
|
||||
|
||||
existing = session.execute(select(AppUser).where(AppUser.username == username)).scalar_one_or_none()
|
||||
password_hash = hash_password(password)
|
||||
if existing is None:
|
||||
user = AppUser(
|
||||
username=username,
|
||||
password_hash=password_hash,
|
||||
role=role,
|
||||
is_active=True,
|
||||
)
|
||||
session.add(user)
|
||||
return user
|
||||
|
||||
existing.password_hash = password_hash
|
||||
existing.role = role
|
||||
existing.is_active = True
|
||||
return existing
|
||||
|
||||
|
||||
def ensure_bootstrap_users() -> None:
|
||||
"""Creates or refreshes bootstrap user accounts from runtime environment credentials."""
|
||||
|
||||
settings = get_settings()
|
||||
admin_username = normalize_username(settings.auth_bootstrap_admin_username)
|
||||
admin_password = settings.auth_bootstrap_admin_password.strip()
|
||||
if not admin_username:
|
||||
raise RuntimeError("AUTH_BOOTSTRAP_ADMIN_USERNAME must not be empty")
|
||||
if not admin_password:
|
||||
raise RuntimeError("AUTH_BOOTSTRAP_ADMIN_PASSWORD must not be empty")
|
||||
|
||||
optional_user_credentials = _resolve_optional_user_credentials(
|
||||
username=settings.auth_bootstrap_user_username,
|
||||
password=settings.auth_bootstrap_user_password,
|
||||
)
|
||||
|
||||
with SessionLocal() as session:
|
||||
_upsert_bootstrap_user(
|
||||
session,
|
||||
username=admin_username,
|
||||
password=admin_password,
|
||||
role=UserRole.ADMIN,
|
||||
)
|
||||
if optional_user_credentials is not None:
|
||||
user_username, user_password = optional_user_credentials
|
||||
if user_username == admin_username:
|
||||
raise RuntimeError("AUTH_BOOTSTRAP_USER_USERNAME must differ from admin username")
|
||||
_upsert_bootstrap_user(
|
||||
session,
|
||||
username=user_username,
|
||||
password=user_password,
|
||||
role=UserRole.USER,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
|
||||
def authenticate_user(session: Session, *, username: str, password: str) -> AppUser | None:
|
||||
"""Authenticates one username/password pair and returns active account on success."""
|
||||
|
||||
normalized_username = normalize_username(username)
|
||||
if not normalized_username:
|
||||
return None
|
||||
user = session.execute(select(AppUser).where(AppUser.username == normalized_username)).scalar_one_or_none()
|
||||
if user is None or not user.is_active:
|
||||
return None
|
||||
if not verify_password(password, user.password_hash):
|
||||
return None
|
||||
return user
|
||||
|
||||
|
||||
def issue_user_session(
|
||||
session: Session,
|
||||
*,
|
||||
user: AppUser,
|
||||
user_agent: str | None = None,
|
||||
ip_address: str | None = None,
|
||||
) -> IssuedSession:
|
||||
"""Issues one new bearer token session for a validated user account."""
|
||||
|
||||
settings = get_settings()
|
||||
now = datetime.now(UTC)
|
||||
ttl_minutes = max(5, min(7 * 24 * 60, int(settings.auth_session_ttl_minutes)))
|
||||
expires_at = now + timedelta(minutes=ttl_minutes)
|
||||
token = _new_session_token(settings)
|
||||
token_hash = _hash_session_token(token, settings)
|
||||
|
||||
session.execute(
|
||||
delete(AuthSession).where(
|
||||
AuthSession.user_id == user.id,
|
||||
AuthSession.expires_at <= now,
|
||||
)
|
||||
)
|
||||
session_entry = AuthSession(
|
||||
user_id=user.id,
|
||||
token_hash=token_hash,
|
||||
expires_at=expires_at,
|
||||
user_agent=(user_agent or "").strip()[:512] or None,
|
||||
ip_address=(ip_address or "").strip()[:64] or None,
|
||||
)
|
||||
session.add(session_entry)
|
||||
return IssuedSession(token=token, expires_at=expires_at)
|
||||
|
||||
|
||||
def resolve_auth_session(session: Session, *, token: str) -> AuthSession | None:
|
||||
"""Resolves one non-revoked and non-expired session from a bearer token value."""
|
||||
|
||||
normalized = token.strip()
|
||||
if not normalized:
|
||||
return None
|
||||
token_hash = _hash_session_token(normalized)
|
||||
now = datetime.now(UTC)
|
||||
session_entry = session.execute(
|
||||
select(AuthSession).where(
|
||||
AuthSession.token_hash == token_hash,
|
||||
AuthSession.revoked_at.is_(None),
|
||||
AuthSession.expires_at > now,
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
if session_entry is None or session_entry.user is None:
|
||||
return None
|
||||
if not session_entry.user.is_active:
|
||||
return None
|
||||
return session_entry
|
||||
|
||||
|
||||
def revoke_auth_session(session: Session, *, session_id: uuid.UUID) -> bool:
|
||||
"""Revokes one active session by identifier and returns whether a change was applied."""
|
||||
|
||||
existing = session.execute(select(AuthSession).where(AuthSession.id == session_id)).scalar_one_or_none()
|
||||
if existing is None or existing.revoked_at is not None:
|
||||
return False
|
||||
existing.revoked_at = datetime.now(UTC)
|
||||
return True
|
||||
@@ -299,17 +299,47 @@ def extract_text_content(filename: str, data: bytes, mime_type: str) -> Extracti
|
||||
)
|
||||
|
||||
|
||||
def extract_archive_members(data: bytes, depth: int = 0) -> list[ArchiveMember]:
|
||||
"""Extracts processable members from zip archives with configurable depth limits."""
|
||||
def extract_archive_members(data: bytes, depth: int = 0, max_members: int | None = None) -> list[ArchiveMember]:
|
||||
"""Extracts processable ZIP members with depth-aware and decompression safety guardrails."""
|
||||
|
||||
members: list[ArchiveMember] = []
|
||||
if depth > settings.max_zip_depth:
|
||||
normalized_depth = max(0, depth)
|
||||
if normalized_depth >= settings.max_zip_depth:
|
||||
return members
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(data)) as archive:
|
||||
infos = [info for info in archive.infolist() if not info.is_dir()][: settings.max_zip_members]
|
||||
for info in infos:
|
||||
member_data = archive.read(info.filename)
|
||||
members.append(ArchiveMember(name=info.filename, data=member_data))
|
||||
member_limit = settings.max_zip_members
|
||||
if max_members is not None:
|
||||
member_limit = max(0, min(settings.max_zip_members, int(max_members)))
|
||||
if member_limit <= 0:
|
||||
return members
|
||||
|
||||
total_uncompressed_bytes = 0
|
||||
try:
|
||||
with zipfile.ZipFile(io.BytesIO(data)) as archive:
|
||||
infos = [info for info in archive.infolist() if not info.is_dir()][:member_limit]
|
||||
for info in infos:
|
||||
if info.file_size <= 0:
|
||||
continue
|
||||
if info.file_size > settings.max_zip_member_uncompressed_bytes:
|
||||
continue
|
||||
if total_uncompressed_bytes + info.file_size > settings.max_zip_total_uncompressed_bytes:
|
||||
continue
|
||||
|
||||
compressed_size = max(1, int(info.compress_size))
|
||||
compression_ratio = float(info.file_size) / float(compressed_size)
|
||||
if compression_ratio > settings.max_zip_compression_ratio:
|
||||
continue
|
||||
|
||||
with archive.open(info, mode="r") as archive_member:
|
||||
member_data = archive_member.read(settings.max_zip_member_uncompressed_bytes + 1)
|
||||
if len(member_data) > settings.max_zip_member_uncompressed_bytes:
|
||||
continue
|
||||
if total_uncompressed_bytes + len(member_data) > settings.max_zip_total_uncompressed_bytes:
|
||||
continue
|
||||
|
||||
total_uncompressed_bytes += len(member_data)
|
||||
members.append(ArchiveMember(name=info.filename, data=member_data))
|
||||
except zipfile.BadZipFile:
|
||||
return []
|
||||
|
||||
return members
|
||||
|
||||
@@ -10,6 +10,7 @@ from typing import Any
|
||||
from openai import APIConnectionError, APIError, APITimeoutError, OpenAI
|
||||
from PIL import Image, ImageOps
|
||||
|
||||
from app.core.config import normalize_and_validate_provider_base_url
|
||||
from app.services.app_settings import DEFAULT_OCR_PROMPT, read_handwriting_provider_settings
|
||||
|
||||
MAX_IMAGE_SIDE = 2000
|
||||
@@ -151,12 +152,17 @@ def _normalize_image_bytes(image_data: bytes) -> tuple[bytes, str]:
|
||||
|
||||
|
||||
def _create_client(provider_settings: dict[str, Any]) -> OpenAI:
|
||||
"""Creates an OpenAI client configured for compatible endpoints and timeouts."""
|
||||
"""Creates an OpenAI client configured with DNS-revalidated endpoint and request timeout controls."""
|
||||
|
||||
api_key = str(provider_settings.get("openai_api_key", "")).strip() or "no-key-required"
|
||||
raw_base_url = str(provider_settings.get("openai_base_url", "")).strip()
|
||||
try:
|
||||
normalized_base_url = normalize_and_validate_provider_base_url(raw_base_url, resolve_dns=True)
|
||||
except ValueError as error:
|
||||
raise HandwritingTranscriptionError(f"invalid_provider_base_url:{error}") from error
|
||||
return OpenAI(
|
||||
api_key=api_key,
|
||||
base_url=str(provider_settings["openai_base_url"]),
|
||||
base_url=normalized_base_url,
|
||||
timeout=int(provider_settings["openai_timeout_seconds"]),
|
||||
)
|
||||
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
from openai import APIConnectionError, APIError, APITimeoutError, OpenAI
|
||||
|
||||
from app.core.config import normalize_and_validate_provider_base_url
|
||||
from app.services.app_settings import read_task_runtime_settings
|
||||
|
||||
|
||||
@@ -36,18 +36,9 @@ class ModelTaskRuntime:
|
||||
|
||||
|
||||
def _normalize_base_url(raw_value: str) -> str:
|
||||
"""Normalizes provider base URL and appends /v1 for OpenAI-compatible servers."""
|
||||
"""Normalizes provider base URL and enforces SSRF protections before outbound calls."""
|
||||
|
||||
trimmed = raw_value.strip().rstrip("/")
|
||||
if not trimmed:
|
||||
return "https://api.openai.com/v1"
|
||||
|
||||
parsed = urlparse(trimmed)
|
||||
path = parsed.path or ""
|
||||
if not path.endswith("/v1"):
|
||||
path = f"{path}/v1" if path else "/v1"
|
||||
|
||||
return urlunparse(parsed._replace(path=path))
|
||||
return normalize_and_validate_provider_base_url(raw_value, resolve_dns=True)
|
||||
|
||||
|
||||
def _should_fallback_to_chat(error: Exception) -> bool:
|
||||
@@ -137,11 +128,16 @@ def resolve_task_runtime(task_name: str) -> ModelTaskRuntime:
|
||||
if provider_type != "openai_compatible":
|
||||
raise ModelTaskError(f"unsupported_provider_type:{provider_type}")
|
||||
|
||||
try:
|
||||
normalized_base_url = _normalize_base_url(str(provider_payload.get("base_url", "https://api.openai.com/v1")))
|
||||
except ValueError as error:
|
||||
raise ModelTaskError(f"invalid_provider_base_url:{error}") from error
|
||||
|
||||
return ModelTaskRuntime(
|
||||
task_name=task_name,
|
||||
provider_id=str(provider_payload.get("id", "")),
|
||||
provider_type=provider_type,
|
||||
base_url=_normalize_base_url(str(provider_payload.get("base_url", "https://api.openai.com/v1"))),
|
||||
base_url=normalized_base_url,
|
||||
timeout_seconds=int(provider_payload.get("timeout_seconds", 45)),
|
||||
api_key=str(provider_payload.get("api_key", "")).strip() or "no-key-required",
|
||||
model=str(task_payload.get("model", "")).strip(),
|
||||
|
||||
@@ -6,10 +6,13 @@ from uuid import UUID
|
||||
from sqlalchemy import delete, func, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.models.document import Document
|
||||
from app.models.processing_log import ProcessingLogEntry
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
MAX_STAGE_LENGTH = 64
|
||||
MAX_EVENT_LENGTH = 256
|
||||
MAX_LEVEL_LENGTH = 16
|
||||
@@ -37,9 +40,49 @@ def _trim(value: str | None, max_length: int) -> str | None:
|
||||
|
||||
|
||||
def _safe_payload(payload_json: dict[str, Any] | None) -> dict[str, Any]:
|
||||
"""Ensures payload values are persisted as dictionaries."""
|
||||
"""Normalizes payload persistence mode using metadata-only defaults for sensitive content."""
|
||||
|
||||
return payload_json if isinstance(payload_json, dict) else {}
|
||||
if not isinstance(payload_json, dict):
|
||||
return {}
|
||||
if settings.processing_log_store_payload_text:
|
||||
return payload_json
|
||||
return _metadata_only_payload(payload_json)
|
||||
|
||||
|
||||
def _metadata_only_payload(payload_json: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Converts payload content into metadata descriptors without persisting raw text values."""
|
||||
|
||||
metadata: dict[str, Any] = {}
|
||||
for index, (raw_key, raw_value) in enumerate(payload_json.items()):
|
||||
if index >= 80:
|
||||
break
|
||||
key = str(raw_key)
|
||||
metadata[key] = _metadata_only_payload_value(raw_value)
|
||||
return metadata
|
||||
|
||||
|
||||
def _metadata_only_payload_value(value: Any) -> Any:
|
||||
"""Converts one payload value into non-sensitive metadata representation."""
|
||||
|
||||
if isinstance(value, dict):
|
||||
return _metadata_only_payload(value)
|
||||
if isinstance(value, (list, tuple)):
|
||||
items = list(value)
|
||||
return {
|
||||
"item_count": len(items),
|
||||
"items_preview": [_metadata_only_payload_value(item) for item in items[:20]],
|
||||
}
|
||||
if isinstance(value, str):
|
||||
normalized = value.strip()
|
||||
return {
|
||||
"text_chars": len(normalized),
|
||||
"text_omitted": bool(normalized),
|
||||
}
|
||||
if isinstance(value, bytes):
|
||||
return {"binary_bytes": len(value)}
|
||||
if isinstance(value, (int, float, bool)) or value is None:
|
||||
return value
|
||||
return {"value_type": type(value).__name__}
|
||||
|
||||
|
||||
def set_processing_log_autocommit(session: Session, enabled: bool) -> None:
|
||||
@@ -82,8 +125,8 @@ def log_processing_event(
|
||||
document_filename=_trim(resolved_document_filename, MAX_DOCUMENT_FILENAME_LENGTH),
|
||||
provider_id=_trim(provider_id, MAX_PROVIDER_LENGTH),
|
||||
model_name=_trim(model_name, MAX_MODEL_LENGTH),
|
||||
prompt_text=_trim(prompt_text, MAX_PROMPT_LENGTH),
|
||||
response_text=_trim(response_text, MAX_RESPONSE_LENGTH),
|
||||
prompt_text=_trim(prompt_text, MAX_PROMPT_LENGTH) if settings.processing_log_store_model_io_text else None,
|
||||
response_text=_trim(response_text, MAX_RESPONSE_LENGTH) if settings.processing_log_store_model_io_text else None,
|
||||
payload_json=_safe_payload(payload_json),
|
||||
)
|
||||
session.add(entry)
|
||||
|
||||
42
backend/app/services/rate_limiter.py
Normal file
42
backend/app/services/rate_limiter.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""Redis-backed fixed-window rate limiter helpers for sensitive API operations."""
|
||||
|
||||
import time
|
||||
|
||||
from redis.exceptions import RedisError
|
||||
|
||||
from app.worker.queue import get_redis
|
||||
|
||||
|
||||
def _rate_limit_key(*, scope: str, subject: str, window_id: int) -> str:
|
||||
"""Builds a stable Redis key for one scope, subject, and fixed time window."""
|
||||
|
||||
return f"dcm:rate-limit:{scope}:{subject}:{window_id}"
|
||||
|
||||
|
||||
def increment_rate_limit(
|
||||
*,
|
||||
scope: str,
|
||||
subject: str,
|
||||
limit: int,
|
||||
window_seconds: int = 60,
|
||||
) -> tuple[int, int]:
|
||||
"""Increments one rate bucket and returns current count with configured limit."""
|
||||
|
||||
bounded_limit = max(0, int(limit))
|
||||
if bounded_limit == 0:
|
||||
return (0, 0)
|
||||
|
||||
bounded_window = max(1, int(window_seconds))
|
||||
current_window = int(time.time() // bounded_window)
|
||||
key = _rate_limit_key(scope=scope, subject=subject, window_id=current_window)
|
||||
|
||||
redis_client = get_redis()
|
||||
try:
|
||||
pipeline = redis_client.pipeline(transaction=True)
|
||||
pipeline.incr(key, 1)
|
||||
pipeline.expire(key, bounded_window + 5)
|
||||
count_value, _ = pipeline.execute()
|
||||
except RedisError as error:
|
||||
raise RuntimeError("Rate limiter backend unavailable") from error
|
||||
|
||||
return (int(count_value), bounded_limit)
|
||||
@@ -3,16 +3,17 @@
|
||||
from redis import Redis
|
||||
from rq import Queue
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.core.config import get_settings, validate_redis_url_security
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
|
||||
def get_redis() -> Redis:
|
||||
"""Creates a Redis connection from configured URL."""
|
||||
"""Creates a Redis connection after enforcing URL security policy checks."""
|
||||
|
||||
return Redis.from_url(settings.redis_url)
|
||||
secure_redis_url = validate_redis_url_security(settings.redis_url)
|
||||
return Redis.from_url(secure_redis_url)
|
||||
|
||||
|
||||
def get_processing_queue() -> Queue:
|
||||
|
||||
26
backend/app/worker/run_worker.py
Normal file
26
backend/app/worker/run_worker.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""Worker entrypoint that enforces Redis URL security checks before queue consumption."""
|
||||
|
||||
from redis import Redis
|
||||
from rq import Worker
|
||||
|
||||
from app.core.config import get_settings, validate_redis_url_security
|
||||
|
||||
|
||||
def _build_worker_connection() -> Redis:
|
||||
"""Builds validated Redis connection used by RQ worker runtime."""
|
||||
|
||||
settings = get_settings()
|
||||
secure_redis_url = validate_redis_url_security(settings.redis_url)
|
||||
return Redis.from_url(secure_redis_url)
|
||||
|
||||
|
||||
def run_worker() -> None:
|
||||
"""Runs the RQ worker loop for the configured DCM processing queue."""
|
||||
|
||||
connection = _build_worker_connection()
|
||||
worker = Worker(["dcm"], connection=connection)
|
||||
worker.work()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_worker()
|
||||
@@ -7,6 +7,7 @@ from pathlib import Path
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.db.base import SessionLocal
|
||||
from app.models.document import Document, DocumentStatus
|
||||
from app.services.app_settings import (
|
||||
@@ -37,6 +38,13 @@ from app.services.storage import absolute_path, compute_sha256, store_bytes, wri
|
||||
from app.worker.queue import get_processing_queue
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
ARCHIVE_ROOT_ID_METADATA_KEY = "archive_root_document_id"
|
||||
ARCHIVE_DEPTH_METADATA_KEY = "archive_depth"
|
||||
ARCHIVE_DESCENDANT_COUNT_METADATA_KEY = "archive_descendant_count"
|
||||
|
||||
|
||||
def _cleanup_processing_logs_with_settings(session: Session) -> None:
|
||||
"""Applies configured processing log retention while trimming old log entries."""
|
||||
|
||||
@@ -48,13 +56,80 @@ def _cleanup_processing_logs_with_settings(session: Session) -> None:
|
||||
)
|
||||
|
||||
|
||||
def _metadata_non_negative_int(value: object, fallback: int = 0) -> int:
|
||||
"""Parses metadata values as non-negative integers with safe fallback behavior."""
|
||||
|
||||
try:
|
||||
parsed = int(value)
|
||||
except (TypeError, ValueError):
|
||||
return fallback
|
||||
return max(0, parsed)
|
||||
|
||||
|
||||
def _metadata_uuid(value: object) -> uuid.UUID | None:
|
||||
"""Parses metadata values as UUIDs while tolerating malformed legacy values."""
|
||||
|
||||
if not isinstance(value, str) or not value.strip():
|
||||
return None
|
||||
try:
|
||||
return uuid.UUID(value.strip())
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_archive_lineage(session: Session, document: Document) -> tuple[uuid.UUID, int]:
|
||||
"""Resolves archive root document id and depth for metadata propagation compatibility."""
|
||||
|
||||
metadata_json = dict(document.metadata_json)
|
||||
metadata_root = _metadata_uuid(metadata_json.get(ARCHIVE_ROOT_ID_METADATA_KEY))
|
||||
metadata_depth = _metadata_non_negative_int(metadata_json.get(ARCHIVE_DEPTH_METADATA_KEY), fallback=0)
|
||||
if metadata_root is not None:
|
||||
return metadata_root, metadata_depth
|
||||
|
||||
if not document.is_archive_member:
|
||||
return document.id, 0
|
||||
|
||||
depth = 0
|
||||
root_document_id = document.id
|
||||
parent_document_id = document.parent_document_id
|
||||
visited: set[uuid.UUID] = {document.id}
|
||||
while parent_document_id is not None and parent_document_id not in visited:
|
||||
visited.add(parent_document_id)
|
||||
parent_document = session.execute(select(Document).where(Document.id == parent_document_id)).scalar_one_or_none()
|
||||
if parent_document is None:
|
||||
break
|
||||
depth += 1
|
||||
root_document_id = parent_document.id
|
||||
parent_document_id = parent_document.parent_document_id
|
||||
|
||||
return root_document_id, depth
|
||||
|
||||
|
||||
def _merge_archive_metadata(document: Document, **updates: object) -> None:
|
||||
"""Applies archive metadata updates while preserving unrelated document metadata keys."""
|
||||
|
||||
metadata_json = dict(document.metadata_json)
|
||||
metadata_json.update(updates)
|
||||
document.metadata_json = metadata_json
|
||||
|
||||
|
||||
def _load_archive_root_for_update(session: Session, root_document_id: uuid.UUID) -> Document | None:
|
||||
"""Loads archive root row with write lock to serialize descendant-count budget updates."""
|
||||
|
||||
return session.execute(
|
||||
select(Document).where(Document.id == root_document_id).with_for_update()
|
||||
).scalar_one_or_none()
|
||||
|
||||
|
||||
def _create_archive_member_document(
|
||||
parent: Document,
|
||||
member_name: str,
|
||||
member_data: bytes,
|
||||
mime_type: str,
|
||||
archive_root_document_id: uuid.UUID,
|
||||
archive_depth: int,
|
||||
) -> Document:
|
||||
"""Creates a child document entity for a file extracted from an uploaded archive."""
|
||||
"""Creates child document entities with lineage metadata for recursive archive processing."""
|
||||
|
||||
extension = Path(member_name).suffix.lower()
|
||||
stored_relative_path = store_bytes(member_name, member_data)
|
||||
@@ -68,7 +143,13 @@ def _create_archive_member_document(
|
||||
size_bytes=len(member_data),
|
||||
logical_path=parent.logical_path,
|
||||
tags=list(parent.tags),
|
||||
metadata_json={"origin": "archive", "parent": str(parent.id)},
|
||||
owner_user_id=parent.owner_user_id,
|
||||
metadata_json={
|
||||
"origin": "archive",
|
||||
"parent": str(parent.id),
|
||||
ARCHIVE_ROOT_ID_METADATA_KEY: str(archive_root_document_id),
|
||||
ARCHIVE_DEPTH_METADATA_KEY: archive_depth,
|
||||
},
|
||||
is_archive_member=True,
|
||||
archived_member_path=member_name,
|
||||
parent_document_id=parent.id,
|
||||
@@ -110,16 +191,46 @@ def process_document_task(document_id: str) -> None:
|
||||
|
||||
if document.extension == ".zip":
|
||||
child_ids: list[str] = []
|
||||
archive_root_document_id, archive_depth = _resolve_archive_lineage(session=session, document=document)
|
||||
_merge_archive_metadata(
|
||||
document,
|
||||
**{
|
||||
ARCHIVE_ROOT_ID_METADATA_KEY: str(archive_root_document_id),
|
||||
ARCHIVE_DEPTH_METADATA_KEY: archive_depth,
|
||||
},
|
||||
)
|
||||
root_document = _load_archive_root_for_update(session=session, root_document_id=archive_root_document_id)
|
||||
if root_document is None:
|
||||
root_document = document
|
||||
|
||||
root_metadata_json = dict(root_document.metadata_json)
|
||||
existing_descendant_count = _metadata_non_negative_int(
|
||||
root_metadata_json.get(ARCHIVE_DESCENDANT_COUNT_METADATA_KEY),
|
||||
fallback=0,
|
||||
)
|
||||
max_descendants_per_root = max(0, int(settings.max_zip_descendants_per_root))
|
||||
remaining_descendant_budget = max(0, max_descendants_per_root - existing_descendant_count)
|
||||
extraction_member_cap = remaining_descendant_budget
|
||||
|
||||
log_processing_event(
|
||||
session=session,
|
||||
stage="archive",
|
||||
event="Archive extraction started",
|
||||
level="info",
|
||||
document=document,
|
||||
payload_json={"size_bytes": len(data)},
|
||||
payload_json={
|
||||
"size_bytes": len(data),
|
||||
"archive_root_document_id": str(archive_root_document_id),
|
||||
"archive_depth": archive_depth,
|
||||
"remaining_descendant_budget": remaining_descendant_budget,
|
||||
},
|
||||
)
|
||||
try:
|
||||
members = extract_archive_members(data)
|
||||
members = extract_archive_members(
|
||||
data,
|
||||
depth=archive_depth,
|
||||
max_members=extraction_member_cap,
|
||||
)
|
||||
for member in members:
|
||||
mime_type = sniff_mime(member.data)
|
||||
child = _create_archive_member_document(
|
||||
@@ -127,6 +238,8 @@ def process_document_task(document_id: str) -> None:
|
||||
member_name=member.name,
|
||||
member_data=member.data,
|
||||
mime_type=mime_type,
|
||||
archive_root_document_id=archive_root_document_id,
|
||||
archive_depth=archive_depth + 1,
|
||||
)
|
||||
session.add(child)
|
||||
session.flush()
|
||||
@@ -142,8 +255,27 @@ def process_document_task(document_id: str) -> None:
|
||||
"member_name": member.name,
|
||||
"member_size_bytes": len(member.data),
|
||||
"mime_type": mime_type,
|
||||
"archive_root_document_id": str(archive_root_document_id),
|
||||
"archive_depth": archive_depth + 1,
|
||||
},
|
||||
)
|
||||
|
||||
updated_root_metadata = dict(root_document.metadata_json)
|
||||
updated_root_metadata[ARCHIVE_ROOT_ID_METADATA_KEY] = str(archive_root_document_id)
|
||||
updated_root_metadata[ARCHIVE_DEPTH_METADATA_KEY] = 0
|
||||
updated_root_metadata[ARCHIVE_DESCENDANT_COUNT_METADATA_KEY] = existing_descendant_count + len(child_ids)
|
||||
root_document.metadata_json = updated_root_metadata
|
||||
|
||||
limit_flags: dict[str, object] = {}
|
||||
if archive_depth >= settings.max_zip_depth:
|
||||
limit_flags["max_depth_reached"] = True
|
||||
if remaining_descendant_budget <= 0:
|
||||
limit_flags["max_descendants_reached"] = True
|
||||
elif len(child_ids) >= remaining_descendant_budget:
|
||||
limit_flags["max_descendants_reached"] = True
|
||||
if limit_flags:
|
||||
_merge_archive_metadata(document, **limit_flags)
|
||||
|
||||
document.status = DocumentStatus.PROCESSED
|
||||
document.extracted_text = f"archive with {len(members)} files"
|
||||
log_processing_event(
|
||||
@@ -152,7 +284,13 @@ def process_document_task(document_id: str) -> None:
|
||||
event="Archive extraction completed",
|
||||
level="info",
|
||||
document=document,
|
||||
payload_json={"member_count": len(members)},
|
||||
payload_json={
|
||||
"member_count": len(members),
|
||||
"archive_root_document_id": str(archive_root_document_id),
|
||||
"archive_depth": archive_depth,
|
||||
"descendant_count": existing_descendant_count + len(child_ids),
|
||||
"remaining_descendant_budget": max(0, remaining_descendant_budget - len(child_ids)),
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
document.status = DocumentStatus.ERROR
|
||||
@@ -231,7 +369,10 @@ def process_document_task(document_id: str) -> None:
|
||||
event="Archive child job enqueued",
|
||||
level="info",
|
||||
document_id=uuid.UUID(child_id),
|
||||
payload_json={"parent_document_id": str(document.id)},
|
||||
payload_json={
|
||||
"parent_document_id": str(document.id),
|
||||
"archive_root_document_id": str(archive_root_document_id),
|
||||
},
|
||||
)
|
||||
session.commit()
|
||||
return
|
||||
|
||||
@@ -16,3 +16,4 @@ orjson==3.11.3
|
||||
openai==1.107.2
|
||||
typesense==1.1.1
|
||||
tiktoken==0.11.0
|
||||
cryptography==46.0.1
|
||||
|
||||
230
backend/tests/test_app_settings_provider_resilience.py
Normal file
230
backend/tests/test_app_settings_provider_resilience.py
Normal file
@@ -0,0 +1,230 @@
|
||||
"""Unit coverage for resilient provider sanitization in persisted app settings."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from types import ModuleType
|
||||
from typing import Any
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
BACKEND_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(BACKEND_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(BACKEND_ROOT))
|
||||
|
||||
if "pydantic_settings" not in sys.modules:
|
||||
pydantic_settings_stub = ModuleType("pydantic_settings")
|
||||
|
||||
class _BaseSettings:
|
||||
"""Minimal BaseSettings replacement for dependency-light unit test execution."""
|
||||
|
||||
def __init__(self, **kwargs: object) -> None:
|
||||
for key, value in kwargs.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
def _settings_config_dict(**kwargs: object) -> dict[str, object]:
|
||||
"""Returns configuration values using dict semantics expected by settings module."""
|
||||
|
||||
return kwargs
|
||||
|
||||
pydantic_settings_stub.BaseSettings = _BaseSettings
|
||||
pydantic_settings_stub.SettingsConfigDict = _settings_config_dict
|
||||
sys.modules["pydantic_settings"] = pydantic_settings_stub
|
||||
|
||||
from app.services import app_settings
|
||||
|
||||
|
||||
def _sample_current_payload() -> dict[str, Any]:
|
||||
"""Builds a sanitized payload used as in-memory persistence fixture for update tests."""
|
||||
|
||||
return app_settings._sanitize_settings(app_settings._default_settings())
|
||||
|
||||
|
||||
class AppSettingsProviderResilienceTests(unittest.TestCase):
|
||||
"""Verifies read-path resilience for corrupt persisted providers without weakening writes."""
|
||||
|
||||
def test_sanitize_settings_skips_invalid_persisted_provider_entries(self) -> None:
|
||||
"""Invalid persisted providers are skipped and tasks rebind to remaining valid providers."""
|
||||
|
||||
payload = {
|
||||
"providers": [
|
||||
{
|
||||
"id": "insecure-provider",
|
||||
"label": "Insecure Provider",
|
||||
"provider_type": "openai_compatible",
|
||||
"base_url": "http://api.openai.com/v1",
|
||||
"timeout_seconds": 45,
|
||||
"api_key": "",
|
||||
},
|
||||
{
|
||||
"id": "secure-provider",
|
||||
"label": "Secure Provider",
|
||||
"provider_type": "openai_compatible",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"timeout_seconds": 45,
|
||||
"api_key": "",
|
||||
},
|
||||
],
|
||||
"tasks": {
|
||||
app_settings.TASK_OCR_HANDWRITING: {"provider_id": "insecure-provider"},
|
||||
app_settings.TASK_SUMMARY_GENERATION: {"provider_id": "insecure-provider"},
|
||||
app_settings.TASK_ROUTING_CLASSIFICATION: {"provider_id": "insecure-provider"},
|
||||
},
|
||||
}
|
||||
|
||||
sanitized = app_settings._sanitize_settings(payload)
|
||||
self.assertEqual([provider["id"] for provider in sanitized["providers"]], ["secure-provider"])
|
||||
self.assertEqual(
|
||||
sanitized["tasks"][app_settings.TASK_OCR_HANDWRITING]["provider_id"],
|
||||
"secure-provider",
|
||||
)
|
||||
self.assertEqual(
|
||||
sanitized["tasks"][app_settings.TASK_SUMMARY_GENERATION]["provider_id"],
|
||||
"secure-provider",
|
||||
)
|
||||
self.assertEqual(
|
||||
sanitized["tasks"][app_settings.TASK_ROUTING_CLASSIFICATION]["provider_id"],
|
||||
"secure-provider",
|
||||
)
|
||||
|
||||
def test_sanitize_settings_uses_default_provider_when_all_persisted_entries_are_invalid(self) -> None:
|
||||
"""Default provider is restored when all persisted provider rows are invalid."""
|
||||
|
||||
payload = {
|
||||
"providers": [
|
||||
{
|
||||
"id": "insecure-provider",
|
||||
"label": "Insecure Provider",
|
||||
"provider_type": "openai_compatible",
|
||||
"base_url": "http://api.openai.com/v1",
|
||||
"timeout_seconds": 45,
|
||||
"api_key": "",
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
sanitized = app_settings._sanitize_settings(payload)
|
||||
defaults = app_settings._default_settings()
|
||||
default_provider_id = defaults["providers"][0]["id"]
|
||||
self.assertEqual(sanitized["providers"][0]["id"], default_provider_id)
|
||||
self.assertEqual(sanitized["providers"][0]["base_url"], defaults["providers"][0]["base_url"])
|
||||
self.assertEqual(
|
||||
sanitized["tasks"][app_settings.TASK_OCR_HANDWRITING]["provider_id"],
|
||||
default_provider_id,
|
||||
)
|
||||
self.assertEqual(
|
||||
sanitized["tasks"][app_settings.TASK_SUMMARY_GENERATION]["provider_id"],
|
||||
default_provider_id,
|
||||
)
|
||||
self.assertEqual(
|
||||
sanitized["tasks"][app_settings.TASK_ROUTING_CLASSIFICATION]["provider_id"],
|
||||
default_provider_id,
|
||||
)
|
||||
|
||||
def test_update_app_settings_keeps_provider_base_url_validation_strict(self) -> None:
|
||||
"""Provider write updates still reject invalid base URLs instead of silently sanitizing."""
|
||||
|
||||
current_payload = _sample_current_payload()
|
||||
current_provider = current_payload["providers"][0]
|
||||
provider_update = {
|
||||
"id": current_provider["id"],
|
||||
"label": current_provider["label"],
|
||||
"provider_type": current_provider["provider_type"],
|
||||
"base_url": "http://api.openai.com/v1",
|
||||
"timeout_seconds": current_provider["timeout_seconds"],
|
||||
}
|
||||
|
||||
with (
|
||||
patch.object(app_settings, "_read_raw_settings", return_value=current_payload),
|
||||
patch.object(app_settings, "_write_settings") as write_settings_mock,
|
||||
):
|
||||
with self.assertRaises(app_settings.AppSettingsValidationError):
|
||||
app_settings.update_app_settings(providers=[provider_update])
|
||||
write_settings_mock.assert_not_called()
|
||||
|
||||
def test_sanitize_settings_migrates_legacy_plaintext_api_key_to_encrypted_field(self) -> None:
|
||||
"""Legacy plaintext API keys are still readable and emitted with encrypted storage representation."""
|
||||
|
||||
payload = {
|
||||
"providers": [
|
||||
{
|
||||
"id": "secure-provider",
|
||||
"label": "Secure Provider",
|
||||
"provider_type": "openai_compatible",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"timeout_seconds": 45,
|
||||
"api_key": "legacy-plaintext-secret",
|
||||
}
|
||||
],
|
||||
"tasks": {
|
||||
app_settings.TASK_OCR_HANDWRITING: {"provider_id": "secure-provider"},
|
||||
app_settings.TASK_SUMMARY_GENERATION: {"provider_id": "secure-provider"},
|
||||
app_settings.TASK_ROUTING_CLASSIFICATION: {"provider_id": "secure-provider"},
|
||||
},
|
||||
}
|
||||
|
||||
with patch.object(app_settings, "_derive_provider_api_key_key", return_value=b"k" * 32):
|
||||
sanitized = app_settings._sanitize_settings(payload)
|
||||
|
||||
provider = sanitized["providers"][0]
|
||||
self.assertEqual(provider["api_key"], "legacy-plaintext-secret")
|
||||
self.assertTrue(
|
||||
str(provider.get("api_key_encrypted", "")).startswith(
|
||||
f"{app_settings.PROVIDER_API_KEY_CIPHERTEXT_PREFIX}:"
|
||||
)
|
||||
)
|
||||
|
||||
def test_serialize_settings_for_storage_excludes_plaintext_api_key(self) -> None:
|
||||
"""Storage payload serialization persists encrypted provider API keys only."""
|
||||
|
||||
payload = _sample_current_payload()
|
||||
payload["providers"][0]["api_key"] = "storage-secret"
|
||||
payload["providers"][0]["api_key_encrypted"] = ""
|
||||
|
||||
with patch.object(app_settings, "_derive_provider_api_key_key", return_value=b"s" * 32):
|
||||
storage_payload = app_settings._serialize_settings_for_storage(payload)
|
||||
|
||||
provider_storage = storage_payload["providers"][0]
|
||||
self.assertNotIn("api_key", provider_storage)
|
||||
self.assertTrue(
|
||||
str(provider_storage.get("api_key_encrypted", "")).startswith(
|
||||
f"{app_settings.PROVIDER_API_KEY_CIPHERTEXT_PREFIX}:"
|
||||
)
|
||||
)
|
||||
|
||||
def test_read_handwriting_provider_settings_revalidates_dns(self) -> None:
|
||||
"""OCR runtime provider settings enforce DNS revalidation before creating outbound clients."""
|
||||
|
||||
runtime_payload = {
|
||||
"provider": {
|
||||
"id": "openai-default",
|
||||
"provider_type": "openai_compatible",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"timeout_seconds": 45,
|
||||
"api_key": "runtime-secret",
|
||||
},
|
||||
"task": {
|
||||
"enabled": True,
|
||||
"model": "gpt-4.1-mini",
|
||||
"prompt": "prompt",
|
||||
},
|
||||
}
|
||||
with (
|
||||
patch.object(app_settings, "read_task_runtime_settings", return_value=runtime_payload),
|
||||
patch.object(
|
||||
app_settings,
|
||||
"normalize_and_validate_provider_base_url",
|
||||
return_value="https://api.openai.com/v1",
|
||||
) as normalize_mock,
|
||||
):
|
||||
runtime_settings = app_settings.read_handwriting_provider_settings()
|
||||
|
||||
normalize_mock.assert_called_once_with("https://api.openai.com/v1", resolve_dns=True)
|
||||
self.assertEqual(runtime_settings["openai_base_url"], "https://api.openai.com/v1")
|
||||
self.assertEqual(runtime_settings["openai_api_key"], "runtime-secret")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
1519
backend/tests/test_security_controls.py
Normal file
1519
backend/tests/test_security_controls.py
Normal file
File diff suppressed because it is too large
Load Diff
280
backend/tests/test_upload_request_size_middleware.py
Normal file
280
backend/tests/test_upload_request_size_middleware.py
Normal file
@@ -0,0 +1,280 @@
|
||||
"""Regression tests for upload request-size middleware scope and preflight handling."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import sys
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from types import ModuleType, SimpleNamespace
|
||||
from typing import Any, Awaitable, Callable
|
||||
|
||||
|
||||
BACKEND_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(BACKEND_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(BACKEND_ROOT))
|
||||
|
||||
|
||||
def _install_main_import_stubs() -> dict[str, ModuleType | None]:
|
||||
"""Installs lightweight module stubs required for importing app.main in isolation."""
|
||||
|
||||
previous_modules: dict[str, ModuleType | None] = {
|
||||
name: sys.modules.get(name)
|
||||
for name in [
|
||||
"fastapi",
|
||||
"fastapi.middleware",
|
||||
"fastapi.middleware.cors",
|
||||
"fastapi.responses",
|
||||
"app.api.router",
|
||||
"app.core.config",
|
||||
"app.db.base",
|
||||
"app.services.app_settings",
|
||||
"app.services.authentication",
|
||||
"app.services.handwriting_style",
|
||||
"app.services.storage",
|
||||
"app.services.typesense_index",
|
||||
]
|
||||
}
|
||||
|
||||
fastapi_stub = ModuleType("fastapi")
|
||||
|
||||
class _Response:
|
||||
"""Minimal response base class for middleware typing compatibility."""
|
||||
|
||||
class _FastAPI:
|
||||
"""Captures middleware registration behavior used by app.main tests."""
|
||||
|
||||
def __init__(self, *_args: object, **_kwargs: object) -> None:
|
||||
self.http_middlewares: list[Any] = []
|
||||
|
||||
def add_middleware(self, *_args: object, **_kwargs: object) -> None:
|
||||
"""Accepts middleware registrations without side effects."""
|
||||
|
||||
def include_router(self, *_args: object, **_kwargs: object) -> None:
|
||||
"""Accepts router registration without side effects."""
|
||||
|
||||
def middleware(
|
||||
self,
|
||||
middleware_type: str,
|
||||
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
||||
"""Registers request middleware functions for later invocation in tests."""
|
||||
|
||||
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
||||
if middleware_type == "http":
|
||||
self.http_middlewares.append(func)
|
||||
return func
|
||||
|
||||
return decorator
|
||||
|
||||
def on_event(
|
||||
self,
|
||||
*_args: object,
|
||||
**_kwargs: object,
|
||||
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
||||
"""Returns no-op startup and shutdown decorators."""
|
||||
|
||||
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
||||
return func
|
||||
|
||||
return decorator
|
||||
|
||||
fastapi_stub.FastAPI = _FastAPI
|
||||
fastapi_stub.Request = object
|
||||
fastapi_stub.Response = _Response
|
||||
sys.modules["fastapi"] = fastapi_stub
|
||||
|
||||
fastapi_middleware_stub = ModuleType("fastapi.middleware")
|
||||
sys.modules["fastapi.middleware"] = fastapi_middleware_stub
|
||||
|
||||
fastapi_middleware_cors_stub = ModuleType("fastapi.middleware.cors")
|
||||
|
||||
class _CORSMiddleware:
|
||||
"""Placeholder CORS middleware class accepted by FastAPI.add_middleware."""
|
||||
|
||||
fastapi_middleware_cors_stub.CORSMiddleware = _CORSMiddleware
|
||||
sys.modules["fastapi.middleware.cors"] = fastapi_middleware_cors_stub
|
||||
|
||||
fastapi_responses_stub = ModuleType("fastapi.responses")
|
||||
|
||||
class _JSONResponse:
|
||||
"""Simple JSONResponse stand-in exposing status code and payload fields."""
|
||||
|
||||
def __init__(self, *, status_code: int, content: dict[str, Any]) -> None:
|
||||
self.status_code = status_code
|
||||
self.content = content
|
||||
|
||||
fastapi_responses_stub.JSONResponse = _JSONResponse
|
||||
sys.modules["fastapi.responses"] = fastapi_responses_stub
|
||||
|
||||
api_router_stub = ModuleType("app.api.router")
|
||||
api_router_stub.api_router = object()
|
||||
sys.modules["app.api.router"] = api_router_stub
|
||||
|
||||
config_stub = ModuleType("app.core.config")
|
||||
|
||||
def get_settings() -> SimpleNamespace:
|
||||
"""Returns minimal settings consumed by app.main during test import."""
|
||||
|
||||
return SimpleNamespace(
|
||||
app_env="development",
|
||||
cors_origins=["http://localhost:5173"],
|
||||
max_upload_request_size_bytes=1024,
|
||||
)
|
||||
|
||||
config_stub.get_settings = get_settings
|
||||
sys.modules["app.core.config"] = config_stub
|
||||
|
||||
db_base_stub = ModuleType("app.db.base")
|
||||
|
||||
def init_db() -> None:
|
||||
"""No-op database initializer for middleware scope tests."""
|
||||
|
||||
db_base_stub.init_db = init_db
|
||||
sys.modules["app.db.base"] = db_base_stub
|
||||
|
||||
app_settings_stub = ModuleType("app.services.app_settings")
|
||||
|
||||
def ensure_app_settings() -> None:
|
||||
"""No-op settings initializer for middleware scope tests."""
|
||||
|
||||
app_settings_stub.ensure_app_settings = ensure_app_settings
|
||||
sys.modules["app.services.app_settings"] = app_settings_stub
|
||||
|
||||
authentication_stub = ModuleType("app.services.authentication")
|
||||
|
||||
def ensure_bootstrap_users() -> None:
|
||||
"""No-op bootstrap user initializer for middleware scope tests."""
|
||||
|
||||
authentication_stub.ensure_bootstrap_users = ensure_bootstrap_users
|
||||
sys.modules["app.services.authentication"] = authentication_stub
|
||||
|
||||
handwriting_style_stub = ModuleType("app.services.handwriting_style")
|
||||
|
||||
def ensure_handwriting_style_collection() -> None:
|
||||
"""No-op handwriting collection initializer for middleware scope tests."""
|
||||
|
||||
handwriting_style_stub.ensure_handwriting_style_collection = ensure_handwriting_style_collection
|
||||
sys.modules["app.services.handwriting_style"] = handwriting_style_stub
|
||||
|
||||
storage_stub = ModuleType("app.services.storage")
|
||||
|
||||
def ensure_storage() -> None:
|
||||
"""No-op storage initializer for middleware scope tests."""
|
||||
|
||||
storage_stub.ensure_storage = ensure_storage
|
||||
sys.modules["app.services.storage"] = storage_stub
|
||||
|
||||
typesense_stub = ModuleType("app.services.typesense_index")
|
||||
|
||||
def ensure_typesense_collection() -> None:
|
||||
"""No-op Typesense collection initializer for middleware scope tests."""
|
||||
|
||||
typesense_stub.ensure_typesense_collection = ensure_typesense_collection
|
||||
sys.modules["app.services.typesense_index"] = typesense_stub
|
||||
|
||||
return previous_modules
|
||||
|
||||
|
||||
def _restore_main_import_stubs(previous_modules: dict[str, ModuleType | None]) -> None:
|
||||
"""Restores module table entries captured before installing app.main test stubs."""
|
||||
|
||||
for module_name, previous in previous_modules.items():
|
||||
if previous is None:
|
||||
sys.modules.pop(module_name, None)
|
||||
else:
|
||||
sys.modules[module_name] = previous
|
||||
|
||||
|
||||
class UploadRequestSizeMiddlewareTests(unittest.IsolatedAsyncioTestCase):
|
||||
"""Verifies upload request-size middleware ignores preflight and guards only upload POST."""
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls) -> None:
|
||||
"""Installs import stubs and imports app.main once for middleware extraction."""
|
||||
|
||||
cls._previous_modules = _install_main_import_stubs()
|
||||
cls.main_module = importlib.import_module("app.main")
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls) -> None:
|
||||
"""Removes imported module and restores pre-existing module table entries."""
|
||||
|
||||
sys.modules.pop("app.main", None)
|
||||
_restore_main_import_stubs(cls._previous_modules)
|
||||
|
||||
def _http_middleware(
|
||||
self,
|
||||
) -> Callable[[object, Callable[[object], Awaitable[object]]], Awaitable[object]]:
|
||||
"""Returns the registered HTTP middleware callable from the stubbed FastAPI app."""
|
||||
|
||||
return self.main_module.app.http_middlewares[0]
|
||||
|
||||
async def test_options_preflight_skips_upload_content_length_guard(self) -> None:
|
||||
"""OPTIONS preflight requests for upload endpoint continue without Content-Length enforcement."""
|
||||
|
||||
request = SimpleNamespace(
|
||||
method="OPTIONS",
|
||||
url=SimpleNamespace(path="/api/v1/documents/upload"),
|
||||
headers={},
|
||||
)
|
||||
expected_response = object()
|
||||
call_next_count = 0
|
||||
|
||||
async def call_next(_request: object) -> object:
|
||||
nonlocal call_next_count
|
||||
call_next_count += 1
|
||||
return expected_response
|
||||
|
||||
response = await self._http_middleware()(request, call_next)
|
||||
|
||||
self.assertIs(response, expected_response)
|
||||
self.assertEqual(call_next_count, 1)
|
||||
|
||||
async def test_post_upload_without_content_length_is_rejected(self) -> None:
|
||||
"""Upload POST requests remain blocked when Content-Length is absent."""
|
||||
|
||||
request = SimpleNamespace(
|
||||
method="POST",
|
||||
url=SimpleNamespace(path="/api/v1/documents/upload"),
|
||||
headers={},
|
||||
)
|
||||
call_next_count = 0
|
||||
|
||||
async def call_next(_request: object) -> object:
|
||||
nonlocal call_next_count
|
||||
call_next_count += 1
|
||||
return object()
|
||||
|
||||
response = await self._http_middleware()(request, call_next)
|
||||
|
||||
self.assertEqual(response.status_code, 411)
|
||||
self.assertEqual(
|
||||
response.content,
|
||||
{"detail": "Content-Length header is required for document uploads"},
|
||||
)
|
||||
self.assertEqual(call_next_count, 0)
|
||||
|
||||
async def test_post_non_upload_path_skips_upload_content_length_guard(self) -> None:
|
||||
"""Content-Length enforcement does not run for non-upload POST requests."""
|
||||
|
||||
request = SimpleNamespace(
|
||||
method="POST",
|
||||
url=SimpleNamespace(path="/api/v1/documents"),
|
||||
headers={},
|
||||
)
|
||||
expected_response = object()
|
||||
call_next_count = 0
|
||||
|
||||
async def call_next(_request: object) -> object:
|
||||
nonlocal call_next_count
|
||||
call_next_count += 1
|
||||
return expected_response
|
||||
|
||||
response = await self._http_middleware()(request, call_next)
|
||||
|
||||
self.assertIs(response, expected_response)
|
||||
self.assertEqual(call_next_count, 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -6,7 +6,8 @@ This directory contains technical documentation for DMS.
|
||||
|
||||
- `../README.md` - project overview, setup, and quick operations
|
||||
- `architecture-overview.md` - backend, frontend, and infrastructure architecture
|
||||
- `api-contract.md` - API endpoint contract grouped by route module, including settings and processing-log trim defaults
|
||||
- `api-contract.md` - API endpoint contract grouped by route module, including session auth, login throttle responses, role and ownership scope, upload limits, and settings or processing-log security constraints
|
||||
- `data-model-reference.md` - database entity definitions and lifecycle states
|
||||
- `operations-and-configuration.md` - runtime operations, ports, volumes, and persisted settings configuration
|
||||
- `frontend-design-foundation.md` - frontend visual system, tokens, UI implementation rules, processing-log timeline behavior, and settings helper-copy guidance
|
||||
- `operations-and-configuration.md` - runtime operations, hardened compose defaults, DEV and LIVE security values, persisted settings configuration behavior, and frontend Vite host allowlist controls
|
||||
- `frontend-design-foundation.md` - frontend visual system, tokens, UI implementation rules, authenticated media delivery under session auth, processing-log timeline behavior, and settings helper-copy guidance
|
||||
- `../.env.example` - repository-level environment template with local defaults and production override guidance
|
||||
|
||||
@@ -4,12 +4,48 @@ Base URL prefix: `/api/v1`
|
||||
|
||||
Primary implementation modules:
|
||||
- `backend/app/api/router.py`
|
||||
- `backend/app/api/routes_auth.py`
|
||||
- `backend/app/api/routes_health.py`
|
||||
- `backend/app/api/routes_documents.py`
|
||||
- `backend/app/api/routes_search.py`
|
||||
- `backend/app/api/routes_processing_logs.py`
|
||||
- `backend/app/api/routes_settings.py`
|
||||
|
||||
## Authentication And Authorization
|
||||
|
||||
- Authentication is cookie-based session auth with a server-issued hashed session token.
|
||||
- Clients authenticate with `POST /auth/login` using username and password.
|
||||
- Backend issues a server-stored session token and sets `HttpOnly` `dcm_session` and readable `dcm_csrf` cookies.
|
||||
- Login brute-force protection enforces Redis-backed throttle checks keyed by username and source IP.
|
||||
- State-changing requests from browser clients must send `x-csrf-token: <dcm_csrf>` in request headers (double-submit pattern).
|
||||
- For non-browser API clients, the optional `Authorization: Bearer <token>` path remains supported when the token is sent explicitly.
|
||||
- `GET /auth/me` returns current identity, role, and current CSRF token.
|
||||
- `POST /auth/logout` revokes current session token.
|
||||
|
||||
Role matrix:
|
||||
- `documents/*`: `admin` or `user`
|
||||
- `search/*`: `admin` or `user`
|
||||
- `settings/*`: `admin` only
|
||||
- `processing/logs/*`: `admin` only
|
||||
|
||||
Ownership rules:
|
||||
- `user` role is restricted to its own documents.
|
||||
- `admin` role can access all documents.
|
||||
|
||||
## Auth
|
||||
|
||||
- `POST /auth/login`
|
||||
- Body model: `AuthLoginRequest`
|
||||
- Response model: `AuthLoginResponse`
|
||||
- Additional responses:
|
||||
- `401` for invalid credentials
|
||||
- `429` for throttled login attempts, with stable message and `Retry-After` header
|
||||
- `503` when the login rate-limiter backend is unavailable
|
||||
- `GET /auth/me`
|
||||
- Response model: `AuthSessionResponse`
|
||||
- `POST /auth/logout`
|
||||
- Response model: `AuthLogoutResponse`
|
||||
|
||||
## Health
|
||||
|
||||
- `GET /health`
|
||||
@@ -26,15 +62,26 @@ Primary implementation modules:
|
||||
- `GET /documents/tags`
|
||||
- Query: `include_trashed`
|
||||
- Response: `{ "tags": string[] }`
|
||||
- Behavior:
|
||||
- all document-assigned tags visible to caller scope are included
|
||||
- predefined tags are role-filtered: `admin` receives full catalog, `user` receives only entries with `global_shared=true`
|
||||
- `GET /documents/paths`
|
||||
- Query: `include_trashed`
|
||||
- Response: `{ "paths": string[] }`
|
||||
- Behavior:
|
||||
- all document-assigned logical paths visible to caller scope are included
|
||||
- predefined paths are role-filtered: `admin` receives full catalog, `user` receives only entries with `global_shared=true`
|
||||
- `GET /documents/types`
|
||||
- Query: `include_trashed`
|
||||
- Response: `{ "types": string[] }`
|
||||
- `POST /documents/content-md/export`
|
||||
- Body model: `ContentExportRequest`
|
||||
- Response: ZIP stream containing one markdown file per matched document
|
||||
- Limits:
|
||||
- hard cap on matched document count (`CONTENT_EXPORT_MAX_DOCUMENTS`)
|
||||
- hard cap on cumulative markdown bytes (`CONTENT_EXPORT_MAX_TOTAL_BYTES`)
|
||||
- per-user rate limit (`CONTENT_EXPORT_RATE_LIMIT_PER_MINUTE`)
|
||||
- Behavior: archive is streamed from spool file instead of unbounded in-memory buffer
|
||||
|
||||
### Per-document operations
|
||||
|
||||
@@ -43,7 +90,8 @@ Primary implementation modules:
|
||||
- `GET /documents/{document_id}/download`
|
||||
- Response: original file bytes
|
||||
- `GET /documents/{document_id}/preview`
|
||||
- Response: inline preview stream where browser-supported
|
||||
- Response: inline preview stream only for safe MIME types
|
||||
- Behavior: script-capable MIME types are forced to attachment responses with `X-Content-Type-Options: nosniff`
|
||||
- `GET /documents/{document_id}/thumbnail`
|
||||
- Response: generated thumbnail image when available
|
||||
- `GET /documents/{document_id}/content-md`
|
||||
@@ -73,36 +121,54 @@ Primary implementation modules:
|
||||
- `conflict_mode` (`ask`, `replace`, `duplicate`)
|
||||
- Response model: `UploadResponse`
|
||||
- Behavior:
|
||||
- `ask`: returns `conflicts` if duplicate checksum is detected
|
||||
- `ask`: returns `conflicts` if duplicate checksum is detected for caller-visible documents
|
||||
- `replace`: creates new document linked to replaced document id
|
||||
- `duplicate`: creates additional document record
|
||||
- upload `POST` request rejected with `411` when `Content-Length` is missing
|
||||
- `OPTIONS /documents/upload` CORS preflight bypasses upload `Content-Length` enforcement
|
||||
- request rejected with `413` when file count, per-file size, or total request size exceeds configured limits
|
||||
|
||||
## Search
|
||||
|
||||
- `GET /search`
|
||||
- Query: `query` (min length 2), `offset`, `limit`, `include_trashed`, `only_trashed`, `path_filter`, `tag_filter`, `type_filter`, `processed_from`, `processed_to`
|
||||
- Response model: `SearchResponse`
|
||||
- Behavior: PostgreSQL full-text and metadata ranking
|
||||
- Behavior: PostgreSQL full-text and metadata ranking with role-based ownership scope
|
||||
|
||||
## Processing Logs
|
||||
|
||||
- Access: admin only
|
||||
|
||||
- `GET /processing/logs`
|
||||
- Query: `offset`, `limit`, `document_id`
|
||||
- Response model: `ProcessingLogListResponse`
|
||||
- `limit` is capped by runtime configuration
|
||||
- sensitive fields are redacted in API responses
|
||||
- `POST /processing/logs/trim`
|
||||
- Query: optional `keep_document_sessions`, `keep_unbound_entries`
|
||||
- Behavior: omitted query values fall back to persisted `/settings.processing_log_retention`
|
||||
- query values are capped by runtime retention limits
|
||||
- Response: trim counters
|
||||
- `POST /processing/logs/clear`
|
||||
- Response: clear counters
|
||||
|
||||
Persistence mode:
|
||||
- default is metadata-only logging (`PROCESSING_LOG_STORE_MODEL_IO_TEXT=false`, `PROCESSING_LOG_STORE_PAYLOAD_TEXT=false`)
|
||||
- full prompt/response or payload content storage requires explicit operator opt-in
|
||||
|
||||
## Settings
|
||||
|
||||
- Access: admin only
|
||||
|
||||
- `GET /settings`
|
||||
- Response model: `AppSettingsResponse`
|
||||
- persisted providers with invalid base URLs are ignored during read sanitization; response falls back to remaining valid providers or secure defaults
|
||||
- provider API keys are exposed only as `api_key_set` and `api_key_masked`
|
||||
- `PATCH /settings`
|
||||
- Body model: `AppSettingsUpdateRequest`
|
||||
- Response model: `AppSettingsResponse`
|
||||
- rejects invalid provider base URLs with `400` when scheme, allowlist, or network safety checks fail
|
||||
- provider API keys are persisted encrypted at rest (`api_key_encrypted`) and plaintext keys are not written to storage
|
||||
- `POST /settings/reset`
|
||||
- Response model: `AppSettingsResponse`
|
||||
- `PATCH /settings/handwriting`
|
||||
@@ -113,6 +179,13 @@ Primary implementation modules:
|
||||
|
||||
## Schema Families
|
||||
|
||||
Auth schemas in `backend/app/schemas/auth.py`:
|
||||
- `AuthLoginRequest`
|
||||
- `AuthUserResponse`
|
||||
- `AuthSessionResponse`
|
||||
- `AuthLoginResponse`
|
||||
- `AuthLogoutResponse`
|
||||
|
||||
Document schemas in `backend/app/schemas/documents.py`:
|
||||
- `DocumentResponse`
|
||||
- `DocumentDetailResponse`
|
||||
@@ -128,4 +201,4 @@ Processing log schemas in `backend/app/schemas/processing_logs.py`:
|
||||
- `ProcessingLogListResponse`
|
||||
|
||||
Settings schemas in `backend/app/schemas/settings.py`:
|
||||
- Provider, task, upload-default, display, processing-log retention, predefined paths or tags, handwriting-style, and legacy handwriting models grouped under `AppSettingsResponse` and `AppSettingsUpdateRequest`.
|
||||
- provider, task, upload-default, display, processing-log retention, predefined paths or tags, handwriting-style, and legacy handwriting models grouped under `AppSettingsResponse` and `AppSettingsUpdateRequest`.
|
||||
|
||||
@@ -6,9 +6,9 @@ DMS runs as a multi-service application defined in `docker-compose.yml`:
|
||||
- `frontend` serves the React UI on port `5173`
|
||||
- `api` serves FastAPI on port `8000`
|
||||
- `worker` executes asynchronous extraction and indexing jobs
|
||||
- `db` provides PostgreSQL persistence on port `5432`
|
||||
- `redis` backs queueing on port `6379`
|
||||
- `typesense` stores search index and vector-adjacent metadata on port `8108`
|
||||
- `db` provides PostgreSQL persistence on the internal compose network
|
||||
- `redis` backs queueing on the internal compose network
|
||||
- `typesense` stores search index and vector-adjacent metadata on the internal compose network
|
||||
|
||||
## Backend Architecture
|
||||
|
||||
@@ -16,16 +16,16 @@ Backend source root: `backend/app/`
|
||||
|
||||
Main boundaries:
|
||||
- `api/` route handlers and HTTP contract
|
||||
- `services/` domain logic (storage, extraction, routing, settings, processing logs, Typesense)
|
||||
- `services/` domain logic (authentication, storage, extraction, routing, settings, processing logs, Typesense)
|
||||
- `db/` SQLAlchemy base, engine, and session lifecycle
|
||||
- `models/` persistence entities (`Document`, `ProcessingLogEntry`)
|
||||
- `models/` persistence entities (`AppUser`, `AuthSession`, `Document`, `ProcessingLogEntry`)
|
||||
- `schemas/` Pydantic response and request schemas
|
||||
- `worker/` RQ queue integration and background processing tasks
|
||||
|
||||
Application bootstrap in `backend/app/main.py`:
|
||||
- mounts routers under `/api/v1`
|
||||
- configures CORS from settings
|
||||
- initializes storage, settings, database schema, and Typesense collection on startup
|
||||
- initializes storage, database schema, bootstrap users, settings, and Typesense collection on startup
|
||||
|
||||
## Processing Lifecycle
|
||||
|
||||
@@ -48,11 +48,12 @@ Core structure:
|
||||
- `design-foundation.css` and `styles.css` define design tokens and global/component styling
|
||||
|
||||
Main user flows:
|
||||
- Login and role-gated navigation (`admin` and `user`)
|
||||
- Upload and conflict resolution
|
||||
- Search and filtered document browsing
|
||||
- Metadata editing and lifecycle actions (trash, restore, delete, reprocess)
|
||||
- Settings management for providers, tasks, and UI defaults
|
||||
- Processing log review
|
||||
- Settings management for providers, tasks, and UI defaults (admin only)
|
||||
- Processing log review (admin only)
|
||||
|
||||
## Persistence and State
|
||||
|
||||
@@ -64,3 +65,13 @@ Persistent data:
|
||||
Transient runtime state:
|
||||
- Redis queues processing tasks and worker execution state
|
||||
- frontend local component state drives active filters, selection, and modal flows
|
||||
|
||||
Security-sensitive runtime behavior:
|
||||
- API access is session-based with per-user server-issued bearer tokens and role checks.
|
||||
- Document and search reads for `user` role are owner-scoped via `owner_user_id`; `admin` can access global scope.
|
||||
- Redis connection URLs are validated by backend queue helpers with environment-aware auth and TLS policy enforcement.
|
||||
- Worker startup runs through `python -m app.worker.run_worker`, which validates Redis URL policy before queue consumption.
|
||||
- Inline preview is limited to safe MIME types and script-capable content is served as attachment-only.
|
||||
- Archive fan-out processing propagates root and depth lineage metadata and enforces depth and per-root descendant caps.
|
||||
- Markdown export applies per-user rate limits, hard document-count and total-byte caps, and spool-file streaming.
|
||||
- Processing logs default to metadata-only persistence, with explicit operator toggles required to store model IO text.
|
||||
|
||||
@@ -2,6 +2,38 @@
|
||||
|
||||
Primary SQLAlchemy models are defined in `backend/app/models/`.
|
||||
|
||||
## app_users
|
||||
|
||||
Model: `AppUser` in `backend/app/models/auth.py`
|
||||
|
||||
Purpose:
|
||||
- Stores authenticatable user identities for session-based API access.
|
||||
|
||||
Core fields:
|
||||
- Identity and credentials: `id`, `username`, `password_hash`
|
||||
- Authorization and lifecycle: `role`, `is_active`
|
||||
- Audit timestamps: `created_at`, `updated_at`
|
||||
|
||||
Enum `UserRole`:
|
||||
- `admin`
|
||||
- `user`
|
||||
|
||||
## auth_sessions
|
||||
|
||||
Model: `AuthSession` in `backend/app/models/auth.py`
|
||||
|
||||
Purpose:
|
||||
- Stores issued bearer sessions linked to user identities.
|
||||
|
||||
Core fields:
|
||||
- Identity and linkage: `id`, `user_id`, `token_hash`
|
||||
- Session lifecycle: `expires_at`, `revoked_at`
|
||||
- Request context: `user_agent`, `ip_address`
|
||||
- Audit timestamps: `created_at`, `updated_at`
|
||||
|
||||
Foreign keys:
|
||||
- `user_id` references `app_users.id` with `ON DELETE CASCADE`.
|
||||
|
||||
## documents
|
||||
|
||||
Model: `Document` in `backend/app/models/document.py`
|
||||
@@ -12,7 +44,7 @@ Purpose:
|
||||
Core fields:
|
||||
- Identity and source: `id`, `original_filename`, `source_relative_path`, `stored_relative_path`
|
||||
- File attributes: `mime_type`, `extension`, `sha256`, `size_bytes`
|
||||
- Organization: `logical_path`, `suggested_path`, `tags`, `suggested_tags`
|
||||
- Ownership and organization: `owner_user_id`, `logical_path`, `suggested_path`, `tags`, `suggested_tags`
|
||||
- Processing outputs: `extracted_text`, `image_text_type`, `handwriting_style_id`, `preview_available`
|
||||
- Lifecycle and relations: `status`, `is_archive_member`, `archived_member_path`, `parent_document_id`, `replaces_document_id`
|
||||
- Metadata and timestamps: `metadata_json`, `created_at`, `processed_at`, `updated_at`
|
||||
@@ -24,8 +56,12 @@ Enum `DocumentStatus`:
|
||||
- `error`
|
||||
- `trashed`
|
||||
|
||||
Foreign keys:
|
||||
- `owner_user_id` references `app_users.id` with `ON DELETE SET NULL`.
|
||||
|
||||
Relationships:
|
||||
- Self-referential `parent_document` relationship for archive extraction trees.
|
||||
- `owner_user` relationship to `AppUser`.
|
||||
|
||||
## processing_logs
|
||||
|
||||
@@ -47,7 +83,10 @@ Foreign keys:
|
||||
|
||||
## Model Lifecycle Notes
|
||||
|
||||
- Upload inserts a `Document` row in `queued` state and enqueues background processing.
|
||||
- Worker updates extraction results and final status (`processed`, `unsupported`, or `error`).
|
||||
- API startup initializes schema and creates or refreshes bootstrap users from auth environment variables.
|
||||
- `POST /auth/login` validates `AppUser` credentials, creates `AuthSession` with hashed token, and returns bearer token once.
|
||||
- Upload inserts `Document` row in `queued` state, assigns `owner_user_id`, and enqueues background processing.
|
||||
- Worker updates extraction results and final status (`processed`, `unsupported`, or `error`), preserving ownership on archive descendants.
|
||||
- User-role queries are owner-scoped; admin-role queries can access all documents.
|
||||
- Trash and restore operations toggle `status` while preserving source files until permanent delete.
|
||||
- Permanent delete removes the document tree (including archive descendants) and associated stored files.
|
||||
|
||||
@@ -49,6 +49,16 @@ Do not hardcode new palette or spacing values in component styles when a token a
|
||||
- Do not render queued headers before their animation starts, even when polling returns batched updates.
|
||||
- Preserve existing header content format and fold/unfold detail behavior as lines are revealed.
|
||||
|
||||
## Authenticated Media Delivery
|
||||
|
||||
- Document previews and thumbnails must load through authenticated fetch flows in `frontend/src/lib/api.ts`, then render via temporary object URLs.
|
||||
- Runtime auth is cookie-backed; valid sessions are reused by browser reload and tab reuse while the `dcm_session` cookie remains valid.
|
||||
- Static build-time token distribution is not supported.
|
||||
- Direct `window.open` calls for protected media endpoints are not allowed because browser navigation requests do not include the API token header.
|
||||
- Download actions for original files and markdown exports must use authenticated blob fetches plus controlled browser download triggers.
|
||||
- Revoke all temporary object URLs after replacement, unmount, or completion to prevent browser memory leaks.
|
||||
- `DocumentViewer` iframe previews must be restricted to safe MIME types and rendered with `sandbox`, restrictive `allow`, and `referrerPolicy="no-referrer"` attributes. Active or script-capable formats must not be embedded inline.
|
||||
|
||||
## Extension Checklist
|
||||
|
||||
When adding or redesigning a UI area:
|
||||
|
||||
@@ -2,37 +2,36 @@
|
||||
|
||||
## Runtime Services
|
||||
|
||||
`docker-compose.yml` defines the runtime stack:
|
||||
- `db` (Postgres 16, port `5432`)
|
||||
- `redis` (Redis 7, port `6379`)
|
||||
- `typesense` (Typesense 29, port `8108`)
|
||||
- `api` (FastAPI backend, port `8000`)
|
||||
- `worker` (RQ background worker)
|
||||
- `frontend` (Vite UI, port `5173`)
|
||||
`docker-compose.yml` defines:
|
||||
- `db` (Postgres 16)
|
||||
- `redis` (Redis 7)
|
||||
- `typesense` (Typesense 29)
|
||||
- `api` (FastAPI backend)
|
||||
- `worker` (RQ worker via `python -m app.worker.run_worker`)
|
||||
- `frontend` (Vite React UI)
|
||||
|
||||
## Named Volumes
|
||||
|
||||
Persistent volumes:
|
||||
- `db-data`
|
||||
- `redis-data`
|
||||
- `dcm-storage`
|
||||
- `typesense-data`
|
||||
Persistent host bind mounts (default root `./data`, overridable with `DCM_DATA_DIR`):
|
||||
- `${DCM_DATA_DIR:-./data}/db-data`
|
||||
- `${DCM_DATA_DIR:-./data}/redis-data`
|
||||
- `${DCM_DATA_DIR:-./data}/storage`
|
||||
- `${DCM_DATA_DIR:-./data}/typesense-data`
|
||||
|
||||
Reset all persisted runtime data:
|
||||
|
||||
```bash
|
||||
docker compose down -v
|
||||
docker compose down
|
||||
rm -rf ${DCM_DATA_DIR:-./data}
|
||||
```
|
||||
|
||||
## Operational Commands
|
||||
## Core Commands
|
||||
|
||||
Start or rebuild stack:
|
||||
Start or rebuild:
|
||||
|
||||
```bash
|
||||
docker compose up --build -d
|
||||
```
|
||||
|
||||
Stop stack:
|
||||
Stop:
|
||||
|
||||
```bash
|
||||
docker compose down
|
||||
@@ -44,71 +43,127 @@ Tail logs:
|
||||
docker compose logs -f
|
||||
```
|
||||
|
||||
## Backend Configuration
|
||||
## Host Bind Mounts
|
||||
|
||||
Settings source:
|
||||
- Runtime settings class: `backend/app/core/config.py`
|
||||
- API settings persistence: `backend/app/services/app_settings.py`
|
||||
Compose is configured with host bind mounts for persistent data. Ensure host directories exist and are writable by the backend runtime user.
|
||||
|
||||
Key environment variables used by `api` and `worker` in compose:
|
||||
- `APP_ENV`
|
||||
- `DATABASE_URL`
|
||||
- `REDIS_URL`
|
||||
- `STORAGE_ROOT`
|
||||
- `PUBLIC_BASE_URL`
|
||||
- `CORS_ORIGINS` (API service)
|
||||
- `TYPESENSE_PROTOCOL`
|
||||
- `TYPESENSE_HOST`
|
||||
- `TYPESENSE_PORT`
|
||||
- `TYPESENSE_API_KEY`
|
||||
- `TYPESENSE_COLLECTION_NAME`
|
||||
|
||||
Selected defaults from `Settings` (`backend/app/core/config.py`):
|
||||
- `upload_chunk_size = 4194304`
|
||||
- `max_zip_members = 250`
|
||||
- `max_zip_depth = 2`
|
||||
- `max_text_length = 500000`
|
||||
- `default_openai_model = "gpt-4.1-mini"`
|
||||
- `default_openai_timeout_seconds = 45`
|
||||
- `default_summary_model = "gpt-4.1-mini"`
|
||||
- `default_routing_model = "gpt-4.1-mini"`
|
||||
- `typesense_timeout_seconds = 120`
|
||||
- `typesense_num_retries = 0`
|
||||
|
||||
## Frontend Configuration
|
||||
|
||||
Frontend runtime API target:
|
||||
- `VITE_API_BASE` in `docker-compose.yml` frontend service
|
||||
|
||||
Frontend local commands:
|
||||
Backend and worker run as non-root user `uid=10001` inside containers. Compose bootstraps the storage bind mount through the one-shot `storage-init` service before either process starts. For manual inspection or repair of host-mounted storage paths:
|
||||
|
||||
```bash
|
||||
cd frontend && npm run dev
|
||||
cd frontend && npm run build
|
||||
cd frontend && npm run preview
|
||||
mkdir -p ${DCM_DATA_DIR:-./data}/storage
|
||||
sudo chown -R 10001:10001 ${DCM_DATA_DIR:-./data}/storage
|
||||
sudo chmod -R u+rwX,g+rwX ${DCM_DATA_DIR:-./data}/storage
|
||||
```
|
||||
|
||||
## Settings Persistence
|
||||
If permissions are incorrect, API startup fails with errors similar to:
|
||||
- `PermissionError: [Errno 13] Permission denied: '/data/storage'`
|
||||
- `FileNotFoundError` for `/data/storage/originals`
|
||||
|
||||
Application-level settings managed from the UI are persisted by backend settings service:
|
||||
- file path: `<STORAGE_ROOT>/settings.json`
|
||||
- endpoints: `/api/v1/settings`, `/api/v1/settings/reset`, `/api/v1/settings/handwriting`
|
||||
## Frontend Build Baseline
|
||||
|
||||
Settings include:
|
||||
- upload defaults
|
||||
- display options
|
||||
- processing-log retention options (`keep_document_sessions`, `keep_unbound_entries`)
|
||||
- provider configuration
|
||||
- OCR, summary, and routing task settings
|
||||
- predefined paths and tags
|
||||
- handwriting-style clustering settings
|
||||
The frontend Dockerfile uses `node:22-slim` with a standard `npm ci --no-audit` install step and no npm-specific build tuning flags.
|
||||
|
||||
Retention settings are used by worker cleanup and by `POST /api/v1/processing/logs/trim` when trim query values are not provided.
|
||||
## Authentication Model
|
||||
|
||||
- Legacy shared build-time frontend token behavior was removed.
|
||||
- API now uses server-issued sessions that are stored in HttpOnly cookies (`dcm_session`) with a separate CSRF cookie (`dcm_csrf`).
|
||||
- Bootstrap users are provisioned from environment:
|
||||
- `AUTH_BOOTSTRAP_ADMIN_USERNAME`
|
||||
- `AUTH_BOOTSTRAP_ADMIN_PASSWORD`
|
||||
- optional `AUTH_BOOTSTRAP_USER_USERNAME`
|
||||
- optional `AUTH_BOOTSTRAP_USER_PASSWORD`
|
||||
- Login brute-force protection is enabled by default and keyed by username and source IP:
|
||||
- `AUTH_LOGIN_FAILURE_LIMIT`
|
||||
- `AUTH_LOGIN_FAILURE_WINDOW_SECONDS`
|
||||
- `AUTH_LOGIN_LOCKOUT_BASE_SECONDS`
|
||||
- `AUTH_LOGIN_LOCKOUT_MAX_SECONDS`
|
||||
- Frontend signs in through `/api/v1/auth/login` and relies on browser session persistence for valid cookie-backed sessions.
|
||||
|
||||
## DEV And LIVE Configuration Matrix
|
||||
|
||||
Use `.env.example` as baseline. The table below documents user-managed settings and recommended values.
|
||||
|
||||
| Variable | Local DEV (HTTP, docker-only) | LIVE (HTTPS behind reverse proxy) |
|
||||
| --- | --- | --- |
|
||||
| `APP_ENV` | `development` | `production` |
|
||||
| `HOST_BIND_IP` | `127.0.0.1` or local LAN bind if needed | `127.0.0.1` (publish behind proxy only) |
|
||||
| `PUBLIC_BASE_URL` | `http://localhost:8000` or same-origin frontend host when proxying API through frontend | `https://app.example.com` when frontend proxies `/api`, or dedicated API origin if you intentionally keep split-origin routing |
|
||||
| `VITE_API_BASE` | empty to use same-origin `/api/v1` through frontend proxy, or explicit local URL when bypassing proxy | empty or `/api/v1` for same-origin production routing; only use `https://api.example.com/api/v1` when you intentionally keep split-origin frontend/API traffic |
|
||||
| `VITE_ALLOWED_HOSTS` | optional comma-separated hostnames, for example `localhost,docs.lan` | optional comma-separated public frontend hostnames, for example `app.example.com` |
|
||||
| `CORS_ORIGINS` | `["http://localhost:5173","http://localhost:3000"]` | exact frontend origins only, for example `["https://app.example.com"]` |
|
||||
| `REDIS_URL` | `redis://:<password>@redis:6379/0` in isolated local network | `rediss://:<password>@redis.internal:6379/0` |
|
||||
| `REDIS_SECURITY_MODE` | `compat` or `auto` | `strict` |
|
||||
| `REDIS_TLS_MODE` | `allow_insecure` or `auto` | `required` |
|
||||
| `AUTH_LOGIN_FAILURE_LIMIT` | default `5` | tune to identity-protection policy and support requirements |
|
||||
| `AUTH_LOGIN_FAILURE_WINDOW_SECONDS` | default `900` | tune to identity-protection policy and support requirements |
|
||||
| `AUTH_LOGIN_LOCKOUT_BASE_SECONDS` | default `30` | tune to identity-protection policy and support requirements |
|
||||
| `AUTH_LOGIN_LOCKOUT_MAX_SECONDS` | default `900` | tune to identity-protection policy and support requirements |
|
||||
| `AUTH_COOKIE_DOMAIN` | empty (recommended; API always issues a host-only auth cookie) | optional parent domain only when you explicitly need a mirrored domain cookie, for example `docs.lan` |
|
||||
| `AUTH_COOKIE_SAMESITE` | `auto` | `none` only for truly cross-site frontend/API deployments; keep `auto` for same-site subdomains such as `docs.lan` and `api.docs.lan` |
|
||||
| `PROVIDER_BASE_URL_ALLOW_HTTP` | `true` only when intentionally testing local HTTP provider endpoints | `false` |
|
||||
| `PROVIDER_BASE_URL_ALLOW_PRIVATE_NETWORK` | `true` only for trusted local development targets | `false` |
|
||||
| `PROVIDER_BASE_URL_ALLOWLIST` | allow needed test hosts | explicit production allowlist, for example `["api.openai.com"]` |
|
||||
| `PROCESSING_LOG_STORE_MODEL_IO_TEXT` | `false` by default; temporary `true` only for controlled debugging | `false` |
|
||||
| `PROCESSING_LOG_STORE_PAYLOAD_TEXT` | `false` by default; temporary `true` only for controlled debugging | `false` |
|
||||
| `CONTENT_EXPORT_MAX_DOCUMENTS` | default `250` or lower based on host memory | tuned to production capacity |
|
||||
| `CONTENT_EXPORT_MAX_TOTAL_BYTES` | default `52428800` (50 MiB) or lower | tuned to production capacity |
|
||||
| `CONTENT_EXPORT_RATE_LIMIT_PER_MINUTE` | default `6` | tuned to API throughput and abuse model |
|
||||
|
||||
`PUBLIC_BASE_URL` must point to the backend API public URL, not the frontend URL.
|
||||
|
||||
## HTTPS Proxy Deployment Notes
|
||||
|
||||
This application supports both:
|
||||
- local HTTP-only operation (no TLS termination in containers)
|
||||
- HTTPS deployment behind a reverse proxy that handles TLS
|
||||
|
||||
Recommended LIVE pattern:
|
||||
1. Proxy terminates TLS and forwards to `api` and `frontend` internal HTTP endpoints.
|
||||
2. Keep container published ports bound to localhost or internal network.
|
||||
3. Set `PUBLIC_BASE_URL` and `VITE_API_BASE` to final HTTPS URLs.
|
||||
4. Set `CORS_ORIGINS` to exact HTTPS frontend origins.
|
||||
5. Credentialed CORS is enabled and constrained for cookie-based sessions with strict origin allowlists.
|
||||
|
||||
## Security Controls
|
||||
|
||||
- CORS uses explicit origin allowlist only; broad origin regex matching is removed.
|
||||
- Worker Redis startup validates URL auth and TLS policy before consuming jobs.
|
||||
- Provider API keys are encrypted at rest with standard AEAD (`cryptography` Fernet).
|
||||
- legacy `enc-v1` payloads are read for backward compatibility
|
||||
- new writes use `enc-v2`
|
||||
- Processing logs default to metadata-only persistence.
|
||||
- Login endpoint applies escalating temporary lockout on repeated failed credentials using Redis-backed subject keys for username and source IP.
|
||||
- Markdown export enforces:
|
||||
- max document count
|
||||
- max total markdown bytes
|
||||
- per-user Redis-backed rate limit
|
||||
- spool-file streaming to avoid unbounded memory archives
|
||||
- User-role document access is owner-scoped for non-admin accounts.
|
||||
|
||||
## Frontend Runtime
|
||||
|
||||
- Frontend no longer consumes `VITE_API_TOKEN`.
|
||||
- Frontend image target is environment-driven:
|
||||
- `APP_ENV=development` builds the `development` target and runs Vite dev server
|
||||
- `APP_ENV=production` builds the `production` target and serves static assets through unprivileged Nginx
|
||||
- Frontend Docker targets are selected from `APP_ENV`, so use `development` or `production` values.
|
||||
- Production frontend Nginx uses non-root runtime plus `/tmp` temp-path configuration so it can run with container capability dropping enabled.
|
||||
- Vite dev server host allowlist uses the union of:
|
||||
- hostnames extracted from `CORS_ORIGINS`
|
||||
- optional explicit hostnames from `VITE_ALLOWED_HOSTS`
|
||||
- `VITE_ALLOWED_HOSTS` only affects development mode where Vite is running.
|
||||
- API auth cookies support optional domain and SameSite configuration through `AUTH_COOKIE_DOMAIN` and `AUTH_COOKIE_SAMESITE`.
|
||||
- HTTPS cookie security detection falls back to `PUBLIC_BASE_URL` scheme when proxy headers are missing.
|
||||
- CSRF validation accepts header matches against any `dcm_csrf` cookie value in the request, covering stale plus fresh duplicate-cookie transitions.
|
||||
- Session authentication is cookie-based; browser reloads and new tabs can reuse an active session until it expires or is revoked.
|
||||
- Protected media and file download flows still use authenticated fetch plus blob/object URL handling.
|
||||
|
||||
## Validation Checklist
|
||||
|
||||
After operational or configuration changes, verify:
|
||||
- `GET /api/v1/health` is healthy
|
||||
- frontend can list, upload, and search documents
|
||||
- processing worker logs show successful task execution
|
||||
- settings save or reset works and persists after restart
|
||||
After configuration changes:
|
||||
- `GET /api/v1/health` returns healthy response
|
||||
- login succeeds for bootstrap admin user
|
||||
- admin can upload, search, open preview, download, and export markdown
|
||||
- user account can only access its own documents
|
||||
- admin-only settings and processing logs are not accessible by user role
|
||||
- `docker compose logs -f api worker` shows no startup validation failures
|
||||
|
||||
@@ -1,101 +1,177 @@
|
||||
services:
|
||||
storage-init:
|
||||
build:
|
||||
context: ./backend
|
||||
user: "0:0"
|
||||
command:
|
||||
- "sh"
|
||||
- "-c"
|
||||
- >
|
||||
mkdir -p /data/storage/originals /data/storage/derived/previews /data/storage/tmp &&
|
||||
chown -R 10001:10001 /data/storage &&
|
||||
chmod -R u+rwX,g+rwX /data/storage
|
||||
volumes:
|
||||
- ${DCM_DATA_DIR:-./data}/storage:/data/storage
|
||||
restart: "no"
|
||||
|
||||
db:
|
||||
image: postgres:16-alpine
|
||||
environment:
|
||||
POSTGRES_USER: dcm
|
||||
POSTGRES_PASSWORD: dcm
|
||||
POSTGRES_DB: dcm
|
||||
ports:
|
||||
- "5432:5432"
|
||||
POSTGRES_USER: ${POSTGRES_USER:?POSTGRES_USER must be set}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set}
|
||||
POSTGRES_DB: ${POSTGRES_DB:?POSTGRES_DB must be set}
|
||||
volumes:
|
||||
- db-data:/var/lib/postgresql/data
|
||||
- ${DCM_DATA_DIR:-./data}/db-data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U dcm -d dcm"]
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:?POSTGRES_USER must be set} -d ${POSTGRES_DB:?POSTGRES_DB must be set}"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- internal
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
ports:
|
||||
- "6379:6379"
|
||||
command:
|
||||
- "redis-server"
|
||||
- "--appendonly"
|
||||
- "yes"
|
||||
- "--requirepass"
|
||||
- "${REDIS_PASSWORD:?REDIS_PASSWORD must be set}"
|
||||
volumes:
|
||||
- redis-data:/data
|
||||
- ${DCM_DATA_DIR:-./data}/redis-data:/data
|
||||
networks:
|
||||
- internal
|
||||
|
||||
typesense:
|
||||
image: typesense/typesense:29.0
|
||||
image: typesense/typesense:30.2.rc6
|
||||
command:
|
||||
- "--data-dir=/data"
|
||||
- "--api-key=dcm-typesense-key"
|
||||
- "--api-key=${TYPESENSE_API_KEY:?TYPESENSE_API_KEY must be set}"
|
||||
- "--enable-cors"
|
||||
ports:
|
||||
- "8108:8108"
|
||||
volumes:
|
||||
- typesense-data:/data
|
||||
- ${DCM_DATA_DIR:-./data}/typesense-data:/data
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- internal
|
||||
|
||||
api:
|
||||
build:
|
||||
context: ./backend
|
||||
environment:
|
||||
APP_ENV: development
|
||||
DATABASE_URL: postgresql+psycopg://dcm:dcm@db:5432/dcm
|
||||
REDIS_URL: redis://redis:6379/0
|
||||
APP_ENV: ${APP_ENV:-development}
|
||||
DATABASE_URL: ${DATABASE_URL:?DATABASE_URL must be set}
|
||||
REDIS_URL: ${REDIS_URL:?REDIS_URL must be set}
|
||||
REDIS_SECURITY_MODE: ${REDIS_SECURITY_MODE:-auto}
|
||||
REDIS_TLS_MODE: ${REDIS_TLS_MODE:-auto}
|
||||
STORAGE_ROOT: /data/storage
|
||||
AUTH_BOOTSTRAP_ADMIN_USERNAME: ${AUTH_BOOTSTRAP_ADMIN_USERNAME:?AUTH_BOOTSTRAP_ADMIN_USERNAME must be set}
|
||||
AUTH_BOOTSTRAP_ADMIN_PASSWORD: ${AUTH_BOOTSTRAP_ADMIN_PASSWORD:?AUTH_BOOTSTRAP_ADMIN_PASSWORD must be set}
|
||||
AUTH_BOOTSTRAP_USER_USERNAME: ${AUTH_BOOTSTRAP_USER_USERNAME:-}
|
||||
AUTH_BOOTSTRAP_USER_PASSWORD: ${AUTH_BOOTSTRAP_USER_PASSWORD:-}
|
||||
AUTH_LOGIN_FAILURE_LIMIT: ${AUTH_LOGIN_FAILURE_LIMIT:-5}
|
||||
AUTH_LOGIN_FAILURE_WINDOW_SECONDS: ${AUTH_LOGIN_FAILURE_WINDOW_SECONDS:-900}
|
||||
AUTH_LOGIN_LOCKOUT_BASE_SECONDS: ${AUTH_LOGIN_LOCKOUT_BASE_SECONDS:-30}
|
||||
AUTH_LOGIN_LOCKOUT_MAX_SECONDS: ${AUTH_LOGIN_LOCKOUT_MAX_SECONDS:-900}
|
||||
APP_SETTINGS_ENCRYPTION_KEY: ${APP_SETTINGS_ENCRYPTION_KEY:?APP_SETTINGS_ENCRYPTION_KEY must be set}
|
||||
PROVIDER_BASE_URL_ALLOWLIST: '${PROVIDER_BASE_URL_ALLOWLIST:-[]}'
|
||||
PROVIDER_BASE_URL_ALLOW_HTTP: ${PROVIDER_BASE_URL_ALLOW_HTTP:-true}
|
||||
PROVIDER_BASE_URL_ALLOW_PRIVATE_NETWORK: ${PROVIDER_BASE_URL_ALLOW_PRIVATE_NETWORK:-true}
|
||||
PROCESSING_LOG_STORE_MODEL_IO_TEXT: ${PROCESSING_LOG_STORE_MODEL_IO_TEXT:-false}
|
||||
PROCESSING_LOG_STORE_PAYLOAD_TEXT: ${PROCESSING_LOG_STORE_PAYLOAD_TEXT:-false}
|
||||
CONTENT_EXPORT_MAX_DOCUMENTS: ${CONTENT_EXPORT_MAX_DOCUMENTS:-250}
|
||||
CONTENT_EXPORT_MAX_TOTAL_BYTES: ${CONTENT_EXPORT_MAX_TOTAL_BYTES:-52428800}
|
||||
CONTENT_EXPORT_RATE_LIMIT_PER_MINUTE: ${CONTENT_EXPORT_RATE_LIMIT_PER_MINUTE:-6}
|
||||
OCR_LANGUAGES: eng,deu
|
||||
PUBLIC_BASE_URL: http://192.168.2.5:8000
|
||||
CORS_ORIGINS: '["http://localhost:5173","http://localhost:3000","http://192.168.2.5:5173"]'
|
||||
PUBLIC_BASE_URL: ${PUBLIC_BASE_URL:-http://localhost:8000}
|
||||
CORS_ORIGINS: '${CORS_ORIGINS:-["http://localhost:5173","http://localhost:3000"]}'
|
||||
TYPESENSE_PROTOCOL: http
|
||||
TYPESENSE_HOST: typesense
|
||||
TYPESENSE_PORT: 8108
|
||||
TYPESENSE_API_KEY: dcm-typesense-key
|
||||
TYPESENSE_API_KEY: ${TYPESENSE_API_KEY:?TYPESENSE_API_KEY must be set}
|
||||
TYPESENSE_COLLECTION_NAME: documents
|
||||
ports:
|
||||
- "8000:8000"
|
||||
# ports:
|
||||
# - "${HOST_BIND_IP:-127.0.0.1}:8000:8000"
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
cap_drop:
|
||||
- ALL
|
||||
volumes:
|
||||
- ./backend/app:/app/app
|
||||
- dcm-storage:/data
|
||||
- ${DCM_DATA_DIR:-./data}/storage:/data/storage
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_started
|
||||
storage-init:
|
||||
condition: service_completed_successfully
|
||||
typesense:
|
||||
condition: service_started
|
||||
networks:
|
||||
npm_proxy:
|
||||
ipv4_address: 192.168.98.41
|
||||
internal:
|
||||
restart: unless-stopped
|
||||
|
||||
worker:
|
||||
build:
|
||||
context: ./backend
|
||||
command: ["rq", "worker", "dcm", "--url", "redis://redis:6379/0"]
|
||||
command: ["python", "-m", "app.worker.run_worker"]
|
||||
environment:
|
||||
APP_ENV: development
|
||||
DATABASE_URL: postgresql+psycopg://dcm:dcm@db:5432/dcm
|
||||
REDIS_URL: redis://redis:6379/0
|
||||
APP_ENV: ${APP_ENV:-development}
|
||||
DATABASE_URL: ${DATABASE_URL:?DATABASE_URL must be set}
|
||||
REDIS_URL: ${REDIS_URL:?REDIS_URL must be set}
|
||||
REDIS_SECURITY_MODE: ${REDIS_SECURITY_MODE:-auto}
|
||||
REDIS_TLS_MODE: ${REDIS_TLS_MODE:-auto}
|
||||
STORAGE_ROOT: /data/storage
|
||||
APP_SETTINGS_ENCRYPTION_KEY: ${APP_SETTINGS_ENCRYPTION_KEY:?APP_SETTINGS_ENCRYPTION_KEY must be set}
|
||||
PROVIDER_BASE_URL_ALLOWLIST: '${PROVIDER_BASE_URL_ALLOWLIST:-[]}'
|
||||
PROVIDER_BASE_URL_ALLOW_HTTP: ${PROVIDER_BASE_URL_ALLOW_HTTP:-true}
|
||||
PROVIDER_BASE_URL_ALLOW_PRIVATE_NETWORK: ${PROVIDER_BASE_URL_ALLOW_PRIVATE_NETWORK:-true}
|
||||
PROCESSING_LOG_STORE_MODEL_IO_TEXT: ${PROCESSING_LOG_STORE_MODEL_IO_TEXT:-false}
|
||||
PROCESSING_LOG_STORE_PAYLOAD_TEXT: ${PROCESSING_LOG_STORE_PAYLOAD_TEXT:-false}
|
||||
OCR_LANGUAGES: eng,deu
|
||||
PUBLIC_BASE_URL: http://localhost:8000
|
||||
PUBLIC_BASE_URL: ${PUBLIC_BASE_URL:-http://localhost:8000}
|
||||
TYPESENSE_PROTOCOL: http
|
||||
TYPESENSE_HOST: typesense
|
||||
TYPESENSE_PORT: 8108
|
||||
TYPESENSE_API_KEY: dcm-typesense-key
|
||||
TYPESENSE_API_KEY: ${TYPESENSE_API_KEY:?TYPESENSE_API_KEY must be set}
|
||||
TYPESENSE_COLLECTION_NAME: documents
|
||||
volumes:
|
||||
- ./backend/app:/app/app
|
||||
- dcm-storage:/data
|
||||
- ${DCM_DATA_DIR:-./data}/storage:/data/storage
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
cap_drop:
|
||||
- ALL
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_started
|
||||
storage-init:
|
||||
condition: service_completed_successfully
|
||||
typesense:
|
||||
condition: service_started
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- internal
|
||||
|
||||
frontend:
|
||||
build:
|
||||
context: ./frontend
|
||||
target: ${APP_ENV:-development}
|
||||
args:
|
||||
VITE_API_BASE: ${VITE_API_BASE:-}
|
||||
environment:
|
||||
VITE_API_BASE: http://192.168.2.5:8000/api/v1
|
||||
ports:
|
||||
- "5173:5173"
|
||||
VITE_API_BASE: ${VITE_API_BASE:-}
|
||||
VITE_API_PROXY_TARGET: ${VITE_API_PROXY_TARGET:-http://api:8000}
|
||||
CORS_ORIGINS: '${CORS_ORIGINS:-["http://localhost:5173","http://localhost:3000"]}'
|
||||
VITE_ALLOWED_HOSTS: ${VITE_ALLOWED_HOSTS:-}
|
||||
# ports:
|
||||
# - "${HOST_BIND_IP:-127.0.0.1}:5173:5173"
|
||||
volumes:
|
||||
- ./frontend/src:/app/src
|
||||
- ./frontend/index.html:/app/index.html
|
||||
@@ -103,9 +179,18 @@ services:
|
||||
depends_on:
|
||||
api:
|
||||
condition: service_started
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
cap_drop:
|
||||
- ALL
|
||||
networks:
|
||||
npm_proxy:
|
||||
ipv4_address: 192.168.98.40
|
||||
internal:
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
db-data:
|
||||
redis-data:
|
||||
dcm-storage:
|
||||
typesense-data:
|
||||
networks:
|
||||
internal:
|
||||
driver: bridge
|
||||
npm_proxy:
|
||||
external: true
|
||||
|
||||
@@ -1,16 +1,44 @@
|
||||
FROM node:22-alpine
|
||||
FROM node:20-slim AS base
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY package.json /app/package.json
|
||||
RUN npm install
|
||||
COPY package-lock.json /app/package-lock.json
|
||||
RUN npm ci --no-audit \
|
||||
&& chown -R node:node /app
|
||||
|
||||
COPY tsconfig.json /app/tsconfig.json
|
||||
COPY tsconfig.node.json /app/tsconfig.node.json
|
||||
COPY vite.config.ts /app/vite.config.ts
|
||||
COPY index.html /app/index.html
|
||||
COPY src /app/src
|
||||
COPY --chown=node:node tsconfig.json /app/tsconfig.json
|
||||
COPY --chown=node:node tsconfig.node.json /app/tsconfig.node.json
|
||||
COPY --chown=node:node vite.config.ts /app/vite.config.ts
|
||||
COPY --chown=node:node index.html /app/index.html
|
||||
COPY --chown=node:node src /app/src
|
||||
|
||||
FROM base AS development
|
||||
|
||||
EXPOSE 5173
|
||||
|
||||
USER node
|
||||
|
||||
CMD ["npm", "run", "dev", "--", "--host", "0.0.0.0", "--port", "5173"]
|
||||
|
||||
FROM base AS build
|
||||
|
||||
ARG VITE_API_BASE=
|
||||
ENV VITE_API_BASE=${VITE_API_BASE}
|
||||
|
||||
RUN npm run build
|
||||
|
||||
FROM nginx:1.27-alpine AS production
|
||||
|
||||
COPY nginx-main.conf /etc/nginx/nginx.conf
|
||||
COPY nginx.conf /etc/nginx/conf.d/default.conf
|
||||
COPY --from=build /app/dist /usr/share/nginx/html
|
||||
RUN mkdir -p /tmp/client_temp /tmp/proxy_temp /tmp/fastcgi_temp /tmp/uwsgi_temp /tmp/scgi_temp \
|
||||
&& chown -R 101:101 /tmp /var/log/nginx /usr/share/nginx/html
|
||||
|
||||
EXPOSE 5173
|
||||
|
||||
USER 101:101
|
||||
|
||||
ENTRYPOINT ["nginx"]
|
||||
CMD ["-g", "daemon off;"]
|
||||
|
||||
22
frontend/nginx-main.conf
Normal file
22
frontend/nginx-main.conf
Normal file
@@ -0,0 +1,22 @@
|
||||
worker_processes auto;
|
||||
pid /tmp/nginx.pid;
|
||||
|
||||
events {
|
||||
worker_connections 1024;
|
||||
}
|
||||
|
||||
http {
|
||||
include /etc/nginx/mime.types;
|
||||
default_type application/octet-stream;
|
||||
|
||||
sendfile on;
|
||||
keepalive_timeout 65;
|
||||
|
||||
client_body_temp_path /tmp/client_temp;
|
||||
proxy_temp_path /tmp/proxy_temp;
|
||||
fastcgi_temp_path /tmp/fastcgi_temp;
|
||||
uwsgi_temp_path /tmp/uwsgi_temp;
|
||||
scgi_temp_path /tmp/scgi_temp;
|
||||
|
||||
include /etc/nginx/conf.d/*.conf;
|
||||
}
|
||||
22
frontend/nginx.conf
Normal file
22
frontend/nginx.conf
Normal file
@@ -0,0 +1,22 @@
|
||||
server {
|
||||
listen 5173;
|
||||
listen [::]:5173;
|
||||
server_name _;
|
||||
client_max_body_size 100m;
|
||||
|
||||
root /usr/share/nginx/html;
|
||||
index index.html;
|
||||
|
||||
location /api/ {
|
||||
proxy_pass http://api:8000;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
}
|
||||
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,7 @@
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"test": "node --experimental-strip-types src/lib/api.test.ts",
|
||||
"build": "tsc -b && vite build",
|
||||
"preview": "vite preview --host 0.0.0.0 --port 4173"
|
||||
},
|
||||
|
||||
@@ -3,9 +3,11 @@
|
||||
*/
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||
import type { JSX } from 'react';
|
||||
import { LogOut, User } from 'lucide-react';
|
||||
|
||||
import ActionModal from './components/ActionModal';
|
||||
import DocumentGrid from './components/DocumentGrid';
|
||||
import LoginScreen from './components/LoginScreen';
|
||||
import DocumentViewer from './components/DocumentViewer';
|
||||
import PathInput from './components/PathInput';
|
||||
import ProcessingLogPanel from './components/ProcessingLogPanel';
|
||||
@@ -14,24 +16,29 @@ import SettingsScreen from './components/SettingsScreen';
|
||||
import UploadSurface from './components/UploadSurface';
|
||||
import {
|
||||
clearProcessingLogs,
|
||||
downloadBlobFile,
|
||||
deleteDocument,
|
||||
exportContentsMarkdown,
|
||||
getCurrentAuthSession,
|
||||
getAppSettings,
|
||||
listDocuments,
|
||||
listPaths,
|
||||
listProcessingLogs,
|
||||
listTags,
|
||||
listTypes,
|
||||
loginWithPassword,
|
||||
logoutCurrentSession,
|
||||
resetAppSettings,
|
||||
searchDocuments,
|
||||
trashDocument,
|
||||
updateAppSettings,
|
||||
uploadDocuments,
|
||||
} from './lib/api';
|
||||
import type { AppSettings, AppSettingsUpdate, DmsDocument, ProcessingLogEntry } from './types';
|
||||
import type { AppSettings, AppSettingsUpdate, AuthUser, DmsDocument, ProcessingLogEntry } from './types';
|
||||
|
||||
type AppScreen = 'documents' | 'settings';
|
||||
type DocumentView = 'active' | 'trash';
|
||||
type AuthPhase = 'checking' | 'unauthenticated' | 'authenticated';
|
||||
|
||||
interface DialogOption {
|
||||
key: string;
|
||||
@@ -50,6 +57,10 @@ interface DialogState {
|
||||
*/
|
||||
export default function App(): JSX.Element {
|
||||
const DEFAULT_PAGE_SIZE = 12;
|
||||
const [authPhase, setAuthPhase] = useState<AuthPhase>('checking');
|
||||
const [authUser, setAuthUser] = useState<AuthUser | null>(null);
|
||||
const [authError, setAuthError] = useState<string | null>(null);
|
||||
const [isAuthenticating, setIsAuthenticating] = useState<boolean>(false);
|
||||
const [screen, setScreen] = useState<AppScreen>('documents');
|
||||
const [documentView, setDocumentView] = useState<DocumentView>('active');
|
||||
const [documents, setDocuments] = useState<DmsDocument[]>([]);
|
||||
@@ -81,6 +92,7 @@ export default function App(): JSX.Element {
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [dialogState, setDialogState] = useState<DialogState | null>(null);
|
||||
const dialogResolverRef = useRef<((value: string) => void) | null>(null);
|
||||
const isAdmin = authUser?.role === 'admin';
|
||||
|
||||
const pageSize = useMemo(() => {
|
||||
const configured = appSettings?.display?.cards_per_page;
|
||||
@@ -117,15 +129,71 @@ export default function App(): JSX.Element {
|
||||
}
|
||||
}, []);
|
||||
|
||||
const downloadBlob = useCallback((blob: Blob, filename: string): void => {
|
||||
const objectUrl = URL.createObjectURL(blob);
|
||||
const anchor = document.createElement('a');
|
||||
anchor.href = objectUrl;
|
||||
anchor.download = filename;
|
||||
anchor.click();
|
||||
URL.revokeObjectURL(objectUrl);
|
||||
/**
|
||||
* Clears workspace state when authentication context changes or session is revoked.
|
||||
*/
|
||||
const resetApplicationState = useCallback((): void => {
|
||||
setScreen('documents');
|
||||
setDocumentView('active');
|
||||
setDocuments([]);
|
||||
setTotalDocuments(0);
|
||||
setCurrentPage(1);
|
||||
setSearchText('');
|
||||
setActiveSearchQuery('');
|
||||
setSelectedDocumentId(null);
|
||||
setSelectedDocumentIds([]);
|
||||
setExportPathInput('');
|
||||
setTagFilter('');
|
||||
setTypeFilter('');
|
||||
setPathFilter('');
|
||||
setProcessedFrom('');
|
||||
setProcessedTo('');
|
||||
setKnownTags([]);
|
||||
setKnownPaths([]);
|
||||
setKnownTypes([]);
|
||||
setAppSettings(null);
|
||||
setSettingsSaveAction(null);
|
||||
setProcessingLogs([]);
|
||||
setProcessingLogError(null);
|
||||
setError(null);
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Exchanges submitted credentials for a server-issued session and activates the app shell.
|
||||
*/
|
||||
const handleLogin = useCallback(async (username: string, password: string): Promise<void> => {
|
||||
setIsAuthenticating(true);
|
||||
setAuthError(null);
|
||||
try {
|
||||
const payload = await loginWithPassword(username, password);
|
||||
setAuthUser(payload.user);
|
||||
setAuthPhase('authenticated');
|
||||
setError(null);
|
||||
} catch (caughtError) {
|
||||
const message = caughtError instanceof Error ? caughtError.message : 'Login failed';
|
||||
setAuthError(message);
|
||||
setAuthUser(null);
|
||||
setAuthPhase('unauthenticated');
|
||||
resetApplicationState();
|
||||
} finally {
|
||||
setIsAuthenticating(false);
|
||||
}
|
||||
}, [resetApplicationState]);
|
||||
|
||||
/**
|
||||
* Revokes current session server-side when possible and always clears local auth state.
|
||||
*/
|
||||
const handleLogout = useCallback(async (): Promise<void> => {
|
||||
setError(null);
|
||||
try {
|
||||
await logoutCurrentSession();
|
||||
} catch {}
|
||||
setAuthUser(null);
|
||||
setAuthError(null);
|
||||
setAuthPhase('unauthenticated');
|
||||
resetApplicationState();
|
||||
}, [resetApplicationState]);
|
||||
|
||||
const loadCatalogs = useCallback(async (): Promise<void> => {
|
||||
const [tags, paths, types] = await Promise.all([listTags(true), listPaths(true), listTypes(true)]);
|
||||
setKnownTags(tags);
|
||||
@@ -193,6 +261,10 @@ export default function App(): JSX.Element {
|
||||
]);
|
||||
|
||||
const loadSettings = useCallback(async (): Promise<void> => {
|
||||
if (!isAdmin) {
|
||||
setAppSettings(null);
|
||||
return;
|
||||
}
|
||||
setError(null);
|
||||
try {
|
||||
const payload = await getAppSettings();
|
||||
@@ -200,9 +272,14 @@ export default function App(): JSX.Element {
|
||||
} catch (caughtError) {
|
||||
setError(caughtError instanceof Error ? caughtError.message : 'Failed to load settings');
|
||||
}
|
||||
}, []);
|
||||
}, [isAdmin]);
|
||||
|
||||
const loadProcessingTimeline = useCallback(async (options?: { silent?: boolean }): Promise<void> => {
|
||||
if (!isAdmin) {
|
||||
setProcessingLogs([]);
|
||||
setProcessingLogError(null);
|
||||
return;
|
||||
}
|
||||
const silent = options?.silent ?? false;
|
||||
if (!silent) {
|
||||
setIsLoadingLogs(true);
|
||||
@@ -218,18 +295,44 @@ export default function App(): JSX.Element {
|
||||
setIsLoadingLogs(false);
|
||||
}
|
||||
}
|
||||
}, []);
|
||||
}, [isAdmin]);
|
||||
|
||||
useEffect(() => {
|
||||
const resolveSession = async (): Promise<void> => {
|
||||
try {
|
||||
const sessionPayload = await getCurrentAuthSession();
|
||||
setAuthUser(sessionPayload.user);
|
||||
setAuthError(null);
|
||||
setAuthPhase('authenticated');
|
||||
} catch {
|
||||
setAuthUser(null);
|
||||
setAuthPhase('unauthenticated');
|
||||
resetApplicationState();
|
||||
}
|
||||
};
|
||||
void resolveSession();
|
||||
}, [resetApplicationState]);
|
||||
|
||||
useEffect(() => {
|
||||
if (authPhase !== 'authenticated') {
|
||||
return;
|
||||
}
|
||||
const bootstrap = async (): Promise<void> => {
|
||||
try {
|
||||
await Promise.all([loadDocuments(), loadCatalogs(), loadSettings(), loadProcessingTimeline()]);
|
||||
if (isAdmin) {
|
||||
await Promise.all([loadDocuments(), loadCatalogs(), loadSettings(), loadProcessingTimeline()]);
|
||||
return;
|
||||
}
|
||||
await Promise.all([loadDocuments(), loadCatalogs()]);
|
||||
setAppSettings(null);
|
||||
setProcessingLogs([]);
|
||||
setProcessingLogError(null);
|
||||
} catch (caughtError) {
|
||||
setError(caughtError instanceof Error ? caughtError.message : 'Failed to initialize application');
|
||||
}
|
||||
};
|
||||
void bootstrap();
|
||||
}, [loadCatalogs, loadDocuments, loadProcessingTimeline, loadSettings]);
|
||||
}, [authPhase, isAdmin, loadCatalogs, loadDocuments, loadProcessingTimeline, loadSettings]);
|
||||
|
||||
useEffect(() => {
|
||||
setSelectedDocumentIds([]);
|
||||
@@ -237,13 +340,25 @@ export default function App(): JSX.Element {
|
||||
}, [documentView, pageSize]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!isAdmin && screen === 'settings') {
|
||||
setScreen('documents');
|
||||
}
|
||||
}, [isAdmin, screen]);
|
||||
|
||||
useEffect(() => {
|
||||
if (authPhase !== 'authenticated') {
|
||||
return;
|
||||
}
|
||||
if (screen !== 'documents') {
|
||||
return;
|
||||
}
|
||||
void loadDocuments();
|
||||
}, [loadDocuments, screen]);
|
||||
}, [authPhase, loadDocuments, screen]);
|
||||
|
||||
useEffect(() => {
|
||||
if (authPhase !== 'authenticated') {
|
||||
return;
|
||||
}
|
||||
if (screen !== 'documents') {
|
||||
return;
|
||||
}
|
||||
@@ -251,9 +366,12 @@ export default function App(): JSX.Element {
|
||||
void loadDocuments({ silent: true });
|
||||
}, 3000);
|
||||
return () => window.clearInterval(pollInterval);
|
||||
}, [loadDocuments, screen]);
|
||||
}, [authPhase, loadDocuments, screen]);
|
||||
|
||||
useEffect(() => {
|
||||
if (authPhase !== 'authenticated' || !isAdmin) {
|
||||
return;
|
||||
}
|
||||
if (screen !== 'documents') {
|
||||
return;
|
||||
}
|
||||
@@ -262,7 +380,7 @@ export default function App(): JSX.Element {
|
||||
void loadProcessingTimeline({ silent: true });
|
||||
}, 1500);
|
||||
return () => window.clearInterval(pollInterval);
|
||||
}, [loadProcessingTimeline, screen]);
|
||||
}, [authPhase, isAdmin, loadProcessingTimeline, screen]);
|
||||
|
||||
const selectedDocument = useMemo(
|
||||
() => documents.find((document) => document.id === selectedDocumentId) ?? null,
|
||||
@@ -307,13 +425,17 @@ export default function App(): JSX.Element {
|
||||
});
|
||||
}
|
||||
|
||||
await Promise.all([loadDocuments(), loadCatalogs(), loadProcessingTimeline()]);
|
||||
if (isAdmin) {
|
||||
await Promise.all([loadDocuments(), loadCatalogs(), loadProcessingTimeline()]);
|
||||
} else {
|
||||
await Promise.all([loadDocuments(), loadCatalogs()]);
|
||||
}
|
||||
} catch (caughtError) {
|
||||
setError(caughtError instanceof Error ? caughtError.message : 'Upload failed');
|
||||
} finally {
|
||||
setIsUploading(false);
|
||||
}
|
||||
}, [appSettings, loadCatalogs, loadDocuments, loadProcessingTimeline, presentDialog]);
|
||||
}, [appSettings, isAdmin, loadCatalogs, loadDocuments, loadProcessingTimeline, presentDialog]);
|
||||
|
||||
const handleSearch = useCallback(async (): Promise<void> => {
|
||||
setSelectedDocumentIds([]);
|
||||
@@ -465,13 +587,13 @@ export default function App(): JSX.Element {
|
||||
only_trashed: documentView === 'trash',
|
||||
include_trashed: false,
|
||||
});
|
||||
downloadBlob(result.blob, result.filename);
|
||||
downloadBlobFile(result.blob, result.filename);
|
||||
} catch (caughtError) {
|
||||
setError(caughtError instanceof Error ? caughtError.message : 'Failed to export selected markdown files');
|
||||
} finally {
|
||||
setIsRunningBulkAction(false);
|
||||
}
|
||||
}, [documentView, downloadBlob, selectedDocumentIds]);
|
||||
}, [documentView, selectedDocumentIds]);
|
||||
|
||||
const handleExportPath = useCallback(async (): Promise<void> => {
|
||||
const trimmedPrefix = exportPathInput.trim();
|
||||
@@ -487,13 +609,13 @@ export default function App(): JSX.Element {
|
||||
only_trashed: documentView === 'trash',
|
||||
include_trashed: false,
|
||||
});
|
||||
downloadBlob(result.blob, result.filename);
|
||||
downloadBlobFile(result.blob, result.filename);
|
||||
} catch (caughtError) {
|
||||
setError(caughtError instanceof Error ? caughtError.message : 'Failed to export path markdown files');
|
||||
} finally {
|
||||
setIsRunningBulkAction(false);
|
||||
}
|
||||
}, [documentView, downloadBlob, exportPathInput]);
|
||||
}, [documentView, exportPathInput]);
|
||||
|
||||
const handleSaveSettings = useCallback(async (payload: AppSettingsUpdate): Promise<void> => {
|
||||
setIsSavingSettings(true);
|
||||
@@ -587,66 +709,99 @@ export default function App(): JSX.Element {
|
||||
setCurrentPage(1);
|
||||
}, []);
|
||||
|
||||
if (authPhase === 'checking') {
|
||||
return (
|
||||
<main className="auth-shell">
|
||||
<section className="auth-card">
|
||||
<h1>LedgerDock</h1>
|
||||
<p>Checking current session...</p>
|
||||
</section>
|
||||
</main>
|
||||
);
|
||||
}
|
||||
|
||||
if (authPhase !== 'authenticated') {
|
||||
return <LoginScreen error={authError} isSubmitting={isAuthenticating} onSubmit={handleLogin} />;
|
||||
}
|
||||
|
||||
return (
|
||||
<main className="app-shell">
|
||||
<header className="topbar">
|
||||
<div>
|
||||
<h1>LedgerDock</h1>
|
||||
<p>Document command deck for OCR, routing intelligence, and controlled metadata ops.</p>
|
||||
</div>
|
||||
<div className="topbar-controls">
|
||||
<div className="topbar-nav-group">
|
||||
<button
|
||||
type="button"
|
||||
className={screen === 'documents' && documentView === 'active' ? 'active-view-button' : 'secondary-action'}
|
||||
onClick={() => {
|
||||
setScreen('documents');
|
||||
setDocumentView('active');
|
||||
}}
|
||||
>
|
||||
Documents
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
className={screen === 'documents' && documentView === 'trash' ? 'active-view-button' : 'secondary-action'}
|
||||
onClick={() => {
|
||||
setScreen('documents');
|
||||
setDocumentView('trash');
|
||||
}}
|
||||
>
|
||||
Trash
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
className={screen === 'settings' ? 'active-view-button' : 'secondary-action'}
|
||||
onClick={() => setScreen('settings')}
|
||||
>
|
||||
Settings
|
||||
</button>
|
||||
<div className="topbar-inner">
|
||||
<div className="topbar-brand">
|
||||
<h1>LedgerDock</h1>
|
||||
<p>Document command deck for OCR, routing intelligence, and controlled metadata ops.</p>
|
||||
<p className="topbar-auth-status">
|
||||
<User className="topbar-user-icon" aria-hidden="true" />
|
||||
You are currently signed in as <span className="topbar-current-username">{authUser?.username}</span>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{screen === 'documents' && (
|
||||
<div className="topbar-document-group">
|
||||
<UploadSurface onUploadRequested={handleUpload} isUploading={isUploading} variant="inline" />
|
||||
</div>
|
||||
)}
|
||||
|
||||
{screen === 'settings' && (
|
||||
<div className="topbar-settings-group">
|
||||
<button type="button" className="secondary-action" onClick={() => void handleResetSettings()} disabled={isSavingSettings}>
|
||||
Reset To Defaults
|
||||
</button>
|
||||
<button type="button" onClick={() => void handleSaveSettingsFromHeader()} disabled={isSavingSettings || !settingsSaveAction}>
|
||||
{isSavingSettings ? 'Saving Settings...' : 'Save Settings'}
|
||||
<div className="topbar-controls">
|
||||
<div className="topbar-primary-row">
|
||||
<div className="topbar-nav-group">
|
||||
<button
|
||||
type="button"
|
||||
className={screen === 'documents' && documentView === 'active' ? 'active-view-button' : 'secondary-action'}
|
||||
onClick={() => {
|
||||
setScreen('documents');
|
||||
setDocumentView('active');
|
||||
}}
|
||||
>
|
||||
Documents
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
className={screen === 'documents' && documentView === 'trash' ? 'active-view-button' : 'secondary-action'}
|
||||
onClick={() => {
|
||||
setScreen('documents');
|
||||
setDocumentView('trash');
|
||||
}}
|
||||
>
|
||||
Trash
|
||||
</button>
|
||||
{isAdmin && (
|
||||
<button
|
||||
type="button"
|
||||
className={screen === 'settings' ? 'active-view-button' : 'secondary-action'}
|
||||
onClick={() => setScreen('settings')}
|
||||
>
|
||||
Settings
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
<button
|
||||
type="button"
|
||||
className="secondary-action topbar-icon-action"
|
||||
onClick={() => void handleLogout()}
|
||||
aria-label="Sign out"
|
||||
>
|
||||
<LogOut className="topbar-signout-icon" aria-hidden="true" />
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{screen === 'documents' && (
|
||||
<div className="topbar-document-group">
|
||||
<UploadSurface onUploadRequested={handleUpload} isUploading={isUploading} variant="inline" />
|
||||
</div>
|
||||
)}
|
||||
|
||||
{screen === 'settings' && isAdmin && (
|
||||
<div className="topbar-settings-group">
|
||||
<button type="button" className="secondary-action" onClick={() => void handleResetSettings()} disabled={isSavingSettings}>
|
||||
Reset To Defaults
|
||||
</button>
|
||||
<button type="button" onClick={() => void handleSaveSettingsFromHeader()} disabled={isSavingSettings || !settingsSaveAction}>
|
||||
{isSavingSettings ? 'Saving Settings...' : 'Save Settings'}
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
{error && <p className="error-banner">{error}</p>}
|
||||
|
||||
{screen === 'settings' && (
|
||||
{screen === 'settings' && isAdmin && (
|
||||
<SettingsScreen
|
||||
settings={appSettings}
|
||||
isSaving={isSavingSettings}
|
||||
@@ -770,16 +925,18 @@ export default function App(): JSX.Element {
|
||||
requestConfirmation={requestConfirmation}
|
||||
/>
|
||||
</section>
|
||||
{processingLogError && <p className="error-banner">{processingLogError}</p>}
|
||||
<ProcessingLogPanel
|
||||
entries={processingLogs}
|
||||
isLoading={isLoadingLogs}
|
||||
isClearing={isClearingLogs}
|
||||
selectedDocumentId={selectedDocumentId}
|
||||
isProcessingActive={isProcessingActive}
|
||||
typingAnimationEnabled={typingAnimationEnabled}
|
||||
onClear={() => void handleClearProcessingLogs()}
|
||||
/>
|
||||
{isAdmin && processingLogError && <p className="error-banner">{processingLogError}</p>}
|
||||
{isAdmin && (
|
||||
<ProcessingLogPanel
|
||||
entries={processingLogs}
|
||||
isLoading={isLoadingLogs}
|
||||
isClearing={isClearingLogs}
|
||||
selectedDocumentId={selectedDocumentId}
|
||||
isProcessingActive={isProcessingActive}
|
||||
typingAnimationEnabled={typingAnimationEnabled}
|
||||
onClear={() => void handleClearProcessingLogs()}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
|
||||
|
||||
@@ -1,12 +1,17 @@
|
||||
/**
|
||||
* Card view for displaying document summary, preview, and metadata.
|
||||
*/
|
||||
import { useState } from 'react';
|
||||
import { useEffect, useRef, useState } from 'react';
|
||||
import type { JSX } from 'react';
|
||||
import { Download, FileText, Trash2 } from 'lucide-react';
|
||||
|
||||
import type { DmsDocument } from '../types';
|
||||
import { contentMarkdownUrl, downloadUrl, thumbnailUrl } from '../lib/api';
|
||||
import {
|
||||
downloadBlobFile,
|
||||
downloadDocumentContentMarkdown,
|
||||
downloadDocumentFile,
|
||||
getDocumentThumbnailBlob,
|
||||
} from '../lib/api';
|
||||
|
||||
/**
|
||||
* Defines properties accepted by the document card component.
|
||||
@@ -79,12 +84,59 @@ export default function DocumentCard({
|
||||
onFilterTag,
|
||||
}: DocumentCardProps): JSX.Element {
|
||||
const [isTrashing, setIsTrashing] = useState<boolean>(false);
|
||||
const [thumbnailObjectUrl, setThumbnailObjectUrl] = useState<string | null>(null);
|
||||
const thumbnailObjectUrlRef = useRef<string | null>(null);
|
||||
const createdDate = new Date(document.created_at).toLocaleString();
|
||||
const status = statusPresentation(document.status);
|
||||
const compactPath = compactLogicalPath(document.logical_path, 180);
|
||||
const trashDisabled = isTrashView || document.status === 'trashed' || isTrashing;
|
||||
const trashTitle = trashDisabled ? 'Already in trash' : 'Move to trash';
|
||||
|
||||
/**
|
||||
* Loads thumbnail preview through authenticated fetch and revokes replaced object URLs.
|
||||
*/
|
||||
useEffect(() => {
|
||||
const revokeThumbnailObjectUrl = (): void => {
|
||||
if (!thumbnailObjectUrlRef.current) {
|
||||
return;
|
||||
}
|
||||
URL.revokeObjectURL(thumbnailObjectUrlRef.current);
|
||||
thumbnailObjectUrlRef.current = null;
|
||||
};
|
||||
|
||||
if (!document.preview_available) {
|
||||
revokeThumbnailObjectUrl();
|
||||
setThumbnailObjectUrl(null);
|
||||
return;
|
||||
}
|
||||
|
||||
let cancelled = false;
|
||||
const loadThumbnail = async (): Promise<void> => {
|
||||
try {
|
||||
const blob = await getDocumentThumbnailBlob(document.id);
|
||||
if (cancelled) {
|
||||
return;
|
||||
}
|
||||
revokeThumbnailObjectUrl();
|
||||
const objectUrl = URL.createObjectURL(blob);
|
||||
thumbnailObjectUrlRef.current = objectUrl;
|
||||
setThumbnailObjectUrl(objectUrl);
|
||||
} catch {
|
||||
if (cancelled) {
|
||||
return;
|
||||
}
|
||||
revokeThumbnailObjectUrl();
|
||||
setThumbnailObjectUrl(null);
|
||||
}
|
||||
};
|
||||
|
||||
void loadThumbnail();
|
||||
return () => {
|
||||
cancelled = true;
|
||||
revokeThumbnailObjectUrl();
|
||||
};
|
||||
}, [document.id, document.preview_available]);
|
||||
|
||||
return (
|
||||
<article
|
||||
className={`document-card ${isSelected ? 'selected' : ''}`}
|
||||
@@ -119,8 +171,8 @@ export default function DocumentCard({
|
||||
</label>
|
||||
</header>
|
||||
<div className="document-preview">
|
||||
{document.preview_available ? (
|
||||
<img src={thumbnailUrl(document.id)} alt={document.original_filename} loading="lazy" />
|
||||
{document.preview_available && thumbnailObjectUrl ? (
|
||||
<img src={thumbnailObjectUrl} alt={document.original_filename} loading="lazy" />
|
||||
) : (
|
||||
<div className="document-preview-fallback">{document.extension || 'file'}</div>
|
||||
)}
|
||||
@@ -173,7 +225,13 @@ export default function DocumentCard({
|
||||
onClick={(event) => {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
window.open(downloadUrl(document.id), '_blank', 'noopener,noreferrer');
|
||||
void (async (): Promise<void> => {
|
||||
try {
|
||||
const payload = await downloadDocumentFile(document.id);
|
||||
downloadBlobFile(payload.blob, payload.filename);
|
||||
} catch {
|
||||
}
|
||||
})();
|
||||
}}
|
||||
>
|
||||
<Download aria-hidden="true" />
|
||||
@@ -186,7 +244,13 @@ export default function DocumentCard({
|
||||
onClick={(event) => {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
window.open(contentMarkdownUrl(document.id), '_blank', 'noopener,noreferrer');
|
||||
void (async (): Promise<void> => {
|
||||
try {
|
||||
const payload = await downloadDocumentContentMarkdown(document.id);
|
||||
downloadBlobFile(payload.blob, payload.filename);
|
||||
} catch {
|
||||
}
|
||||
})();
|
||||
}}
|
||||
>
|
||||
<FileText aria-hidden="true" />
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
/**
|
||||
* Embedded document viewer panel for preview, metadata updates, and lifecycle actions.
|
||||
*/
|
||||
import { useEffect, useMemo, useState } from 'react';
|
||||
import { useEffect, useMemo, useRef, useState } from 'react';
|
||||
import type { JSX } from 'react';
|
||||
|
||||
import {
|
||||
contentMarkdownUrl,
|
||||
downloadBlobFile,
|
||||
downloadDocumentContentMarkdown,
|
||||
deleteDocument,
|
||||
getDocumentDetails,
|
||||
previewUrl,
|
||||
getDocumentPreviewBlob,
|
||||
reprocessDocument,
|
||||
restoreDocument,
|
||||
trashDocument,
|
||||
@@ -18,6 +19,47 @@ import type { DmsDocument, DmsDocumentDetail } from '../types';
|
||||
import PathInput from './PathInput';
|
||||
import TagInput from './TagInput';
|
||||
|
||||
const SAFE_IMAGE_PREVIEW_MIME_TYPES = new Set<string>([
|
||||
'image/bmp',
|
||||
'image/gif',
|
||||
'image/jpeg',
|
||||
'image/jpg',
|
||||
'image/png',
|
||||
'image/webp',
|
||||
]);
|
||||
|
||||
const SAFE_IFRAME_PREVIEW_MIME_TYPES = new Set<string>([
|
||||
'application/json',
|
||||
'application/pdf',
|
||||
'text/csv',
|
||||
'text/markdown',
|
||||
'text/plain',
|
||||
]);
|
||||
|
||||
/**
|
||||
* Normalizes MIME values by stripping parameters and lowercasing for stable comparison.
|
||||
*/
|
||||
function normalizeMimeType(mimeType: string | null | undefined): string {
|
||||
if (!mimeType) {
|
||||
return '';
|
||||
}
|
||||
return mimeType.split(';')[0]?.trim().toLowerCase() ?? '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves whether a MIME type is safe to render as an image preview.
|
||||
*/
|
||||
function isSafeImagePreviewMimeType(mimeType: string): boolean {
|
||||
return SAFE_IMAGE_PREVIEW_MIME_TYPES.has(mimeType);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves whether a MIME type is safe to render inside a sandboxed iframe preview.
|
||||
*/
|
||||
function isSafeIframePreviewMimeType(mimeType: string): boolean {
|
||||
return SAFE_IFRAME_PREVIEW_MIME_TYPES.has(mimeType);
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines props for the selected document viewer panel.
|
||||
*/
|
||||
@@ -44,6 +86,8 @@ export default function DocumentViewer({
|
||||
requestConfirmation,
|
||||
}: DocumentViewerProps): JSX.Element {
|
||||
const [documentDetail, setDocumentDetail] = useState<DmsDocumentDetail | null>(null);
|
||||
const [previewObjectUrl, setPreviewObjectUrl] = useState<string | null>(null);
|
||||
const [isLoadingPreview, setIsLoadingPreview] = useState<boolean>(false);
|
||||
const [isLoadingDetails, setIsLoadingDetails] = useState<boolean>(false);
|
||||
const [originalFilename, setOriginalFilename] = useState<string>('');
|
||||
const [logicalPath, setLogicalPath] = useState<string>('');
|
||||
@@ -55,6 +99,31 @@ export default function DocumentViewer({
|
||||
const [isDeleting, setIsDeleting] = useState<boolean>(false);
|
||||
const [isMetadataDirty, setIsMetadataDirty] = useState<boolean>(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const previewObjectUrlRef = useRef<string | null>(null);
|
||||
|
||||
/**
|
||||
* Resolves normalized MIME type used by preview safety checks.
|
||||
*/
|
||||
const previewMimeType = useMemo(() => normalizeMimeType(document?.mime_type), [document?.mime_type]);
|
||||
|
||||
/**
|
||||
* Resolves whether selected document should render as a safe image element in preview.
|
||||
*/
|
||||
const isImageDocument = useMemo(() => {
|
||||
return isSafeImagePreviewMimeType(previewMimeType);
|
||||
}, [previewMimeType]);
|
||||
|
||||
/**
|
||||
* Resolves whether selected document should render in sandboxed iframe preview.
|
||||
*/
|
||||
const canRenderIframePreview = useMemo(() => {
|
||||
return isSafeIframePreviewMimeType(previewMimeType);
|
||||
}, [previewMimeType]);
|
||||
|
||||
/**
|
||||
* Resolves whether selected document supports any inline preview mode.
|
||||
*/
|
||||
const canRenderInlinePreview = isImageDocument || canRenderIframePreview;
|
||||
|
||||
/**
|
||||
* Syncs editable metadata fields whenever selection changes.
|
||||
@@ -62,6 +131,12 @@ export default function DocumentViewer({
|
||||
useEffect(() => {
|
||||
if (!document) {
|
||||
setDocumentDetail(null);
|
||||
if (previewObjectUrlRef.current) {
|
||||
URL.revokeObjectURL(previewObjectUrlRef.current);
|
||||
previewObjectUrlRef.current = null;
|
||||
}
|
||||
setPreviewObjectUrl(null);
|
||||
setIsLoadingPreview(false);
|
||||
setIsMetadataDirty(false);
|
||||
return;
|
||||
}
|
||||
@@ -72,6 +147,63 @@ export default function DocumentViewer({
|
||||
setError(null);
|
||||
}, [document?.id]);
|
||||
|
||||
/**
|
||||
* Loads authenticated preview bytes and exposes a temporary object URL for iframe or image rendering.
|
||||
*/
|
||||
useEffect(() => {
|
||||
const revokePreviewObjectUrl = (): void => {
|
||||
if (!previewObjectUrlRef.current) {
|
||||
return;
|
||||
}
|
||||
URL.revokeObjectURL(previewObjectUrlRef.current);
|
||||
previewObjectUrlRef.current = null;
|
||||
};
|
||||
|
||||
if (!document) {
|
||||
revokePreviewObjectUrl();
|
||||
setPreviewObjectUrl(null);
|
||||
setIsLoadingPreview(false);
|
||||
return;
|
||||
}
|
||||
if (!canRenderInlinePreview) {
|
||||
revokePreviewObjectUrl();
|
||||
setPreviewObjectUrl(null);
|
||||
setIsLoadingPreview(false);
|
||||
return;
|
||||
}
|
||||
|
||||
let cancelled = false;
|
||||
setIsLoadingPreview(true);
|
||||
const loadPreview = async (): Promise<void> => {
|
||||
try {
|
||||
const blob = await getDocumentPreviewBlob(document.id);
|
||||
if (cancelled) {
|
||||
return;
|
||||
}
|
||||
revokePreviewObjectUrl();
|
||||
const objectUrl = URL.createObjectURL(blob);
|
||||
previewObjectUrlRef.current = objectUrl;
|
||||
setPreviewObjectUrl(objectUrl);
|
||||
} catch {
|
||||
if (cancelled) {
|
||||
return;
|
||||
}
|
||||
revokePreviewObjectUrl();
|
||||
setPreviewObjectUrl(null);
|
||||
} finally {
|
||||
if (!cancelled) {
|
||||
setIsLoadingPreview(false);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void loadPreview();
|
||||
return () => {
|
||||
cancelled = true;
|
||||
revokePreviewObjectUrl();
|
||||
};
|
||||
}, [document?.id, canRenderInlinePreview]);
|
||||
|
||||
/**
|
||||
* Refreshes editable metadata from list updates only while form is clean.
|
||||
*/
|
||||
@@ -122,16 +254,6 @@ export default function DocumentViewer({
|
||||
};
|
||||
}, [document?.id]);
|
||||
|
||||
/**
|
||||
* Resolves whether selected document should render as an image element in preview.
|
||||
*/
|
||||
const isImageDocument = useMemo(() => {
|
||||
if (!document) {
|
||||
return false;
|
||||
}
|
||||
return document.mime_type.startsWith('image/');
|
||||
}, [document]);
|
||||
|
||||
/**
|
||||
* Extracts provider/transcription errors from document metadata for user visibility.
|
||||
*/
|
||||
@@ -418,10 +540,27 @@ export default function DocumentViewer({
|
||||
<h2>{document.original_filename}</h2>
|
||||
<p className="small">Status: {document.status}</p>
|
||||
<div className="viewer-preview">
|
||||
{isImageDocument ? (
|
||||
<img src={previewUrl(document.id)} alt={document.original_filename} />
|
||||
{previewObjectUrl ? (
|
||||
isImageDocument ? (
|
||||
<img src={previewObjectUrl} alt={document.original_filename} />
|
||||
) : canRenderIframePreview ? (
|
||||
<iframe
|
||||
src={previewObjectUrl}
|
||||
title={document.original_filename}
|
||||
sandbox=""
|
||||
referrerPolicy="no-referrer"
|
||||
allow="clipboard-read 'none'; clipboard-write 'none'; geolocation 'none'; microphone 'none'; camera 'none'; payment 'none'; usb 'none'; fullscreen 'none'"
|
||||
loading="lazy"
|
||||
/>
|
||||
) : (
|
||||
<p className="small">Preview blocked for this file type. Download to inspect safely.</p>
|
||||
)
|
||||
) : isLoadingPreview ? (
|
||||
<p className="small">Loading preview...</p>
|
||||
) : !canRenderInlinePreview ? (
|
||||
<p className="small">Preview blocked for this file type. Download to inspect safely.</p>
|
||||
) : (
|
||||
<iframe src={previewUrl(document.id)} title={document.original_filename} />
|
||||
<p className="small">Preview unavailable for this document.</p>
|
||||
)}
|
||||
</div>
|
||||
<label>
|
||||
@@ -561,7 +700,16 @@ export default function DocumentViewer({
|
||||
<button
|
||||
type="button"
|
||||
className="secondary-action"
|
||||
onClick={() => window.open(contentMarkdownUrl(document.id), '_blank', 'noopener,noreferrer')}
|
||||
onClick={() => {
|
||||
void (async (): Promise<void> => {
|
||||
try {
|
||||
const payload = await downloadDocumentContentMarkdown(document.id);
|
||||
downloadBlobFile(payload.blob, payload.filename);
|
||||
} catch (caughtError) {
|
||||
setError(caughtError instanceof Error ? caughtError.message : 'Failed to download markdown');
|
||||
}
|
||||
})();
|
||||
}}
|
||||
disabled={isDeleting}
|
||||
title="Downloads recognized/extracted content as markdown for this document."
|
||||
>
|
||||
|
||||
71
frontend/src/components/LoginScreen.tsx
Normal file
71
frontend/src/components/LoginScreen.tsx
Normal file
@@ -0,0 +1,71 @@
|
||||
/**
|
||||
* Login screen for session-based authentication before loading protected application views.
|
||||
*/
|
||||
import { FormEvent, useState } from 'react';
|
||||
import type { JSX } from 'react';
|
||||
|
||||
interface LoginScreenProps {
|
||||
error: string | null;
|
||||
isSubmitting: boolean;
|
||||
onSubmit: (username: string, password: string) => Promise<void>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Renders credential form used to issue per-user API sessions.
|
||||
*/
|
||||
export default function LoginScreen({
|
||||
error,
|
||||
isSubmitting,
|
||||
onSubmit,
|
||||
}: LoginScreenProps): JSX.Element {
|
||||
const [username, setUsername] = useState<string>('');
|
||||
const [password, setPassword] = useState<string>('');
|
||||
|
||||
/**
|
||||
* Submits credentials and leaves result handling to parent application orchestration.
|
||||
*/
|
||||
const handleSubmit = (event: FormEvent<HTMLFormElement>): void => {
|
||||
event.preventDefault();
|
||||
if (isSubmitting) {
|
||||
return;
|
||||
}
|
||||
void onSubmit(username, password);
|
||||
};
|
||||
|
||||
return (
|
||||
<main className="auth-shell">
|
||||
<section className="auth-card">
|
||||
<h1>LedgerDock</h1>
|
||||
<p>Sign in with your account to access documents and role-scoped controls.</p>
|
||||
<form onSubmit={handleSubmit} className="auth-form">
|
||||
<label>
|
||||
Username
|
||||
<input
|
||||
type="text"
|
||||
value={username}
|
||||
onChange={(event) => setUsername(event.target.value)}
|
||||
autoComplete="username"
|
||||
required
|
||||
disabled={isSubmitting}
|
||||
/>
|
||||
</label>
|
||||
<label>
|
||||
Password
|
||||
<input
|
||||
type="password"
|
||||
value={password}
|
||||
onChange={(event) => setPassword(event.target.value)}
|
||||
autoComplete="current-password"
|
||||
required
|
||||
disabled={isSubmitting}
|
||||
/>
|
||||
</label>
|
||||
<button type="submit" disabled={isSubmitting}>
|
||||
{isSubmitting ? 'Signing In...' : 'Sign In'}
|
||||
</button>
|
||||
</form>
|
||||
{error && <p className="error-banner">{error}</p>}
|
||||
</section>
|
||||
</main>
|
||||
);
|
||||
}
|
||||
181
frontend/src/lib/api.test.ts
Normal file
181
frontend/src/lib/api.test.ts
Normal file
@@ -0,0 +1,181 @@
|
||||
// @ts-ignore Node strip-types runtime requires explicit .ts extension in ESM imports.
|
||||
import {
|
||||
downloadDocumentContentMarkdown,
|
||||
downloadDocumentFile,
|
||||
getCurrentAuthSession,
|
||||
getDocumentPreviewBlob,
|
||||
getDocumentThumbnailBlob,
|
||||
loginWithPassword,
|
||||
logoutCurrentSession,
|
||||
updateDocumentMetadata,
|
||||
} from './api.ts';
|
||||
|
||||
/**
|
||||
* Throws when a test condition is false.
|
||||
*/
|
||||
function assert(condition: boolean, message: string): void {
|
||||
if (!condition) {
|
||||
throw new Error(message);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that async functions reject with an expected message fragment.
|
||||
*/
|
||||
async function assertRejects(action: () => Promise<unknown>, expectedMessage: string): Promise<void> {
|
||||
try {
|
||||
await action();
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
assert(message.includes(expectedMessage), `Expected error containing "${expectedMessage}" but received "${message}"`);
|
||||
return;
|
||||
}
|
||||
throw new Error(`Expected rejection containing "${expectedMessage}"`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts fetch inputs into a URL string for assertions.
|
||||
*/
|
||||
function toRequestUrl(input: RequestInfo | URL): string {
|
||||
if (typeof input === 'string') {
|
||||
return input;
|
||||
}
|
||||
if (input instanceof URL) {
|
||||
return input.toString();
|
||||
}
|
||||
return input.url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs API helper tests for authenticated media and auth session workflows.
|
||||
*/
|
||||
async function runApiTests(): Promise<void> {
|
||||
const originalFetch = globalThis.fetch;
|
||||
const globalWithDocument = globalThis as typeof globalThis & { document?: { cookie?: string } };
|
||||
const originalDocument = globalWithDocument.document;
|
||||
|
||||
try {
|
||||
const requestUrls: string[] = [];
|
||||
const requestAuthHeaders: Array<string | null> = [];
|
||||
const requestCsrfHeaders: Array<string | null> = [];
|
||||
globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit): Promise<Response> => {
|
||||
requestUrls.push(toRequestUrl(input));
|
||||
const normalizedHeaders = new Headers(init?.headers);
|
||||
requestAuthHeaders.push(normalizedHeaders.get('Authorization'));
|
||||
requestCsrfHeaders.push(normalizedHeaders.get('x-csrf-token'));
|
||||
return new Response('preview-bytes', { status: 200 });
|
||||
}) as typeof fetch;
|
||||
|
||||
const thumbnail = await getDocumentThumbnailBlob('doc-1');
|
||||
const preview = await getDocumentPreviewBlob('doc-1');
|
||||
|
||||
assert(await thumbnail.text() === 'preview-bytes', 'Thumbnail blob bytes mismatch');
|
||||
assert(await preview.text() === 'preview-bytes', 'Preview blob bytes mismatch');
|
||||
assert(
|
||||
requestUrls[0] === 'http://localhost:8000/api/v1/documents/doc-1/thumbnail',
|
||||
`Unexpected thumbnail URL ${requestUrls[0]}`,
|
||||
);
|
||||
assert(
|
||||
requestUrls[1] === 'http://localhost:8000/api/v1/documents/doc-1/preview',
|
||||
`Unexpected preview URL ${requestUrls[1]}`,
|
||||
);
|
||||
assert(requestAuthHeaders[0] === null, `Expected no auth header for thumbnail request, got "${requestAuthHeaders[0]}"`);
|
||||
assert(requestAuthHeaders[1] === null, `Expected no auth header for preview request, got "${requestAuthHeaders[1]}"`);
|
||||
assert(requestCsrfHeaders[0] === null, `Expected no CSRF header for thumbnail request, got "${requestCsrfHeaders[0]}"`);
|
||||
assert(requestCsrfHeaders[1] === null, `Expected no CSRF header for preview request, got "${requestCsrfHeaders[1]}"`);
|
||||
|
||||
globalWithDocument.document = {
|
||||
cookie: 'dcm_csrf=csrf-session-token',
|
||||
};
|
||||
let metadataCsrfHeader: string | null = null;
|
||||
let metadataContentType: string | null = null;
|
||||
let metadataAuthHeader: string | null = null;
|
||||
globalThis.fetch = (async (_input: RequestInfo | URL, init?: RequestInit): Promise<Response> => {
|
||||
const headers = new Headers(init?.headers);
|
||||
metadataCsrfHeader = headers.get('x-csrf-token');
|
||||
metadataAuthHeader = headers.get('Authorization');
|
||||
metadataContentType = headers.get('Content-Type');
|
||||
return new Response('{}', { status: 200 });
|
||||
}) as typeof fetch;
|
||||
await updateDocumentMetadata('doc-headers', { original_filename: 'renamed.pdf' });
|
||||
assert(metadataContentType === 'application/json', `Expected JSON content type to be preserved, got "${metadataContentType}"`);
|
||||
assert(metadataAuthHeader === null, `Expected no auth header, got "${metadataAuthHeader}"`);
|
||||
assert(metadataCsrfHeader === 'csrf-session-token', `Expected CSRF header, got "${metadataCsrfHeader}"`);
|
||||
|
||||
globalThis.fetch = (async (): Promise<Response> => {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
access_token: 'issued-session-token',
|
||||
token_type: 'bearer',
|
||||
expires_at: '2026-03-01T10:30:00Z',
|
||||
user: {
|
||||
id: '3a42f5e0-b1ad-4f68-b2f4-3fa8c2fb31c9',
|
||||
username: 'admin',
|
||||
role: 'admin',
|
||||
},
|
||||
}),
|
||||
{ status: 200, headers: { 'Content-Type': 'application/json' } },
|
||||
);
|
||||
}) as typeof fetch;
|
||||
const loginPayload = await loginWithPassword('admin', 'password');
|
||||
assert(loginPayload.access_token === 'issued-session-token', 'Unexpected issued session token in login payload');
|
||||
assert(loginPayload.user.username === 'admin', 'Unexpected login user payload');
|
||||
|
||||
globalThis.fetch = (async (): Promise<Response> => {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
expires_at: '2026-03-01T10:30:00Z',
|
||||
user: {
|
||||
id: '3a42f5e0-b1ad-4f68-b2f4-3fa8c2fb31c9',
|
||||
username: 'admin',
|
||||
role: 'admin',
|
||||
},
|
||||
}),
|
||||
{ status: 200, headers: { 'Content-Type': 'application/json' } },
|
||||
);
|
||||
}) as typeof fetch;
|
||||
const sessionPayload = await getCurrentAuthSession();
|
||||
assert(sessionPayload.user.role === 'admin', 'Expected admin role from auth session payload');
|
||||
|
||||
globalThis.fetch = (async (): Promise<Response> => {
|
||||
return new Response('{}', { status: 200, headers: { 'Content-Type': 'application/json' } });
|
||||
}) as typeof fetch;
|
||||
await logoutCurrentSession();
|
||||
|
||||
globalThis.fetch = (async (): Promise<Response> => {
|
||||
return new Response('file-bytes', {
|
||||
status: 200,
|
||||
headers: {
|
||||
'content-disposition': 'attachment; filename="invoice.pdf"',
|
||||
},
|
||||
});
|
||||
}) as typeof fetch;
|
||||
|
||||
const fileResult = await downloadDocumentFile('doc-2');
|
||||
assert(fileResult.filename === 'invoice.pdf', `Unexpected download filename ${fileResult.filename}`);
|
||||
assert((await fileResult.blob.text()) === 'file-bytes', 'Original download bytes mismatch');
|
||||
|
||||
globalThis.fetch = (async (): Promise<Response> => {
|
||||
return new Response('# markdown', { status: 200 });
|
||||
}) as typeof fetch;
|
||||
|
||||
const markdownResult = await downloadDocumentContentMarkdown('doc-3');
|
||||
assert(markdownResult.filename === 'document-content.md', `Unexpected markdown filename ${markdownResult.filename}`);
|
||||
assert((await markdownResult.blob.text()) === '# markdown', 'Markdown bytes mismatch');
|
||||
|
||||
globalThis.fetch = (async (): Promise<Response> => {
|
||||
return new Response('forbidden', { status: 401 });
|
||||
}) as typeof fetch;
|
||||
|
||||
await assertRejects(async () => downloadDocumentContentMarkdown('doc-4'), 'Failed to download document markdown');
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
if (originalDocument !== undefined) {
|
||||
globalWithDocument.document = originalDocument;
|
||||
} else {
|
||||
delete globalWithDocument.document;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await runApiTests();
|
||||
@@ -4,6 +4,8 @@
|
||||
import type {
|
||||
AppSettings,
|
||||
AppSettingsUpdate,
|
||||
AuthLoginResponse,
|
||||
AuthSessionInfo,
|
||||
DocumentListResponse,
|
||||
DmsDocument,
|
||||
DmsDocumentDetail,
|
||||
@@ -14,9 +16,139 @@ import type {
|
||||
} from '../types';
|
||||
|
||||
/**
|
||||
* Resolves backend base URL from environment with localhost fallback.
|
||||
* Resolves backend base URL from environment with same-origin proxy fallback.
|
||||
*/
|
||||
const API_BASE = import.meta.env.VITE_API_BASE ?? 'http://localhost:8000/api/v1';
|
||||
function resolveApiBase(): string {
|
||||
const envValue = import.meta.env?.VITE_API_BASE;
|
||||
if (typeof envValue === 'string') {
|
||||
const trimmed = envValue.trim().replace(/\/+$/, '');
|
||||
if (trimmed) {
|
||||
return trimmed;
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof window !== 'undefined' && window.location?.origin) {
|
||||
return '/api/v1';
|
||||
}
|
||||
return 'http://localhost:8000/api/v1';
|
||||
}
|
||||
|
||||
const API_BASE = resolveApiBase();
|
||||
|
||||
/**
|
||||
* CSRF cookie contract used by authenticated requests.
|
||||
*/
|
||||
const CSRF_COOKIE_NAME = "dcm_csrf";
|
||||
const CSRF_HEADER_NAME = "x-csrf-token";
|
||||
const CSRF_SAFE_METHODS = new Set(["GET", "HEAD", "OPTIONS"]);
|
||||
const CSRF_SESSION_STORAGE_KEY = "dcm_csrf_token";
|
||||
|
||||
type ApiRequestInit = Omit<RequestInit, 'headers'> & { headers?: HeadersInit };
|
||||
|
||||
type ApiErrorPayload = { detail?: string } | null;
|
||||
|
||||
/**
|
||||
* Returns a cookie value by name for the active browser runtime.
|
||||
*/
|
||||
function getCookieValue(name: string): string | undefined {
|
||||
if (typeof document === "undefined") {
|
||||
return undefined;
|
||||
}
|
||||
const rawCookie = document.cookie ?? "";
|
||||
return rawCookie
|
||||
.split(";")
|
||||
.map((entry) => entry.trim())
|
||||
.find((entry) => entry.startsWith(`${name}=`))
|
||||
?.slice(name.length + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the runtime CSRF token from browser cookie storage for API requests.
|
||||
*/
|
||||
function resolveCsrfToken(): string | undefined {
|
||||
const cookieToken = getCookieValue(CSRF_COOKIE_NAME);
|
||||
if (cookieToken) {
|
||||
return cookieToken;
|
||||
}
|
||||
return loadStoredCsrfToken();
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the runtime CSRF token from browser session storage.
|
||||
*/
|
||||
function loadStoredCsrfToken(): string | undefined {
|
||||
if (typeof window === "undefined") {
|
||||
return undefined;
|
||||
}
|
||||
const rawValue = window.sessionStorage.getItem(CSRF_SESSION_STORAGE_KEY);
|
||||
const normalizedValue = rawValue?.trim();
|
||||
return normalizedValue ? normalizedValue : undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Persists or clears a runtime CSRF token in browser session storage.
|
||||
*/
|
||||
function persistCsrfToken(token: string | undefined | null): void {
|
||||
if (typeof window === "undefined") {
|
||||
return;
|
||||
}
|
||||
const normalizedValue = typeof token === "string" ? token.trim() : "";
|
||||
if (!normalizedValue) {
|
||||
window.sessionStorage.removeItem(CSRF_SESSION_STORAGE_KEY);
|
||||
return;
|
||||
}
|
||||
window.sessionStorage.setItem(CSRF_SESSION_STORAGE_KEY, normalizedValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether a method should include CSRF metadata.
|
||||
*/
|
||||
function requiresCsrfHeader(method: string): boolean {
|
||||
const normalizedMethod = method.toUpperCase();
|
||||
return !CSRF_SAFE_METHODS.has(normalizedMethod);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges request headers and appends CSRF metadata for state-changing requests.
|
||||
*/
|
||||
function buildRequestHeaders(method: string, headers?: HeadersInit): Headers | undefined {
|
||||
const requestHeaders = new Headers(headers);
|
||||
if (method && requiresCsrfHeader(method)) {
|
||||
const csrfToken = resolveCsrfToken();
|
||||
if (csrfToken) {
|
||||
requestHeaders.set(CSRF_HEADER_NAME, csrfToken);
|
||||
}
|
||||
}
|
||||
return requestHeaders;
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes an API request with shared fetch options and CSRF handling.
|
||||
*/
|
||||
function apiRequest(input: string, init: ApiRequestInit = {}): Promise<Response> {
|
||||
const method = init.method ?? "GET";
|
||||
const headers = buildRequestHeaders(method, init.headers);
|
||||
return fetch(input, {
|
||||
...init,
|
||||
credentials: 'include',
|
||||
...(headers ? { headers } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts backend error detail text from JSON payloads when available.
|
||||
*/
|
||||
async function responseErrorDetail(response: Response): Promise<string> {
|
||||
try {
|
||||
const payload = (await response.json()) as ApiErrorPayload;
|
||||
if (payload && typeof payload.detail === 'string' && payload.detail.trim()) {
|
||||
return payload.detail.trim();
|
||||
}
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes query parameters while skipping undefined and null values.
|
||||
@@ -45,6 +177,81 @@ function responseFilename(response: Response, fallback: string): string {
|
||||
return match[1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Triggers a browser file download for blob payloads and releases temporary object URLs.
|
||||
*/
|
||||
export function downloadBlobFile(blob: Blob, filename: string): void {
|
||||
const objectUrl = URL.createObjectURL(blob);
|
||||
const anchor = document.createElement('a');
|
||||
anchor.href = objectUrl;
|
||||
anchor.download = filename;
|
||||
document.body.appendChild(anchor);
|
||||
anchor.click();
|
||||
anchor.remove();
|
||||
window.setTimeout(() => {
|
||||
URL.revokeObjectURL(objectUrl);
|
||||
}, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Authenticates one user and returns authenticated session metadata.
|
||||
*/
|
||||
export async function loginWithPassword(username: string, password: string): Promise<AuthLoginResponse> {
|
||||
const response = await fetch(`${API_BASE}/auth/login`, {
|
||||
method: 'POST',
|
||||
credentials: 'include',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
username: username.trim(),
|
||||
password,
|
||||
}),
|
||||
});
|
||||
if (!response.ok) {
|
||||
const detail = await responseErrorDetail(response);
|
||||
if (detail) {
|
||||
throw new Error(detail);
|
||||
}
|
||||
throw new Error('Login failed');
|
||||
}
|
||||
const payload = await (response.json() as Promise<AuthLoginResponse>);
|
||||
persistCsrfToken(payload.csrf_token);
|
||||
return payload;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads currently authenticated user session metadata.
|
||||
*/
|
||||
export async function getCurrentAuthSession(): Promise<AuthSessionInfo> {
|
||||
const response = await apiRequest(`${API_BASE}/auth/me`);
|
||||
if (!response.ok) {
|
||||
const detail = await responseErrorDetail(response);
|
||||
if (detail) {
|
||||
throw new Error(detail);
|
||||
}
|
||||
throw new Error('Failed to load authentication session');
|
||||
}
|
||||
const payload = await (response.json() as Promise<AuthSessionInfo>);
|
||||
persistCsrfToken(payload.csrf_token);
|
||||
return payload;
|
||||
}
|
||||
|
||||
/**
|
||||
* Revokes the current authenticated session.
|
||||
*/
|
||||
export async function logoutCurrentSession(): Promise<void> {
|
||||
const response = await apiRequest(`${API_BASE}/auth/logout`, {
|
||||
method: 'POST',
|
||||
});
|
||||
persistCsrfToken(undefined);
|
||||
if (!response.ok && response.status !== 401) {
|
||||
const detail = await responseErrorDetail(response);
|
||||
if (detail) {
|
||||
throw new Error(detail);
|
||||
}
|
||||
throw new Error('Failed to logout');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads documents from the backend list endpoint.
|
||||
*/
|
||||
@@ -72,7 +279,7 @@ export async function listDocuments(options?: {
|
||||
processed_from: options?.processedFrom,
|
||||
processed_to: options?.processedTo,
|
||||
});
|
||||
const response = await fetch(`${API_BASE}/documents${query}`);
|
||||
const response = await apiRequest(`${API_BASE}/documents${query}`);
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to load documents');
|
||||
}
|
||||
@@ -108,7 +315,7 @@ export async function searchDocuments(
|
||||
processed_from: options?.processedFrom,
|
||||
processed_to: options?.processedTo,
|
||||
});
|
||||
const response = await fetch(`${API_BASE}/search${query}`);
|
||||
const response = await apiRequest(`${API_BASE}/search${query}`);
|
||||
if (!response.ok) {
|
||||
throw new Error('Search failed');
|
||||
}
|
||||
@@ -128,7 +335,7 @@ export async function listProcessingLogs(options?: {
|
||||
offset: options?.offset ?? 0,
|
||||
document_id: options?.documentId,
|
||||
});
|
||||
const response = await fetch(`${API_BASE}/processing/logs${query}`);
|
||||
const response = await apiRequest(`${API_BASE}/processing/logs${query}`);
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to load processing logs');
|
||||
}
|
||||
@@ -146,7 +353,7 @@ export async function trimProcessingLogs(options?: {
|
||||
keep_document_sessions: options?.keepDocumentSessions ?? 2,
|
||||
keep_unbound_entries: options?.keepUnboundEntries ?? 80,
|
||||
});
|
||||
const response = await fetch(`${API_BASE}/processing/logs/trim${query}`, {
|
||||
const response = await apiRequest(`${API_BASE}/processing/logs/trim${query}`, {
|
||||
method: 'POST',
|
||||
});
|
||||
if (!response.ok) {
|
||||
@@ -159,7 +366,7 @@ export async function trimProcessingLogs(options?: {
|
||||
* Clears all persisted processing logs.
|
||||
*/
|
||||
export async function clearProcessingLogs(): Promise<{ deleted_entries: number }> {
|
||||
const response = await fetch(`${API_BASE}/processing/logs/clear`, {
|
||||
const response = await apiRequest(`${API_BASE}/processing/logs/clear`, {
|
||||
method: 'POST',
|
||||
});
|
||||
if (!response.ok) {
|
||||
@@ -173,7 +380,7 @@ export async function clearProcessingLogs(): Promise<{ deleted_entries: number }
|
||||
*/
|
||||
export async function listTags(includeTrashed = false): Promise<string[]> {
|
||||
const query = buildQuery({ include_trashed: includeTrashed });
|
||||
const response = await fetch(`${API_BASE}/documents/tags${query}`);
|
||||
const response = await apiRequest(`${API_BASE}/documents/tags${query}`);
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to load tags');
|
||||
}
|
||||
@@ -186,7 +393,7 @@ export async function listTags(includeTrashed = false): Promise<string[]> {
|
||||
*/
|
||||
export async function listPaths(includeTrashed = false): Promise<string[]> {
|
||||
const query = buildQuery({ include_trashed: includeTrashed });
|
||||
const response = await fetch(`${API_BASE}/documents/paths${query}`);
|
||||
const response = await apiRequest(`${API_BASE}/documents/paths${query}`);
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to load paths');
|
||||
}
|
||||
@@ -199,7 +406,7 @@ export async function listPaths(includeTrashed = false): Promise<string[]> {
|
||||
*/
|
||||
export async function listTypes(includeTrashed = false): Promise<string[]> {
|
||||
const query = buildQuery({ include_trashed: includeTrashed });
|
||||
const response = await fetch(`${API_BASE}/documents/types${query}`);
|
||||
const response = await apiRequest(`${API_BASE}/documents/types${query}`);
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to load document types');
|
||||
}
|
||||
@@ -228,7 +435,7 @@ export async function uploadDocuments(
|
||||
formData.append('tags', options.tags);
|
||||
formData.append('conflict_mode', options.conflictMode);
|
||||
|
||||
const response = await fetch(`${API_BASE}/documents/upload`, {
|
||||
const response = await apiRequest(`${API_BASE}/documents/upload`, {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
});
|
||||
@@ -245,7 +452,7 @@ export async function updateDocumentMetadata(
|
||||
documentId: string,
|
||||
payload: { original_filename?: string; logical_path?: string; tags?: string[] },
|
||||
): Promise<DmsDocument> {
|
||||
const response = await fetch(`${API_BASE}/documents/${documentId}`, {
|
||||
const response = await apiRequest(`${API_BASE}/documents/${documentId}`, {
|
||||
method: 'PATCH',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
@@ -262,7 +469,7 @@ export async function updateDocumentMetadata(
|
||||
* Moves a document to trash state without removing stored files.
|
||||
*/
|
||||
export async function trashDocument(documentId: string): Promise<DmsDocument> {
|
||||
const response = await fetch(`${API_BASE}/documents/${documentId}/trash`, { method: 'POST' });
|
||||
const response = await apiRequest(`${API_BASE}/documents/${documentId}/trash`, { method: 'POST' });
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to trash document');
|
||||
}
|
||||
@@ -273,7 +480,7 @@ export async function trashDocument(documentId: string): Promise<DmsDocument> {
|
||||
* Restores a document from trash to active state.
|
||||
*/
|
||||
export async function restoreDocument(documentId: string): Promise<DmsDocument> {
|
||||
const response = await fetch(`${API_BASE}/documents/${documentId}/restore`, { method: 'POST' });
|
||||
const response = await apiRequest(`${API_BASE}/documents/${documentId}/restore`, { method: 'POST' });
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to restore document');
|
||||
}
|
||||
@@ -284,7 +491,7 @@ export async function restoreDocument(documentId: string): Promise<DmsDocument>
|
||||
* Permanently deletes a document record and associated stored files.
|
||||
*/
|
||||
export async function deleteDocument(documentId: string): Promise<{ deleted_documents: number; deleted_files: number }> {
|
||||
const response = await fetch(`${API_BASE}/documents/${documentId}`, { method: 'DELETE' });
|
||||
const response = await apiRequest(`${API_BASE}/documents/${documentId}`, { method: 'DELETE' });
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to delete document');
|
||||
}
|
||||
@@ -295,7 +502,7 @@ export async function deleteDocument(documentId: string): Promise<{ deleted_docu
|
||||
* Loads full details for one document, including extracted text content.
|
||||
*/
|
||||
export async function getDocumentDetails(documentId: string): Promise<DmsDocumentDetail> {
|
||||
const response = await fetch(`${API_BASE}/documents/${documentId}`);
|
||||
const response = await apiRequest(`${API_BASE}/documents/${documentId}`);
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to load document details');
|
||||
}
|
||||
@@ -306,7 +513,7 @@ export async function getDocumentDetails(documentId: string): Promise<DmsDocumen
|
||||
* Re-enqueues one document for extraction and classification processing.
|
||||
*/
|
||||
export async function reprocessDocument(documentId: string): Promise<DmsDocument> {
|
||||
const response = await fetch(`${API_BASE}/documents/${documentId}/reprocess`, {
|
||||
const response = await apiRequest(`${API_BASE}/documents/${documentId}/reprocess`, {
|
||||
method: 'POST',
|
||||
});
|
||||
if (!response.ok) {
|
||||
@@ -343,6 +550,60 @@ export function contentMarkdownUrl(documentId: string): string {
|
||||
return `${API_BASE}/documents/${documentId}/content-md`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Downloads preview bytes for one document using centralized auth headers.
|
||||
*/
|
||||
export async function getDocumentPreviewBlob(documentId: string): Promise<Blob> {
|
||||
const response = await apiRequest(previewUrl(documentId));
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to load document preview');
|
||||
}
|
||||
return response.blob();
|
||||
}
|
||||
|
||||
/**
|
||||
* Downloads thumbnail bytes for one document using centralized auth headers.
|
||||
*/
|
||||
export async function getDocumentThumbnailBlob(documentId: string): Promise<Blob> {
|
||||
const response = await apiRequest(thumbnailUrl(documentId));
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to load document thumbnail');
|
||||
}
|
||||
return response.blob();
|
||||
}
|
||||
|
||||
/**
|
||||
* Downloads the original document payload with backend-provided filename fallback.
|
||||
*/
|
||||
export async function downloadDocumentFile(documentId: string): Promise<{ blob: Blob; filename: string }> {
|
||||
const response = await apiRequest(downloadUrl(documentId));
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to download document');
|
||||
}
|
||||
const blob = await response.blob();
|
||||
return {
|
||||
blob,
|
||||
filename: responseFilename(response, 'document-download'),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Downloads extracted markdown content for one document with backend-provided filename fallback.
|
||||
*/
|
||||
export async function downloadDocumentContentMarkdown(
|
||||
documentId: string,
|
||||
): Promise<{ blob: Blob; filename: string }> {
|
||||
const response = await apiRequest(contentMarkdownUrl(documentId));
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to download document markdown');
|
||||
}
|
||||
const blob = await response.blob();
|
||||
return {
|
||||
blob,
|
||||
filename: responseFilename(response, 'document-content.md'),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Exports extracted content markdown files for selected documents or path filters.
|
||||
*/
|
||||
@@ -352,7 +613,7 @@ export async function exportContentsMarkdown(payload: {
|
||||
include_trashed?: boolean;
|
||||
only_trashed?: boolean;
|
||||
}): Promise<{ blob: Blob; filename: string }> {
|
||||
const response = await fetch(`${API_BASE}/documents/content-md/export`, {
|
||||
const response = await apiRequest(`${API_BASE}/documents/content-md/export`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
@@ -373,7 +634,7 @@ export async function exportContentsMarkdown(payload: {
|
||||
* Retrieves persisted application settings from backend.
|
||||
*/
|
||||
export async function getAppSettings(): Promise<AppSettings> {
|
||||
const response = await fetch(`${API_BASE}/settings`);
|
||||
const response = await apiRequest(`${API_BASE}/settings`);
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to load application settings');
|
||||
}
|
||||
@@ -384,7 +645,7 @@ export async function getAppSettings(): Promise<AppSettings> {
|
||||
* Updates provider and task settings for OpenAI-compatible model execution.
|
||||
*/
|
||||
export async function updateAppSettings(payload: AppSettingsUpdate): Promise<AppSettings> {
|
||||
const response = await fetch(`${API_BASE}/settings`, {
|
||||
const response = await apiRequest(`${API_BASE}/settings`, {
|
||||
method: 'PATCH',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
@@ -392,7 +653,8 @@ export async function updateAppSettings(payload: AppSettingsUpdate): Promise<App
|
||||
body: JSON.stringify(payload),
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to update settings');
|
||||
const detail = await responseErrorDetail(response);
|
||||
throw new Error(detail ? `Failed to update settings: ${detail}` : 'Failed to update settings');
|
||||
}
|
||||
return response.json() as Promise<AppSettings>;
|
||||
}
|
||||
@@ -401,7 +663,7 @@ export async function updateAppSettings(payload: AppSettingsUpdate): Promise<App
|
||||
* Resets persisted provider and task settings to backend defaults.
|
||||
*/
|
||||
export async function resetAppSettings(): Promise<AppSettings> {
|
||||
const response = await fetch(`${API_BASE}/settings/reset`, {
|
||||
const response = await apiRequest(`${API_BASE}/settings/reset`, {
|
||||
method: 'POST',
|
||||
});
|
||||
if (!response.ok) {
|
||||
|
||||
@@ -4,11 +4,58 @@
|
||||
.app-shell {
|
||||
width: min(1820px, 100% - 2rem);
|
||||
margin: 0 auto;
|
||||
padding: var(--space-3) 0 var(--space-4);
|
||||
padding: 0 0 var(--space-4);
|
||||
display: grid;
|
||||
gap: var(--space-3);
|
||||
}
|
||||
|
||||
.auth-shell {
|
||||
min-height: 100vh;
|
||||
display: grid;
|
||||
place-items: center;
|
||||
padding: var(--space-4) var(--space-2);
|
||||
}
|
||||
|
||||
.auth-card {
|
||||
width: min(430px, 100%);
|
||||
display: grid;
|
||||
gap: var(--space-2);
|
||||
padding: var(--space-3);
|
||||
border: 1px solid var(--color-border-strong);
|
||||
border-radius: var(--radius-lg);
|
||||
background: linear-gradient(180deg, rgba(28, 42, 63, 0.95) 0%, rgba(20, 30, 47, 0.95) 100%);
|
||||
box-shadow: var(--shadow-soft);
|
||||
}
|
||||
|
||||
.auth-card h1 {
|
||||
margin: 0;
|
||||
font-family: var(--font-display);
|
||||
font-size: clamp(1.4rem, 2.1vw, 2rem);
|
||||
}
|
||||
|
||||
.auth-card p {
|
||||
margin: 0;
|
||||
color: var(--color-text-muted);
|
||||
font-size: 0.88rem;
|
||||
}
|
||||
|
||||
.auth-form {
|
||||
display: grid;
|
||||
gap: var(--space-2);
|
||||
}
|
||||
|
||||
.auth-form label {
|
||||
display: grid;
|
||||
gap: 0.35rem;
|
||||
font-size: 0.8rem;
|
||||
color: var(--color-text-muted);
|
||||
}
|
||||
|
||||
.auth-form button {
|
||||
margin-top: 0.25rem;
|
||||
min-height: 2.1rem;
|
||||
}
|
||||
|
||||
.app-shell > * {
|
||||
animation: rise-in 220ms ease both;
|
||||
}
|
||||
@@ -23,18 +70,33 @@
|
||||
|
||||
.topbar {
|
||||
position: sticky;
|
||||
top: var(--space-2);
|
||||
top: 0;
|
||||
z-index: 50;
|
||||
left: 0;
|
||||
width: 100vw;
|
||||
margin-left: calc(50% - 50vw);
|
||||
margin-right: calc(50% - 50vw);
|
||||
padding: 0;
|
||||
border: 1px solid var(--color-border-strong);
|
||||
border-radius: 0;
|
||||
background: linear-gradient(180deg, rgba(28, 42, 63, 0.96) 0%, rgba(20, 30, 47, 0.96) 100%);
|
||||
box-shadow: var(--shadow-soft);
|
||||
backdrop-filter: blur(10px);
|
||||
}
|
||||
|
||||
.topbar-inner {
|
||||
width: min(1820px, 100% - 2rem);
|
||||
margin: 0 auto;
|
||||
display: grid;
|
||||
grid-template-columns: minmax(260px, 1fr) auto;
|
||||
gap: var(--space-3);
|
||||
align-items: start;
|
||||
padding: var(--space-3);
|
||||
border: 1px solid var(--color-border-strong);
|
||||
border-radius: var(--radius-lg);
|
||||
background: linear-gradient(180deg, rgba(28, 42, 63, 0.96) 0%, rgba(20, 30, 47, 0.96) 100%);
|
||||
box-shadow: var(--shadow-soft);
|
||||
backdrop-filter: blur(10px);
|
||||
}
|
||||
|
||||
.topbar-brand {
|
||||
display: grid;
|
||||
gap: 0;
|
||||
}
|
||||
|
||||
.topbar h1 {
|
||||
@@ -50,12 +112,39 @@
|
||||
font-size: 0.85rem;
|
||||
}
|
||||
|
||||
.topbar-auth-status {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 0.35rem;
|
||||
margin-top: 0.45rem;
|
||||
color: var(--color-text-muted);
|
||||
font-size: 0.76rem;
|
||||
}
|
||||
|
||||
.topbar-user-icon {
|
||||
width: 0.85rem;
|
||||
height: 0.85rem;
|
||||
}
|
||||
|
||||
.topbar-current-username {
|
||||
color: var(--color-text);
|
||||
font-family: var(--font-mono);
|
||||
font-size: 0.76rem;
|
||||
}
|
||||
|
||||
.topbar-controls {
|
||||
display: grid;
|
||||
gap: var(--space-2);
|
||||
justify-items: end;
|
||||
}
|
||||
|
||||
.topbar-primary-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: flex-end;
|
||||
gap: var(--space-2);
|
||||
}
|
||||
|
||||
.topbar-nav-group,
|
||||
.topbar-document-group,
|
||||
.topbar-settings-group {
|
||||
@@ -65,6 +154,21 @@
|
||||
gap: var(--space-2);
|
||||
}
|
||||
|
||||
.topbar-icon-action {
|
||||
width: 2.05rem;
|
||||
min-height: 2.05rem;
|
||||
padding: 0;
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
border-radius: var(--radius-xs);
|
||||
}
|
||||
|
||||
.topbar-signout-icon {
|
||||
width: 0.92rem;
|
||||
height: 0.92rem;
|
||||
}
|
||||
|
||||
.topbar-document-group .upload-actions-inline {
|
||||
display: flex;
|
||||
gap: var(--space-2);
|
||||
@@ -1244,6 +1348,12 @@ button:disabled {
|
||||
}
|
||||
|
||||
.topbar {
|
||||
width: 100%;
|
||||
margin-left: 0;
|
||||
margin-right: 0;
|
||||
}
|
||||
|
||||
.topbar-inner {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
@@ -1252,10 +1362,16 @@ button:disabled {
|
||||
}
|
||||
|
||||
.topbar-nav-group,
|
||||
.topbar-primary-row,
|
||||
.topbar-document-group,
|
||||
.topbar-settings-group {
|
||||
justify-content: flex-start;
|
||||
}
|
||||
|
||||
.topbar-primary-row {
|
||||
justify-content: space-between;
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 1040px) {
|
||||
@@ -1340,12 +1456,14 @@ button:disabled {
|
||||
|
||||
@media (max-width: 560px) {
|
||||
.topbar-nav-group,
|
||||
.topbar-primary-row,
|
||||
.topbar-document-group,
|
||||
.topbar-settings-group {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.topbar-nav-group button,
|
||||
.topbar-primary-row button,
|
||||
.topbar-document-group button,
|
||||
.topbar-settings-group button {
|
||||
flex: 1;
|
||||
|
||||
@@ -58,6 +58,33 @@ export interface SearchResponse {
|
||||
items: DmsDocument[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents one authenticated user identity returned by backend auth endpoints.
|
||||
*/
|
||||
export interface AuthUser {
|
||||
id: string;
|
||||
username: string;
|
||||
role: 'admin' | 'user';
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents active authentication session metadata.
|
||||
*/
|
||||
export interface AuthSessionInfo {
|
||||
user: AuthUser;
|
||||
expires_at: string;
|
||||
csrf_token?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents login response payload with issued session metadata.
|
||||
*/
|
||||
export interface AuthLoginResponse extends AuthSessionInfo {
|
||||
access_token?: string;
|
||||
token_type: 'bearer';
|
||||
csrf_token?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents distinct document type values available for filter controls.
|
||||
*/
|
||||
|
||||
@@ -15,5 +15,6 @@
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
"types": ["vite/client", "react", "react-dom"]
|
||||
},
|
||||
"include": ["src"]
|
||||
"include": ["src"],
|
||||
"exclude": ["src/**/*.test.ts", "src/**/*.test.tsx"]
|
||||
}
|
||||
|
||||
@@ -1,14 +1,93 @@
|
||||
/**
|
||||
* Vite configuration for the DMS frontend application.
|
||||
*/
|
||||
import { defineConfig } from 'vite';
|
||||
import { defineConfig, loadEnv } from 'vite';
|
||||
|
||||
/**
|
||||
* Parses a comma-separated environment value into normalized entries.
|
||||
*
|
||||
* @param rawValue Raw comma-separated value.
|
||||
* @returns List of non-empty normalized entries.
|
||||
*/
|
||||
function parseCsvList(rawValue: string | undefined): string[] {
|
||||
if (!rawValue) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return rawValue
|
||||
.split(',')
|
||||
.map((entry) => entry.trim())
|
||||
.filter((entry) => entry.length > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts hostnames from CORS origin values.
|
||||
*
|
||||
* @param rawValue JSON array string or comma-separated origin list.
|
||||
* @returns Hostnames parsed from valid origins.
|
||||
*/
|
||||
function parseCorsOriginHosts(rawValue: string | undefined): string[] {
|
||||
if (!rawValue) {
|
||||
return [];
|
||||
}
|
||||
|
||||
let origins: string[] = [];
|
||||
|
||||
try {
|
||||
const parsedOrigins = JSON.parse(rawValue);
|
||||
if (Array.isArray(parsedOrigins)) {
|
||||
origins = parsedOrigins.filter((entry): entry is string => typeof entry === 'string');
|
||||
} else if (typeof parsedOrigins === 'string') {
|
||||
origins = [parsedOrigins];
|
||||
}
|
||||
} catch {
|
||||
origins = parseCsvList(rawValue);
|
||||
}
|
||||
|
||||
return origins.flatMap((origin) => {
|
||||
try {
|
||||
const parsedUrl = new URL(origin);
|
||||
return parsedUrl.hostname ? [parsedUrl.hostname] : [];
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the Vite allowed host list from environment-driven inputs.
|
||||
*
|
||||
* @param env Environment variable key-value map.
|
||||
* @returns De-duplicated hostnames, or undefined to keep Vite defaults.
|
||||
*/
|
||||
function buildAllowedHosts(env: Record<string, string>): string[] | undefined {
|
||||
const explicitHosts = parseCsvList(env.VITE_ALLOWED_HOSTS);
|
||||
const corsOriginHosts = parseCorsOriginHosts(env.CORS_ORIGINS);
|
||||
const mergedHosts = Array.from(new Set([...explicitHosts, ...corsOriginHosts]));
|
||||
|
||||
return mergedHosts.length > 0 ? mergedHosts : undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Exports frontend build and dev-server settings.
|
||||
*/
|
||||
export default defineConfig({
|
||||
server: {
|
||||
host: '0.0.0.0',
|
||||
port: 5173,
|
||||
},
|
||||
export default defineConfig(({ mode }) => {
|
||||
const env = loadEnv(mode, process.cwd(), '');
|
||||
const allowedHosts = buildAllowedHosts(env);
|
||||
const apiProxyTarget = env.VITE_API_PROXY_TARGET?.trim() || 'http://localhost:8000';
|
||||
|
||||
return {
|
||||
server: {
|
||||
host: '0.0.0.0',
|
||||
port: 5173,
|
||||
proxy: {
|
||||
'/api': {
|
||||
target: apiProxyTarget,
|
||||
changeOrigin: false,
|
||||
secure: false,
|
||||
},
|
||||
},
|
||||
...(allowedHosts ? { allowedHosts } : {}),
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user