Initial commit

This commit is contained in:
2026-02-21 09:44:18 -03:00
commit 5dfc2cbd85
65 changed files with 11989 additions and 0 deletions

View File

@@ -0,0 +1 @@
"""Pydantic schema package for API request and response models."""

View File

@@ -0,0 +1,92 @@
"""Pydantic schema definitions for document API payloads."""
from datetime import datetime
from uuid import UUID
from pydantic import BaseModel, Field
from app.models.document import DocumentStatus
class DocumentResponse(BaseModel):
"""Represents a document record returned by API endpoints."""
id: UUID
original_filename: str
source_relative_path: str
mime_type: str
extension: str
size_bytes: int
sha256: str
logical_path: str
suggested_path: str | None
image_text_type: str | None
handwriting_style_id: str | None
tags: list[str] = Field(default_factory=list)
suggested_tags: list[str] = Field(default_factory=list)
status: DocumentStatus
preview_available: bool
is_archive_member: bool
archived_member_path: str | None
parent_document_id: UUID | None
replaces_document_id: UUID | None
created_at: datetime
processed_at: datetime | None
class Config:
"""Enables ORM object parsing for SQLAlchemy model instances."""
from_attributes = True
class DocumentDetailResponse(DocumentResponse):
"""Represents a full document payload including extracted text content."""
extracted_text: str
metadata_json: dict
class DocumentsListResponse(BaseModel):
"""Represents a paginated document list response payload."""
total: int
items: list[DocumentResponse]
class UploadConflict(BaseModel):
"""Describes an upload conflict where a matching checksum already exists."""
original_filename: str
sha256: str
existing_document_id: UUID
class UploadResponse(BaseModel):
"""Represents the result of a batch upload request."""
uploaded: list[DocumentResponse] = Field(default_factory=list)
conflicts: list[UploadConflict] = Field(default_factory=list)
class DocumentUpdateRequest(BaseModel):
"""Captures document metadata changes."""
original_filename: str | None = None
logical_path: str | None = None
tags: list[str] | None = None
class SearchResponse(BaseModel):
"""Represents the result of a search query."""
total: int
items: list[DocumentResponse]
class ContentExportRequest(BaseModel):
"""Describes filters used to export extracted document contents as Markdown files."""
document_ids: list[UUID] = Field(default_factory=list)
path_prefix: str | None = None
include_trashed: bool = False
only_trashed: bool = False

View File

@@ -0,0 +1,35 @@
"""Pydantic schemas for processing pipeline log API payloads."""
from datetime import datetime
from uuid import UUID
from pydantic import BaseModel, Field
class ProcessingLogEntryResponse(BaseModel):
"""Represents one persisted processing log event returned by API endpoints."""
id: int
created_at: datetime
level: str
stage: str
event: str
document_id: UUID | None
document_filename: str | None
provider_id: str | None
model_name: str | None
prompt_text: str | None
response_text: str | None
payload_json: dict
class Config:
"""Enables ORM object parsing for SQLAlchemy model instances."""
from_attributes = True
class ProcessingLogListResponse(BaseModel):
"""Represents a paginated collection of processing log records."""
total: int
items: list[ProcessingLogEntryResponse] = Field(default_factory=list)

View File

@@ -0,0 +1,242 @@
"""Pydantic schemas for application-level runtime settings."""
from pydantic import BaseModel, Field
class ProviderSettingsResponse(BaseModel):
"""Represents a persisted model provider with non-secret connection metadata."""
id: str
label: str
provider_type: str = "openai_compatible"
base_url: str
timeout_seconds: int
api_key_set: bool
api_key_masked: str = ""
class ProviderSettingsUpdateRequest(BaseModel):
"""Represents a model provider create-or-update request."""
id: str
label: str
provider_type: str = "openai_compatible"
base_url: str
timeout_seconds: int = Field(default=45, ge=5, le=180)
api_key: str | None = None
clear_api_key: bool = False
class OcrTaskSettingsResponse(BaseModel):
"""Represents OCR task runtime settings and prompt configuration."""
enabled: bool
provider_id: str
model: str
prompt: str
class OcrTaskSettingsUpdateRequest(BaseModel):
"""Represents OCR task settings updates."""
enabled: bool | None = None
provider_id: str | None = None
model: str | None = None
prompt: str | None = None
class SummaryTaskSettingsResponse(BaseModel):
"""Represents summarization task runtime settings."""
enabled: bool
provider_id: str
model: str
prompt: str
max_input_tokens: int
class SummaryTaskSettingsUpdateRequest(BaseModel):
"""Represents summarization task settings updates."""
enabled: bool | None = None
provider_id: str | None = None
model: str | None = None
prompt: str | None = None
max_input_tokens: int | None = Field(default=None, ge=512, le=64000)
class RoutingTaskSettingsResponse(BaseModel):
"""Represents routing task runtime settings for path and tag classification."""
enabled: bool
provider_id: str
model: str
prompt: str
neighbor_count: int
neighbor_min_similarity: float
auto_apply_confidence_threshold: float
auto_apply_neighbor_similarity_threshold: float
neighbor_path_override_enabled: bool
neighbor_path_override_min_similarity: float
neighbor_path_override_min_gap: float
neighbor_path_override_max_confidence: float
class RoutingTaskSettingsUpdateRequest(BaseModel):
"""Represents routing task settings updates."""
enabled: bool | None = None
provider_id: str | None = None
model: str | None = None
prompt: str | None = None
neighbor_count: int | None = Field(default=None, ge=1, le=40)
neighbor_min_similarity: float | None = Field(default=None, ge=0.0, le=1.0)
auto_apply_confidence_threshold: float | None = Field(default=None, ge=0.0, le=1.0)
auto_apply_neighbor_similarity_threshold: float | None = Field(default=None, ge=0.0, le=1.0)
neighbor_path_override_enabled: bool | None = None
neighbor_path_override_min_similarity: float | None = Field(default=None, ge=0.0, le=1.0)
neighbor_path_override_min_gap: float | None = Field(default=None, ge=0.0, le=1.0)
neighbor_path_override_max_confidence: float | None = Field(default=None, ge=0.0, le=1.0)
class UploadDefaultsResponse(BaseModel):
"""Represents default upload destination and default tags."""
logical_path: str
tags: list[str] = Field(default_factory=list)
class UploadDefaultsUpdateRequest(BaseModel):
"""Represents updates for default upload destination and default tags."""
logical_path: str | None = None
tags: list[str] | None = None
class DisplaySettingsResponse(BaseModel):
"""Represents document-list display preferences."""
cards_per_page: int = Field(default=12, ge=1, le=200)
log_typing_animation_enabled: bool = True
class DisplaySettingsUpdateRequest(BaseModel):
"""Represents updates for document-list display preferences."""
cards_per_page: int | None = Field(default=None, ge=1, le=200)
log_typing_animation_enabled: bool | None = None
class PredefinedPathEntryResponse(BaseModel):
"""Represents one predefined logical path with global discoverability scope."""
value: str
global_shared: bool
class PredefinedPathEntryUpdateRequest(BaseModel):
"""Represents one predefined logical path create-or-update request."""
value: str
global_shared: bool = False
class PredefinedTagEntryResponse(BaseModel):
"""Represents one predefined tag with global discoverability scope."""
value: str
global_shared: bool
class PredefinedTagEntryUpdateRequest(BaseModel):
"""Represents one predefined tag create-or-update request."""
value: str
global_shared: bool = False
class HandwritingStyleSettingsResponse(BaseModel):
"""Represents handwriting-style clustering settings used by Typesense image embeddings."""
enabled: bool
embed_model: str
neighbor_limit: int
match_min_similarity: float
bootstrap_match_min_similarity: float
bootstrap_sample_size: int
image_max_side: int
class HandwritingStyleSettingsUpdateRequest(BaseModel):
"""Represents updates for handwriting-style clustering and match thresholds."""
enabled: bool | None = None
embed_model: str | None = None
neighbor_limit: int | None = Field(default=None, ge=1, le=32)
match_min_similarity: float | None = Field(default=None, ge=0.0, le=1.0)
bootstrap_match_min_similarity: float | None = Field(default=None, ge=0.0, le=1.0)
bootstrap_sample_size: int | None = Field(default=None, ge=1, le=30)
image_max_side: int | None = Field(default=None, ge=256, le=4096)
class TaskSettingsResponse(BaseModel):
"""Represents all task-level model bindings and prompt settings."""
ocr_handwriting: OcrTaskSettingsResponse
summary_generation: SummaryTaskSettingsResponse
routing_classification: RoutingTaskSettingsResponse
class TaskSettingsUpdateRequest(BaseModel):
"""Represents partial updates for task-level settings."""
ocr_handwriting: OcrTaskSettingsUpdateRequest | None = None
summary_generation: SummaryTaskSettingsUpdateRequest | None = None
routing_classification: RoutingTaskSettingsUpdateRequest | None = None
class AppSettingsResponse(BaseModel):
"""Represents all application settings exposed by the API."""
upload_defaults: UploadDefaultsResponse
display: DisplaySettingsResponse
handwriting_style_clustering: HandwritingStyleSettingsResponse
predefined_paths: list[PredefinedPathEntryResponse] = Field(default_factory=list)
predefined_tags: list[PredefinedTagEntryResponse] = Field(default_factory=list)
providers: list[ProviderSettingsResponse]
tasks: TaskSettingsResponse
class AppSettingsUpdateRequest(BaseModel):
"""Represents full settings update input for providers and task bindings."""
upload_defaults: UploadDefaultsUpdateRequest | None = None
display: DisplaySettingsUpdateRequest | None = None
handwriting_style_clustering: HandwritingStyleSettingsUpdateRequest | None = None
predefined_paths: list[PredefinedPathEntryUpdateRequest] | None = None
predefined_tags: list[PredefinedTagEntryUpdateRequest] | None = None
providers: list[ProviderSettingsUpdateRequest] | None = None
tasks: TaskSettingsUpdateRequest | None = None
class HandwritingSettingsResponse(BaseModel):
"""Represents legacy handwriting response shape kept for backward compatibility."""
provider: str = "openai_compatible"
enabled: bool
openai_base_url: str
openai_model: str
openai_timeout_seconds: int
openai_api_key_set: bool
openai_api_key_masked: str = ""
class HandwritingSettingsUpdateRequest(BaseModel):
"""Represents legacy handwriting update shape kept for backward compatibility."""
enabled: bool | None = None
openai_base_url: str | None = None
openai_model: str | None = None
openai_timeout_seconds: int | None = Field(default=None, ge=5, le=180)
openai_api_key: str | None = None
clear_openai_api_key: bool = False