Initial commit

This commit is contained in:
2026-02-21 09:44:18 -03:00
commit 5dfc2cbd85
65 changed files with 11989 additions and 0 deletions

View File

@@ -0,0 +1,6 @@
"""Model exports for ORM metadata discovery."""
from app.models.document import Document, DocumentStatus
from app.models.processing_log import ProcessingLogEntry
__all__ = ["Document", "DocumentStatus", "ProcessingLogEntry"]

View File

@@ -0,0 +1,65 @@
"""Data model representing a stored and processed document."""
import uuid
from datetime import UTC, datetime
from enum import Enum
from sqlalchemy import Boolean, DateTime, Enum as SqlEnum, ForeignKey, Integer, String, Text
from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.db.base import Base
class DocumentStatus(str, Enum):
"""Enumerates processing states for uploaded documents."""
QUEUED = "queued"
PROCESSED = "processed"
UNSUPPORTED = "unsupported"
ERROR = "error"
TRASHED = "trashed"
class Document(Base):
"""Stores file identity, storage paths, extracted content, and classification metadata."""
__tablename__ = "documents"
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
original_filename: Mapped[str] = mapped_column(String(512), nullable=False)
source_relative_path: Mapped[str] = mapped_column(String(1024), nullable=False, default="")
stored_relative_path: Mapped[str] = mapped_column(String(1024), nullable=False)
mime_type: Mapped[str] = mapped_column(String(255), nullable=False, default="application/octet-stream")
extension: Mapped[str] = mapped_column(String(32), nullable=False, default="")
sha256: Mapped[str] = mapped_column(String(128), nullable=False)
size_bytes: Mapped[int] = mapped_column(Integer, nullable=False)
logical_path: Mapped[str] = mapped_column(String(1024), nullable=False, default="Inbox")
suggested_path: Mapped[str | None] = mapped_column(String(1024), nullable=True)
tags: Mapped[list[str]] = mapped_column(ARRAY(String), nullable=False, default=list)
suggested_tags: Mapped[list[str]] = mapped_column(ARRAY(String), nullable=False, default=list)
metadata_json: Mapped[dict] = mapped_column(JSONB, nullable=False, default=dict)
extracted_text: Mapped[str] = mapped_column(Text, nullable=False, default="")
image_text_type: Mapped[str | None] = mapped_column(String(64), nullable=True)
handwriting_style_id: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
status: Mapped[DocumentStatus] = mapped_column(SqlEnum(DocumentStatus), nullable=False, default=DocumentStatus.QUEUED)
preview_available: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
archived_member_path: Mapped[str | None] = mapped_column(String(1024), nullable=True)
is_archive_member: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
parent_document_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), ForeignKey("documents.id"), nullable=True)
replaces_document_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), ForeignKey("documents.id"), nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, default=lambda: datetime.now(UTC))
processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=lambda: datetime.now(UTC),
onupdate=lambda: datetime.now(UTC),
)
parent_document: Mapped["Document | None"] = relationship(
"Document",
remote_side="Document.id",
foreign_keys=[parent_document_id],
post_update=True,
)

View File

@@ -0,0 +1,33 @@
"""Data model representing one persisted processing pipeline log entry."""
import uuid
from datetime import UTC, datetime
from sqlalchemy import BigInteger, DateTime, ForeignKey, String, Text
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column
from app.db.base import Base
class ProcessingLogEntry(Base):
"""Stores a timestamped processing event with optional model prompt and response text."""
__tablename__ = "processing_logs"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, default=lambda: datetime.now(UTC))
level: Mapped[str] = mapped_column(String(16), nullable=False, default="info")
stage: Mapped[str] = mapped_column(String(64), nullable=False)
event: Mapped[str] = mapped_column(String(256), nullable=False)
document_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("documents.id", ondelete="SET NULL"),
nullable=True,
)
document_filename: Mapped[str | None] = mapped_column(String(512), nullable=True)
provider_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
model_name: Mapped[str | None] = mapped_column(String(256), nullable=True)
prompt_text: Mapped[str | None] = mapped_column(Text, nullable=True)
response_text: Mapped[str | None] = mapped_column(Text, nullable=True)
payload_json: Mapped[dict] = mapped_column(JSONB, nullable=False, default=dict)