Initial commit

This commit is contained in:
2026-02-21 09:44:18 -03:00
commit 5dfc2cbd85
65 changed files with 11989 additions and 0 deletions

View File

@@ -0,0 +1,65 @@
"""Data model representing a stored and processed document."""
import uuid
from datetime import UTC, datetime
from enum import Enum
from sqlalchemy import Boolean, DateTime, Enum as SqlEnum, ForeignKey, Integer, String, Text
from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.db.base import Base
class DocumentStatus(str, Enum):
"""Enumerates processing states for uploaded documents."""
QUEUED = "queued"
PROCESSED = "processed"
UNSUPPORTED = "unsupported"
ERROR = "error"
TRASHED = "trashed"
class Document(Base):
"""Stores file identity, storage paths, extracted content, and classification metadata."""
__tablename__ = "documents"
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
original_filename: Mapped[str] = mapped_column(String(512), nullable=False)
source_relative_path: Mapped[str] = mapped_column(String(1024), nullable=False, default="")
stored_relative_path: Mapped[str] = mapped_column(String(1024), nullable=False)
mime_type: Mapped[str] = mapped_column(String(255), nullable=False, default="application/octet-stream")
extension: Mapped[str] = mapped_column(String(32), nullable=False, default="")
sha256: Mapped[str] = mapped_column(String(128), nullable=False)
size_bytes: Mapped[int] = mapped_column(Integer, nullable=False)
logical_path: Mapped[str] = mapped_column(String(1024), nullable=False, default="Inbox")
suggested_path: Mapped[str | None] = mapped_column(String(1024), nullable=True)
tags: Mapped[list[str]] = mapped_column(ARRAY(String), nullable=False, default=list)
suggested_tags: Mapped[list[str]] = mapped_column(ARRAY(String), nullable=False, default=list)
metadata_json: Mapped[dict] = mapped_column(JSONB, nullable=False, default=dict)
extracted_text: Mapped[str] = mapped_column(Text, nullable=False, default="")
image_text_type: Mapped[str | None] = mapped_column(String(64), nullable=True)
handwriting_style_id: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
status: Mapped[DocumentStatus] = mapped_column(SqlEnum(DocumentStatus), nullable=False, default=DocumentStatus.QUEUED)
preview_available: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
archived_member_path: Mapped[str | None] = mapped_column(String(1024), nullable=True)
is_archive_member: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
parent_document_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), ForeignKey("documents.id"), nullable=True)
replaces_document_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), ForeignKey("documents.id"), nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, default=lambda: datetime.now(UTC))
processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=lambda: datetime.now(UTC),
onupdate=lambda: datetime.now(UTC),
)
parent_document: Mapped["Document | None"] = relationship(
"Document",
remote_side="Document.id",
foreign_keys=[parent_document_id],
post_update=True,
)