Files
ledgerdock/backend/app/models/document.py
2026-02-21 09:44:18 -03:00

66 lines
3.4 KiB
Python

"""Data model representing a stored and processed document."""
import uuid
from datetime import UTC, datetime
from enum import Enum
from sqlalchemy import Boolean, DateTime, Enum as SqlEnum, ForeignKey, Integer, String, Text
from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.db.base import Base
class DocumentStatus(str, Enum):
"""Enumerates processing states for uploaded documents."""
QUEUED = "queued"
PROCESSED = "processed"
UNSUPPORTED = "unsupported"
ERROR = "error"
TRASHED = "trashed"
class Document(Base):
"""Stores file identity, storage paths, extracted content, and classification metadata."""
__tablename__ = "documents"
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
original_filename: Mapped[str] = mapped_column(String(512), nullable=False)
source_relative_path: Mapped[str] = mapped_column(String(1024), nullable=False, default="")
stored_relative_path: Mapped[str] = mapped_column(String(1024), nullable=False)
mime_type: Mapped[str] = mapped_column(String(255), nullable=False, default="application/octet-stream")
extension: Mapped[str] = mapped_column(String(32), nullable=False, default="")
sha256: Mapped[str] = mapped_column(String(128), nullable=False)
size_bytes: Mapped[int] = mapped_column(Integer, nullable=False)
logical_path: Mapped[str] = mapped_column(String(1024), nullable=False, default="Inbox")
suggested_path: Mapped[str | None] = mapped_column(String(1024), nullable=True)
tags: Mapped[list[str]] = mapped_column(ARRAY(String), nullable=False, default=list)
suggested_tags: Mapped[list[str]] = mapped_column(ARRAY(String), nullable=False, default=list)
metadata_json: Mapped[dict] = mapped_column(JSONB, nullable=False, default=dict)
extracted_text: Mapped[str] = mapped_column(Text, nullable=False, default="")
image_text_type: Mapped[str | None] = mapped_column(String(64), nullable=True)
handwriting_style_id: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
status: Mapped[DocumentStatus] = mapped_column(SqlEnum(DocumentStatus), nullable=False, default=DocumentStatus.QUEUED)
preview_available: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
archived_member_path: Mapped[str | None] = mapped_column(String(1024), nullable=True)
is_archive_member: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
parent_document_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), ForeignKey("documents.id"), nullable=True)
replaces_document_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), ForeignKey("documents.id"), nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, default=lambda: datetime.now(UTC))
processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=lambda: datetime.now(UTC),
onupdate=lambda: datetime.now(UTC),
)
parent_document: Mapped["Document | None"] = relationship(
"Document",
remote_side="Document.id",
foreign_keys=[parent_document_id],
post_update=True,
)