73 lines
3.7 KiB
Python
73 lines
3.7 KiB
Python
"""Data model representing a stored and processed document."""
|
|
|
|
import uuid
|
|
from datetime import UTC, datetime
|
|
from enum import Enum
|
|
|
|
from sqlalchemy import Boolean, DateTime, Enum as SqlEnum, ForeignKey, Integer, String, Text
|
|
from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID
|
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
|
|
from app.db.base import Base
|
|
|
|
|
|
class DocumentStatus(str, Enum):
|
|
"""Enumerates processing states for uploaded documents."""
|
|
|
|
QUEUED = "queued"
|
|
PROCESSED = "processed"
|
|
UNSUPPORTED = "unsupported"
|
|
ERROR = "error"
|
|
TRASHED = "trashed"
|
|
|
|
|
|
class Document(Base):
|
|
"""Stores file identity, storage paths, extracted content, and classification metadata."""
|
|
|
|
__tablename__ = "documents"
|
|
|
|
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
original_filename: Mapped[str] = mapped_column(String(512), nullable=False)
|
|
source_relative_path: Mapped[str] = mapped_column(String(1024), nullable=False, default="")
|
|
stored_relative_path: Mapped[str] = mapped_column(String(1024), nullable=False)
|
|
mime_type: Mapped[str] = mapped_column(String(255), nullable=False, default="application/octet-stream")
|
|
extension: Mapped[str] = mapped_column(String(32), nullable=False, default="")
|
|
sha256: Mapped[str] = mapped_column(String(128), nullable=False)
|
|
size_bytes: Mapped[int] = mapped_column(Integer, nullable=False)
|
|
logical_path: Mapped[str] = mapped_column(String(1024), nullable=False, default="Inbox")
|
|
suggested_path: Mapped[str | None] = mapped_column(String(1024), nullable=True)
|
|
tags: Mapped[list[str]] = mapped_column(ARRAY(String), nullable=False, default=list)
|
|
suggested_tags: Mapped[list[str]] = mapped_column(ARRAY(String), nullable=False, default=list)
|
|
owner_user_id: Mapped[uuid.UUID | None] = mapped_column(
|
|
UUID(as_uuid=True),
|
|
ForeignKey("app_users.id", ondelete="SET NULL"),
|
|
nullable=True,
|
|
index=True,
|
|
)
|
|
metadata_json: Mapped[dict] = mapped_column(JSONB, nullable=False, default=dict)
|
|
extracted_text: Mapped[str] = mapped_column(Text, nullable=False, default="")
|
|
image_text_type: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
|
handwriting_style_id: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
|
|
status: Mapped[DocumentStatus] = mapped_column(SqlEnum(DocumentStatus), nullable=False, default=DocumentStatus.QUEUED)
|
|
preview_available: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
|
archived_member_path: Mapped[str | None] = mapped_column(String(1024), nullable=True)
|
|
is_archive_member: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
|
parent_document_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), ForeignKey("documents.id"), nullable=True)
|
|
replaces_document_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), ForeignKey("documents.id"), nullable=True)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, default=lambda: datetime.now(UTC))
|
|
processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
|
updated_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True),
|
|
nullable=False,
|
|
default=lambda: datetime.now(UTC),
|
|
onupdate=lambda: datetime.now(UTC),
|
|
)
|
|
|
|
parent_document: Mapped["Document | None"] = relationship(
|
|
"Document",
|
|
remote_side="Document.id",
|
|
foreign_keys=[parent_document_id],
|
|
post_update=True,
|
|
)
|
|
owner_user: Mapped["AppUser | None"] = relationship("AppUser", foreign_keys=[owner_user_id], post_update=True)
|