Initial commit

2026-05-16 12:05:36 -03:00
parent 0ce972a361
commit e82cee97a7
65 changed files with 9051 additions and 5 deletions
@@ -0,0 +1,8 @@
+import os
+
+os.environ.setdefault("DMARC_SENTINEL_ALLOW_NO_LLM_FOR_TESTS", "true")
+os.environ.setdefault("OPENAI_API_KEY", "test")
+os.environ.setdefault("DASHBOARD_USERNAME", "admin")
+os.environ.setdefault("DASHBOARD_PASSWORD", "test")
+os.environ.setdefault("HOMEPAGE_API_TOKEN", "test")
+os.environ.setdefault("DMARC_SENTINEL_CONFIG", "tests/fixtures/config_test.yml")
@@ -0,0 +1,36 @@
+app:
+  name: "DMARC Sentinel"
+  base_url: "https://sentinel.tukutoi.com"
+  timezone: "Europe/Zurich"
+  poll_interval_minutes: 30
+  database_url: "sqlite:///data/test-main.sqlite3"
+  log_level: "INFO"
+  max_attachment_decompressed_mb: 20
+  max_reports_per_poll: 200
+
+security:
+  dashboard_auth_enabled: false
+  api_token_required: false
+
+llm:
+  provider: "openai"
+  api_key_env: "OPENAI_API_KEY"
+  model: "gpt-4.1-mini"
+
+inboxes:
+  - id: "tukutoi"
+    label: "Tukutoi"
+    domain: "tukutoi.com"
+    imap_host: "mail.tukutoi.com"
+    username_env: "TUKUTOI_IMAP_USER"
+    password_env: "TUKUTOI_IMAP_PASSWORD"
+    folder: "DMARC"
+    recipient: "dmarcreports@tukutoi.com"
+    enabled: true
+
+known_senders:
+  tukutoi.com: []
+
+alerts:
+  email:
+    enabled: false
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<feedback>
+  <report_metadata>
+    <org_name>google.com</org_name>
+    <email>noreply-dmarc-support@google.com</email>
+    <extra_contact_info>https://support.google.com/a/answer/2466580</extra_contact_info>
+    <report_id>sample-report-1</report_id>
+    <date_range>
+      <begin>1778716800</begin>
+      <end>1778803200</end>
+    </date_range>
+  </report_metadata>
+  <policy_published>
+    <domain>tukutoi.com</domain>
+    <adkim>r</adkim>
+    <aspf>r</aspf>
+    <p>none</p>
+    <sp>none</sp>
+    <pct>100</pct>
+    <fo>1</fo>
+  </policy_published>
+  <record>
+    <row>
+      <source_ip>203.0.113.10</source_ip>
+      <count>25</count>
+      <policy_evaluated>
+        <disposition>none</disposition>
+        <dkim>fail</dkim>
+        <spf>fail</spf>
+        <reason>
+          <type>local_policy</type>
+          <comment>sample</comment>
+        </reason>
+      </policy_evaluated>
+    </row>
+    <identifiers>
+      <header_from>tukutoi.com</header_from>
+    </identifiers>
+    <auth_results>
+      <dkim>
+        <domain>bad.example</domain>
+        <selector>x</selector>
+        <result>fail</result>
+        <human_result>body hash did not verify</human_result>
+      </dkim>
+      <spf>
+        <domain>bad.example</domain>
+        <scope>mfrom</scope>
+        <result>fail</result>
+      </spf>
+    </auth_results>
+  </record>
+</feedback>
@@ -0,0 +1,16 @@
+import pytest
+from fastapi import HTTPException
+
+from app.validation import parse_positive_int_ids
+
+
+def test_parse_alert_ids_accepts_positive_ints_and_decimal_strings():
+    assert parse_positive_int_ids([1, "2"]) == [1, 2]
+
+
+@pytest.mark.parametrize("value", [["abc"], [0], [-1], [True], "1"])
+def test_parse_alert_ids_rejects_malformed_values(value):
+    with pytest.raises(HTTPException) as exc:
+        parse_positive_int_ids(value)
+
+    assert exc.value.status_code == 400
@@ -0,0 +1,177 @@
+import json
+from datetime import datetime, timedelta, timezone
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session
+
+from app.analyzer import analyze_report
+from app.config import Settings
+from app.db import Base
+from app.models import Record, Report
+
+
+def _session():
+    engine = create_engine("sqlite:///:memory:", future=True)
+    Base.metadata.create_all(engine)
+    return Session(engine)
+
+
+def _settings() -> Settings:
+    return Settings.model_validate(
+        {
+            "inboxes": [],
+            "known_senders": {
+                "tukutoi.com": [
+                    {"id": "mailcow", "name": "mailcow outbound", "ip_allowlist": ["198.51.100.5/32"], "dkim_domains": [], "spf_domains": []}
+                ]
+            },
+            "alerts": {"email": {"enabled": False}},
+        }
+    )
+
+
+def _report(
+    session: Session,
+    *,
+    source_ip: str,
+    count: int,
+    known: bool,
+    dmarc_pass: bool,
+    spf_aligned: bool = False,
+    dkim_aligned: bool | None = None,
+    report_time: datetime | None = None,
+    org_name: str = "google.com",
+) -> Report:
+    dkim_aligned = dmarc_pass if dkim_aligned is None else dkim_aligned
+    report_time = report_time or datetime.now(timezone.utc)
+    report = Report(
+        inbox_id="tukutoi",
+        raw_xml_sha256=f"sha-{source_ip}-{count}-{known}-{dmarc_pass}-{spf_aligned}-{dkim_aligned}-{report_time.isoformat()}-{org_name}",
+        report_id=f"r-{source_ip}-{report_time.isoformat()}",
+        org_name=org_name,
+        domain="tukutoi.com",
+        date_begin=report_time - timedelta(hours=1),
+        date_end=report_time,
+    )
+    session.add(report)
+    session.flush()
+    session.add(
+        Record(
+            report=report,
+            source_ip=source_ip,
+            count=count,
+            disposition="none",
+            policy_dkim="pass" if dkim_aligned else "fail",
+            policy_spf="pass" if spf_aligned else "fail",
+            dkim_aligned=dkim_aligned,
+            spf_aligned=spf_aligned,
+            dmarc_pass=dmarc_pass,
+            header_from="tukutoi.com",
+            known_sender_id="mailcow" if known else None,
+            known_sender_name="mailcow outbound" if known else None,
+            is_known_sender=known,
+        )
+    )
+    session.commit()
+    return report
+
+
+def test_unknown_source_failed_both_alert():
+    session = _session()
+    report = _report(session, source_ip="203.0.113.10", count=25, known=False, dmarc_pass=False)
+
+    alerts = analyze_report(session, _settings(), report)
+
+    assert any(alert.type == "unknown_source_failed_both" and alert.severity == "critical" for alert, _, _ in alerts)
+
+
+def test_known_sender_failure_alert():
+    session = _session()
+    report = _report(session, source_ip="198.51.100.5", count=25, known=True, dmarc_pass=False)
+
+    alerts = analyze_report(session, _settings(), report)
+
+    assert any(alert.type == "known_sender_dmarc_failure" and alert.severity == "critical" for alert, _, _ in alerts)
+
+
+def test_dkim_authenticated_relay_is_info_not_sender_warning():
+    session = _session()
+    report = _report(
+        session,
+        source_ip="209.85.220.69",
+        count=1,
+        known=False,
+        dmarc_pass=True,
+        spf_aligned=False,
+        dkim_aligned=True,
+    )
+
+    alerts = analyze_report(session, _settings(), report)
+
+    relay = next(alert for alert, _, _ in alerts if alert.type == "dkim_authenticated_relay")
+    assert relay.severity == "info"
+    assert "intermediary" in relay.summary
+    assert "add to SPF" in relay.summary
+    assert not any(alert.type == "new_passing_source" for alert, _, _ in alerts)
+
+
+def test_alert_fingerprint_prevents_duplicate_open_alerts():
+    session = _session()
+    settings = _settings()
+    report = _report(session, source_ip="203.0.113.10", count=25, known=False, dmarc_pass=False)
+    first = analyze_report(session, settings, report)
+    second = analyze_report(session, settings, report)
+
+    created = [is_new for _, is_new, _ in first + second]
+    assert created.count(True) >= 1
+    assert created.count(False) >= 1
+
+
+def test_unknown_failure_spike_uses_trailing_reports_outside_current_period():
+    session = _session()
+    settings = _settings()
+    now = datetime(2026, 5, 16, 12, tzinfo=timezone.utc)
+    for offset in range(2, 9):
+        _report(session, source_ip=f"203.0.113.{offset}", count=10, known=False, dmarc_pass=False, report_time=now - timedelta(days=offset))
+    report = _report(session, source_ip="203.0.113.200", count=40, known=False, dmarc_pass=False, report_time=now)
+
+    alerts = analyze_report(session, settings, report)
+
+    spike = next(alert for alert, _, _ in alerts if alert.type == "sudden_unknown_failure_spike")
+    details = json.loads(spike.details_json)
+    assert details["current_24h"] == 40
+    assert details["trailing_7d_avg"] > 0
+
+
+def test_configured_rate_thresholds_create_alerts():
+    session = _session()
+    settings = _settings()
+    report = _report(session, source_ip="203.0.113.55", count=25, known=False, dmarc_pass=False)
+
+    alerts = analyze_report(session, settings, report)
+
+    assert any(alert.type == "high_unknown_source_failure_rate" for alert, _, _ in alerts)
+
+
+def test_repeated_failure_days_threshold_creates_alert():
+    session = _session()
+    settings = _settings()
+    now = datetime(2026, 5, 16, 12, tzinfo=timezone.utc)
+    _report(session, source_ip="203.0.113.77", count=8, known=False, dmarc_pass=False, report_time=now - timedelta(days=1))
+    report = _report(session, source_ip="203.0.113.77", count=8, known=False, dmarc_pass=False, report_time=now)
+
+    alerts = analyze_report(session, settings, report)
+
+    assert any(alert.type == "repeated_dmarc_failure" for alert, _, _ in alerts)
+
+
+def test_missing_reporter_threshold_creates_alert():
+    session = _session()
+    settings = _settings()
+    now = datetime(2026, 5, 16, 12, tzinfo=timezone.utc)
+    _report(session, source_ip="203.0.113.88", count=1, known=False, dmarc_pass=True, report_time=now - timedelta(days=5), org_name="old-reporter")
+    report = _report(session, source_ip="203.0.113.89", count=1, known=False, dmarc_pass=True, report_time=now, org_name="current-reporter")
+
+    alerts = analyze_report(session, settings, report)
+
+    assert any(alert.type == "missing_reporter" for alert, _, _ in alerts)
@@ -0,0 +1,9 @@
+from app.main import app
+
+
+def test_generated_api_documentation_is_disabled():
+    paths = {route.path for route in app.routes}
+
+    assert "/docs" not in paths
+    assert "/redoc" not in paths
+    assert "/openapi.json" not in paths
@@ -0,0 +1,62 @@
+import gzip
+import io
+import zipfile
+from email.message import EmailMessage
+from pathlib import Path
+
+import pytest
+
+from app.attachment_extractor import AttachmentExtractionError, extract_dmarc_attachments, extract_payload
+
+
+def _xml() -> bytes:
+    return Path("tests/fixtures/sample_dmarc.xml").read_bytes()
+
+
+def test_gzip_attachment_extraction():
+    gz = gzip.compress(_xml())
+    reports = extract_payload("report.xml.gz", "application/octet-stream", gz, 20)
+
+    assert len(reports) == 1
+    assert reports[0].payload.startswith(b"<?xml")
+    assert len(reports[0].sha256) == 64
+
+
+def test_zip_attachment_extraction_rejects_traversal():
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, "w") as archive:
+        archive.writestr("report.xml", _xml())
+        archive.writestr("../evil.xml", _xml())
+
+    with pytest.raises(AttachmentExtractionError, match="unsafe zip path"):
+        extract_payload("reports.zip", "application/zip", buf.getvalue(), 20)
+
+
+def test_zip_attachment_extraction_rejects_nested_archives():
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, "w") as archive:
+        archive.writestr("nested.zip", b"not allowed")
+
+    with pytest.raises(AttachmentExtractionError, match="nested archive"):
+        extract_payload("reports.zip", "application/zip", buf.getvalue(), 20)
+
+
+def test_zip_attachment_extraction_caps_reports_per_archive():
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, "w") as archive:
+        archive.writestr("one.xml", _xml())
+        archive.writestr("two.xml", _xml())
+
+    with pytest.raises(AttachmentExtractionError, match="archive XML report limit"):
+        extract_payload("reports.zip", "application/zip", buf.getvalue(), 20, max_reports_per_archive=1)
+
+
+def test_message_attachment_detection_with_octet_stream_valid_filename():
+    msg = EmailMessage()
+    msg["Subject"] = "Report domain tukutoi.com"
+    msg.set_content("attached")
+    msg.add_attachment(gzip.compress(_xml()), maintype="application", subtype="octet-stream", filename="report.gz")
+
+    reports = extract_dmarc_attachments(msg, 20)
+
+    assert len(reports) == 1
@@ -0,0 +1,25 @@
+import pytest
+from fastapi import HTTPException
+from fastapi.security import HTTPBasicCredentials
+
+from app.auth import require_dashboard_auth
+from app.config import Settings
+
+
+def test_dashboard_auth_fails_closed_when_credentials_are_missing(monkeypatch):
+    monkeypatch.delenv("DASHBOARD_USERNAME", raising=False)
+    monkeypatch.delenv("DASHBOARD_PASSWORD", raising=False)
+    settings = Settings.model_validate({"inboxes": [], "alerts": {"email": {"enabled": False}}})
+
+    with pytest.raises(HTTPException) as exc:
+        require_dashboard_auth(HTTPBasicCredentials(username="", password=""), settings)
+
+    assert exc.value.status_code == 500
+
+
+def test_dashboard_auth_accepts_configured_credentials(monkeypatch):
+    monkeypatch.setenv("DASHBOARD_USERNAME", "admin")
+    monkeypatch.setenv("DASHBOARD_PASSWORD", "secret")
+    settings = Settings.model_validate({"inboxes": [], "alerts": {"email": {"enabled": False}}})
+
+    require_dashboard_auth(HTTPBasicCredentials(username="admin", password="secret"), settings)
@@ -0,0 +1,22 @@
+from pathlib import Path
+
+import pytest
+
+from app.config import load_settings
+
+
+def test_default_config_requires_real_runtime_config(monkeypatch, tmp_path):
+    monkeypatch.delenv("DMARC_SENTINEL_CONFIG", raising=False)
+    monkeypatch.chdir(tmp_path)
+
+    with pytest.raises(FileNotFoundError, match="config/config.yml"):
+        load_settings()
+
+
+def test_explicit_config_path_is_loaded(monkeypatch):
+    path = Path("tests/fixtures/config_test.yml")
+    monkeypatch.setenv("DMARC_SENTINEL_CONFIG", str(path))
+
+    settings = load_settings()
+
+    assert settings.inboxes[0].id == "tukutoi"
@@ -0,0 +1,79 @@
+from datetime import datetime, timezone
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session
+
+from app.db import Base
+from app.homepage import homepage_summary, latest_summary
+from app.models import Alert, LLMReport, Record, Report
+
+
+def test_homepage_api_status_calculation():
+    engine = create_engine("sqlite:///:memory:", future=True)
+    Base.metadata.create_all(engine)
+    session = Session(engine)
+    report = Report(
+        inbox_id="tukutoi",
+        raw_xml_sha256="sha-homepage",
+        report_id="r1",
+        org_name="google.com",
+        domain="tukutoi.com",
+        date_begin=datetime.now(timezone.utc),
+        date_end=datetime.now(timezone.utc),
+    )
+    session.add(report)
+    session.flush()
+    session.add(Record(report=report, source_ip="198.51.100.1", count=99, dmarc_pass=True, spf_aligned=True, dkim_aligned=True))
+    session.add(Record(report=report, source_ip="203.0.113.10", count=1, dmarc_pass=False, spf_aligned=False, dkim_aligned=False))
+    session.add(
+        Alert(
+            fingerprint="tukutoi.com:new_unknown_source:203.0.113.10",
+            inbox_id="tukutoi",
+            domain="tukutoi.com",
+            severity="warning",
+            type="new_unknown_source",
+            title="New unknown source",
+            summary="summary",
+            details_json="{}",
+        )
+    )
+    session.commit()
+
+    data = homepage_summary(session)
+
+    assert data["status"] == "warning"
+    assert data["dmarc_pass_rate"] == "99.0%"
+    assert data["warnings"] == 1
+
+
+def test_overview_summary_prefers_portfolio_report():
+    engine = create_engine("sqlite:///:memory:", future=True)
+    Base.metadata.create_all(engine)
+    session = Session(engine)
+    now = datetime.now(timezone.utc)
+    session.add_all(
+        [
+            LLMReport(
+                domain="tukutoi.com",
+                period_start=now,
+                period_end=now,
+                report_type="posture",
+                input_json="{}",
+                output_json="{}",
+                plain_text="single domain",
+            ),
+            LLMReport(
+                domain="__all__",
+                period_start=now,
+                period_end=now,
+                report_type="posture",
+                input_json="{}",
+                output_json="{}",
+                plain_text="portfolio",
+            ),
+        ]
+    )
+    session.commit()
+
+    assert latest_summary(session) == "portfolio"
+    assert latest_summary(session, "tukutoi.com") == "single domain"
@@ -0,0 +1,104 @@
+from app.config import Settings
+from app.dmarc_parser import ParsedAuthResult, ParsedRecord
+from app.known_senders import classify_record
+
+
+def _record(source_ip: str, *, dkim_domain: str = "tukutoi.com", spf_domain: str = "tukutoi.com") -> ParsedRecord:
+    return ParsedRecord(
+        source_ip=source_ip,
+        count=1,
+        disposition="none",
+        policy_dkim="pass",
+        policy_spf="pass",
+        dkim_aligned=True,
+        spf_aligned=True,
+        dmarc_pass=True,
+        header_from="tukutoi.com",
+        reason_type=None,
+        reason_comment=None,
+        auth_results=[
+            ParsedAuthResult(auth_type="dkim", domain=dkim_domain, result="pass"),
+            ParsedAuthResult(auth_type="spf", domain=spf_domain, result="pass"),
+        ],
+    )
+
+
+def test_ip_allowlisted_sender_requires_ip_match_even_when_auth_domain_matches():
+    settings = Settings.model_validate(
+        {
+            "known_senders": {
+                "tukutoi.com": [
+                    {
+                        "id": "mailcow",
+                        "name": "mailcow outbound",
+                        "ip_allowlist": ["45.148.30.200/32"],
+                        "dkim_domains": ["tukutoi.com"],
+                        "spf_domains": ["tukutoi.com"],
+                    }
+                ]
+            },
+            "alerts": {"email": {"enabled": False}},
+        }
+    )
+
+    match = classify_record(settings, "tukutoi.com", _record("50.31.205.203"))
+
+    assert match.is_known is False
+    assert match.id is None
+    assert match.name is None
+
+
+def test_ip_allowlisted_sender_matches_configured_ip():
+    settings = Settings.model_validate(
+        {
+            "known_senders": {
+                "tukutoi.com": [
+                    {
+                        "id": "mailcow",
+                        "name": "mailcow outbound",
+                        "ip_allowlist": ["45.148.30.200/32"],
+                        "dkim_domains": ["tukutoi.com"],
+                        "spf_domains": ["tukutoi.com"],
+                    }
+                ]
+            },
+            "alerts": {"email": {"enabled": False}},
+        }
+    )
+
+    match = classify_record(settings, "tukutoi.com", _record("45.148.30.200"))
+
+    assert match.is_known is True
+    assert match.id == "mailcow"
+
+
+def test_domain_only_sender_still_matches_auth_domain_when_no_ip_allowlist_exists():
+    settings = Settings.model_validate(
+        {
+            "known_senders": {
+                "tukutoi.com": [
+                    {
+                        "id": "domain-only",
+                        "name": "domain-only sender",
+                        "ip_allowlist": [],
+                        "dkim_domains": ["tukutoi.com"],
+                        "spf_domains": [],
+                    }
+                ]
+            },
+            "alerts": {"email": {"enabled": False}},
+        }
+    )
+
+    match = classify_record(settings, "tukutoi.com", _record("50.31.205.203"))
+
+    assert match.is_known is True
+    assert match.id == "domain-only"
+
+
+def test_aligned_dkim_without_configured_sender_is_not_known_sender():
+    settings = Settings.model_validate({"known_senders": {}, "alerts": {"email": {"enabled": False}}})
+
+    match = classify_record(settings, "tukutoi.com", _record("50.31.205.203"))
+
+    assert match.is_known is False
@@ -0,0 +1,49 @@
+from app.config import Settings
+from app.llm import LLMClient, normalize_alert_explanation
+from app.models import Alert
+
+
+def test_llm_json_validation_fallback():
+    client = LLMClient(Settings.model_validate({"alerts": {"email": {"enabled": False}}}))
+    alert = Alert(
+        fingerprint="x",
+        inbox_id="tukutoi",
+        domain="tukutoi.com",
+        severity="critical",
+        type="unknown_source_failed_both",
+        title="Unknown source failed SPF and DKIM for tukutoi.com",
+        summary="Deterministic summary",
+        details_json="{}",
+    )
+
+    explanation = client.explain_alert(alert)
+
+    assert explanation.confidence == "fallback"
+    assert "DMARC aggregate data alone" in explanation.risk
+
+
+def test_alert_explanation_accepts_explanation_action_items_shape():
+    alert = Alert(
+        fingerprint="x",
+        inbox_id="tukutoi",
+        domain="tukutoi.com",
+        severity="warning",
+        type="new_authenticated_source",
+        title="New authenticated source observed for tukutoi.com",
+        summary="Deterministic summary",
+        details_json="{}",
+    )
+
+    explanation = normalize_alert_explanation(
+        {
+            "explanation": "A new authenticated source was observed for tukutoi.com.",
+            "action_items": ["Confirm whether this source is authorized.", "Add it to known senders if approved."],
+            "confidence": "high",
+        },
+        alert,
+    )
+
+    assert explanation.summary == "A new authenticated source was observed for tukutoi.com."
+    assert "aggregate data alone" in explanation.risk
+    assert "Confirm whether this source is authorized" in explanation.recommended_action
+    assert explanation.confidence == "high"
@@ -0,0 +1,44 @@
+from pathlib import Path
+
+import pytest
+
+from app.dmarc_parser import DMARCParseError, parse_dmarc_xml
+
+
+def test_parser_valid_dmarc_report():
+    payload = Path("tests/fixtures/sample_dmarc.xml").read_bytes()
+    report = parse_dmarc_xml(payload)
+
+    assert report.org_name == "google.com"
+    assert report.domain == "tukutoi.com"
+    assert report.policy_p == "none"
+    assert report.date_begin is not None
+    assert len(report.records) == 1
+    record = report.records[0]
+    assert record.source_ip == "203.0.113.10"
+    assert record.count == 25
+    assert record.dkim_aligned is False
+    assert record.spf_aligned is False
+    assert record.dmarc_pass is False
+    assert {auth.auth_type for auth in record.auth_results} == {"dkim", "spf"}
+
+
+def test_parser_rejects_record_limit():
+    payload = Path("tests/fixtures/sample_dmarc.xml").read_bytes()
+
+    with pytest.raises(DMARCParseError, match="record limit"):
+        parse_dmarc_xml(payload, max_records=0)
+
+
+def test_parser_rejects_invalid_source_ip():
+    payload = Path("tests/fixtures/sample_dmarc.xml").read_text().replace("203.0.113.10", "not-an-ip").encode()
+
+    with pytest.raises(DMARCParseError, match="Invalid source IP"):
+        parse_dmarc_xml(payload)
+
+
+def test_parser_rejects_absurd_record_count():
+    payload = Path("tests/fixtures/sample_dmarc.xml").read_text().replace("<count>25</count>", "<count>10000001</count>").encode()
+
+    with pytest.raises(DMARCParseError, match="exceeds limit"):
+        parse_dmarc_xml(payload, max_record_count=10000000)
@@ -0,0 +1,41 @@
+from app.config import Settings
+from app.scheduler import generate_daily_summaries, generate_weekly_summaries, start_scheduler
+
+
+def _settings(**llm):
+    return Settings.model_validate({"alerts": {"email": {"enabled": False}}, "llm": llm})
+
+
+def test_disabled_digest_jobs_do_not_instantiate_llm(monkeypatch):
+    def fail_llm(*args, **kwargs):
+        raise AssertionError("LLM should not be constructed when summaries are disabled")
+
+    monkeypatch.setattr("app.scheduler.LLMClient", fail_llm)
+    settings = _settings(generate_daily_summary=False, generate_weekly_summary=False)
+
+    assert generate_daily_summaries(settings) == []
+    assert generate_weekly_summaries(settings) == []
+
+
+def test_scheduler_only_registers_enabled_digest_jobs(monkeypatch):
+    created = []
+
+    class FakeScheduler:
+        running = True
+
+        def __init__(self, timezone):
+            self.timezone = timezone
+            self.jobs = []
+            created.append(self)
+
+        def add_job(self, func, trigger, **kwargs):
+            self.jobs.append(kwargs["id"])
+
+        def start(self):
+            pass
+
+    monkeypatch.setattr("app.scheduler.BackgroundScheduler", FakeScheduler)
+
+    scheduler = start_scheduler(_settings(generate_daily_summary=False, generate_weekly_summary=True))
+
+    assert scheduler.jobs == ["poll", "weekly"]
@@ -0,0 +1,20 @@
+from datetime import date
+
+import pytest
+from pydantic import ValidationError
+
+from app.schemas import BacklogRequest
+
+
+def test_backlog_request_parses_iso_dates():
+    request = BacklogRequest.model_validate(
+        {"inbox_id": "tukutoi", "since": "2026-05-01", "before": "2026-05-16"}
+    )
+
+    assert request.since == date(2026, 5, 1)
+    assert request.before == date(2026, 5, 16)
+
+
+def test_backlog_request_rejects_malformed_dates():
+    with pytest.raises(ValidationError):
+        BacklogRequest.model_validate({"inbox_id": "tukutoi", "since": "not-a-date"})
@@ -0,0 +1,37 @@
+from email.message import EmailMessage
+from pathlib import Path
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session
+
+from app.config import Settings
+from app.db import Base
+from app.attachment_extractor import ExtractedReport
+from app.message_processor import _store_report
+from app.models import MailMessage
+
+
+def test_duplicate_sha_detection():
+    engine = create_engine("sqlite:///:memory:", future=True)
+    Base.metadata.create_all(engine)
+    session = Session(engine)
+    mail = MailMessage(inbox_id="tukutoi", imap_uid="1", folder="DMARC", status="skipped")
+    session.add(mail)
+    session.commit()
+    payload = Path("tests/fixtures/sample_dmarc.xml").read_bytes()
+    extracted = ExtractedReport("report.xml", payload, "0" * 64)
+    settings = Settings.model_validate({"alerts": {"email": {"enabled": False}}})
+
+    report, duplicate = _store_report(session, settings, settings.inboxes[0] if settings.inboxes else _Inbox(), mail, extracted)
+    session.commit()
+    second, second_duplicate = _store_report(session, settings, settings.inboxes[0] if settings.inboxes else _Inbox(), mail, extracted)
+
+    assert report is not None
+    assert duplicate is None
+    assert second is None
+    assert second_duplicate == report
+
+
+class _Inbox:
+    id = "tukutoi"
+    domain = "tukutoi.com"