Remove useless reports, avoid DB-Races

This commit is contained in:
2026-05-16 13:25:10 -03:00
parent b54375dbb3
commit 026efec79b
9 changed files with 88 additions and 74 deletions
-25
View File
@@ -486,29 +486,4 @@ def _reporter_alerts(session: Session, settings: Settings, report: Report) -> li
details={**_report_evidence(report), "policy_p": report.policy_p, "policy_sp": report.policy_sp, "policy_pct": report.policy_pct}, details={**_report_evidence(report), "policy_p": report.policy_p, "policy_sp": report.policy_sp, "policy_pct": report.policy_pct},
) )
) )
missing_after = max(1, settings.alerts.thresholds.missing_reporter_days)
cutoff = _report_time(report) - timedelta(days=missing_after)
reporter_rows = session.execute(
select(Report.org_name, func.max(func.coalesce(Report.date_end, Report.date_begin, Report.created_at)))
.where(Report.domain == report.domain, Report.org_name.is_not(None))
.group_by(Report.org_name)
).all()
for org_name, last_seen in reporter_rows:
last_seen_at = _as_utc(last_seen)
if not org_name or not last_seen_at or last_seen_at >= cutoff:
continue
missed_days = (_report_time(report) - last_seen_at).days
alerts.append(
create_or_update_alert(
session,
inbox_id=report.inbox_id,
domain=report.domain,
severity="warning",
alert_type="missing_reporter",
key=org_name,
title=f"DMARC reporter missing for {report.domain}",
summary=f"{org_name} has not sent a DMARC aggregate report for {missed_days} days.",
details={**_report_evidence(report, link_report=False), "reporter": org_name, "last_seen_at": last_seen_at.isoformat()},
)
)
return alerts return alerts
-1
View File
@@ -110,7 +110,6 @@ class AlertThresholds(BaseModel):
total_volume_drop_percent: float = 80 total_volume_drop_percent: float = 80
min_messages_for_rate_alert: int = 20 min_messages_for_rate_alert: int = 20
repeated_failure_days: int = 2 repeated_failure_days: int = 2
missing_reporter_days: int = 3
class AlertsConfig(BaseModel): class AlertsConfig(BaseModel):
+19 -2
View File
@@ -4,7 +4,7 @@ from contextlib import contextmanager
from pathlib import Path from pathlib import Path
from typing import Iterator from typing import Iterator
from sqlalchemy import create_engine, text from sqlalchemy import create_engine, event, text
from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
from app.config import get_settings from app.config import get_settings
@@ -23,7 +23,24 @@ def _engine_url() -> str:
return url return url
engine = create_engine(_engine_url(), future=True, pool_pre_ping=True) def _connect_args(url: str) -> dict:
if url.startswith("sqlite:"):
return {"timeout": 60}
return {}
engine_url = _engine_url()
engine = create_engine(engine_url, future=True, pool_pre_ping=True, connect_args=_connect_args(engine_url))
@event.listens_for(engine, "connect")
def _configure_sqlite(dbapi_connection, connection_record) -> None:
if engine.url.get_backend_name() != "sqlite":
return
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA busy_timeout=60000")
cursor.execute("PRAGMA journal_mode=WAL")
cursor.close()
SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False, future=True) SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False, future=True)
+8 -3
View File
@@ -7,13 +7,14 @@ from dataclasses import dataclass
@dataclass @dataclass
class InboxRunLease: class InboxRunLease:
inbox_id: str inbox_id: str
_lock: threading.Lock _locks: list[threading.Lock]
_released: bool = False _released: bool = False
def release(self) -> None: def release(self) -> None:
if not self._released: if not self._released:
self._released = True self._released = True
self._lock.release() for lock in reversed(self._locks):
lock.release()
def __enter__(self) -> "InboxRunLease": def __enter__(self) -> "InboxRunLease":
return self return self
@@ -25,14 +26,18 @@ class InboxRunLease:
class InboxRunLocks: class InboxRunLocks:
def __init__(self) -> None: def __init__(self) -> None:
self._guard = threading.Lock() self._guard = threading.Lock()
self._global_lock = threading.Lock()
self._locks: dict[str, threading.Lock] = {} self._locks: dict[str, threading.Lock] = {}
def acquire(self, inbox_id: str, *, blocking: bool = False) -> InboxRunLease | None: def acquire(self, inbox_id: str, *, blocking: bool = False) -> InboxRunLease | None:
if not self._global_lock.acquire(blocking=blocking):
return None
with self._guard: with self._guard:
lock = self._locks.setdefault(inbox_id, threading.Lock()) lock = self._locks.setdefault(inbox_id, threading.Lock())
if not lock.acquire(blocking=blocking): if not lock.acquire(blocking=blocking):
self._global_lock.release()
return None return None
return InboxRunLease(inbox_id=inbox_id, _lock=lock) return InboxRunLease(inbox_id=inbox_id, _locks=[self._global_lock, lock])
def active(self, inbox_id: str) -> bool: def active(self, inbox_id: str) -> bool:
lease = self.acquire(inbox_id, blocking=False) lease = self.acquire(inbox_id, blocking=False)
+11 -7
View File
@@ -773,13 +773,17 @@ def _copy_summary_to_job(job_id: str, summary) -> None:
import_jobs.update(job_id, mutate) import_jobs.update(job_id, mutate)
def _run_import_job(job_id: str, action: str, body: ProcessNowRequest | BacklogRequest, lease: InboxRunLease) -> None: def _run_import_job(job_id: str, action: str, body: ProcessNowRequest | BacklogRequest) -> None:
lease: InboxRunLease | None = None
try:
inbox = settings.get_inbox(body.inbox_id)
lease = inbox_run_locks.acquire(inbox.id, blocking=True)
def mark_running(job): def mark_running(job):
job.status = "running" job.status = "running"
import_jobs.update(job_id, mark_running) import_jobs.update(job_id, mark_running)
try: try:
inbox = settings.get_inbox(body.inbox_id)
with session_scope() as session: with session_scope() as session:
if action == "backlog": if action == "backlog":
assert isinstance(body, BacklogRequest) assert isinstance(body, BacklogRequest)
@@ -808,6 +812,9 @@ def _run_import_job(job_id: str, action: str, body: ProcessNowRequest | BacklogR
progress_callback=lambda item: _copy_summary_to_job(job_id, item), progress_callback=lambda item: _copy_summary_to_job(job_id, item),
) )
_copy_summary_to_job(job_id, summary) _copy_summary_to_job(job_id, summary)
finally:
lease.release()
lease = None
def mark_done(job): def mark_done(job):
job.status = "succeeded" job.status = "succeeded"
@@ -824,6 +831,7 @@ def _run_import_job(job_id: str, action: str, body: ProcessNowRequest | BacklogR
import_jobs.update(job_id, mark_failed) import_jobs.update(job_id, mark_failed)
finally: finally:
if lease:
lease.release() lease.release()
@@ -835,16 +843,12 @@ def _start_import_job(action: str, body: ProcessNowRequest | BacklogRequest) ->
active = import_jobs.active_for_inbox(body.inbox_id) active = import_jobs.active_for_inbox(body.inbox_id)
if active: if active:
return active.to_dict() return active.to_dict()
lease = inbox_run_locks.acquire(body.inbox_id, blocking=False)
if not lease:
raise HTTPException(status_code=409, detail=f"Inbox {body.inbox_id} is already processing.")
try: try:
job = import_jobs.create(body.inbox_id, action) job = import_jobs.create(body.inbox_id, action)
thread = threading.Thread(target=_run_import_job, args=(job.id, action, body, lease), daemon=True) thread = threading.Thread(target=_run_import_job, args=(job.id, action, body), daemon=True)
thread.start() thread.start()
return job.to_dict() return job.to_dict()
except Exception: except Exception:
lease.release()
raise raise
-1
View File
@@ -100,4 +100,3 @@ alerts:
total_volume_drop_percent: 80 total_volume_drop_percent: 80
min_messages_for_rate_alert: 20 min_messages_for_rate_alert: 20
repeated_failure_days: 2 repeated_failure_days: 2
missing_reporter_days: 3
-1
View File
@@ -124,4 +124,3 @@ alerts:
total_volume_drop_percent: 80 total_volume_drop_percent: 80
min_messages_for_rate_alert: 20 min_messages_for_rate_alert: 20
repeated_failure_days: 2 repeated_failure_days: 2
missing_reporter_days: 3
+2 -2
View File
@@ -165,7 +165,7 @@ def test_repeated_failure_days_threshold_creates_alert():
assert any(alert.type == "repeated_dmarc_failure" for alert, _, _ in alerts) assert any(alert.type == "repeated_dmarc_failure" for alert, _, _ in alerts)
def test_missing_reporter_threshold_creates_alert(): def test_missing_reporter_gap_does_not_create_alert():
session = _session() session = _session()
settings = _settings() settings = _settings()
now = datetime(2026, 5, 16, 12, tzinfo=timezone.utc) now = datetime(2026, 5, 16, 12, tzinfo=timezone.utc)
@@ -174,4 +174,4 @@ def test_missing_reporter_threshold_creates_alert():
alerts = analyze_report(session, settings, report) alerts = analyze_report(session, settings, report)
assert any(alert.type == "missing_reporter" for alert, _, _ in alerts) assert not any(alert.type == "missing_reporter" for alert, _, _ in alerts)
+16
View File
@@ -0,0 +1,16 @@
from app.inbox_locks import InboxRunLocks
def test_inbox_run_locks_serialize_different_inboxes():
locks = InboxRunLocks()
first = locks.acquire("first", blocking=False)
assert first is not None
try:
assert locks.acquire("second", blocking=False) is None
finally:
first.release()
second = locks.acquire("second", blocking=False)
assert second is not None
second.release()