Initial commit

This commit is contained in:
2026-05-16 12:05:36 -03:00
parent 0ce972a361
commit e82cee97a7
65 changed files with 9051 additions and 5 deletions
+8
View File
@@ -0,0 +1,8 @@
import os
os.environ.setdefault("DMARC_SENTINEL_ALLOW_NO_LLM_FOR_TESTS", "true")
os.environ.setdefault("OPENAI_API_KEY", "test")
os.environ.setdefault("DASHBOARD_USERNAME", "admin")
os.environ.setdefault("DASHBOARD_PASSWORD", "test")
os.environ.setdefault("HOMEPAGE_API_TOKEN", "test")
os.environ.setdefault("DMARC_SENTINEL_CONFIG", "tests/fixtures/config_test.yml")
+36
View File
@@ -0,0 +1,36 @@
app:
name: "DMARC Sentinel"
base_url: "https://sentinel.tukutoi.com"
timezone: "Europe/Zurich"
poll_interval_minutes: 30
database_url: "sqlite:///data/test-main.sqlite3"
log_level: "INFO"
max_attachment_decompressed_mb: 20
max_reports_per_poll: 200
security:
dashboard_auth_enabled: false
api_token_required: false
llm:
provider: "openai"
api_key_env: "OPENAI_API_KEY"
model: "gpt-4.1-mini"
inboxes:
- id: "tukutoi"
label: "Tukutoi"
domain: "tukutoi.com"
imap_host: "mail.tukutoi.com"
username_env: "TUKUTOI_IMAP_USER"
password_env: "TUKUTOI_IMAP_PASSWORD"
folder: "DMARC"
recipient: "dmarcreports@tukutoi.com"
enabled: true
known_senders:
tukutoi.com: []
alerts:
email:
enabled: false
+53
View File
@@ -0,0 +1,53 @@
<?xml version="1.0" encoding="UTF-8"?>
<feedback>
<report_metadata>
<org_name>google.com</org_name>
<email>noreply-dmarc-support@google.com</email>
<extra_contact_info>https://support.google.com/a/answer/2466580</extra_contact_info>
<report_id>sample-report-1</report_id>
<date_range>
<begin>1778716800</begin>
<end>1778803200</end>
</date_range>
</report_metadata>
<policy_published>
<domain>tukutoi.com</domain>
<adkim>r</adkim>
<aspf>r</aspf>
<p>none</p>
<sp>none</sp>
<pct>100</pct>
<fo>1</fo>
</policy_published>
<record>
<row>
<source_ip>203.0.113.10</source_ip>
<count>25</count>
<policy_evaluated>
<disposition>none</disposition>
<dkim>fail</dkim>
<spf>fail</spf>
<reason>
<type>local_policy</type>
<comment>sample</comment>
</reason>
</policy_evaluated>
</row>
<identifiers>
<header_from>tukutoi.com</header_from>
</identifiers>
<auth_results>
<dkim>
<domain>bad.example</domain>
<selector>x</selector>
<result>fail</result>
<human_result>body hash did not verify</human_result>
</dkim>
<spf>
<domain>bad.example</domain>
<scope>mfrom</scope>
<result>fail</result>
</spf>
</auth_results>
</record>
</feedback>
+16
View File
@@ -0,0 +1,16 @@
import pytest
from fastapi import HTTPException
from app.validation import parse_positive_int_ids
def test_parse_alert_ids_accepts_positive_ints_and_decimal_strings():
assert parse_positive_int_ids([1, "2"]) == [1, 2]
@pytest.mark.parametrize("value", [["abc"], [0], [-1], [True], "1"])
def test_parse_alert_ids_rejects_malformed_values(value):
with pytest.raises(HTTPException) as exc:
parse_positive_int_ids(value)
assert exc.value.status_code == 400
+177
View File
@@ -0,0 +1,177 @@
import json
from datetime import datetime, timedelta, timezone
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from app.analyzer import analyze_report
from app.config import Settings
from app.db import Base
from app.models import Record, Report
def _session():
engine = create_engine("sqlite:///:memory:", future=True)
Base.metadata.create_all(engine)
return Session(engine)
def _settings() -> Settings:
return Settings.model_validate(
{
"inboxes": [],
"known_senders": {
"tukutoi.com": [
{"id": "mailcow", "name": "mailcow outbound", "ip_allowlist": ["198.51.100.5/32"], "dkim_domains": [], "spf_domains": []}
]
},
"alerts": {"email": {"enabled": False}},
}
)
def _report(
session: Session,
*,
source_ip: str,
count: int,
known: bool,
dmarc_pass: bool,
spf_aligned: bool = False,
dkim_aligned: bool | None = None,
report_time: datetime | None = None,
org_name: str = "google.com",
) -> Report:
dkim_aligned = dmarc_pass if dkim_aligned is None else dkim_aligned
report_time = report_time or datetime.now(timezone.utc)
report = Report(
inbox_id="tukutoi",
raw_xml_sha256=f"sha-{source_ip}-{count}-{known}-{dmarc_pass}-{spf_aligned}-{dkim_aligned}-{report_time.isoformat()}-{org_name}",
report_id=f"r-{source_ip}-{report_time.isoformat()}",
org_name=org_name,
domain="tukutoi.com",
date_begin=report_time - timedelta(hours=1),
date_end=report_time,
)
session.add(report)
session.flush()
session.add(
Record(
report=report,
source_ip=source_ip,
count=count,
disposition="none",
policy_dkim="pass" if dkim_aligned else "fail",
policy_spf="pass" if spf_aligned else "fail",
dkim_aligned=dkim_aligned,
spf_aligned=spf_aligned,
dmarc_pass=dmarc_pass,
header_from="tukutoi.com",
known_sender_id="mailcow" if known else None,
known_sender_name="mailcow outbound" if known else None,
is_known_sender=known,
)
)
session.commit()
return report
def test_unknown_source_failed_both_alert():
session = _session()
report = _report(session, source_ip="203.0.113.10", count=25, known=False, dmarc_pass=False)
alerts = analyze_report(session, _settings(), report)
assert any(alert.type == "unknown_source_failed_both" and alert.severity == "critical" for alert, _, _ in alerts)
def test_known_sender_failure_alert():
session = _session()
report = _report(session, source_ip="198.51.100.5", count=25, known=True, dmarc_pass=False)
alerts = analyze_report(session, _settings(), report)
assert any(alert.type == "known_sender_dmarc_failure" and alert.severity == "critical" for alert, _, _ in alerts)
def test_dkim_authenticated_relay_is_info_not_sender_warning():
session = _session()
report = _report(
session,
source_ip="209.85.220.69",
count=1,
known=False,
dmarc_pass=True,
spf_aligned=False,
dkim_aligned=True,
)
alerts = analyze_report(session, _settings(), report)
relay = next(alert for alert, _, _ in alerts if alert.type == "dkim_authenticated_relay")
assert relay.severity == "info"
assert "intermediary" in relay.summary
assert "add to SPF" in relay.summary
assert not any(alert.type == "new_passing_source" for alert, _, _ in alerts)
def test_alert_fingerprint_prevents_duplicate_open_alerts():
session = _session()
settings = _settings()
report = _report(session, source_ip="203.0.113.10", count=25, known=False, dmarc_pass=False)
first = analyze_report(session, settings, report)
second = analyze_report(session, settings, report)
created = [is_new for _, is_new, _ in first + second]
assert created.count(True) >= 1
assert created.count(False) >= 1
def test_unknown_failure_spike_uses_trailing_reports_outside_current_period():
session = _session()
settings = _settings()
now = datetime(2026, 5, 16, 12, tzinfo=timezone.utc)
for offset in range(2, 9):
_report(session, source_ip=f"203.0.113.{offset}", count=10, known=False, dmarc_pass=False, report_time=now - timedelta(days=offset))
report = _report(session, source_ip="203.0.113.200", count=40, known=False, dmarc_pass=False, report_time=now)
alerts = analyze_report(session, settings, report)
spike = next(alert for alert, _, _ in alerts if alert.type == "sudden_unknown_failure_spike")
details = json.loads(spike.details_json)
assert details["current_24h"] == 40
assert details["trailing_7d_avg"] > 0
def test_configured_rate_thresholds_create_alerts():
session = _session()
settings = _settings()
report = _report(session, source_ip="203.0.113.55", count=25, known=False, dmarc_pass=False)
alerts = analyze_report(session, settings, report)
assert any(alert.type == "high_unknown_source_failure_rate" for alert, _, _ in alerts)
def test_repeated_failure_days_threshold_creates_alert():
session = _session()
settings = _settings()
now = datetime(2026, 5, 16, 12, tzinfo=timezone.utc)
_report(session, source_ip="203.0.113.77", count=8, known=False, dmarc_pass=False, report_time=now - timedelta(days=1))
report = _report(session, source_ip="203.0.113.77", count=8, known=False, dmarc_pass=False, report_time=now)
alerts = analyze_report(session, settings, report)
assert any(alert.type == "repeated_dmarc_failure" for alert, _, _ in alerts)
def test_missing_reporter_threshold_creates_alert():
session = _session()
settings = _settings()
now = datetime(2026, 5, 16, 12, tzinfo=timezone.utc)
_report(session, source_ip="203.0.113.88", count=1, known=False, dmarc_pass=True, report_time=now - timedelta(days=5), org_name="old-reporter")
report = _report(session, source_ip="203.0.113.89", count=1, known=False, dmarc_pass=True, report_time=now, org_name="current-reporter")
alerts = analyze_report(session, settings, report)
assert any(alert.type == "missing_reporter" for alert, _, _ in alerts)
+9
View File
@@ -0,0 +1,9 @@
from app.main import app
def test_generated_api_documentation_is_disabled():
paths = {route.path for route in app.routes}
assert "/docs" not in paths
assert "/redoc" not in paths
assert "/openapi.json" not in paths
+62
View File
@@ -0,0 +1,62 @@
import gzip
import io
import zipfile
from email.message import EmailMessage
from pathlib import Path
import pytest
from app.attachment_extractor import AttachmentExtractionError, extract_dmarc_attachments, extract_payload
def _xml() -> bytes:
return Path("tests/fixtures/sample_dmarc.xml").read_bytes()
def test_gzip_attachment_extraction():
gz = gzip.compress(_xml())
reports = extract_payload("report.xml.gz", "application/octet-stream", gz, 20)
assert len(reports) == 1
assert reports[0].payload.startswith(b"<?xml")
assert len(reports[0].sha256) == 64
def test_zip_attachment_extraction_rejects_traversal():
buf = io.BytesIO()
with zipfile.ZipFile(buf, "w") as archive:
archive.writestr("report.xml", _xml())
archive.writestr("../evil.xml", _xml())
with pytest.raises(AttachmentExtractionError, match="unsafe zip path"):
extract_payload("reports.zip", "application/zip", buf.getvalue(), 20)
def test_zip_attachment_extraction_rejects_nested_archives():
buf = io.BytesIO()
with zipfile.ZipFile(buf, "w") as archive:
archive.writestr("nested.zip", b"not allowed")
with pytest.raises(AttachmentExtractionError, match="nested archive"):
extract_payload("reports.zip", "application/zip", buf.getvalue(), 20)
def test_zip_attachment_extraction_caps_reports_per_archive():
buf = io.BytesIO()
with zipfile.ZipFile(buf, "w") as archive:
archive.writestr("one.xml", _xml())
archive.writestr("two.xml", _xml())
with pytest.raises(AttachmentExtractionError, match="archive XML report limit"):
extract_payload("reports.zip", "application/zip", buf.getvalue(), 20, max_reports_per_archive=1)
def test_message_attachment_detection_with_octet_stream_valid_filename():
msg = EmailMessage()
msg["Subject"] = "Report domain tukutoi.com"
msg.set_content("attached")
msg.add_attachment(gzip.compress(_xml()), maintype="application", subtype="octet-stream", filename="report.gz")
reports = extract_dmarc_attachments(msg, 20)
assert len(reports) == 1
+25
View File
@@ -0,0 +1,25 @@
import pytest
from fastapi import HTTPException
from fastapi.security import HTTPBasicCredentials
from app.auth import require_dashboard_auth
from app.config import Settings
def test_dashboard_auth_fails_closed_when_credentials_are_missing(monkeypatch):
monkeypatch.delenv("DASHBOARD_USERNAME", raising=False)
monkeypatch.delenv("DASHBOARD_PASSWORD", raising=False)
settings = Settings.model_validate({"inboxes": [], "alerts": {"email": {"enabled": False}}})
with pytest.raises(HTTPException) as exc:
require_dashboard_auth(HTTPBasicCredentials(username="", password=""), settings)
assert exc.value.status_code == 500
def test_dashboard_auth_accepts_configured_credentials(monkeypatch):
monkeypatch.setenv("DASHBOARD_USERNAME", "admin")
monkeypatch.setenv("DASHBOARD_PASSWORD", "secret")
settings = Settings.model_validate({"inboxes": [], "alerts": {"email": {"enabled": False}}})
require_dashboard_auth(HTTPBasicCredentials(username="admin", password="secret"), settings)
+22
View File
@@ -0,0 +1,22 @@
from pathlib import Path
import pytest
from app.config import load_settings
def test_default_config_requires_real_runtime_config(monkeypatch, tmp_path):
monkeypatch.delenv("DMARC_SENTINEL_CONFIG", raising=False)
monkeypatch.chdir(tmp_path)
with pytest.raises(FileNotFoundError, match="config/config.yml"):
load_settings()
def test_explicit_config_path_is_loaded(monkeypatch):
path = Path("tests/fixtures/config_test.yml")
monkeypatch.setenv("DMARC_SENTINEL_CONFIG", str(path))
settings = load_settings()
assert settings.inboxes[0].id == "tukutoi"
+79
View File
@@ -0,0 +1,79 @@
from datetime import datetime, timezone
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from app.db import Base
from app.homepage import homepage_summary, latest_summary
from app.models import Alert, LLMReport, Record, Report
def test_homepage_api_status_calculation():
engine = create_engine("sqlite:///:memory:", future=True)
Base.metadata.create_all(engine)
session = Session(engine)
report = Report(
inbox_id="tukutoi",
raw_xml_sha256="sha-homepage",
report_id="r1",
org_name="google.com",
domain="tukutoi.com",
date_begin=datetime.now(timezone.utc),
date_end=datetime.now(timezone.utc),
)
session.add(report)
session.flush()
session.add(Record(report=report, source_ip="198.51.100.1", count=99, dmarc_pass=True, spf_aligned=True, dkim_aligned=True))
session.add(Record(report=report, source_ip="203.0.113.10", count=1, dmarc_pass=False, spf_aligned=False, dkim_aligned=False))
session.add(
Alert(
fingerprint="tukutoi.com:new_unknown_source:203.0.113.10",
inbox_id="tukutoi",
domain="tukutoi.com",
severity="warning",
type="new_unknown_source",
title="New unknown source",
summary="summary",
details_json="{}",
)
)
session.commit()
data = homepage_summary(session)
assert data["status"] == "warning"
assert data["dmarc_pass_rate"] == "99.0%"
assert data["warnings"] == 1
def test_overview_summary_prefers_portfolio_report():
engine = create_engine("sqlite:///:memory:", future=True)
Base.metadata.create_all(engine)
session = Session(engine)
now = datetime.now(timezone.utc)
session.add_all(
[
LLMReport(
domain="tukutoi.com",
period_start=now,
period_end=now,
report_type="posture",
input_json="{}",
output_json="{}",
plain_text="single domain",
),
LLMReport(
domain="__all__",
period_start=now,
period_end=now,
report_type="posture",
input_json="{}",
output_json="{}",
plain_text="portfolio",
),
]
)
session.commit()
assert latest_summary(session) == "portfolio"
assert latest_summary(session, "tukutoi.com") == "single domain"
+104
View File
@@ -0,0 +1,104 @@
from app.config import Settings
from app.dmarc_parser import ParsedAuthResult, ParsedRecord
from app.known_senders import classify_record
def _record(source_ip: str, *, dkim_domain: str = "tukutoi.com", spf_domain: str = "tukutoi.com") -> ParsedRecord:
return ParsedRecord(
source_ip=source_ip,
count=1,
disposition="none",
policy_dkim="pass",
policy_spf="pass",
dkim_aligned=True,
spf_aligned=True,
dmarc_pass=True,
header_from="tukutoi.com",
reason_type=None,
reason_comment=None,
auth_results=[
ParsedAuthResult(auth_type="dkim", domain=dkim_domain, result="pass"),
ParsedAuthResult(auth_type="spf", domain=spf_domain, result="pass"),
],
)
def test_ip_allowlisted_sender_requires_ip_match_even_when_auth_domain_matches():
settings = Settings.model_validate(
{
"known_senders": {
"tukutoi.com": [
{
"id": "mailcow",
"name": "mailcow outbound",
"ip_allowlist": ["45.148.30.200/32"],
"dkim_domains": ["tukutoi.com"],
"spf_domains": ["tukutoi.com"],
}
]
},
"alerts": {"email": {"enabled": False}},
}
)
match = classify_record(settings, "tukutoi.com", _record("50.31.205.203"))
assert match.is_known is False
assert match.id is None
assert match.name is None
def test_ip_allowlisted_sender_matches_configured_ip():
settings = Settings.model_validate(
{
"known_senders": {
"tukutoi.com": [
{
"id": "mailcow",
"name": "mailcow outbound",
"ip_allowlist": ["45.148.30.200/32"],
"dkim_domains": ["tukutoi.com"],
"spf_domains": ["tukutoi.com"],
}
]
},
"alerts": {"email": {"enabled": False}},
}
)
match = classify_record(settings, "tukutoi.com", _record("45.148.30.200"))
assert match.is_known is True
assert match.id == "mailcow"
def test_domain_only_sender_still_matches_auth_domain_when_no_ip_allowlist_exists():
settings = Settings.model_validate(
{
"known_senders": {
"tukutoi.com": [
{
"id": "domain-only",
"name": "domain-only sender",
"ip_allowlist": [],
"dkim_domains": ["tukutoi.com"],
"spf_domains": [],
}
]
},
"alerts": {"email": {"enabled": False}},
}
)
match = classify_record(settings, "tukutoi.com", _record("50.31.205.203"))
assert match.is_known is True
assert match.id == "domain-only"
def test_aligned_dkim_without_configured_sender_is_not_known_sender():
settings = Settings.model_validate({"known_senders": {}, "alerts": {"email": {"enabled": False}}})
match = classify_record(settings, "tukutoi.com", _record("50.31.205.203"))
assert match.is_known is False
+49
View File
@@ -0,0 +1,49 @@
from app.config import Settings
from app.llm import LLMClient, normalize_alert_explanation
from app.models import Alert
def test_llm_json_validation_fallback():
client = LLMClient(Settings.model_validate({"alerts": {"email": {"enabled": False}}}))
alert = Alert(
fingerprint="x",
inbox_id="tukutoi",
domain="tukutoi.com",
severity="critical",
type="unknown_source_failed_both",
title="Unknown source failed SPF and DKIM for tukutoi.com",
summary="Deterministic summary",
details_json="{}",
)
explanation = client.explain_alert(alert)
assert explanation.confidence == "fallback"
assert "DMARC aggregate data alone" in explanation.risk
def test_alert_explanation_accepts_explanation_action_items_shape():
alert = Alert(
fingerprint="x",
inbox_id="tukutoi",
domain="tukutoi.com",
severity="warning",
type="new_authenticated_source",
title="New authenticated source observed for tukutoi.com",
summary="Deterministic summary",
details_json="{}",
)
explanation = normalize_alert_explanation(
{
"explanation": "A new authenticated source was observed for tukutoi.com.",
"action_items": ["Confirm whether this source is authorized.", "Add it to known senders if approved."],
"confidence": "high",
},
alert,
)
assert explanation.summary == "A new authenticated source was observed for tukutoi.com."
assert "aggregate data alone" in explanation.risk
assert "Confirm whether this source is authorized" in explanation.recommended_action
assert explanation.confidence == "high"
+44
View File
@@ -0,0 +1,44 @@
from pathlib import Path
import pytest
from app.dmarc_parser import DMARCParseError, parse_dmarc_xml
def test_parser_valid_dmarc_report():
payload = Path("tests/fixtures/sample_dmarc.xml").read_bytes()
report = parse_dmarc_xml(payload)
assert report.org_name == "google.com"
assert report.domain == "tukutoi.com"
assert report.policy_p == "none"
assert report.date_begin is not None
assert len(report.records) == 1
record = report.records[0]
assert record.source_ip == "203.0.113.10"
assert record.count == 25
assert record.dkim_aligned is False
assert record.spf_aligned is False
assert record.dmarc_pass is False
assert {auth.auth_type for auth in record.auth_results} == {"dkim", "spf"}
def test_parser_rejects_record_limit():
payload = Path("tests/fixtures/sample_dmarc.xml").read_bytes()
with pytest.raises(DMARCParseError, match="record limit"):
parse_dmarc_xml(payload, max_records=0)
def test_parser_rejects_invalid_source_ip():
payload = Path("tests/fixtures/sample_dmarc.xml").read_text().replace("203.0.113.10", "not-an-ip").encode()
with pytest.raises(DMARCParseError, match="Invalid source IP"):
parse_dmarc_xml(payload)
def test_parser_rejects_absurd_record_count():
payload = Path("tests/fixtures/sample_dmarc.xml").read_text().replace("<count>25</count>", "<count>10000001</count>").encode()
with pytest.raises(DMARCParseError, match="exceeds limit"):
parse_dmarc_xml(payload, max_record_count=10000000)
+41
View File
@@ -0,0 +1,41 @@
from app.config import Settings
from app.scheduler import generate_daily_summaries, generate_weekly_summaries, start_scheduler
def _settings(**llm):
return Settings.model_validate({"alerts": {"email": {"enabled": False}}, "llm": llm})
def test_disabled_digest_jobs_do_not_instantiate_llm(monkeypatch):
def fail_llm(*args, **kwargs):
raise AssertionError("LLM should not be constructed when summaries are disabled")
monkeypatch.setattr("app.scheduler.LLMClient", fail_llm)
settings = _settings(generate_daily_summary=False, generate_weekly_summary=False)
assert generate_daily_summaries(settings) == []
assert generate_weekly_summaries(settings) == []
def test_scheduler_only_registers_enabled_digest_jobs(monkeypatch):
created = []
class FakeScheduler:
running = True
def __init__(self, timezone):
self.timezone = timezone
self.jobs = []
created.append(self)
def add_job(self, func, trigger, **kwargs):
self.jobs.append(kwargs["id"])
def start(self):
pass
monkeypatch.setattr("app.scheduler.BackgroundScheduler", FakeScheduler)
scheduler = start_scheduler(_settings(generate_daily_summary=False, generate_weekly_summary=True))
assert scheduler.jobs == ["poll", "weekly"]
+20
View File
@@ -0,0 +1,20 @@
from datetime import date
import pytest
from pydantic import ValidationError
from app.schemas import BacklogRequest
def test_backlog_request_parses_iso_dates():
request = BacklogRequest.model_validate(
{"inbox_id": "tukutoi", "since": "2026-05-01", "before": "2026-05-16"}
)
assert request.since == date(2026, 5, 1)
assert request.before == date(2026, 5, 16)
def test_backlog_request_rejects_malformed_dates():
with pytest.raises(ValidationError):
BacklogRequest.model_validate({"inbox_id": "tukutoi", "since": "not-a-date"})
+37
View File
@@ -0,0 +1,37 @@
from email.message import EmailMessage
from pathlib import Path
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from app.config import Settings
from app.db import Base
from app.attachment_extractor import ExtractedReport
from app.message_processor import _store_report
from app.models import MailMessage
def test_duplicate_sha_detection():
engine = create_engine("sqlite:///:memory:", future=True)
Base.metadata.create_all(engine)
session = Session(engine)
mail = MailMessage(inbox_id="tukutoi", imap_uid="1", folder="DMARC", status="skipped")
session.add(mail)
session.commit()
payload = Path("tests/fixtures/sample_dmarc.xml").read_bytes()
extracted = ExtractedReport("report.xml", payload, "0" * 64)
settings = Settings.model_validate({"alerts": {"email": {"enabled": False}}})
report, duplicate = _store_report(session, settings, settings.inboxes[0] if settings.inboxes else _Inbox(), mail, extracted)
session.commit()
second, second_duplicate = _store_report(session, settings, settings.inboxes[0] if settings.inboxes else _Inbox(), mail, extracted)
assert report is not None
assert duplicate is None
assert second is None
assert second_duplicate == report
class _Inbox:
id = "tukutoi"
domain = "tukutoi.com"