Initial commit

This commit is contained in:
2026-05-16 12:05:36 -03:00
parent 0ce972a361
commit e82cee97a7
65 changed files with 9051 additions and 5 deletions
+402
View File
@@ -0,0 +1,402 @@
from __future__ import annotations
import json
from datetime import date, datetime, time, timedelta, timezone
from sqlalchemy import delete, select
from sqlalchemy.orm import Session
from app.db import init_db, session_scope
from app.models import Alert, AuthResult, DailyStat, InboxStatus, LLMReport, MailMessage, Record, Report, utcnow
DOMAIN = "tukutoi.com"
INBOX = "tukutoi"
def _dt(days_ago: int, hour: int = 0) -> datetime:
target = date.today() - timedelta(days=days_ago)
return datetime.combine(target, time(hour=hour), tzinfo=timezone.utc)
def _purge_smoke(session: Session) -> None:
smoke_reports = session.execute(select(Report.id).where(Report.report_id.like("smoke-%"))).scalars().all()
if smoke_reports:
smoke_records = session.execute(select(Record.id).where(Record.report_id.in_(smoke_reports))).scalars().all()
if smoke_records:
session.execute(delete(AuthResult).where(AuthResult.record_id.in_(smoke_records)))
session.execute(delete(Record).where(Record.id.in_(smoke_records)))
session.execute(delete(Report).where(Report.id.in_(smoke_reports)))
smoke_messages = session.execute(select(MailMessage.id).where(MailMessage.message_id.like("<smoke-%"))).scalars().all()
if smoke_messages:
session.execute(delete(MailMessage).where(MailMessage.id.in_(smoke_messages)))
session.execute(delete(Alert).where(Alert.details_json.like('%"smoke": true%')))
session.execute(delete(LLMReport).where(LLMReport.input_json.like('%"smoke": true%')))
session.execute(delete(DailyStat).where(DailyStat.domain == DOMAIN))
session.commit()
def _mail(session: Session, uid: str, subject: str, days_ago: int) -> MailMessage:
mail = MailMessage(
inbox_id=INBOX,
imap_uid=uid,
message_id=f"<smoke-{uid}@dmarc-sentinel.local>",
folder="DMARC",
subject=subject,
sender="reports@example.net",
recipient="dmarcreports@tukutoi.com",
message_date=_dt(days_ago, 6),
seen=True,
status="success",
processed_at=utcnow(),
)
session.add(mail)
session.flush()
return mail
def _report(
session: Session,
*,
mail: MailMessage,
days_ago: int,
org: str,
report_id: str,
sha: str,
policy: str = "none",
) -> Report:
report = Report(
inbox_id=INBOX,
mail_message_id=mail.id,
raw_xml_sha256=sha,
report_id=report_id,
org_name=org,
org_email=f"dmarc@{org}",
extra_contact_info=f"https://{org}/dmarc",
domain=DOMAIN,
date_begin=_dt(days_ago, 0),
date_end=_dt(days_ago - 1, 0) if days_ago else utcnow(),
policy_p=policy,
policy_sp=policy,
policy_pct=100,
adkim="r",
aspf="r",
fo="1",
)
session.add(report)
session.flush()
return report
def _record(
session: Session,
*,
report: Report,
source_ip: str,
count: int,
disposition: str,
spf: bool,
dkim: bool,
known_id: str | None,
known_name: str | None,
header_from: str = DOMAIN,
reason_type: str | None = None,
reason_comment: str | None = None,
dkim_domain: str | None = DOMAIN,
spf_domain: str | None = DOMAIN,
) -> Record:
record = Record(
report_id=report.id,
source_ip=source_ip,
count=count,
disposition=disposition,
policy_dkim="pass" if dkim else "fail",
policy_spf="pass" if spf else "fail",
dkim_aligned=dkim,
spf_aligned=spf,
dmarc_pass=dkim or spf,
header_from=header_from,
reason_type=reason_type,
reason_comment=reason_comment,
known_sender_id=known_id,
known_sender_name=known_name,
is_known_sender=known_id is not None,
)
session.add(record)
session.flush()
session.add(
AuthResult(
record_id=record.id,
auth_type="dkim",
domain=dkim_domain,
selector="default",
result="pass" if dkim else "fail",
human_result="synthetic smoke data",
)
)
session.add(
AuthResult(
record_id=record.id,
auth_type="spf",
domain=spf_domain,
scope="mfrom",
result="pass" if spf else "fail",
)
)
return record
def _alert(
session: Session,
*,
fingerprint: str,
severity: str,
alert_type: str,
title: str,
summary: str,
details: dict,
llm_summary: str,
llm_risk: str,
llm_action: str,
days_ago: int,
) -> None:
now = utcnow()
session.add(
Alert(
fingerprint=fingerprint,
inbox_id=INBOX,
domain=DOMAIN,
severity=severity,
type=alert_type,
title=title,
summary=summary,
details_json=json.dumps({"smoke": True, **details}, sort_keys=True),
llm_summary=llm_summary,
llm_risk=llm_risk,
llm_recommended_action=llm_action,
status="open",
first_seen_at=_dt(days_ago, 8),
last_seen_at=now,
)
)
def seed_smoke_data() -> None:
init_db()
with session_scope() as session:
_purge_smoke(session)
status = session.scalar(select(InboxStatus).where(InboxStatus.inbox_id == INBOX))
if not status:
status = InboxStatus(
inbox_id=INBOX,
label="Tukutoi",
domain=DOMAIN,
folder="DMARC",
recipient="dmarcreports@tukutoi.com",
enabled=True,
)
session.add(status)
status.last_check_at = utcnow()
status.last_success_at = utcnow()
status.last_new_messages = 18
status.last_reports_imported = 15
status.last_error = None
reporters = ["google.com", "yahoo.com", "outlook.com", "proton.me"]
for i, days_ago in enumerate(range(13, -1, -1), start=1):
mail = _mail(session, str(9000 + i), f"DMARC aggregate report for {DOMAIN}", days_ago)
report = _report(
session,
mail=mail,
days_ago=days_ago,
org=reporters[i % len(reporters)],
report_id=f"smoke-report-{i}",
sha=f"{i:064x}",
policy="none" if days_ago > 3 else "quarantine",
)
base = 2800 + i * 110
_record(
session,
report=report,
source_ip="198.51.100.20",
count=base,
disposition="none",
spf=True,
dkim=True,
known_id="mailcow",
known_name="mailcow outbound",
)
_record(
session,
report=report,
source_ip="203.0.113.40",
count=420 + i * 12,
disposition="none",
spf=True,
dkim=False,
known_id="google_workspace",
known_name="Google Workspace",
dkim_domain="tukutoi.com",
spf_domain="_spf.google.com",
)
if i >= 9:
_record(
session,
report=report,
source_ip="203.0.113.99",
count=18 + i * 4,
disposition="none",
spf=False,
dkim=False,
known_id=None,
known_name=None,
header_from=DOMAIN,
reason_type="local_policy",
reason_comment="Unrecognized source failed both aligned SPF and DKIM.",
dkim_domain="bad-sender.example",
spf_domain="bad-sender.example",
)
if i in {12, 13}:
_record(
session,
report=report,
source_ip="192.0.2.77",
count=9 + i,
disposition="quarantine",
spf=False,
dkim=False,
known_id=None,
known_name=None,
header_from=DOMAIN,
reason_type="sampled_out",
reason_comment="Receiver applied quarantine to a small unauthorized sample.",
dkim_domain="newsletter.invalid",
spf_domain="newsletter.invalid",
)
for days_ago in range(13, -1, -1):
day = date.today() - timedelta(days=days_ago)
total = 3600 + (13 - days_ago) * 160
fail = 12 + max(0, 6 - days_ago) * 11
stat = DailyStat(
domain=DOMAIN,
date=day,
total_messages=total,
dmarc_pass_count=total - fail,
dmarc_fail_count=fail,
spf_aligned_count=total - fail - 18,
spf_failed_count=fail + 18,
dkim_aligned_count=total - fail - 35,
dkim_failed_count=fail + 35,
unknown_source_count=1 if days_ago < 6 else 0,
known_source_count=2,
quarantine_count=22 if days_ago in {0, 1} else 0,
reject_count=0,
top_reporters_json=json.dumps(
[
{"org": "google.com", "reports": 5},
{"org": "yahoo.com", "reports": 4},
{"org": "outlook.com", "reports": 3},
]
),
top_sources_json=json.dumps(
[
{"source_ip": "198.51.100.20", "count": total - 600},
{"source_ip": "203.0.113.40", "count": 520},
{"source_ip": "203.0.113.99", "count": fail},
]
),
)
session.add(stat)
_alert(
session,
fingerprint=f"{DOMAIN}:unknown_source_failed_both:203.0.113.99:smoke",
severity="critical",
alert_type="unknown_source_failed_both",
title=f"Unknown source failed SPF and DKIM for {DOMAIN}",
summary="203.0.113.99 sent a growing volume of mail that failed both SPF and DKIM alignment.",
details={"source_ip": "203.0.113.99", "count": 74, "spf_aligned": False, "dkim_aligned": False, "dmarc_pass": False},
llm_summary="Unknown infrastructure is sending mail that claims to be from tukutoi.com and fails both aligned SPF and DKIM.",
llm_risk="This is likely spoofing or unauthorized sending. It does not by itself prove a mailbox compromise.",
llm_action="Confirm whether the IP belongs to an approved sender. If not, monitor volume and keep legitimate senders passing before considering stricter policy.",
days_ago=4,
)
_alert(
session,
fingerprint=f"{DOMAIN}:quarantine_or_reject_seen:192.0.2.77:smoke",
severity="critical",
alert_type="quarantine_or_reject_seen",
title=f"Quarantine disposition seen for {DOMAIN}",
summary="Receivers quarantined a small number of messages from an unknown source.",
details={"source_ip": "192.0.2.77", "count": 22, "disposition": "quarantine"},
llm_summary="Some mail claiming to be from tukutoi.com is now being quarantined by receivers.",
llm_risk="The impacted traffic appears unauthorized in this sample, but verify whether any legitimate sender is missing from known senders.",
llm_action="Review the quarantined source and classify it only if it is an approved sender.",
days_ago=1,
)
_alert(
session,
fingerprint=f"{DOMAIN}:dkim_authenticated_relay:203.0.113.40:smoke",
severity="info",
alert_type="dkim_authenticated_relay",
title=f"DKIM-authenticated relay observed for {DOMAIN}",
summary="A receiver observed 203.0.113.40 transmitting mail for tukutoi.com. SPF failed for that hop, DKIM aligned, and DMARC passed.",
details={"source_ip": "203.0.113.40", "count": 612, "spf_aligned": False, "dkim_aligned": True, "dmarc_pass": True},
llm_summary=None,
llm_risk=None,
llm_action=None,
days_ago=2,
)
today = date.today()
daily_input = {"smoke": True, "task": "daily_dmarc_summary", "domain": DOMAIN, "period": today.isoformat()}
session.add(
LLMReport(
domain=DOMAIN,
period_start=datetime.combine(today, time.min, tzinfo=timezone.utc),
period_end=datetime.combine(today + timedelta(days=1), time.min, tzinfo=timezone.utc),
report_type="daily",
input_json=json.dumps(daily_input),
output_json=json.dumps(
{
"headline": "DMARC is mostly healthy, with one unauthorized source to review.",
"summary": "Legitimate mail is passing consistently. A small but increasing unknown source is failing both SPF and DKIM, and receivers quarantined a small sample.",
"action_items": [
"Review 203.0.113.99 and confirm it is not an approved sender.",
"Classify 203.0.113.40 if it belongs to an approved platform.",
],
"business_risk": "Medium",
}
),
plain_text=(
"DMARC is mostly healthy, with one unauthorized source to review.\n\n"
"Legitimate mail is passing consistently. A small but increasing unknown source is failing both SPF and DKIM, "
"and receivers quarantined a small sample.\n\n"
"Actions: Review 203.0.113.99; classify 203.0.113.40 if approved."
),
)
)
week_start = today - timedelta(days=7)
session.add(
LLMReport(
domain=DOMAIN,
period_start=datetime.combine(week_start, time.min, tzinfo=timezone.utc),
period_end=datetime.combine(today, time.min, tzinfo=timezone.utc),
report_type="weekly",
input_json=json.dumps({"smoke": True, "task": "weekly_dmarc_summary", "domain": DOMAIN}),
output_json=json.dumps(
{
"headline": "Weekly posture is stable with one spoofing pattern.",
"summary": "Known senders continue to pass. Unknown failures appeared late in the week and should be watched before any policy change.",
"action_items": ["Verify new sources.", "Keep policy at quarantine until known sender coverage is confirmed."],
"business_risk": "Medium",
}
),
plain_text="Weekly posture is stable with one spoofing pattern.\n\nKnown senders continue to pass. Unknown failures appeared late in the week.",
)
)
if __name__ == "__main__":
seed_smoke_data()
print("Smoke data seeded")