from __future__ import annotations import json from datetime import date, datetime, time, timedelta, timezone from sqlalchemy import delete, select from sqlalchemy.orm import Session from app.db import init_db, session_scope from app.models import Alert, AuthResult, DailyStat, InboxStatus, LLMReport, MailMessage, Record, Report, utcnow DOMAIN = "tukutoi.com" INBOX = "tukutoi" def _dt(days_ago: int, hour: int = 0) -> datetime: target = date.today() - timedelta(days=days_ago) return datetime.combine(target, time(hour=hour), tzinfo=timezone.utc) def _purge_smoke(session: Session) -> None: smoke_reports = session.execute(select(Report.id).where(Report.report_id.like("smoke-%"))).scalars().all() if smoke_reports: smoke_records = session.execute(select(Record.id).where(Record.report_id.in_(smoke_reports))).scalars().all() if smoke_records: session.execute(delete(AuthResult).where(AuthResult.record_id.in_(smoke_records))) session.execute(delete(Record).where(Record.id.in_(smoke_records))) session.execute(delete(Report).where(Report.id.in_(smoke_reports))) smoke_messages = session.execute(select(MailMessage.id).where(MailMessage.message_id.like(" MailMessage: mail = MailMessage( inbox_id=INBOX, imap_uid=uid, message_id=f"", folder="DMARC", subject=subject, sender="reports@example.net", recipient="dmarcreports@tukutoi.com", message_date=_dt(days_ago, 6), seen=True, status="success", processed_at=utcnow(), ) session.add(mail) session.flush() return mail def _report( session: Session, *, mail: MailMessage, days_ago: int, org: str, report_id: str, sha: str, policy: str = "none", ) -> Report: report = Report( inbox_id=INBOX, mail_message_id=mail.id, raw_xml_sha256=sha, report_id=report_id, org_name=org, org_email=f"dmarc@{org}", extra_contact_info=f"https://{org}/dmarc", domain=DOMAIN, date_begin=_dt(days_ago, 0), date_end=_dt(days_ago - 1, 0) if days_ago else utcnow(), policy_p=policy, policy_sp=policy, policy_pct=100, adkim="r", aspf="r", fo="1", ) session.add(report) session.flush() return report def _record( session: Session, *, report: Report, source_ip: str, count: int, disposition: str, spf: bool, dkim: bool, known_id: str | None, known_name: str | None, header_from: str = DOMAIN, reason_type: str | None = None, reason_comment: str | None = None, dkim_domain: str | None = DOMAIN, spf_domain: str | None = DOMAIN, ) -> Record: record = Record( report_id=report.id, source_ip=source_ip, count=count, disposition=disposition, policy_dkim="pass" if dkim else "fail", policy_spf="pass" if spf else "fail", dkim_aligned=dkim, spf_aligned=spf, dmarc_pass=dkim or spf, header_from=header_from, reason_type=reason_type, reason_comment=reason_comment, known_sender_id=known_id, known_sender_name=known_name, is_known_sender=known_id is not None, ) session.add(record) session.flush() session.add( AuthResult( record_id=record.id, auth_type="dkim", domain=dkim_domain, selector="default", result="pass" if dkim else "fail", human_result="synthetic smoke data", ) ) session.add( AuthResult( record_id=record.id, auth_type="spf", domain=spf_domain, scope="mfrom", result="pass" if spf else "fail", ) ) return record def _alert( session: Session, *, fingerprint: str, severity: str, alert_type: str, title: str, summary: str, details: dict, llm_summary: str, llm_risk: str, llm_action: str, days_ago: int, ) -> None: now = utcnow() session.add( Alert( fingerprint=fingerprint, inbox_id=INBOX, domain=DOMAIN, severity=severity, type=alert_type, title=title, summary=summary, details_json=json.dumps({"smoke": True, **details}, sort_keys=True), llm_summary=llm_summary, llm_risk=llm_risk, llm_recommended_action=llm_action, status="open", first_seen_at=_dt(days_ago, 8), last_seen_at=now, ) ) def seed_smoke_data() -> None: init_db() with session_scope() as session: _purge_smoke(session) status = session.scalar(select(InboxStatus).where(InboxStatus.inbox_id == INBOX)) if not status: status = InboxStatus( inbox_id=INBOX, label="Tukutoi", domain=DOMAIN, folder="DMARC", recipient="dmarcreports@tukutoi.com", enabled=True, ) session.add(status) status.last_check_at = utcnow() status.last_success_at = utcnow() status.last_new_messages = 18 status.last_reports_imported = 15 status.last_error = None reporters = ["google.com", "yahoo.com", "outlook.com", "proton.me"] for i, days_ago in enumerate(range(13, -1, -1), start=1): mail = _mail(session, str(9000 + i), f"DMARC aggregate report for {DOMAIN}", days_ago) report = _report( session, mail=mail, days_ago=days_ago, org=reporters[i % len(reporters)], report_id=f"smoke-report-{i}", sha=f"{i:064x}", policy="none" if days_ago > 3 else "quarantine", ) base = 2800 + i * 110 _record( session, report=report, source_ip="198.51.100.20", count=base, disposition="none", spf=True, dkim=True, known_id="mailcow", known_name="mailcow outbound", ) _record( session, report=report, source_ip="203.0.113.40", count=420 + i * 12, disposition="none", spf=True, dkim=False, known_id="google_workspace", known_name="Google Workspace", dkim_domain="tukutoi.com", spf_domain="_spf.google.com", ) if i >= 9: _record( session, report=report, source_ip="203.0.113.99", count=18 + i * 4, disposition="none", spf=False, dkim=False, known_id=None, known_name=None, header_from=DOMAIN, reason_type="local_policy", reason_comment="Unrecognized source failed both aligned SPF and DKIM.", dkim_domain="bad-sender.example", spf_domain="bad-sender.example", ) if i in {12, 13}: _record( session, report=report, source_ip="192.0.2.77", count=9 + i, disposition="quarantine", spf=False, dkim=False, known_id=None, known_name=None, header_from=DOMAIN, reason_type="sampled_out", reason_comment="Receiver applied quarantine to a small unauthorized sample.", dkim_domain="newsletter.invalid", spf_domain="newsletter.invalid", ) for days_ago in range(13, -1, -1): day = date.today() - timedelta(days=days_ago) total = 3600 + (13 - days_ago) * 160 fail = 12 + max(0, 6 - days_ago) * 11 stat = DailyStat( domain=DOMAIN, date=day, total_messages=total, dmarc_pass_count=total - fail, dmarc_fail_count=fail, spf_aligned_count=total - fail - 18, spf_failed_count=fail + 18, dkim_aligned_count=total - fail - 35, dkim_failed_count=fail + 35, unknown_source_count=1 if days_ago < 6 else 0, known_source_count=2, quarantine_count=22 if days_ago in {0, 1} else 0, reject_count=0, top_reporters_json=json.dumps( [ {"org": "google.com", "reports": 5}, {"org": "yahoo.com", "reports": 4}, {"org": "outlook.com", "reports": 3}, ] ), top_sources_json=json.dumps( [ {"source_ip": "198.51.100.20", "count": total - 600}, {"source_ip": "203.0.113.40", "count": 520}, {"source_ip": "203.0.113.99", "count": fail}, ] ), ) session.add(stat) _alert( session, fingerprint=f"{DOMAIN}:unknown_source_failed_both:203.0.113.99:smoke", severity="critical", alert_type="unknown_source_failed_both", title=f"Unknown source failed SPF and DKIM for {DOMAIN}", summary="203.0.113.99 sent a growing volume of mail that failed both SPF and DKIM alignment.", details={"source_ip": "203.0.113.99", "count": 74, "spf_aligned": False, "dkim_aligned": False, "dmarc_pass": False}, llm_summary="Unknown infrastructure is sending mail that claims to be from tukutoi.com and fails both aligned SPF and DKIM.", llm_risk="This is likely spoofing or unauthorized sending. It does not by itself prove a mailbox compromise.", llm_action="Confirm whether the IP belongs to an approved sender. If not, monitor volume and keep legitimate senders passing before considering stricter policy.", days_ago=4, ) _alert( session, fingerprint=f"{DOMAIN}:quarantine_or_reject_seen:192.0.2.77:smoke", severity="critical", alert_type="quarantine_or_reject_seen", title=f"Quarantine disposition seen for {DOMAIN}", summary="Receivers quarantined a small number of messages from an unknown source.", details={"source_ip": "192.0.2.77", "count": 22, "disposition": "quarantine"}, llm_summary="Some mail claiming to be from tukutoi.com is now being quarantined by receivers.", llm_risk="The impacted traffic appears unauthorized in this sample, but verify whether any legitimate sender is missing from known senders.", llm_action="Review the quarantined source and classify it only if it is an approved sender.", days_ago=1, ) _alert( session, fingerprint=f"{DOMAIN}:dkim_authenticated_relay:203.0.113.40:smoke", severity="info", alert_type="dkim_authenticated_relay", title=f"DKIM-authenticated relay observed for {DOMAIN}", summary="A receiver observed 203.0.113.40 transmitting mail for tukutoi.com. SPF failed for that hop, DKIM aligned, and DMARC passed.", details={"source_ip": "203.0.113.40", "count": 612, "spf_aligned": False, "dkim_aligned": True, "dmarc_pass": True}, llm_summary=None, llm_risk=None, llm_action=None, days_ago=2, ) today = date.today() daily_input = {"smoke": True, "task": "daily_dmarc_summary", "domain": DOMAIN, "period": today.isoformat()} session.add( LLMReport( domain=DOMAIN, period_start=datetime.combine(today, time.min, tzinfo=timezone.utc), period_end=datetime.combine(today + timedelta(days=1), time.min, tzinfo=timezone.utc), report_type="daily", input_json=json.dumps(daily_input), output_json=json.dumps( { "headline": "DMARC is mostly healthy, with one unauthorized source to review.", "summary": "Legitimate mail is passing consistently. A small but increasing unknown source is failing both SPF and DKIM, and receivers quarantined a small sample.", "action_items": [ "Review 203.0.113.99 and confirm it is not an approved sender.", "Classify 203.0.113.40 if it belongs to an approved platform.", ], "business_risk": "Medium", } ), plain_text=( "DMARC is mostly healthy, with one unauthorized source to review.\n\n" "Legitimate mail is passing consistently. A small but increasing unknown source is failing both SPF and DKIM, " "and receivers quarantined a small sample.\n\n" "Actions: Review 203.0.113.99; classify 203.0.113.40 if approved." ), ) ) week_start = today - timedelta(days=7) session.add( LLMReport( domain=DOMAIN, period_start=datetime.combine(week_start, time.min, tzinfo=timezone.utc), period_end=datetime.combine(today, time.min, tzinfo=timezone.utc), report_type="weekly", input_json=json.dumps({"smoke": True, "task": "weekly_dmarc_summary", "domain": DOMAIN}), output_json=json.dumps( { "headline": "Weekly posture is stable with one spoofing pattern.", "summary": "Known senders continue to pass. Unknown failures appeared late in the week and should be watched before any policy change.", "action_items": ["Verify new sources.", "Keep policy at quarantine until known sender coverage is confirmed."], "business_risk": "Medium", } ), plain_text="Weekly posture is stable with one spoofing pattern.\n\nKnown senders continue to pass. Unknown failures appeared late in the week.", ) ) if __name__ == "__main__": seed_smoke_data() print("Smoke data seeded")