#!/usr/bin/env python3 """ Fetch inbox emails and route each message to the project whose tickets.authorized_emails.to matches the message To. Project is resolved per message: To/Delivered-To/X-Original-To are compared to tickets.authorized_emails.to in each projects//conf.json; the first matching project id is used. Only messages from authorized_emails.from are kept. Messages on or after MAIL_SINCE_DATE are considered. Does not use UNSEEN; does not mark as read (BODY.PEEK[]). Writes to projects//data/issues/ as JSON ...pending. One file per message. State: we skip creating .pending if .pending exists or .response exists for that base. Usage: run with GITEA_ISSUES_DIR set (e.g. via tickets-fetch-inbox.sh). MAIL_SINCE_DATE overrides date (DD-Mon-YYYY). """ from __future__ import annotations import email import hashlib import imaplib import json import os import re import sys from datetime import datetime, timezone from email.header import decode_header from email.utils import parsedate_to_datetime from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parent)) from mail_common import imap_search_criterion_all, load_imap_config, imap_ssl_context from project_config import ( data_issues_dir_for_project, ia_dev_root, load_project_config_by_id, project_root, resolve_project_id_by_email_to, ) def decode_header_value(header: str | None) -> str: if not header: return "" parts = decode_header(header) result = [] for part, charset in parts: if isinstance(part, bytes): result.append(part.decode(charset or "utf-8", errors="replace")) else: result.append(part) return "".join(result) def parse_from_address(from_header: str) -> str: """Extract email address from From header (e.g. 'Name ' -> user@host).""" if not from_header: return "" match = re.search(r"<([^>]+)>", from_header) if match: return match.group(1).strip().lower() return from_header.strip().lower() def get_text_body(msg: email.message.Message) -> str: if msg.is_multipart(): for part in msg.walk(): if part.get_content_type() == "text/plain": payload = part.get_payload(decode=True) if payload: return payload.decode( part.get_content_charset() or "utf-8", errors="replace" ) return "" payload = msg.get_payload(decode=True) if not payload: return "" return payload.decode( msg.get_content_charset() or "utf-8", errors="replace" ) def _extract_addresses(header_value: str) -> set[str]: """Extract email addresses from a header value (e.g. 'Name , other@host').""" if not header_value or not header_value.strip(): return set() decoded = decode_header_value(header_value).strip() # Angle-bracket: <...@...> in_angle = re.findall(r"<([^>]+)>", decoded) # Standalone addr-spec (simplified: local@domain) plain = re.findall(r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9][a-zA-Z0-9.-]*[a-zA-Z0-9]", decoded) out: set[str] = set() for a in in_angle: out.add(a.strip().lower()) for a in plain: out.add(a.strip().lower()) return out def get_message_to_addresses(msg: email.message.Message) -> list[str]: """Return ordered list of recipient addresses (To, Delivered-To, X-Original-To, etc.) for project resolution.""" order = ("To", "Delivered-To", "X-Original-To", "X-Delivered-To", "X-Envelope-To", "Envelope-To") seen: set[str] = set() result: list[str] = [] for name in order: value = msg.get(name) if not value: continue addrs = _extract_addresses(decode_header_value(value)) for a in addrs: if a not in seen: seen.add(a) result.append(a) return result def sanitize_from_for_filename(email_addr: str) -> str: """Filesystem-safe string from email (e.g. user@example.com -> user_example.com).""" return re.sub(r"[^a-zA-Z0-9._-]", "_", email_addr.replace("@", "_")) def generate_message_id(mid: str | None, uid_s: str, date_str: str, from_addr: str) -> str: """Deterministic 8-char id so the same message always gets the same base filename.""" raw = mid or f"{uid_s}_{date_str}_{from_addr}" return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:8] def sanitize_attachment_filename(name: str) -> str: """Safe filename for attachment (no path, no dangerous chars).""" if not name or not name.strip(): return "attachment" base = Path(name).name return re.sub(r"[^a-zA-Z0-9._-]", "_", base)[:200] or "attachment" def get_attachments(msg: email.message.Message) -> list[tuple[str, bytes, str]]: """Return list of (filename, payload_bytes, content_type) for each attachment.""" result: list[tuple[str, bytes, str]] = [] for part in msg.walk(): content_type = (part.get_content_type() or "").lower() if content_type.startswith("multipart/"): continue filename = part.get_filename() if not filename: # Optional: treat inline images etc. with Content-Disposition attachment disp = part.get("Content-Disposition") or "" if "attachment" in disp.lower(): ext = "" if "image" in content_type: ext = ".bin" if "octet-stream" in content_type else ".img" filename = f"attachment{ext}" else: continue filename = decode_header_value(filename).strip() if not filename: continue payload = part.get_payload(decode=True) if payload is None: continue result.append((filename, payload, content_type)) return result def parse_references(refs: str | None) -> list[str]: if not refs: return [] return [x.strip() for x in re.split(r"\s+", refs) if x.strip()] def main() -> int: cfg = load_imap_config() if not cfg["user"] or not cfg["password"]: print("[tickets-fetch-inbox] IMAP_USER and IMAP_PASSWORD required.", file=sys.stderr) return 1 # Spool is per-project; each message is routed by its To address to projects//data/issues/ print("[tickets-fetch-inbox] Project resolved per message from To/Delivered-To/X-Original-To (tickets.authorized_emails.to).") mail = imaplib.IMAP4(cfg["host"], int(cfg["port"])) if cfg["use_starttls"]: mail.starttls(imap_ssl_context(cfg.get("ssl_verify", True))) mail.login(cfg["user"], cfg["password"]) mail.select("INBOX") # Do not use UNSEEN; fetch messages on or after MAIL_SINCE_DATE (default 10-Mar-2026). Filter by authorized senders only. # Use BODY.PEEK[] instead of RFC822 so the server does not set \Seen (emails stay "unread"). since_criterion = imap_search_criterion_all() _, nums = mail.search(None, since_criterion) ids = nums[0].split() written = 0 skipped_fetch = 0 skipped_no_project = 0 skipped_from = 0 skipped_pending = 0 skipped_response = 0 for uid in ids: uid_s = uid.decode("ascii") _, data = mail.fetch(uid, "(BODY.PEEK[])") if not data or not data[0]: skipped_fetch += 1 continue raw = data[0] raw_bytes = None if isinstance(raw, tuple): if len(raw) >= 2 and isinstance(raw[1], bytes): raw_bytes = raw[1] elif len(raw) >= 2 and isinstance(raw[1], str): raw_bytes = raw[1].encode("utf-8", errors="replace") elif isinstance(raw, bytes): raw_bytes = raw if not raw_bytes: skipped_fetch += 1 continue try: msg = email.message_from_bytes(raw_bytes) except Exception: skipped_fetch += 1 continue to_addresses = get_message_to_addresses(msg) project_id: str | None = None for addr in to_addresses: project_id = resolve_project_id_by_email_to(addr) if project_id: break if not project_id: skipped_no_project += 1 continue conf = load_project_config_by_id(project_id) if not conf: skipped_no_project += 1 continue auth = (conf.get("tickets") or {}).get("authorized_emails") or {} from_list = auth.get("from") if isinstance(from_list, list): allowed_from = {str(a).strip().lower() for a in from_list if a} elif isinstance(from_list, str): allowed_from = {a.strip().lower() for a in re.split(r"[,;]", from_list) if a.strip()} else: allowed_from = set() from_raw = decode_header_value(msg.get("From")) from_addr = parse_from_address(from_raw) if from_addr not in allowed_from: skipped_from += 1 continue spool = data_issues_dir_for_project(project_id) spool.mkdir(parents=True, exist_ok=True) mid = (msg.get("Message-ID") or "").strip() to_raw = decode_header_value(msg.get("To")) to_addrs = [a.strip() for a in re.split(r"[,;]", to_raw) if a.strip()] subj = decode_header_value(msg.get("Subject")) date_h = decode_header_value(msg.get("Date")) refs = parse_references(msg.get("References")) in_reply_to = (msg.get("In-Reply-To") or "").strip() or None body = get_text_body(msg) try: if date_h: dt = parsedate_to_datetime(date_h) date_str = dt.strftime("%Y-%m-%dT%H%M%S") else: date_str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H%M%S") except Exception: date_str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H%M%S") from_safe = sanitize_from_for_filename(from_addr) msg_id_short = generate_message_id(mid, uid_s, date_str, from_addr) base = f"{date_str}.{msg_id_short}.{from_safe}" path = spool / f"{base}.pending" if path.exists(): skipped_pending += 1 continue # Already treated: .response exists (we don't keep .pending after replying). if (spool / f"{base}.response").exists(): skipped_response += 1 continue created_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") attachments_meta: list[dict[str, str | int]] = [] attachment_parts = get_attachments(msg) if attachment_parts: att_dir = spool / f"{base}.d" att_dir.mkdir(parents=True, exist_ok=True) for idx, (orig_name, payload_bytes, content_type) in enumerate(attachment_parts): safe_name = sanitize_attachment_filename(orig_name) stored_name = f"{idx}_{safe_name}" stored_path = att_dir / stored_name stored_path.write_bytes(payload_bytes) rel_path = f"{base}.d/{stored_name}" attachments_meta.append({ "filename": orig_name, "path": rel_path, "content_type": content_type, "size": len(payload_bytes), }) payload = { "version": 1, "type": "incoming", "id": msg_id_short, "message_id": mid or "", "from": from_addr, "to": to_addrs, "subject": subj, "date": date_h or "", "body": body or "", "references": refs, "in_reply_to": in_reply_to, "uid": uid_s, "created_at": created_at, "issue_number": None, "status": "pending", "attachments": attachments_meta, } path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") written += 1 print(f"[tickets-fetch-inbox] Wrote {path.name}") mail.logout() print(f"[tickets-fetch-inbox] Done. Wrote {written} new message(s).") if skipped_fetch or skipped_no_project or skipped_from or skipped_pending or skipped_response: print( f"[tickets-fetch-inbox] Skipped: fetch/parse={skipped_fetch}, no_project_for_to={skipped_no_project}, " f"from_not_allowed={skipped_from}, pending_exists={skipped_pending}, response_exists={skipped_response}." ) return 0 if __name__ == "__main__": sys.exit(main())