ia_dev/gitea-issues/tickets-fetch-inbox.py
Nicolas Cantu 61cec6f430 Sync ia_dev: token resolution via .secrets/<env>/ia_token, doc updates
**Motivations:**
- Align master with current codebase (token from projects/<id>/.secrets/<env>/ia_token)
- Id resolution by mail To or by API token; no slug

**Root causes:**
- Token moved from conf.json to .secrets/<env>/ia_token; env from directory name

**Correctifs:**
- Server and scripts resolve project+env by scanning all projects and envs

**Evolutions:**
- tickets-fetch-inbox routes by To address; notary-ai agents and API doc updated

**Pages affectées:**
- ai_working_help/server.js, docs, project_config.py, lib/project_config.sh
- projects/README.md, lecoffreio/docs/API.md, gitea-issues/tickets-fetch-inbox.py
2026-03-16 15:00:23 +01:00

322 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Fetch inbox emails and route each message to the project whose tickets.authorized_emails.to matches the message To.
Project is resolved per message: To/Delivered-To/X-Original-To are compared to tickets.authorized_emails.to in each
projects/<id>/conf.json; the first matching project id is used. Only messages from authorized_emails.from are kept.
Messages on or after MAIL_SINCE_DATE are considered. Does not use UNSEEN; does not mark as read (BODY.PEEK[]).
Writes to projects/<id>/data/issues/ as JSON <date>.<msg_id>.<from>.pending. One file per message.
State: we skip creating .pending if .pending exists or .response exists for that base.
Usage: run with GITEA_ISSUES_DIR set (e.g. via tickets-fetch-inbox.sh). MAIL_SINCE_DATE overrides date (DD-Mon-YYYY).
"""
from __future__ import annotations
import email
import hashlib
import imaplib
import json
import os
import re
import sys
from datetime import datetime, timezone
from email.header import decode_header
from email.utils import parsedate_to_datetime
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent))
from mail_common import imap_search_criterion_all, load_imap_config, imap_ssl_context
from project_config import (
data_issues_dir_for_project,
ia_dev_root,
load_project_config_by_id,
project_root,
resolve_project_id_by_email_to,
)
def decode_header_value(header: str | None) -> str:
if not header:
return ""
parts = decode_header(header)
result = []
for part, charset in parts:
if isinstance(part, bytes):
result.append(part.decode(charset or "utf-8", errors="replace"))
else:
result.append(part)
return "".join(result)
def parse_from_address(from_header: str) -> str:
"""Extract email address from From header (e.g. 'Name <user@host>' -> user@host)."""
if not from_header:
return ""
match = re.search(r"<([^>]+)>", from_header)
if match:
return match.group(1).strip().lower()
return from_header.strip().lower()
def get_text_body(msg: email.message.Message) -> str:
if msg.is_multipart():
for part in msg.walk():
if part.get_content_type() == "text/plain":
payload = part.get_payload(decode=True)
if payload:
return payload.decode(
part.get_content_charset() or "utf-8", errors="replace"
)
return ""
payload = msg.get_payload(decode=True)
if not payload:
return ""
return payload.decode(
msg.get_content_charset() or "utf-8", errors="replace"
)
def _extract_addresses(header_value: str) -> set[str]:
"""Extract email addresses from a header value (e.g. 'Name <user@host>, other@host')."""
if not header_value or not header_value.strip():
return set()
decoded = decode_header_value(header_value).strip()
# Angle-bracket: <...@...>
in_angle = re.findall(r"<([^>]+)>", decoded)
# Standalone addr-spec (simplified: local@domain)
plain = re.findall(r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9][a-zA-Z0-9.-]*[a-zA-Z0-9]", decoded)
out: set[str] = set()
for a in in_angle:
out.add(a.strip().lower())
for a in plain:
out.add(a.strip().lower())
return out
def get_message_to_addresses(msg: email.message.Message) -> list[str]:
"""Return ordered list of recipient addresses (To, Delivered-To, X-Original-To, etc.) for project resolution."""
order = ("To", "Delivered-To", "X-Original-To", "X-Delivered-To", "X-Envelope-To", "Envelope-To")
seen: set[str] = set()
result: list[str] = []
for name in order:
value = msg.get(name)
if not value:
continue
addrs = _extract_addresses(decode_header_value(value))
for a in addrs:
if a not in seen:
seen.add(a)
result.append(a)
return result
def sanitize_from_for_filename(email_addr: str) -> str:
"""Filesystem-safe string from email (e.g. user@example.com -> user_example.com)."""
return re.sub(r"[^a-zA-Z0-9._-]", "_", email_addr.replace("@", "_"))
def generate_message_id(mid: str | None, uid_s: str, date_str: str, from_addr: str) -> str:
"""Deterministic 8-char id so the same message always gets the same base filename."""
raw = mid or f"{uid_s}_{date_str}_{from_addr}"
return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:8]
def sanitize_attachment_filename(name: str) -> str:
"""Safe filename for attachment (no path, no dangerous chars)."""
if not name or not name.strip():
return "attachment"
base = Path(name).name
return re.sub(r"[^a-zA-Z0-9._-]", "_", base)[:200] or "attachment"
def get_attachments(msg: email.message.Message) -> list[tuple[str, bytes, str]]:
"""Return list of (filename, payload_bytes, content_type) for each attachment."""
result: list[tuple[str, bytes, str]] = []
for part in msg.walk():
content_type = (part.get_content_type() or "").lower()
if content_type.startswith("multipart/"):
continue
filename = part.get_filename()
if not filename:
# Optional: treat inline images etc. with Content-Disposition attachment
disp = part.get("Content-Disposition") or ""
if "attachment" in disp.lower():
ext = ""
if "image" in content_type:
ext = ".bin" if "octet-stream" in content_type else ".img"
filename = f"attachment{ext}"
else:
continue
filename = decode_header_value(filename).strip()
if not filename:
continue
payload = part.get_payload(decode=True)
if payload is None:
continue
result.append((filename, payload, content_type))
return result
def parse_references(refs: str | None) -> list[str]:
if not refs:
return []
return [x.strip() for x in re.split(r"\s+", refs) if x.strip()]
def main() -> int:
cfg = load_imap_config()
if not cfg["user"] or not cfg["password"]:
print("[tickets-fetch-inbox] IMAP_USER and IMAP_PASSWORD required.", file=sys.stderr)
return 1
# Spool is per-project; each message is routed by its To address to projects/<id>/data/issues/
print("[tickets-fetch-inbox] Project resolved per message from To/Delivered-To/X-Original-To (tickets.authorized_emails.to).")
mail = imaplib.IMAP4(cfg["host"], int(cfg["port"]))
if cfg["use_starttls"]:
mail.starttls(imap_ssl_context(cfg.get("ssl_verify", True)))
mail.login(cfg["user"], cfg["password"])
mail.select("INBOX")
# Do not use UNSEEN; fetch messages on or after MAIL_SINCE_DATE (default 10-Mar-2026). Filter by authorized senders only.
# Use BODY.PEEK[] instead of RFC822 so the server does not set \Seen (emails stay "unread").
since_criterion = imap_search_criterion_all()
_, nums = mail.search(None, since_criterion)
ids = nums[0].split()
written = 0
skipped_fetch = 0
skipped_no_project = 0
skipped_from = 0
skipped_pending = 0
skipped_response = 0
for uid in ids:
uid_s = uid.decode("ascii")
_, data = mail.fetch(uid, "(BODY.PEEK[])")
if not data or not data[0]:
skipped_fetch += 1
continue
raw = data[0]
raw_bytes = None
if isinstance(raw, tuple):
if len(raw) >= 2 and isinstance(raw[1], bytes):
raw_bytes = raw[1]
elif len(raw) >= 2 and isinstance(raw[1], str):
raw_bytes = raw[1].encode("utf-8", errors="replace")
elif isinstance(raw, bytes):
raw_bytes = raw
if not raw_bytes:
skipped_fetch += 1
continue
try:
msg = email.message_from_bytes(raw_bytes)
except Exception:
skipped_fetch += 1
continue
to_addresses = get_message_to_addresses(msg)
project_id: str | None = None
for addr in to_addresses:
project_id = resolve_project_id_by_email_to(addr)
if project_id:
break
if not project_id:
skipped_no_project += 1
continue
conf = load_project_config_by_id(project_id)
if not conf:
skipped_no_project += 1
continue
auth = (conf.get("tickets") or {}).get("authorized_emails") or {}
from_list = auth.get("from")
if isinstance(from_list, list):
allowed_from = {str(a).strip().lower() for a in from_list if a}
elif isinstance(from_list, str):
allowed_from = {a.strip().lower() for a in re.split(r"[,;]", from_list) if a.strip()}
else:
allowed_from = set()
from_raw = decode_header_value(msg.get("From"))
from_addr = parse_from_address(from_raw)
if from_addr not in allowed_from:
skipped_from += 1
continue
spool = data_issues_dir_for_project(project_id)
spool.mkdir(parents=True, exist_ok=True)
mid = (msg.get("Message-ID") or "").strip()
to_raw = decode_header_value(msg.get("To"))
to_addrs = [a.strip() for a in re.split(r"[,;]", to_raw) if a.strip()]
subj = decode_header_value(msg.get("Subject"))
date_h = decode_header_value(msg.get("Date"))
refs = parse_references(msg.get("References"))
in_reply_to = (msg.get("In-Reply-To") or "").strip() or None
body = get_text_body(msg)
try:
if date_h:
dt = parsedate_to_datetime(date_h)
date_str = dt.strftime("%Y-%m-%dT%H%M%S")
else:
date_str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H%M%S")
except Exception:
date_str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H%M%S")
from_safe = sanitize_from_for_filename(from_addr)
msg_id_short = generate_message_id(mid, uid_s, date_str, from_addr)
base = f"{date_str}.{msg_id_short}.{from_safe}"
path = spool / f"{base}.pending"
if path.exists():
skipped_pending += 1
continue
# Already treated: .response exists (we don't keep .pending after replying).
if (spool / f"{base}.response").exists():
skipped_response += 1
continue
created_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
attachments_meta: list[dict[str, str | int]] = []
attachment_parts = get_attachments(msg)
if attachment_parts:
att_dir = spool / f"{base}.d"
att_dir.mkdir(parents=True, exist_ok=True)
for idx, (orig_name, payload_bytes, content_type) in enumerate(attachment_parts):
safe_name = sanitize_attachment_filename(orig_name)
stored_name = f"{idx}_{safe_name}"
stored_path = att_dir / stored_name
stored_path.write_bytes(payload_bytes)
rel_path = f"{base}.d/{stored_name}"
attachments_meta.append({
"filename": orig_name,
"path": rel_path,
"content_type": content_type,
"size": len(payload_bytes),
})
payload = {
"version": 1,
"type": "incoming",
"id": msg_id_short,
"message_id": mid or "",
"from": from_addr,
"to": to_addrs,
"subject": subj,
"date": date_h or "",
"body": body or "",
"references": refs,
"in_reply_to": in_reply_to,
"uid": uid_s,
"created_at": created_at,
"issue_number": None,
"status": "pending",
"attachments": attachments_meta,
}
path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
written += 1
print(f"[tickets-fetch-inbox] Wrote {path.name}")
mail.logout()
print(f"[tickets-fetch-inbox] Done. Wrote {written} new message(s).")
if skipped_fetch or skipped_no_project or skipped_from or skipped_pending or skipped_response:
print(
f"[tickets-fetch-inbox] Skipped: fetch/parse={skipped_fetch}, no_project_for_to={skipped_no_project}, "
f"from_not_allowed={skipped_from}, pending_exists={skipped_pending}, response_exists={skipped_response}."
)
return 0
if __name__ == "__main__":
sys.exit(main())