#!/usr/bin/env node /** * Upload files changed between ORIG_HEAD and HEAD to AnythingLLM (post-merge / after pull). * Requires: ANYTHINGLLM_BASE_URL, ANYTHINGLLM_API_KEY, workspace slug via ANYTHINGLLM_WORKSPACE_SLUG or .anythingllm.json */ import { execFileSync } from "node:child_process"; import * as fs from "node:fs"; import * as fsPromises from "node:fs/promises"; import * as path from "node:path"; import { fileURLToPath } from "node:url"; import ignore from "ignore"; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const ALWAYS_IGNORE = [".git/", "node_modules/", "**/node_modules/"].join("\n"); const readJson = (p) => { const raw = fs.readFileSync(p, "utf8"); return JSON.parse(raw); }; const git = (repoRoot, args) => { return execFileSync("git", args, { cwd: repoRoot, encoding: "utf8", stdio: ["ignore", "pipe", "pipe"], }).trim(); }; const parseArgs = () => { const out = { repoRoot: process.cwd(), uploadAll: false, uploadPrefix: "" }; const argv = process.argv.slice(2); for (let i = 0; i < argv.length; i += 1) { if (argv[i] === "--repo-root" && argv[i + 1]) { out.repoRoot = path.resolve(argv[i + 1]); i += 1; continue; } if (argv[i] === "--upload-all") { out.uploadAll = true; continue; } if (argv[i] === "--upload-prefix" && argv[i + 1]) { out.uploadPrefix = String(argv[i + 1] ?? "").trim(); i += 1; continue; } } return out; }; const walkFiles = async (dir) => { const out = []; const scan = async (d) => { const entries = await fsPromises.readdir(d, { withFileTypes: true }); for (const e of entries) { const p = path.join(d, e.name); if (e.isSymbolicLink()) { continue; } if (e.isDirectory()) { await scan(p); continue; } if (e.isFile()) { out.push(p); } } }; await scan(dir); return out; }; const toPosixRel = (root, abs) => { const rel = path.relative(root, abs); return rel.split(path.sep).join("/"); }; const loadWorkspaceSlug = (repoRoot) => { const env = process.env.ANYTHINGLLM_WORKSPACE_SLUG?.trim(); if (env) { return env; } const cfgPath = path.join(repoRoot, ".anythingllm.json"); try { const j = readJson(cfgPath); if (typeof j.workspaceSlug === "string" && j.workspaceSlug.trim().length > 0) { return j.workspaceSlug.trim(); } } catch { /* missing */ } // smart_ide integration: resolve slug from projects//conf.json when available. // This avoids having to write per-repo config files into the target clones. try { const smartIdeRoot = path.resolve(__dirname, "..", ".."); const projectsDir = path.join(smartIdeRoot, "projects"); if (!fs.existsSync(projectsDir)) { return ""; } const repoReal = fs.realpathSync(repoRoot); const envNameRaw = process.env.SMART_IDE_ENV?.trim() ?? ""; const envName = envNameRaw === "test" || envNameRaw === "pprod" || envNameRaw === "prod" ? envNameRaw : "test"; for (const ent of fs.readdirSync(projectsDir, { withFileTypes: true })) { if (!ent.isDirectory()) { continue; } const confPath = path.join(projectsDir, ent.name, "conf.json"); if (!fs.existsSync(confPath)) { continue; } let conf; try { conf = readJson(confPath); } catch { continue; } const projectPath = typeof conf?.project_path === "string" ? conf.project_path.trim() : ""; if (!projectPath) { continue; } const absProjectPath = path.isAbsolute(projectPath) ? projectPath : path.resolve(smartIdeRoot, projectPath); let projectReal; try { projectReal = fs.realpathSync(absProjectPath); } catch { continue; } if (projectReal !== repoReal) { continue; } const slugCfg = conf?.smart_ide?.anythingllm_workspace_slug; if (typeof slugCfg === "string" && slugCfg.trim().length > 0) { return slugCfg.trim(); } if (slugCfg && typeof slugCfg === "object") { const slug = slugCfg?.[envName]; if (typeof slug === "string" && slug.trim().length > 0) { return slug.trim(); } } } } catch { // ignore and fall back to empty (explicit skip handled by caller) } return ""; }; const normalizeApiKey = (raw) => { const t = raw.trim(); const m = /^Bearer\s+/i.exec(t); return m ? t.slice(m[0].length).trim() : t; }; const readPositiveIntEnv = (name, fallback) => { const raw = process.env[name]; if (!raw || raw.trim().length === 0) { return fallback; } const s = raw.trim(); if (!/^\d+$/.test(s)) { console.error( `anythingllm-pull-sync: invalid ${name}=${JSON.stringify(raw)}; using default ${fallback}`, ); return fallback; } const n = Number(s); if (!Number.isFinite(n) || n <= 0) { console.error( `anythingllm-pull-sync: invalid ${name}=${JSON.stringify(raw)}; using default ${fallback}`, ); return fallback; } return n; }; const uploadOne = async (baseUrl, apiKey, slug, absPath, uploadName) => { const root = baseUrl.replace(/\/+$/, ""); const buf = await fsPromises.readFile(absPath); const body = new FormData(); body.append("file", new Blob([buf]), uploadName); body.append("addToWorkspaces", slug); const res = await fetch(`${root}/api/v1/document/upload`, { method: "POST", headers: { Authorization: `Bearer ${apiKey}` }, body, }); const text = await res.text(); let parsed; try { parsed = JSON.parse(text); } catch { throw new Error(`non-JSON ${res.status}: ${text.slice(0, 200)}`); } if (!res.ok || parsed.success !== true) { throw new Error(`${res.status}: ${text.slice(0, 400)}`); } }; const main = async () => { const { repoRoot, uploadAll, uploadPrefix } = parseArgs(); const baseUrl = process.env.ANYTHINGLLM_BASE_URL?.trim() ?? ""; const apiKeyRaw = process.env.ANYTHINGLLM_API_KEY?.trim() ?? ""; const maxBytes = readPositiveIntEnv("ANYTHINGLLM_SYNC_MAX_FILE_BYTES", 5_242_880); const maxFiles = readPositiveIntEnv("ANYTHINGLLM_SYNC_MAX_FILES", 200); if (!baseUrl || !apiKeyRaw) { console.error( "anythingllm-pull-sync: missing ANYTHINGLLM_BASE_URL or ANYTHINGLLM_API_KEY — skip.", ); process.exit(0); } const apiKey = normalizeApiKey(apiKeyRaw); const slug = loadWorkspaceSlug(repoRoot); if (!slug) { console.error( "anythingllm-pull-sync: set ANYTHINGLLM_WORKSPACE_SLUG or .anythingllm.json { \"workspaceSlug\": \"…\" } — skip.", ); process.exit(0); } if (uploadAll === true) { const ignorePath = path.join(repoRoot, ".4nkaiignore"); let userRules = ""; try { userRules = await fsPromises.readFile(ignorePath, "utf8"); } catch { userRules = ""; } const ig = ignore(); ig.add(ALWAYS_IGNORE); ig.add(userRules); let uploaded = 0; let skipped = 0; const errors = []; const absFiles = await walkFiles(repoRoot); for (const abs of absFiles) { const rel = toPosixRel(repoRoot, abs); if (rel.length === 0 || rel.startsWith("..")) { skipped += 1; continue; } if (ig.ignores(rel)) { skipped += 1; continue; } let st; try { st = await fsPromises.stat(abs); } catch { skipped += 1; continue; } if (!st.isFile()) { skipped += 1; continue; } if (st.size > maxBytes) { skipped += 1; continue; } if (uploaded >= maxFiles) { console.error("anythingllm-pull-sync: cap reached (ANYTHINGLLM_SYNC_MAX_FILES)."); break; } const relPosix = rel.split(path.sep).join("/"); const baseName = relPosix.split("/").join("__"); const uploadName = uploadPrefix && uploadPrefix.length > 0 ? `${uploadPrefix}__${baseName}` : baseName; try { await uploadOne(baseUrl, apiKey, slug, abs, uploadName); uploaded += 1; } catch (e) { errors.push(`${relPosix}: ${e instanceof Error ? e.message : String(e)}`); } } console.error( `anythingllm-pull-sync: mode=upload-all uploaded=${uploaded} skipped=${skipped} errors=${errors.length}`, ); for (const line of errors.slice(0, 20)) { console.error(line); } if (errors.length > 20) { console.error(`… ${errors.length - 20} more`); } process.exit(errors.length > 0 ? 1 : 0); } try { git(repoRoot, ["rev-parse", "-q", "--verify", "ORIG_HEAD"]); } catch { console.error("anythingllm-pull-sync: no ORIG_HEAD (not a merge/pull) — skip."); process.exit(0); } let names; try { const out = git(repoRoot, [ "diff", "--name-only", "--diff-filter=ACMRT", "ORIG_HEAD", "HEAD", ]); names = out.length > 0 ? out.split("\n").filter(Boolean) : []; } catch (e) { console.error( "anythingllm-pull-sync: git diff failed — skip.", e instanceof Error ? e.message : String(e), ); process.exit(0); } if (names.length === 0) { console.error("anythingllm-pull-sync: no file changes between ORIG_HEAD and HEAD."); process.exit(0); } const ignorePath = path.join(repoRoot, ".4nkaiignore"); let userRules = ""; try { userRules = await fsPromises.readFile(ignorePath, "utf8"); } catch { userRules = ""; } const ig = ignore(); ig.add(ALWAYS_IGNORE); ig.add(userRules); let uploaded = 0; let skipped = 0; const errors = []; for (const rel of names) { if (rel.includes("..") || path.isAbsolute(rel)) { skipped += 1; continue; } const posix = rel.split(path.sep).join("/"); if (ig.ignores(posix)) { skipped += 1; continue; } const abs = path.join(repoRoot, rel); let st; try { st = await fsPromises.stat(abs); } catch { skipped += 1; continue; } if (!st.isFile()) { skipped += 1; continue; } if (st.size > maxBytes) { skipped += 1; continue; } if (uploaded >= maxFiles) { console.error("anythingllm-pull-sync: cap reached (ANYTHINGLLM_SYNC_MAX_FILES)."); break; } const uploadName = posix.split("/").join("__"); try { await uploadOne(baseUrl, apiKey, slug, abs, uploadName); uploaded += 1; } catch (e) { errors.push(`${posix}: ${e instanceof Error ? e.message : String(e)}`); } } console.error( `anythingllm-pull-sync: uploaded=${uploaded} skipped=${skipped} errors=${errors.length}`, ); for (const line of errors.slice(0, 20)) { console.error(line); } if (errors.length > 20) { console.error(`… ${errors.length - 20} more`); } process.exit(errors.length > 0 ? 1 : 0); }; main().catch((e) => { console.error("anythingllm-pull-sync:", e); process.exit(1); });