Initial state: - ia_dev was historically referenced as ./ia_dev in docs and integrations, while the vendored module lives under services/ia_dev. - AnythingLLM sync and hook installation had error masking / weak exit signaling. - Proxy layers did not validate proxy path segments, allowing path normalization tricks. Motivation: - Make the IDE-oriented workflow usable (sync -> act -> deploy/preview) with explicit errors. - Reduce security footguns in proxying and script automation. Resolution: - Standardize IA_DEV_ROOT usage and documentation to services/ia_dev. - Add SSH remote data mirroring + optional AnythingLLM ingestion. - Extend AnythingLLM pull sync to support upload-all/prefix and fail on upload errors. - Harden smart-ide-sso-gateway and smart-ide-global-api proxying with safe-path checks and non-leaking error responses. - Improve ia-dev-gateway runner validation and reduce sensitive path leakage. - Add site scaffold tool (Vite/React) with OIDC + chat via sso-gateway -> orchestrator. Root cause: - Historical layout changes (submodule -> vendored tree) and missing central contracts for path resolution. - Missing validation for proxy path traversal patterns. - Overuse of silent fallbacks (|| true, exit 0 on partial failures) in automation scripts. Impacted features: - Project sync: git pull + AnythingLLM sync + remote data mirror ingestion. - Site frontends: SSO gateway proxy and orchestrator intents (rag.query, chat.local). - Agent execution: ia-dev-gateway script runner and SSE output. Code modified: - scripts/remote-data-ssh-sync.sh - scripts/anythingllm-pull-sync/sync.mjs - scripts/install-anythingllm-post-merge-hook.sh - cron/git-pull-project-clones.sh - services/smart-ide-sso-gateway/src/server.ts - services/smart-ide-global-api/src/server.ts - services/smart-ide-orchestrator/src/server.ts - services/ia-dev-gateway/src/server.ts - services/ia_dev/tools/site-generate.sh Documentation modified: - docs/** (architecture, API docs, ia_dev module + integration, scripts) Configurations modified: - config/services.local.env.example - services/*/.env.example Files in deploy modified: - services/ia_dev/deploy/* Files in logs impacted: - logs/ia_dev.log (runtime only) - .logs/* (runtime only) Databases and other sources modified: - None Off-project modifications: - None Files in .smartIde modified: - .smartIde/agents/*.md - services/ia_dev/.smartIde/** Files in .secrets modified: - None New patch version in VERSION: - 0.0.5 CHANGELOG.md updated: - yes
396 lines
11 KiB
JavaScript
Executable File
396 lines
11 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
/**
|
|
* Upload files changed between ORIG_HEAD and HEAD to AnythingLLM (post-merge / after pull).
|
|
* Requires: ANYTHINGLLM_BASE_URL, ANYTHINGLLM_API_KEY, workspace slug via ANYTHINGLLM_WORKSPACE_SLUG or .anythingllm.json
|
|
*/
|
|
import { execFileSync } from "node:child_process";
|
|
import * as fs from "node:fs";
|
|
import * as fsPromises from "node:fs/promises";
|
|
import * as path from "node:path";
|
|
import { fileURLToPath } from "node:url";
|
|
import ignore from "ignore";
|
|
|
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
|
|
const ALWAYS_IGNORE = [".git/", "node_modules/", "**/node_modules/"].join("\n");
|
|
|
|
const readJson = (p) => {
|
|
const raw = fs.readFileSync(p, "utf8");
|
|
return JSON.parse(raw);
|
|
};
|
|
|
|
const git = (repoRoot, args) => {
|
|
return execFileSync("git", args, {
|
|
cwd: repoRoot,
|
|
encoding: "utf8",
|
|
stdio: ["ignore", "pipe", "pipe"],
|
|
}).trim();
|
|
};
|
|
|
|
const parseArgs = () => {
|
|
const out = { repoRoot: process.cwd(), uploadAll: false, uploadPrefix: "" };
|
|
const argv = process.argv.slice(2);
|
|
for (let i = 0; i < argv.length; i += 1) {
|
|
if (argv[i] === "--repo-root" && argv[i + 1]) {
|
|
out.repoRoot = path.resolve(argv[i + 1]);
|
|
i += 1;
|
|
continue;
|
|
}
|
|
if (argv[i] === "--upload-all") {
|
|
out.uploadAll = true;
|
|
continue;
|
|
}
|
|
if (argv[i] === "--upload-prefix" && argv[i + 1]) {
|
|
out.uploadPrefix = String(argv[i + 1] ?? "").trim();
|
|
i += 1;
|
|
continue;
|
|
}
|
|
}
|
|
return out;
|
|
};
|
|
|
|
const walkFiles = async (dir) => {
|
|
const out = [];
|
|
const scan = async (d) => {
|
|
const entries = await fsPromises.readdir(d, { withFileTypes: true });
|
|
for (const e of entries) {
|
|
const p = path.join(d, e.name);
|
|
if (e.isSymbolicLink()) {
|
|
continue;
|
|
}
|
|
if (e.isDirectory()) {
|
|
await scan(p);
|
|
continue;
|
|
}
|
|
if (e.isFile()) {
|
|
out.push(p);
|
|
}
|
|
}
|
|
};
|
|
await scan(dir);
|
|
return out;
|
|
};
|
|
|
|
const toPosixRel = (root, abs) => {
|
|
const rel = path.relative(root, abs);
|
|
return rel.split(path.sep).join("/");
|
|
};
|
|
|
|
const loadWorkspaceSlug = (repoRoot) => {
|
|
const env = process.env.ANYTHINGLLM_WORKSPACE_SLUG?.trim();
|
|
if (env) {
|
|
return env;
|
|
}
|
|
const cfgPath = path.join(repoRoot, ".anythingllm.json");
|
|
try {
|
|
const j = readJson(cfgPath);
|
|
if (typeof j.workspaceSlug === "string" && j.workspaceSlug.trim().length > 0) {
|
|
return j.workspaceSlug.trim();
|
|
}
|
|
} catch {
|
|
/* missing */
|
|
}
|
|
// smart_ide integration: resolve slug from projects/<id>/conf.json when available.
|
|
// This avoids having to write per-repo config files into the target clones.
|
|
try {
|
|
const smartIdeRoot = path.resolve(__dirname, "..", "..");
|
|
const projectsDir = path.join(smartIdeRoot, "projects");
|
|
if (!fs.existsSync(projectsDir)) {
|
|
return "";
|
|
}
|
|
const repoReal = fs.realpathSync(repoRoot);
|
|
const envNameRaw = process.env.SMART_IDE_ENV?.trim() ?? "";
|
|
const envName = envNameRaw === "test" || envNameRaw === "pprod" || envNameRaw === "prod" ? envNameRaw : "test";
|
|
|
|
for (const ent of fs.readdirSync(projectsDir, { withFileTypes: true })) {
|
|
if (!ent.isDirectory()) {
|
|
continue;
|
|
}
|
|
const confPath = path.join(projectsDir, ent.name, "conf.json");
|
|
if (!fs.existsSync(confPath)) {
|
|
continue;
|
|
}
|
|
let conf;
|
|
try {
|
|
conf = readJson(confPath);
|
|
} catch {
|
|
continue;
|
|
}
|
|
const projectPath = typeof conf?.project_path === "string" ? conf.project_path.trim() : "";
|
|
if (!projectPath) {
|
|
continue;
|
|
}
|
|
const absProjectPath = path.isAbsolute(projectPath)
|
|
? projectPath
|
|
: path.resolve(smartIdeRoot, projectPath);
|
|
let projectReal;
|
|
try {
|
|
projectReal = fs.realpathSync(absProjectPath);
|
|
} catch {
|
|
continue;
|
|
}
|
|
if (projectReal !== repoReal) {
|
|
continue;
|
|
}
|
|
|
|
const slugCfg = conf?.smart_ide?.anythingllm_workspace_slug;
|
|
if (typeof slugCfg === "string" && slugCfg.trim().length > 0) {
|
|
return slugCfg.trim();
|
|
}
|
|
if (slugCfg && typeof slugCfg === "object") {
|
|
const slug = slugCfg?.[envName];
|
|
if (typeof slug === "string" && slug.trim().length > 0) {
|
|
return slug.trim();
|
|
}
|
|
}
|
|
}
|
|
} catch {
|
|
// ignore and fall back to empty (explicit skip handled by caller)
|
|
}
|
|
return "";
|
|
};
|
|
|
|
const normalizeApiKey = (raw) => {
|
|
const t = raw.trim();
|
|
const m = /^Bearer\s+/i.exec(t);
|
|
return m ? t.slice(m[0].length).trim() : t;
|
|
};
|
|
|
|
const readPositiveIntEnv = (name, fallback) => {
|
|
const raw = process.env[name];
|
|
if (!raw || raw.trim().length === 0) {
|
|
return fallback;
|
|
}
|
|
const s = raw.trim();
|
|
if (!/^\d+$/.test(s)) {
|
|
console.error(
|
|
`anythingllm-pull-sync: invalid ${name}=${JSON.stringify(raw)}; using default ${fallback}`,
|
|
);
|
|
return fallback;
|
|
}
|
|
const n = Number(s);
|
|
if (!Number.isFinite(n) || n <= 0) {
|
|
console.error(
|
|
`anythingllm-pull-sync: invalid ${name}=${JSON.stringify(raw)}; using default ${fallback}`,
|
|
);
|
|
return fallback;
|
|
}
|
|
return n;
|
|
};
|
|
|
|
const uploadOne = async (baseUrl, apiKey, slug, absPath, uploadName) => {
|
|
const root = baseUrl.replace(/\/+$/, "");
|
|
const buf = await fsPromises.readFile(absPath);
|
|
const body = new FormData();
|
|
body.append("file", new Blob([buf]), uploadName);
|
|
body.append("addToWorkspaces", slug);
|
|
const res = await fetch(`${root}/api/v1/document/upload`, {
|
|
method: "POST",
|
|
headers: { Authorization: `Bearer ${apiKey}` },
|
|
body,
|
|
});
|
|
const text = await res.text();
|
|
let parsed;
|
|
try {
|
|
parsed = JSON.parse(text);
|
|
} catch {
|
|
throw new Error(`non-JSON ${res.status}: ${text.slice(0, 200)}`);
|
|
}
|
|
if (!res.ok || parsed.success !== true) {
|
|
throw new Error(`${res.status}: ${text.slice(0, 400)}`);
|
|
}
|
|
};
|
|
|
|
const main = async () => {
|
|
const { repoRoot, uploadAll, uploadPrefix } = parseArgs();
|
|
const baseUrl = process.env.ANYTHINGLLM_BASE_URL?.trim() ?? "";
|
|
const apiKeyRaw = process.env.ANYTHINGLLM_API_KEY?.trim() ?? "";
|
|
const maxBytes = readPositiveIntEnv("ANYTHINGLLM_SYNC_MAX_FILE_BYTES", 5_242_880);
|
|
const maxFiles = readPositiveIntEnv("ANYTHINGLLM_SYNC_MAX_FILES", 200);
|
|
|
|
if (!baseUrl || !apiKeyRaw) {
|
|
console.error(
|
|
"anythingllm-pull-sync: missing ANYTHINGLLM_BASE_URL or ANYTHINGLLM_API_KEY — skip.",
|
|
);
|
|
process.exit(0);
|
|
}
|
|
const apiKey = normalizeApiKey(apiKeyRaw);
|
|
const slug = loadWorkspaceSlug(repoRoot);
|
|
if (!slug) {
|
|
console.error(
|
|
"anythingllm-pull-sync: set ANYTHINGLLM_WORKSPACE_SLUG or .anythingllm.json { \"workspaceSlug\": \"…\" } — skip.",
|
|
);
|
|
process.exit(0);
|
|
}
|
|
|
|
if (uploadAll === true) {
|
|
const ignorePath = path.join(repoRoot, ".4nkaiignore");
|
|
let userRules = "";
|
|
try {
|
|
userRules = await fsPromises.readFile(ignorePath, "utf8");
|
|
} catch {
|
|
userRules = "";
|
|
}
|
|
const ig = ignore();
|
|
ig.add(ALWAYS_IGNORE);
|
|
ig.add(userRules);
|
|
|
|
let uploaded = 0;
|
|
let skipped = 0;
|
|
const errors = [];
|
|
|
|
const absFiles = await walkFiles(repoRoot);
|
|
for (const abs of absFiles) {
|
|
const rel = toPosixRel(repoRoot, abs);
|
|
if (rel.length === 0 || rel.startsWith("..")) {
|
|
skipped += 1;
|
|
continue;
|
|
}
|
|
if (ig.ignores(rel)) {
|
|
skipped += 1;
|
|
continue;
|
|
}
|
|
let st;
|
|
try {
|
|
st = await fsPromises.stat(abs);
|
|
} catch {
|
|
skipped += 1;
|
|
continue;
|
|
}
|
|
if (!st.isFile()) {
|
|
skipped += 1;
|
|
continue;
|
|
}
|
|
if (st.size > maxBytes) {
|
|
skipped += 1;
|
|
continue;
|
|
}
|
|
if (uploaded >= maxFiles) {
|
|
console.error("anythingllm-pull-sync: cap reached (ANYTHINGLLM_SYNC_MAX_FILES).");
|
|
break;
|
|
}
|
|
const relPosix = rel.split(path.sep).join("/");
|
|
const baseName = relPosix.split("/").join("__");
|
|
const uploadName =
|
|
uploadPrefix && uploadPrefix.length > 0 ? `${uploadPrefix}__${baseName}` : baseName;
|
|
try {
|
|
await uploadOne(baseUrl, apiKey, slug, abs, uploadName);
|
|
uploaded += 1;
|
|
} catch (e) {
|
|
errors.push(`${relPosix}: ${e instanceof Error ? e.message : String(e)}`);
|
|
}
|
|
}
|
|
|
|
console.error(
|
|
`anythingllm-pull-sync: mode=upload-all uploaded=${uploaded} skipped=${skipped} errors=${errors.length}`,
|
|
);
|
|
for (const line of errors.slice(0, 20)) {
|
|
console.error(line);
|
|
}
|
|
if (errors.length > 20) {
|
|
console.error(`… ${errors.length - 20} more`);
|
|
}
|
|
process.exit(errors.length > 0 ? 1 : 0);
|
|
}
|
|
|
|
try {
|
|
git(repoRoot, ["rev-parse", "-q", "--verify", "ORIG_HEAD"]);
|
|
} catch {
|
|
console.error("anythingllm-pull-sync: no ORIG_HEAD (not a merge/pull) — skip.");
|
|
process.exit(0);
|
|
}
|
|
|
|
let names;
|
|
try {
|
|
const out = git(repoRoot, [
|
|
"diff",
|
|
"--name-only",
|
|
"--diff-filter=ACMRT",
|
|
"ORIG_HEAD",
|
|
"HEAD",
|
|
]);
|
|
names = out.length > 0 ? out.split("\n").filter(Boolean) : [];
|
|
} catch (e) {
|
|
console.error(
|
|
"anythingllm-pull-sync: git diff failed — skip.",
|
|
e instanceof Error ? e.message : String(e),
|
|
);
|
|
process.exit(0);
|
|
}
|
|
|
|
if (names.length === 0) {
|
|
console.error("anythingllm-pull-sync: no file changes between ORIG_HEAD and HEAD.");
|
|
process.exit(0);
|
|
}
|
|
|
|
const ignorePath = path.join(repoRoot, ".4nkaiignore");
|
|
let userRules = "";
|
|
try {
|
|
userRules = await fsPromises.readFile(ignorePath, "utf8");
|
|
} catch {
|
|
userRules = "";
|
|
}
|
|
const ig = ignore();
|
|
ig.add(ALWAYS_IGNORE);
|
|
ig.add(userRules);
|
|
|
|
let uploaded = 0;
|
|
let skipped = 0;
|
|
const errors = [];
|
|
|
|
for (const rel of names) {
|
|
if (rel.includes("..") || path.isAbsolute(rel)) {
|
|
skipped += 1;
|
|
continue;
|
|
}
|
|
const posix = rel.split(path.sep).join("/");
|
|
if (ig.ignores(posix)) {
|
|
skipped += 1;
|
|
continue;
|
|
}
|
|
const abs = path.join(repoRoot, rel);
|
|
let st;
|
|
try {
|
|
st = await fsPromises.stat(abs);
|
|
} catch {
|
|
skipped += 1;
|
|
continue;
|
|
}
|
|
if (!st.isFile()) {
|
|
skipped += 1;
|
|
continue;
|
|
}
|
|
if (st.size > maxBytes) {
|
|
skipped += 1;
|
|
continue;
|
|
}
|
|
if (uploaded >= maxFiles) {
|
|
console.error("anythingllm-pull-sync: cap reached (ANYTHINGLLM_SYNC_MAX_FILES).");
|
|
break;
|
|
}
|
|
const uploadName = posix.split("/").join("__");
|
|
try {
|
|
await uploadOne(baseUrl, apiKey, slug, abs, uploadName);
|
|
uploaded += 1;
|
|
} catch (e) {
|
|
errors.push(`${posix}: ${e instanceof Error ? e.message : String(e)}`);
|
|
}
|
|
}
|
|
|
|
console.error(
|
|
`anythingllm-pull-sync: uploaded=${uploaded} skipped=${skipped} errors=${errors.length}`,
|
|
);
|
|
for (const line of errors.slice(0, 20)) {
|
|
console.error(line);
|
|
}
|
|
if (errors.length > 20) {
|
|
console.error(`… ${errors.length - 20} more`);
|
|
}
|
|
process.exit(errors.length > 0 ? 1 : 0);
|
|
};
|
|
|
|
main().catch((e) => {
|
|
console.error("anythingllm-pull-sync:", e);
|
|
process.exit(1);
|
|
});
|