Nicolas Cantu 58cc2493e5 chore: consolidate ia_dev module, sync tooling, and harden gateways (0.0.5)
Initial state:
- ia_dev was historically referenced as ./ia_dev in docs and integrations, while the vendored module lives under services/ia_dev.
- AnythingLLM sync and hook installation had error masking / weak exit signaling.
- Proxy layers did not validate proxy path segments, allowing path normalization tricks.

Motivation:
- Make the IDE-oriented workflow usable (sync -> act -> deploy/preview) with explicit errors.
- Reduce security footguns in proxying and script automation.

Resolution:
- Standardize IA_DEV_ROOT usage and documentation to services/ia_dev.
- Add SSH remote data mirroring + optional AnythingLLM ingestion.
- Extend AnythingLLM pull sync to support upload-all/prefix and fail on upload errors.
- Harden smart-ide-sso-gateway and smart-ide-global-api proxying with safe-path checks and non-leaking error responses.
- Improve ia-dev-gateway runner validation and reduce sensitive path leakage.
- Add site scaffold tool (Vite/React) with OIDC + chat via sso-gateway -> orchestrator.

Root cause:
- Historical layout changes (submodule -> vendored tree) and missing central contracts for path resolution.
- Missing validation for proxy path traversal patterns.
- Overuse of silent fallbacks (|| true, exit 0 on partial failures) in automation scripts.

Impacted features:
- Project sync: git pull + AnythingLLM sync + remote data mirror ingestion.
- Site frontends: SSO gateway proxy and orchestrator intents (rag.query, chat.local).
- Agent execution: ia-dev-gateway script runner and SSE output.

Code modified:
- scripts/remote-data-ssh-sync.sh
- scripts/anythingllm-pull-sync/sync.mjs
- scripts/install-anythingllm-post-merge-hook.sh
- cron/git-pull-project-clones.sh
- services/smart-ide-sso-gateway/src/server.ts
- services/smart-ide-global-api/src/server.ts
- services/smart-ide-orchestrator/src/server.ts
- services/ia-dev-gateway/src/server.ts
- services/ia_dev/tools/site-generate.sh

Documentation modified:
- docs/** (architecture, API docs, ia_dev module + integration, scripts)

Configurations modified:
- config/services.local.env.example
- services/*/.env.example

Files in deploy modified:
- services/ia_dev/deploy/*

Files in logs impacted:
- logs/ia_dev.log (runtime only)
- .logs/* (runtime only)

Databases and other sources modified:
- None

Off-project modifications:
- None

Files in .smartIde modified:
- .smartIde/agents/*.md
- services/ia_dev/.smartIde/**

Files in .secrets modified:
- None

New patch version in VERSION:
- 0.0.5

CHANGELOG.md updated:
- yes
2026-04-04 18:36:43 +02:00

396 lines
11 KiB
JavaScript
Executable File

#!/usr/bin/env node
/**
* Upload files changed between ORIG_HEAD and HEAD to AnythingLLM (post-merge / after pull).
* Requires: ANYTHINGLLM_BASE_URL, ANYTHINGLLM_API_KEY, workspace slug via ANYTHINGLLM_WORKSPACE_SLUG or .anythingllm.json
*/
import { execFileSync } from "node:child_process";
import * as fs from "node:fs";
import * as fsPromises from "node:fs/promises";
import * as path from "node:path";
import { fileURLToPath } from "node:url";
import ignore from "ignore";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const ALWAYS_IGNORE = [".git/", "node_modules/", "**/node_modules/"].join("\n");
const readJson = (p) => {
const raw = fs.readFileSync(p, "utf8");
return JSON.parse(raw);
};
const git = (repoRoot, args) => {
return execFileSync("git", args, {
cwd: repoRoot,
encoding: "utf8",
stdio: ["ignore", "pipe", "pipe"],
}).trim();
};
const parseArgs = () => {
const out = { repoRoot: process.cwd(), uploadAll: false, uploadPrefix: "" };
const argv = process.argv.slice(2);
for (let i = 0; i < argv.length; i += 1) {
if (argv[i] === "--repo-root" && argv[i + 1]) {
out.repoRoot = path.resolve(argv[i + 1]);
i += 1;
continue;
}
if (argv[i] === "--upload-all") {
out.uploadAll = true;
continue;
}
if (argv[i] === "--upload-prefix" && argv[i + 1]) {
out.uploadPrefix = String(argv[i + 1] ?? "").trim();
i += 1;
continue;
}
}
return out;
};
const walkFiles = async (dir) => {
const out = [];
const scan = async (d) => {
const entries = await fsPromises.readdir(d, { withFileTypes: true });
for (const e of entries) {
const p = path.join(d, e.name);
if (e.isSymbolicLink()) {
continue;
}
if (e.isDirectory()) {
await scan(p);
continue;
}
if (e.isFile()) {
out.push(p);
}
}
};
await scan(dir);
return out;
};
const toPosixRel = (root, abs) => {
const rel = path.relative(root, abs);
return rel.split(path.sep).join("/");
};
const loadWorkspaceSlug = (repoRoot) => {
const env = process.env.ANYTHINGLLM_WORKSPACE_SLUG?.trim();
if (env) {
return env;
}
const cfgPath = path.join(repoRoot, ".anythingllm.json");
try {
const j = readJson(cfgPath);
if (typeof j.workspaceSlug === "string" && j.workspaceSlug.trim().length > 0) {
return j.workspaceSlug.trim();
}
} catch {
/* missing */
}
// smart_ide integration: resolve slug from projects/<id>/conf.json when available.
// This avoids having to write per-repo config files into the target clones.
try {
const smartIdeRoot = path.resolve(__dirname, "..", "..");
const projectsDir = path.join(smartIdeRoot, "projects");
if (!fs.existsSync(projectsDir)) {
return "";
}
const repoReal = fs.realpathSync(repoRoot);
const envNameRaw = process.env.SMART_IDE_ENV?.trim() ?? "";
const envName = envNameRaw === "test" || envNameRaw === "pprod" || envNameRaw === "prod" ? envNameRaw : "test";
for (const ent of fs.readdirSync(projectsDir, { withFileTypes: true })) {
if (!ent.isDirectory()) {
continue;
}
const confPath = path.join(projectsDir, ent.name, "conf.json");
if (!fs.existsSync(confPath)) {
continue;
}
let conf;
try {
conf = readJson(confPath);
} catch {
continue;
}
const projectPath = typeof conf?.project_path === "string" ? conf.project_path.trim() : "";
if (!projectPath) {
continue;
}
const absProjectPath = path.isAbsolute(projectPath)
? projectPath
: path.resolve(smartIdeRoot, projectPath);
let projectReal;
try {
projectReal = fs.realpathSync(absProjectPath);
} catch {
continue;
}
if (projectReal !== repoReal) {
continue;
}
const slugCfg = conf?.smart_ide?.anythingllm_workspace_slug;
if (typeof slugCfg === "string" && slugCfg.trim().length > 0) {
return slugCfg.trim();
}
if (slugCfg && typeof slugCfg === "object") {
const slug = slugCfg?.[envName];
if (typeof slug === "string" && slug.trim().length > 0) {
return slug.trim();
}
}
}
} catch {
// ignore and fall back to empty (explicit skip handled by caller)
}
return "";
};
const normalizeApiKey = (raw) => {
const t = raw.trim();
const m = /^Bearer\s+/i.exec(t);
return m ? t.slice(m[0].length).trim() : t;
};
const readPositiveIntEnv = (name, fallback) => {
const raw = process.env[name];
if (!raw || raw.trim().length === 0) {
return fallback;
}
const s = raw.trim();
if (!/^\d+$/.test(s)) {
console.error(
`anythingllm-pull-sync: invalid ${name}=${JSON.stringify(raw)}; using default ${fallback}`,
);
return fallback;
}
const n = Number(s);
if (!Number.isFinite(n) || n <= 0) {
console.error(
`anythingllm-pull-sync: invalid ${name}=${JSON.stringify(raw)}; using default ${fallback}`,
);
return fallback;
}
return n;
};
const uploadOne = async (baseUrl, apiKey, slug, absPath, uploadName) => {
const root = baseUrl.replace(/\/+$/, "");
const buf = await fsPromises.readFile(absPath);
const body = new FormData();
body.append("file", new Blob([buf]), uploadName);
body.append("addToWorkspaces", slug);
const res = await fetch(`${root}/api/v1/document/upload`, {
method: "POST",
headers: { Authorization: `Bearer ${apiKey}` },
body,
});
const text = await res.text();
let parsed;
try {
parsed = JSON.parse(text);
} catch {
throw new Error(`non-JSON ${res.status}: ${text.slice(0, 200)}`);
}
if (!res.ok || parsed.success !== true) {
throw new Error(`${res.status}: ${text.slice(0, 400)}`);
}
};
const main = async () => {
const { repoRoot, uploadAll, uploadPrefix } = parseArgs();
const baseUrl = process.env.ANYTHINGLLM_BASE_URL?.trim() ?? "";
const apiKeyRaw = process.env.ANYTHINGLLM_API_KEY?.trim() ?? "";
const maxBytes = readPositiveIntEnv("ANYTHINGLLM_SYNC_MAX_FILE_BYTES", 5_242_880);
const maxFiles = readPositiveIntEnv("ANYTHINGLLM_SYNC_MAX_FILES", 200);
if (!baseUrl || !apiKeyRaw) {
console.error(
"anythingllm-pull-sync: missing ANYTHINGLLM_BASE_URL or ANYTHINGLLM_API_KEY — skip.",
);
process.exit(0);
}
const apiKey = normalizeApiKey(apiKeyRaw);
const slug = loadWorkspaceSlug(repoRoot);
if (!slug) {
console.error(
"anythingllm-pull-sync: set ANYTHINGLLM_WORKSPACE_SLUG or .anythingllm.json { \"workspaceSlug\": \"…\" } — skip.",
);
process.exit(0);
}
if (uploadAll === true) {
const ignorePath = path.join(repoRoot, ".4nkaiignore");
let userRules = "";
try {
userRules = await fsPromises.readFile(ignorePath, "utf8");
} catch {
userRules = "";
}
const ig = ignore();
ig.add(ALWAYS_IGNORE);
ig.add(userRules);
let uploaded = 0;
let skipped = 0;
const errors = [];
const absFiles = await walkFiles(repoRoot);
for (const abs of absFiles) {
const rel = toPosixRel(repoRoot, abs);
if (rel.length === 0 || rel.startsWith("..")) {
skipped += 1;
continue;
}
if (ig.ignores(rel)) {
skipped += 1;
continue;
}
let st;
try {
st = await fsPromises.stat(abs);
} catch {
skipped += 1;
continue;
}
if (!st.isFile()) {
skipped += 1;
continue;
}
if (st.size > maxBytes) {
skipped += 1;
continue;
}
if (uploaded >= maxFiles) {
console.error("anythingllm-pull-sync: cap reached (ANYTHINGLLM_SYNC_MAX_FILES).");
break;
}
const relPosix = rel.split(path.sep).join("/");
const baseName = relPosix.split("/").join("__");
const uploadName =
uploadPrefix && uploadPrefix.length > 0 ? `${uploadPrefix}__${baseName}` : baseName;
try {
await uploadOne(baseUrl, apiKey, slug, abs, uploadName);
uploaded += 1;
} catch (e) {
errors.push(`${relPosix}: ${e instanceof Error ? e.message : String(e)}`);
}
}
console.error(
`anythingllm-pull-sync: mode=upload-all uploaded=${uploaded} skipped=${skipped} errors=${errors.length}`,
);
for (const line of errors.slice(0, 20)) {
console.error(line);
}
if (errors.length > 20) {
console.error(`${errors.length - 20} more`);
}
process.exit(errors.length > 0 ? 1 : 0);
}
try {
git(repoRoot, ["rev-parse", "-q", "--verify", "ORIG_HEAD"]);
} catch {
console.error("anythingllm-pull-sync: no ORIG_HEAD (not a merge/pull) — skip.");
process.exit(0);
}
let names;
try {
const out = git(repoRoot, [
"diff",
"--name-only",
"--diff-filter=ACMRT",
"ORIG_HEAD",
"HEAD",
]);
names = out.length > 0 ? out.split("\n").filter(Boolean) : [];
} catch (e) {
console.error(
"anythingllm-pull-sync: git diff failed — skip.",
e instanceof Error ? e.message : String(e),
);
process.exit(0);
}
if (names.length === 0) {
console.error("anythingllm-pull-sync: no file changes between ORIG_HEAD and HEAD.");
process.exit(0);
}
const ignorePath = path.join(repoRoot, ".4nkaiignore");
let userRules = "";
try {
userRules = await fsPromises.readFile(ignorePath, "utf8");
} catch {
userRules = "";
}
const ig = ignore();
ig.add(ALWAYS_IGNORE);
ig.add(userRules);
let uploaded = 0;
let skipped = 0;
const errors = [];
for (const rel of names) {
if (rel.includes("..") || path.isAbsolute(rel)) {
skipped += 1;
continue;
}
const posix = rel.split(path.sep).join("/");
if (ig.ignores(posix)) {
skipped += 1;
continue;
}
const abs = path.join(repoRoot, rel);
let st;
try {
st = await fsPromises.stat(abs);
} catch {
skipped += 1;
continue;
}
if (!st.isFile()) {
skipped += 1;
continue;
}
if (st.size > maxBytes) {
skipped += 1;
continue;
}
if (uploaded >= maxFiles) {
console.error("anythingllm-pull-sync: cap reached (ANYTHINGLLM_SYNC_MAX_FILES).");
break;
}
const uploadName = posix.split("/").join("__");
try {
await uploadOne(baseUrl, apiKey, slug, abs, uploadName);
uploaded += 1;
} catch (e) {
errors.push(`${posix}: ${e instanceof Error ? e.message : String(e)}`);
}
}
console.error(
`anythingllm-pull-sync: uploaded=${uploaded} skipped=${skipped} errors=${errors.length}`,
);
for (const line of errors.slice(0, 20)) {
console.error(line);
}
if (errors.length > 20) {
console.error(`${errors.length - 20} more`);
}
process.exit(errors.length > 0 ? 1 : 0);
};
main().catch((e) => {
console.error("anythingllm-pull-sync:", e);
process.exit(1);
});