From 69ab265560867f8b8ced9c909113adf07137d4f4 Mon Sep 17 00:00:00 2001 From: Nicolas Cantu Date: Tue, 24 Mar 2026 22:36:37 +0100 Subject: [PATCH] feat: initial RAG sync with .4nkaiignore (extension 0.3, server 0.2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Motivations:** - Seed AnythingLLM workspace from cloned repo using gitignore-style filters **Root causes:** - N/A **Correctifs:** - N/A **Evolutions:** - Template 4nkaiignore.default; server copies after clone; extension uploads via POST /api/v1/document/upload - New commands /workspace-sync; settings initialSync*; dependency ignore **Pages affectées:** - extensions/anythingllm-workspaces/* - services/repos-devtools-server/* - docs/features/initial-rag-sync-4nkaiignore.md --- docs/features/initial-rag-sync-4nkaiignore.md | 35 +++++ extensions/anythingllm-workspaces/README.md | 113 +++++---------- .../anythingllm-workspaces/package-lock.json | 16 ++- .../anythingllm-workspaces/package.json | 34 ++++- .../src/anythingllmClient.ts | 30 ++-- .../src/anythingllmDocumentApi.ts | 56 ++++++++ .../src/commandParser.ts | 19 ++- .../anythingllm-workspaces/src/config.ts | 17 +++ .../src/devToolsExecutor.ts | 59 +++++++- .../src/devToolsPanel.ts | 11 +- .../src/initialRagSync.ts | 136 ++++++++++++++++++ .../src/reposApiClient.ts | 10 +- .../anythingllm-workspaces/src/types.ts | 7 - .../templates/4nkaiignore.default | 54 +++++++ services/repos-devtools-server/README.md | 38 ++--- services/repos-devtools-server/package.json | 4 +- .../repos-devtools-server/src/handlers.ts | 16 +++ services/repos-devtools-server/src/paths.ts | 3 +- services/repos-devtools-server/src/server.ts | 2 +- .../src/write4nkaiignore.ts | 23 +++ .../templates/4nkaiignore.default | 54 +++++++ 21 files changed, 580 insertions(+), 157 deletions(-) create mode 100644 docs/features/initial-rag-sync-4nkaiignore.md create mode 100644 extensions/anythingllm-workspaces/src/anythingllmDocumentApi.ts create mode 100644 extensions/anythingllm-workspaces/src/initialRagSync.ts create mode 100644 extensions/anythingllm-workspaces/templates/4nkaiignore.default create mode 100644 services/repos-devtools-server/src/write4nkaiignore.ts create mode 100644 services/repos-devtools-server/templates/4nkaiignore.default diff --git a/docs/features/initial-rag-sync-4nkaiignore.md b/docs/features/initial-rag-sync-4nkaiignore.md new file mode 100644 index 0000000..455766e --- /dev/null +++ b/docs/features/initial-rag-sync-4nkaiignore.md @@ -0,0 +1,35 @@ +# Synchronisation RAG initiale et `.4nkaiignore` + +**Author:** 4NK + +## Objectif + +À la **création du clone** (ou chargement sync), disposer d’un **workspace AnythingLLM** aligné sur le dépôt et importer une **première vague de fichiers** utiles au RAG, en excluant le bruit via un fichier **`.4nkaiignore`** (syntaxe **identique à `.gitignore`**). + +## Comportement + +1. **Serveur `repos-devtools-server`** : après `git clone` réussi, copie **`templates/4nkaiignore.default`** vers **`/.4nkaiignore`** si absent. +2. **Extension 0.3.0** : après `/repos-clone-sync`, `/repos-load-sync`, ou sur **`/workspace-sync `**, si l’option **`anythingllm.initialSyncAfterClone`** n’est pas à `false` : + - assure **`.4nkaiignore`** depuis le template bundlé si toujours absent ; + - parcourt le dépôt, applique règles de base + `.4nkaiignore` ; + - envoie chaque fichier accepté via **`POST /api/v1/document/upload`** avec **`addToWorkspaces`** = slug du workspace. + +## Fichier type + +- **`extensions/anythingllm-workspaces/templates/4nkaiignore.default`** +- **`services/repos-devtools-server/templates/4nkaiignore.default`** (même contenu ; à maintenir en parité). + +L’utilisateur renomme / copie en **`.4nkaiignore`** à la racine du projet et adapte les règles. + +## Prérequis AnythingLLM + +Le **collecteur / processeur de documents** doit être joignable par l’instance AnythingLLM ; sinon l’upload API échoue avec le message renvoyé par le serveur. + +## Modalités d’analyse + +- Compter les champs **`uploaded`**, **`skipped`**, **`errors`**, **`capped`**, **`dotfileCreated`** dans la section **Initial RAG sync** du panneau Dev tools. +- Vérifier les logs AnythingLLM / collector en cas d’échec systématique des uploads. + +## Modalités de déploiement + +- Rebuild et redémarrage de **repos-devtools-server** ; repackaging / réinstallation de l’extension **0.3.0+**. diff --git a/extensions/anythingllm-workspaces/README.md b/extensions/anythingllm-workspaces/README.md index d89121e..2d6401c 100644 --- a/extensions/anythingllm-workspaces/README.md +++ b/extensions/anythingllm-workspaces/README.md @@ -1,102 +1,57 @@ # AnythingLLM Workspaces (VS Code / Cursor) -Extension that talks to the **AnythingLLM developer HTTP API** (list/create workspaces, open the UI in a browser). Optionally uses a **local repos HTTP service** (`repos-devtools-server`) to clone or open Git folders under a configured root, from a **Dev tools** webview panel. - -## Features - -| Area | What it does | -|------|----------------| -| AnythingLLM | List workspaces, open one in the browser, open the web UI. | -| Workspace ensure | If no workspace matches a repo folder name, create it via `POST /api/v1/workspace/new`. | -| Local repos API | Clone (`branch` **test** by default), list git folders, resolve paths — requires `repos-devtools-server`. | +AnythingLLM **developer API** (workspaces, documents), optional **repos-devtools-server**, **Dev tools** webview, and **initial RAG upload** after clone/load using **`.4nkaiignore`** (same syntax as `.gitignore`). ## Requirements -1. **AnythingLLM** reachable at a public base URL (example: `https://ia.enso.4nkweb.com/anythingllm`). -2. An **API key** from AnythingLLM: **Settings → API Keys**. - -**Important:** Do **not** put the **nginx Bearer secret** used for `/ollama/` here (see `deploy/nginx/README-ia-enso.md`). AnythingLLM only accepts keys stored in its own app; a wrong value returns `403` and `{"error":"No valid api key found."}`. - -3. For **clone / repos-list / repos-load** commands: run **`repos-devtools-server`** on the machine that owns the clone directory (see `../../services/repos-devtools-server/README.md`). Default URL from the editor: `http://127.0.0.1:37140`. If Cursor connects over **SSH** to that host, `127.0.0.1` is the remote machine — no port forward needed. If the editor runs on another PC, set `anythingllm.reposApiBaseUrl` to a tunnel or the server’s reachable address. - -## Installation - -- **From source:** open `extensions/anythingllm-workspaces` in VS Code / Cursor, **Run → Start Debugging** (Extension Development Host). -- **From VSIX:** - `npm install && npm run compile && npx @vscode/vsce package` - then **Extensions → … → Install from VSIX…** and pick `anythingllm-workspaces-*.vsix`. - -After install or upgrade, run **Developer: Reload Window** if commands are missing. +- AnythingLLM with **API key** (**Settings → API Keys**). Do **not** use the nginx Bearer for `/ollama/` here. +- **`repos-devtools-server`** on the host that owns clones (default `http://127.0.0.1:37140`). +- For **document upload**, AnythingLLM’s **document processor (collector)** must be online; otherwise `POST /api/v1/document/upload` returns an error. ## Configuration -Open **Settings**, search for **AnythingLLM**, or edit **User** `settings.json`: - | Key | Description | |-----|-------------| -| `anythingllm.baseUrl` | AnythingLLM public base URL, **no** trailing slash. | -| `anythingllm.apiKey` | API key from AnythingLLM (a leading `Bearer ` prefix is stripped if present). | -| `anythingllm.reposApiBaseUrl` | `repos-devtools-server` base URL, no trailing slash (default `http://127.0.0.1:37140`). | -| `anythingllm.reposApiToken` | Same secret as `REPOS_DEVTOOLS_TOKEN` on the server. | +| `anythingllm.baseUrl` | AnythingLLM public URL (no trailing `/`). | +| `anythingllm.apiKey` | API key. **User** settings. | +| `anythingllm.reposApiBaseUrl` | repos-devtools-server URL. | +| `anythingllm.reposApiToken` | Same as `REPOS_DEVTOOLS_TOKEN`. | +| `anythingllm.initialSyncAfterClone` | Default **on**: after `/repos-clone-sync`, `/repos-load-sync`, and `/workspace-sync`, upload filtered files. Set to `false` to disable. | +| `anythingllm.initialSyncMaxFiles` | Max files per run (default `400`). | +| `anythingllm.initialSyncMaxFileBytes` | Max bytes per file (default `5242880`). | -Use **User** settings so secrets are not committed with a workspace. +## Commands (palette) -## Commands (Command Palette) +- **AnythingLLM: List workspaces** — `GET /api/v1/workspaces`, open one in the browser. +- **AnythingLLM: Open web UI** +- **AnythingLLM: Dev tools panel** — webview for scripted commands. -Open the palette: **Ctrl+Shift+P** (Windows / Linux) or **Cmd+Shift+P** (macOS). +## `.4nkaiignore` -| Title in palette | Command ID | Action | -|------------------|------------|--------| -| **AnythingLLM: List workspaces** | `anythingllm.listWorkspaces` | Calls `GET /api/v1/workspaces`, pick a workspace, open it in the browser. | -| **AnythingLLM: Open web UI** | `anythingllm.openWebUi` | Opens `anythingllm.baseUrl` in the browser. | -| **AnythingLLM: Dev tools panel** | `anythingllm.openDevToolsPanel` | Opens the Dev tools webview (see below). | +- **Template (reference):** `templates/4nkaiignore.default` in this extension (and the same file under `services/repos-devtools-server/templates/` for the clone server). +- **At repo root:** the file must be named **`.4nkaiignore`**. +- **After `git clone` via the server:** if `.4nkaiignore` is missing, the server copies the template into the new repo (`fourNkAiIgnoreTemplateWrote` in the JSON response). +- **Before upload:** the extension creates `.4nkaiignore` from the bundled template only if it is still missing (e.g. repo cloned outside the server). -## Dev tools panel +Filtering uses the **`ignore`** package (gitignore semantics). The extension always applies baseline rules (e.g. `.git/`, `node_modules/`) in addition to `.4nkaiignore`. -### How to open it - -1. **Ctrl+Shift+P** / **Cmd+Shift+P** -2. Type **AnythingLLM: Dev tools panel** (or `dev tools`, `anythingllm`). -3. **Enter** - -A side editor tab opens with: - -- A **Commands** text area (one command per line) -- **Run** — execute all non-empty lines in order -- **Clear output** -- **Response** — JSON or text from the server / API, or `ERROR: …` - -Settings are read **when you click Run**, so you can change `baseUrl` or tokens without reopening the panel. - -### Command lines +## Dev tools — command lines | Line | Behaviour | |------|-----------| -| `/repos-clone ` | `POST /repos-clone` — clone into `REPOS_DEVTOOLS_ROOT`, branch **`test`**. | -| `/repos-clone-sync ` | Same as clone, then ensure an AnythingLLM workspace with the **same name as the repo folder**, **Open folder** in the editor, open that workspace in the browser. | -| `repos-list` or `/repos-list` | `GET /repos-list` — git repositories under the server root. | -| `/repos-load ` | `POST /repos-load` — verify folder + **Open folder**. | -| `/repos-load-sync ` | Same as load + ensure workspace + browser. | -| `/workspace-load ` | List workspaces; if none matches by **name** or **slug**, create via API; then open browser. | -| `help` or `/help` | Print built-in help in **Response**. | +| `/repos-clone-sync ` | Clone (branch `test`) → ensure workspace → **initial RAG upload** (if enabled) → open folder → browser. | +| `/repos-load-sync ` | Open folder → ensure workspace → **initial RAG upload** → browser. | +| `/workspace-sync ` | Resolve repo under `REPOS_DEVTOOLS_ROOT` → ensure workspace → **initial RAG upload** (no folder open). | +| `/workspace-load ` | Ensure workspace → browser only (no file upload). | +| Other lines | See `help` in the panel. | -**Workspace name:** Matching uses **exact** equality on AnythingLLM `name` or `slug` and the repo folder name you pass. Creation body: `{ "name": "" }`. +Upload uses **`POST /api/v1/document/upload`** with multipart field **`file`** and **`addToWorkspaces`** set to the workspace **slug**. Relative paths are flattened to a safe single-segment filename (`dir__file.ts`) to reduce name collisions. -**API key vs browser user:** The extension only uses the **developer API key**. Multi-user behaviour is defined by AnythingLLM for that key. +**JSON field `workspaceCreatedByApi`:** `true` only if this run called `POST /api/v1/workspace/new`; `false` if the workspace already existed. ## Ollama -This extension does **not** call Ollama. For OpenAI-compatible URLs such as `https://ia.enso.4nkweb.com/ollama/v1`, configure the editor’s model provider and use the nginx Bearer as documented in `deploy/nginx/README-ia-enso.md`. - -## Troubleshooting - -| Symptom | Check | -|---------|--------| -| `403` / `No valid api key found` | Use an AnythingLLM **Settings → API Keys** value, not the Ollama nginx Bearer. | -| `401` on clone/list/load | `anythingllm.reposApiToken` must equal `REPOS_DEVTOOLS_TOKEN` on `repos-devtools-server`. | -| `ECONNREFUSED` / fetch failed | Server running? Correct `anythingllm.reposApiBaseUrl`? | -| Command palette has no AnythingLLM entries | Extension enabled? **Developer: Reload Window**. | -| Clone fails | Remote must expose branch **`test`** (or change branch via the HTTP API body, not the one-line panel command). | +Not used by this extension. Configure Cursor’s model URL for `/ollama/v1` separately. ## Build @@ -106,12 +61,10 @@ npm install npm run compile ``` -Package: `npx @vscode/vsce package --allow-missing-repository` (Node 20+ recommended for current `vsce`). +## References -## API reference - -- AnythingLLM (upstream): Mintplex-Labs **anything-llm** — `server/endpoints/api/workspace/index.js` (routes under `/api`, e.g. `GET /v1/workspaces`, `POST /v1/workspace/new`). -- Local repos: `services/repos-devtools-server/README.md`. +- AnythingLLM document API: `POST /v1/document/upload` under `/api` (Mintplex-Labs anything-llm `server/endpoints/api/document/index.js`). +- Local server: `services/repos-devtools-server/README.md`. ## License diff --git a/extensions/anythingllm-workspaces/package-lock.json b/extensions/anythingllm-workspaces/package-lock.json index a5f49c2..99f2743 100644 --- a/extensions/anythingllm-workspaces/package-lock.json +++ b/extensions/anythingllm-workspaces/package-lock.json @@ -1,13 +1,16 @@ { "name": "anythingllm-workspaces", - "version": "0.1.0", + "version": "0.3.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "anythingllm-workspaces", - "version": "0.1.0", + "version": "0.3.0", "license": "MIT", + "dependencies": { + "ignore": "^5.3.2" + }, "devDependencies": { "@types/node": "^20.11.0", "@types/vscode": "^1.85.0", @@ -32,6 +35,15 @@ "integrity": "sha512-AGuxUEpU4F4mfuQjxPPaQVyuOMhs+VT/xRok1jiHVBubHK7lBRvCuOMZG0LKUwxncrPorJ5qq/uil3IdZBd5lA==", "dev": true }, + "node_modules/ignore": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", + "integrity": "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==", + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, "node_modules/typescript": { "version": "5.9.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", diff --git a/extensions/anythingllm-workspaces/package.json b/extensions/anythingllm-workspaces/package.json index 9aee538..a934ad5 100644 --- a/extensions/anythingllm-workspaces/package.json +++ b/extensions/anythingllm-workspaces/package.json @@ -1,8 +1,8 @@ { "name": "anythingllm-workspaces", "displayName": "AnythingLLM Workspaces (ia.enso)", - "description": "List AnythingLLM workspaces via your proxied instance (e.g. ia.enso.4nkweb.com/anythingllm).", - "version": "0.2.0", + "description": "AnythingLLM API, repos devtools, initial RAG sync via .4nkaiignore.", + "version": "0.3.0", "publisher": "4nk", "license": "MIT", "engines": { @@ -22,22 +22,41 @@ "anythingllm.baseUrl": { "type": "string", "default": "https://ia.enso.4nkweb.com/anythingllm", - "markdownDescription": "Public base URL of AnythingLLM (nginx path `/anythingllm/`, no trailing slash required)." + "markdownDescription": "Public base URL of AnythingLLM (no trailing slash)." }, "anythingllm.apiKey": { "type": "string", "default": "", - "markdownDescription": "AnythingLLM API key (UI: **Settings → API Keys**). Prefer **User** settings to avoid committing secrets." + "markdownDescription": "AnythingLLM API key (**Settings → API Keys**). **User** settings." }, "anythingllm.reposApiBaseUrl": { "type": "string", "default": "http://127.0.0.1:37140", - "markdownDescription": "Base URL of **repos-devtools-server** (no trailing slash). Must match the machine where `/home/ncantu/code` (or `REPOS_DEVTOOLS_ROOT`) lives." + "markdownDescription": "repos-devtools-server base URL (no trailing slash)." }, "anythingllm.reposApiToken": { "type": "string", "default": "", - "markdownDescription": "Bearer token shared with `REPOS_DEVTOOLS_TOKEN` on the repos-devtools-server. **User** settings only." + "markdownDescription": "Same as `REPOS_DEVTOOLS_TOKEN` on the server." + }, + "anythingllm.initialSyncAfterClone": { + "type": "boolean", + "default": true, + "markdownDescription": "After `/repos-clone-sync` or `/repos-load-sync`, upload repo files to the workspace (filtered by `.4nkaiignore`). Requires AnythingLLM document processor (collector) online." + }, + "anythingllm.initialSyncMaxFiles": { + "type": "number", + "default": 400, + "minimum": 1, + "maximum": 10000, + "markdownDescription": "Max files to upload per initial sync." + }, + "anythingllm.initialSyncMaxFileBytes": { + "type": "number", + "default": 5242880, + "minimum": 1024, + "maximum": 104857600, + "markdownDescription": "Max size per file (bytes) for initial sync." } } }, @@ -61,6 +80,9 @@ "watch": "tsc -watch -p ./", "vscode:prepublish": "npm run compile" }, + "dependencies": { + "ignore": "^5.3.2" + }, "devDependencies": { "@types/node": "^20.11.0", "@types/vscode": "^1.85.0", diff --git a/extensions/anythingllm-workspaces/src/anythingllmClient.ts b/extensions/anythingllm-workspaces/src/anythingllmClient.ts index 08070db..fd7b793 100644 --- a/extensions/anythingllm-workspaces/src/anythingllmClient.ts +++ b/extensions/anythingllm-workspaces/src/anythingllmClient.ts @@ -55,6 +55,17 @@ const normalizeApiSecret = (raw: string): string => { return bearerPrefix.test(trimmed) ? trimmed.replace(bearerPrefix, "").trim() : trimmed; }; +const parseWorkspaceEnvelope = (payload: unknown): AnythingWorkspace => { + if (!isRecord(payload)) { + throw new Error("AnythingLLM API: expected object body"); + } + const ws = payload.workspace; + if (!isWorkspace(ws)) { + throw new Error("AnythingLLM API: missing workspace in response"); + } + return ws; +}; + export const listWorkspaces = async ( baseUrl: string, apiKey: string, @@ -74,24 +85,11 @@ export const listWorkspaces = async ( }); const text = await response.text(); if (!response.ok) { - throw new Error( - `AnythingLLM API ${response.status}: ${text.slice(0, 500)}`, - ); + throw new Error(`AnythingLLM API ${response.status}: ${text.slice(0, 500)}`); } return parseListWorkspaces(parseJson(text)); }; -const parseWorkspaceEnvelope = (payload: unknown): AnythingWorkspace => { - if (!isRecord(payload)) { - throw new Error("AnythingLLM API: expected object body"); - } - const ws = payload.workspace; - if (!isWorkspace(ws)) { - throw new Error("AnythingLLM API: missing workspace in response"); - } - return ws; -}; - export const createWorkspace = async ( baseUrl: string, apiKey: string, @@ -118,9 +116,7 @@ export const createWorkspace = async ( }); const text = await response.text(); if (!response.ok) { - throw new Error( - `AnythingLLM API ${response.status}: ${text.slice(0, 500)}`, - ); + throw new Error(`AnythingLLM API ${response.status}: ${text.slice(0, 500)}`); } return parseWorkspaceEnvelope(parseJson(text)); }; diff --git a/extensions/anythingllm-workspaces/src/anythingllmDocumentApi.ts b/extensions/anythingllm-workspaces/src/anythingllmDocumentApi.ts new file mode 100644 index 0000000..a895cf1 --- /dev/null +++ b/extensions/anythingllm-workspaces/src/anythingllmDocumentApi.ts @@ -0,0 +1,56 @@ +import * as fs from "node:fs/promises"; +import { normalizeAnythingLlmBaseUrl } from "./anythingllmClient"; + +const normalizeApiSecret = (raw: string): string => { + const trimmed = raw.trim(); + const bearerPrefix = /^Bearer\s+/i; + return bearerPrefix.test(trimmed) ? trimmed.replace(bearerPrefix, "").trim() : trimmed; +}; + +const isRecord = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value); + +export const uploadLocalFileToWorkspace = async ( + baseUrl: string, + apiKey: string, + workspaceSlug: string, + absoluteFilePath: string, + uploadFileName: string, +): Promise => { + const normalized = normalizeAnythingLlmBaseUrl(baseUrl); + const key = normalizeApiSecret(apiKey); + if (key.length === 0) { + throw new Error("anythingllm.apiKey is empty"); + } + const buf = await fs.readFile(absoluteFilePath); + const body = new FormData(); + body.append("file", new Blob([buf]), uploadFileName); + body.append("addToWorkspaces", workspaceSlug); + const url = `${normalized}/api/v1/document/upload`; + const response = await fetch(url, { + method: "POST", + headers: { + Authorization: `Bearer ${key}`, + }, + body, + }); + const text = await response.text(); + let parsed: unknown; + try { + parsed = JSON.parse(text) as unknown; + } catch { + throw new Error(`document upload: non-JSON response ${response.status}: ${text.slice(0, 300)}`); + } + if (!response.ok) { + throw new Error(`document upload ${response.status}: ${text.slice(0, 500)}`); + } + if (!isRecord(parsed)) { + throw new Error("document upload: invalid JSON body"); + } + const success = parsed.success; + const err = parsed.error; + if (success !== true) { + const msg = typeof err === "string" ? err : JSON.stringify(err); + throw new Error(`document upload failed: ${msg}`); + } +}; diff --git a/extensions/anythingllm-workspaces/src/commandParser.ts b/extensions/anythingllm-workspaces/src/commandParser.ts index f5bbe67..55d1c24 100644 --- a/extensions/anythingllm-workspaces/src/commandParser.ts +++ b/extensions/anythingllm-workspaces/src/commandParser.ts @@ -3,6 +3,7 @@ export type ParsedDevCommand = | { readonly kind: "repos-list" } | { readonly kind: "repos-load"; readonly name: string; readonly sync: boolean } | { readonly kind: "workspace-load"; readonly name: string } + | { readonly kind: "workspace-sync-repo"; readonly name: string } | { readonly kind: "help" } | { readonly kind: "unknown"; readonly raw: string }; @@ -32,6 +33,9 @@ export const parseDevCommandLine = (line: string): ParsedDevCommand => { if (cmd === "/workspace-load") { return { kind: "workspace-load", name: argRest }; } + if (cmd === "/workspace-sync") { + return { kind: "workspace-sync-repo", name: argRest }; + } if (cmd === "help" || cmd === "/help") { return { kind: "help" }; } @@ -41,12 +45,13 @@ export const parseDevCommandLine = (line: string): ParsedDevCommand => { export const devCommandsHelpText = (): string => { return [ "Commands (one per line):", - " /repos-clone — clone into REPOS_DEVTOOLS_ROOT, branch test (default)", - " /repos-clone-sync — clone + ensure AnythingLLM workspace (same name) + open folder + browser", - " repos-list — list cloned git repos under root", - " /repos-load — verify repo; open folder in editor", - " /repos-load-sync — open folder + ensure workspace + browser", - " /workspace-load — ensure workspace by name (create via API if missing) + browser", - " help — this list", + " /repos-clone — clone (branch test)", + " /repos-clone-sync — clone + workspace + open folder + optional initial RAG upload (.4nkaiignore)", + " repos-list — list git repos under REPOS_DEVTOOLS_ROOT", + " /repos-load — verify repo + open folder", + " /repos-load-sync — open folder + workspace + optional initial RAG upload", + " /workspace-load — ensure workspace + browser", + " /workspace-sync — ensure workspace + initial RAG upload (repo must exist under root)", + " help — this list", ].join("\n"); }; diff --git a/extensions/anythingllm-workspaces/src/config.ts b/extensions/anythingllm-workspaces/src/config.ts index ffe34a8..e8ba8fa 100644 --- a/extensions/anythingllm-workspaces/src/config.ts +++ b/extensions/anythingllm-workspaces/src/config.ts @@ -7,6 +7,9 @@ export interface DevToolsConfigSnapshot { readonly anythingApiKey: string; readonly reposApiBaseUrl: string; readonly reposApiToken: string; + readonly initialSyncAfterClone: boolean; + readonly initialSyncMaxFiles: number; + readonly initialSyncMaxFileBytes: number; } export const readAnythingConfig = (): { baseUrl: string; apiKey: string } => { @@ -16,6 +19,14 @@ export const readAnythingConfig = (): { baseUrl: string; apiKey: string } => { return { baseUrl, apiKey }; }; +const readPositiveInt = (cfg: vscode.WorkspaceConfiguration, key: string, fallback: number): number => { + const v = cfg.get(key); + if (typeof v === "number" && Number.isFinite(v) && v > 0) { + return Math.floor(v); + } + return fallback; +}; + export const readDevToolsConfig = (): DevToolsConfigSnapshot => { const cfg = vscode.workspace.getConfiguration(CONFIG_SECTION); const { baseUrl, apiKey } = readAnythingConfig(); @@ -27,10 +38,16 @@ export const readDevToolsConfig = (): DevToolsConfigSnapshot => { typeof cfg.get("reposApiToken") === "string" ? (cfg.get("reposApiToken") as string) : ""; + const initialSyncAfterClone = cfg.get("initialSyncAfterClone") !== false; + const initialSyncMaxFiles = readPositiveInt(cfg, "initialSyncMaxFiles", 400); + const initialSyncMaxFileBytes = readPositiveInt(cfg, "initialSyncMaxFileBytes", 5_242_880); return { anythingBaseUrl: baseUrl, anythingApiKey: apiKey, reposApiBaseUrl, reposApiToken, + initialSyncAfterClone, + initialSyncMaxFiles, + initialSyncMaxFileBytes, }; }; diff --git a/extensions/anythingllm-workspaces/src/devToolsExecutor.ts b/extensions/anythingllm-workspaces/src/devToolsExecutor.ts index 4578214..161aaa2 100644 --- a/extensions/anythingllm-workspaces/src/devToolsExecutor.ts +++ b/extensions/anythingllm-workspaces/src/devToolsExecutor.ts @@ -7,6 +7,7 @@ import { import { normalizeAnythingLlmBaseUrl } from "./anythingllmClient"; import { reposApiClone, reposApiList, reposApiLoad } from "./reposApiClient"; import { ensureWorkspaceForRepoName } from "./workspaceEnsure"; +import { runInitialRagImportFromRepo } from "./initialRagSync"; const DEFAULT_BRANCH = "test"; @@ -26,6 +27,10 @@ export interface DevToolsRunnerContext { readonly anythingApiKey: string; readonly reposApiBaseUrl: string; readonly reposApiToken: string; + readonly initialSyncAfterClone: boolean; + readonly initialSyncMaxFiles: number; + readonly initialSyncMaxFileBytes: number; + readonly default4nkaiignoreTemplateFsPath: string; readonly openFolder: (fsPath: string) => Thenable; readonly openAnythingWorkspaceInBrowser: (slug: string) => Thenable; } @@ -45,6 +50,27 @@ const assertAnythingConfig = (ctx: DevToolsRunnerContext): void => { } }; +const appendInitialRag = async ( + ctx: DevToolsRunnerContext, + repoRoot: string, + workspaceSlug: string, +): Promise => { + if (!ctx.initialSyncAfterClone) { + return ""; + } + assertAnythingConfig(ctx); + const res = await runInitialRagImportFromRepo({ + baseUrl: ctx.anythingBaseUrl, + apiKey: ctx.anythingApiKey, + workspaceSlug, + repoRoot, + templateFsPath: ctx.default4nkaiignoreTemplateFsPath, + maxFiles: ctx.initialSyncMaxFiles, + maxFileBytes: ctx.initialSyncMaxFileBytes, + }); + return `\n---\nInitial RAG sync: ${fmt(res)}`; +}; + const runOne = async ( cmd: ParsedDevCommand, ctx: DevToolsRunnerContext, @@ -91,8 +117,9 @@ const runOne = async ( out += `\n---\nAnythingLLM workspace: ${fmt({ slug: ensured.workspace.slug, name: ensured.workspace.name, - created: ensured.created, + workspaceCreatedByApi: ensured.created, })}`; + out += await appendInitialRag(ctx, fsPath, ensured.workspace.slug); await ctx.openFolder(fsPath); await ctx.openAnythingWorkspaceInBrowser(ensured.workspace.slug); } @@ -120,8 +147,9 @@ const runOne = async ( out += `\n---\nAnythingLLM workspace: ${fmt({ slug: ensured.workspace.slug, name: ensured.workspace.name, - created: ensured.created, + workspaceCreatedByApi: ensured.created, })}`; + out += await appendInitialRag(ctx, loaded.path, ensured.workspace.slug); await ctx.openAnythingWorkspaceInBrowser(ensured.workspace.slug); } return out; @@ -140,9 +168,34 @@ const runOne = async ( return fmt({ slug: ensured.workspace.slug, name: ensured.workspace.name, - created: ensured.created, + workspaceCreatedByApi: ensured.created, }); } + if (cmd.kind === "workspace-sync-repo") { + assertReposConfig(ctx); + assertAnythingConfig(ctx); + if (cmd.name.length === 0) { + throw new Error("/workspace-sync requires a repository folder name."); + } + const loaded = await reposApiLoad( + ctx.reposApiBaseUrl, + ctx.reposApiToken, + cmd.name, + ); + const ensured = await ensureWorkspaceForRepoName( + ctx.anythingBaseUrl, + ctx.anythingApiKey, + loaded.name, + ); + let out = fmt({ + repoPath: loaded.path, + slug: ensured.workspace.slug, + name: ensured.workspace.name, + workspaceCreatedByApi: ensured.created, + }); + out += await appendInitialRag(ctx, loaded.path, ensured.workspace.slug); + return out; + } return `Unhandled: ${JSON.stringify(cmd)}`; }; diff --git a/extensions/anythingllm-workspaces/src/devToolsPanel.ts b/extensions/anythingllm-workspaces/src/devToolsPanel.ts index 538f681..13be113 100644 --- a/extensions/anythingllm-workspaces/src/devToolsPanel.ts +++ b/extensions/anythingllm-workspaces/src/devToolsPanel.ts @@ -33,7 +33,7 @@ const buildHtml = (
- +
@@ -67,6 +67,11 @@ export const showDevToolsPanel = ( panel.webview.html = buildHtml(panel.webview, context.extensionUri); const openFolder = registerDevToolsOpenFolder(vscode); + const templateFsPath = vscode.Uri.joinPath( + context.extensionUri, + "templates", + "4nkaiignore.default", + ).fsPath; panel.webview.onDidReceiveMessage( (msg: unknown) => { @@ -87,6 +92,10 @@ export const showDevToolsPanel = ( anythingApiKey: use.anythingApiKey, reposApiBaseUrl: use.reposApiBaseUrl, reposApiToken: use.reposApiToken, + initialSyncAfterClone: use.initialSyncAfterClone, + initialSyncMaxFiles: use.initialSyncMaxFiles, + initialSyncMaxFileBytes: use.initialSyncMaxFileBytes, + default4nkaiignoreTemplateFsPath: templateFsPath, openFolder, openAnythingWorkspaceInBrowser: openBrowser, }); diff --git a/extensions/anythingllm-workspaces/src/initialRagSync.ts b/extensions/anythingllm-workspaces/src/initialRagSync.ts new file mode 100644 index 0000000..675b6cc --- /dev/null +++ b/extensions/anythingllm-workspaces/src/initialRagSync.ts @@ -0,0 +1,136 @@ +import ignore from "ignore"; +import * as fs from "node:fs/promises"; +import * as path from "node:path"; +import { uploadLocalFileToWorkspace } from "./anythingllmDocumentApi"; + +const ALWAYS_IGNORE = [".git/", "node_modules/", "**/node_modules/"].join("\n"); + +export interface InitialRagImportResult { + readonly uploaded: number; + readonly skipped: number; + readonly errors: readonly string[]; + readonly dotfileCreated: boolean; + readonly capped: boolean; +} + +export const ensureDot4nkaiignoreFromTemplate = async ( + repoRoot: string, + templateFsPath: string, +): Promise<{ created: boolean }> => { + const target = path.join(repoRoot, ".4nkaiignore"); + try { + await fs.access(target); + return { created: false }; + } catch { + const tmpl = await fs.readFile(templateFsPath, "utf8"); + await fs.writeFile(target, tmpl, "utf8"); + return { created: true }; + } +}; + +const walkFiles = async (dir: string): Promise => { + const out: string[] = []; + const scan = async (d: string): Promise => { + const entries = await fs.readdir(d, { withFileTypes: true }); + for (const e of entries) { + const p = path.join(d, e.name); + if (e.isSymbolicLink()) { + continue; + } + if (e.isDirectory()) { + await scan(p); + continue; + } + if (e.isFile()) { + out.push(p); + } + } + }; + await scan(dir); + return out; +}; + +const toPosixRel = (root: string, abs: string): string => { + const rel = path.relative(root, abs); + return rel.split(path.sep).join("/"); +}; + +const uploadNameForRel = (rel: string): string => { + return rel.split("/").join("__"); +}; + +export const runInitialRagImportFromRepo = async (opts: { + readonly baseUrl: string; + readonly apiKey: string; + readonly workspaceSlug: string; + readonly repoRoot: string; + readonly templateFsPath: string; + readonly maxFiles: number; + readonly maxFileBytes: number; +}): Promise => { + const dot = await ensureDot4nkaiignoreFromTemplate(opts.repoRoot, opts.templateFsPath); + const ignorePath = path.join(opts.repoRoot, ".4nkaiignore"); + let userRules = ""; + try { + userRules = await fs.readFile(ignorePath, "utf8"); + } catch { + userRules = ""; + } + const ig = ignore(); + ig.add(ALWAYS_IGNORE); + ig.add(userRules); + + const absFiles = await walkFiles(opts.repoRoot); + const candidates: string[] = []; + for (const abs of absFiles) { + const rel = toPosixRel(opts.repoRoot, abs); + if (rel.length === 0 || rel.startsWith("..")) { + continue; + } + if (ig.ignores(rel)) { + continue; + } + candidates.push(abs); + } + + let uploaded = 0; + let skipped = 0; + const errors: string[] = []; + let capped = false; + + for (const abs of candidates) { + if (uploaded >= opts.maxFiles) { + capped = true; + skipped += 1; + continue; + } + const st = await fs.stat(abs); + if (st.size > opts.maxFileBytes) { + skipped += 1; + continue; + } + const rel = toPosixRel(opts.repoRoot, abs); + const uploadName = uploadNameForRel(rel); + try { + await uploadLocalFileToWorkspace( + opts.baseUrl, + opts.apiKey, + opts.workspaceSlug, + abs, + uploadName, + ); + uploaded += 1; + } catch (e) { + const m = e instanceof Error ? e.message : String(e); + errors.push(`${rel}: ${m}`); + } + } + + return { + uploaded, + skipped, + errors, + dotfileCreated: dot.created, + capped, + }; +}; diff --git a/extensions/anythingllm-workspaces/src/reposApiClient.ts b/extensions/anythingllm-workspaces/src/reposApiClient.ts index e9805f0..fa6e78e 100644 --- a/extensions/anythingllm-workspaces/src/reposApiClient.ts +++ b/extensions/anythingllm-workspaces/src/reposApiClient.ts @@ -38,7 +38,9 @@ export const reposApiClone = async ( /* keep text */ } if (!res.ok) { - throw new Error(`repos API ${res.status}: ${typeof body === "string" ? body : JSON.stringify(body)}`); + throw new Error( + `repos API ${res.status}: ${typeof body === "string" ? body : JSON.stringify(body)}`, + ); } return body; }; @@ -85,10 +87,10 @@ export const reposApiLoad = async ( throw new Error("repos-load: invalid response"); } const rec = body as Record; - const path = rec.path; + const p = rec.path; const n = rec.name; - if (typeof path !== "string" || typeof n !== "string") { + if (typeof p !== "string" || typeof n !== "string") { throw new Error("repos-load: missing path or name"); } - return { path, name: n }; + return { path: p, name: n }; }; diff --git a/extensions/anythingllm-workspaces/src/types.ts b/extensions/anythingllm-workspaces/src/types.ts index 7db47ba..08ffa7d 100644 --- a/extensions/anythingllm-workspaces/src/types.ts +++ b/extensions/anythingllm-workspaces/src/types.ts @@ -1,14 +1,7 @@ -export interface AnythingThreadSummary { - readonly user_id: number | null; - readonly slug: string; - readonly name: string | null; -} - export interface AnythingWorkspace { readonly id: number; readonly name: string; readonly slug: string; readonly createdAt?: string; readonly lastUpdatedAt?: string; - readonly threads?: readonly AnythingThreadSummary[]; } diff --git a/extensions/anythingllm-workspaces/templates/4nkaiignore.default b/extensions/anythingllm-workspaces/templates/4nkaiignore.default new file mode 100644 index 0000000..87a678a --- /dev/null +++ b/extensions/anythingllm-workspaces/templates/4nkaiignore.default @@ -0,0 +1,54 @@ +# .4nkaiignore — same rules as .gitignore (see gitignore(5)) +# Used by the AnythingLLM Workspaces extension to filter the initial document upload +# after clone or /repos-load-sync. Copy or rename to `.4nkaiignore` at the repo root. + +# VCS +.git/ + +# Dependencies & build outputs +node_modules/ +**/node_modules/ +dist/ +out/ +build/ +.next/ +.turbo/ +coverage/ +.nyc_output/ +target/ + +# IDE / OS +.idea/ +.vscode/ +.DS_Store +Thumbs.db + +# Secrets & local env (never embed) +.env +.env.* +!.env.example + +# Large or binary artifacts (remove a line if your project should embed that type) +*.png +*.jpg +*.jpeg +*.gif +*.webp +*.ico +*.pdf +*.zip +*.tar +*.gz +*.7z +*.wasm +*.so +*.dylib +*.dll +*.exe +*.mp4 +*.mp3 + +# Minified bundles (often redundant with sources) +*.min.js +*.min.css +*.map diff --git a/services/repos-devtools-server/README.md b/services/repos-devtools-server/README.md index 5502cf9..8c063c4 100644 --- a/services/repos-devtools-server/README.md +++ b/services/repos-devtools-server/README.md @@ -1,47 +1,35 @@ # repos-devtools-server -Local HTTP API bound to **`127.0.0.1`** for git operations under **`REPOS_DEVTOOLS_ROOT`** (default `/home/ncantu/code`). +Local HTTP API on **`127.0.0.1`** for git operations under **`REPOS_DEVTOOLS_ROOT`** (default `/home/ncantu/code`). + +After a successful **`POST /repos-clone`**, if the new repo has no **`.4nkaiignore`**, the server copies **`templates/4nkaiignore.default`** into the repository root. The response includes **`fourNkAiIgnoreTemplateWrote`: boolean**. ## Environment | Variable | Required | Description | |----------|----------|-------------| -| `REPOS_DEVTOOLS_TOKEN` | yes | Shared secret; clients send `Authorization: Bearer `. | -| `REPOS_DEVTOOLS_ROOT` | no | Absolute root for clones (default `/home/ncantu/code`). | +| `REPOS_DEVTOOLS_TOKEN` | yes | `Authorization: Bearer ` on every request. | +| `REPOS_DEVTOOLS_ROOT` | no | Clone root (default `/home/ncantu/code`). | | `REPOS_DEVTOOLS_HOST` | no | Bind address (default `127.0.0.1`). | | `REPOS_DEVTOOLS_PORT` | no | Port (default `37140`). | ## Endpoints -- `POST /repos-clone` — JSON `{ "url": "", "branch": "test" }` (`branch` optional, default `test`). -- `GET /repos-list` — Lists immediate subdirectories of the root that contain `.git`. -- `POST /repos-load` — JSON `{ "name": "" }` — Verifies the repo exists; returns absolute `path`. - -All endpoints require `Authorization: Bearer `. +- `POST /repos-clone` — JSON `{ "url": "", "branch": "test" }` (`branch` optional). +- `GET /repos-list` +- `POST /repos-load` — JSON `{ "name": "" }` ## Run -### One-off (foreground) - ```bash -cd services/repos-devtools-server npm install npm run build -# Create .env (gitignored) with REPOS_DEVTOOLS_TOKEN=... and REPOS_DEVTOOLS_ROOT=/home/ncantu/code -set -a && source .env && set +a && node dist/server.js +export REPOS_DEVTOOLS_TOKEN='…' +npm start ``` -### systemd (user) +After upgrading, reload **systemd** if used: `systemctl --user daemon-reload && systemctl --user restart repos-devtools-server.service`. -Copy `systemd/user/repos-devtools-server.service` to `~/.config/systemd/user/`, create `.env` beside this README, then: +## Templates -```bash -systemctl --user daemon-reload -systemctl --user enable --now repos-devtools-server.service -``` - -Use the same token in the VS Code / Cursor setting **`anythingllm.reposApiToken`**. - -## Integration - -The **AnythingLLM Workspaces** extension command **AnythingLLM: Dev tools panel** calls this API and the AnythingLLM HTTP API for workspace create/list. +Keep **`templates/4nkaiignore.default`** aligned with `extensions/anythingllm-workspaces/templates/4nkaiignore.default` in the monorepo when you change defaults. diff --git a/services/repos-devtools-server/package.json b/services/repos-devtools-server/package.json index 6e34e2f..d1a4ff4 100644 --- a/services/repos-devtools-server/package.json +++ b/services/repos-devtools-server/package.json @@ -1,8 +1,8 @@ { "name": "@4nk/repos-devtools-server", - "version": "0.1.0", + "version": "0.2.0", "private": true, - "description": "Local HTTP API: git clone/list under REPOS_DEVTOOLS_ROOT (e.g. /home/ncantu/code).", + "description": "Local HTTP API: git clone/list under REPOS_DEVTOOLS_ROOT; writes default .4nkaiignore after clone.", "license": "MIT", "type": "module", "main": "dist/server.js", diff --git a/services/repos-devtools-server/src/handlers.ts b/services/repos-devtools-server/src/handlers.ts index cda06e6..2c11a84 100644 --- a/services/repos-devtools-server/src/handlers.ts +++ b/services/repos-devtools-server/src/handlers.ts @@ -9,6 +9,7 @@ import { repoDirForName, repoNameFromGitUrl, } from "./paths.js"; +import { copyDefault4nkaiignoreIfMissing } from "./write4nkaiignore.js"; const json = (res: ServerResponse, status: number, body: unknown): void => { res.writeHead(status, { "Content-Type": "application/json; charset=utf-8" }); @@ -82,12 +83,27 @@ export const handleReposClone = async ( }); return; } + let fourNkAiIgnoreTemplateWrote = false; + try { + const c = await copyDefault4nkaiignoreIfMissing(dest); + fourNkAiIgnoreTemplateWrote = c.wrote; + } catch (e) { + const msg = e instanceof Error ? e.message : String(e); + json(res, 500, { + error: "clone ok but failed to write default .4nkaiignore template", + detail: msg, + name, + path: dest, + }); + return; + } json(res, 200, { ok: true, name, path: dest, branch, url, + fourNkAiIgnoreTemplateWrote, }); }; diff --git a/services/repos-devtools-server/src/paths.ts b/services/repos-devtools-server/src/paths.ts index 0a20313..1d3d363 100644 --- a/services/repos-devtools-server/src/paths.ts +++ b/services/repos-devtools-server/src/paths.ts @@ -16,8 +16,7 @@ export const assertSafeRepoName = (name: string): string => { export const getCodeRoot = (): string => { const raw = process.env.REPOS_DEVTOOLS_ROOT ?? "/home/ncantu/code"; - const resolved = path.resolve(raw); - return resolved; + return path.resolve(raw); }; export const repoDirForName = (codeRoot: string, name: string): string => { diff --git a/services/repos-devtools-server/src/server.ts b/services/repos-devtools-server/src/server.ts index 4aeda7f..cc8d005 100644 --- a/services/repos-devtools-server/src/server.ts +++ b/services/repos-devtools-server/src/server.ts @@ -1,5 +1,5 @@ import * as http from "node:http"; -import { requireBearer, readExpectedToken } from "./auth.js"; +import { readExpectedToken, requireBearer } from "./auth.js"; import { handleReposClone, handleReposList, diff --git a/services/repos-devtools-server/src/write4nkaiignore.ts b/services/repos-devtools-server/src/write4nkaiignore.ts new file mode 100644 index 0000000..9e47604 --- /dev/null +++ b/services/repos-devtools-server/src/write4nkaiignore.ts @@ -0,0 +1,23 @@ +import * as fs from "node:fs/promises"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; + +const templateFsPath = (): string => { + const here = path.dirname(fileURLToPath(import.meta.url)); + return path.join(here, "..", "templates", "4nkaiignore.default"); +}; + +export const copyDefault4nkaiignoreIfMissing = async ( + repoRoot: string, +): Promise<{ wrote: boolean }> => { + const target = path.join(repoRoot, ".4nkaiignore"); + try { + await fs.access(target); + return { wrote: false }; + } catch { + const src = templateFsPath(); + const content = await fs.readFile(src, "utf8"); + await fs.writeFile(target, content, "utf8"); + return { wrote: true }; + } +}; diff --git a/services/repos-devtools-server/templates/4nkaiignore.default b/services/repos-devtools-server/templates/4nkaiignore.default new file mode 100644 index 0000000..1a33234 --- /dev/null +++ b/services/repos-devtools-server/templates/4nkaiignore.default @@ -0,0 +1,54 @@ +# .4nkaiignore — same rules as .gitignore (see gitignore(5)) +# Used to filter the initial document upload to AnythingLLM (extension). +# Copy or rename to `.4nkaiignore` at the repo root and adjust per project. + +# VCS +.git/ + +# Dependencies & build outputs +node_modules/ +**/node_modules/ +dist/ +out/ +build/ +.next/ +.turbo/ +coverage/ +.nyc_output/ +target/ + +# IDE / OS +.idea/ +.vscode/ +.DS_Store +Thumbs.db + +# Secrets & local env (never embed) +.env +.env.* +!.env.example + +# Large or binary artifacts (remove a line if your project should embed that type) +*.png +*.jpg +*.jpeg +*.gif +*.webp +*.ico +*.pdf +*.zip +*.tar +*.gz +*.7z +*.wasm +*.so +*.dylib +*.dll +*.exe +*.mp4 +*.mp3 + +# Minified bundles (often redundant with sources) +*.min.js +*.min.css +*.map