- Add install-local-hf.sh (uv sync --extra hf or pip install -e .[hf]) - Add run-chandra-hf.sh defaulting to --method hf - Expand .env.example for upstream/local.env (MODEL_CHECKPOINT, TORCH_*)
22 lines
693 B
Plaintext
22 lines
693 B
Plaintext
# Copy to services/chandra/upstream/local.env (loaded by pydantic-settings via find_dotenv).
|
|
# https://github.com/datalab-to/chandra — local Hugging Face inference
|
|
|
|
# Hugging Face model id (weights downloaded on first run)
|
|
MODEL_CHECKPOINT=datalab-to/chandra-ocr-2
|
|
|
|
# Optional: force device, e.g. cuda:0, cpu
|
|
# TORCH_DEVICE=cuda:0
|
|
|
|
# Optional: flash attention — requires compatible GPU + flash-attn installed
|
|
# TORCH_ATTN=flash_attention_2
|
|
|
|
MAX_OUTPUT_TOKENS=12384
|
|
|
|
# If the checkpoint were gated (not the case for chandra-ocr-2 by default), use:
|
|
# HF_TOKEN=
|
|
|
|
# --- vLLM only (not used with --method hf) ---
|
|
# VLLM_API_BASE=http://localhost:8000/v1
|
|
# VLLM_MODEL_NAME=chandra
|
|
# VLLM_GPUS=0
|