name: framework version: 0.0.1 schema: v1 # Continue.dev config for the framework-backed local LLM stack. Drop at # ~/.continue/config.yaml (per-machine) or /.continue/config.yaml # (repo-scoped). One-time prep on the box: `ollama pull nomic-embed-text`. # # Two chat models, manually switched via Continue's model dropdown. # Qwen3-Coder is the daily driver; Kimi-Linear is the long-context play # and only works once P0 verification passes on the box. models: - name: Qwen3-Coder 30B (Ollama) provider: ollama model: qwen3-coder:30b # Ollama tag (the "A3B" is the MoE active count, descriptive only) apiBase: http://framework:11434 # NO /v1 — Ollama provider speaks /api/* directly roles: [chat, edit, apply] defaultCompletionOptions: contextLength: 65536 # matches OLLAMA_CONTEXT_LENGTH in ollama.yml - name: Kimi-Linear 48B-A3B (vLLM) provider: openai # vLLM exposes OpenAI-compatible /v1 model: kimi-linear # served-model-name in compose/kimi-linear.yml apiBase: http://framework:8000/v1 apiKey: dummy # vLLM doesn't enforce; Continue requires non-empty roles: [chat, edit, apply] defaultCompletionOptions: contextLength: 32768 # P0 narrow start; bump as --max-model-len ramps - name: nomic-embed-text provider: ollama model: nomic-embed-text apiBase: http://framework:11434 roles: [embed] context: - provider: code - provider: diff - provider: terminal - provider: problems - provider: folder - provider: codebase # uses the embed-role model above - provider: file - provider: open # Tab autocomplete intentionally omitted — Qwen3-Coder-30B is too slow # for inline FIM. Add a small FIM-trained model later (qwen2.5-coder:1.5b # or starcoder2:3b) and wire as roles: [autocomplete].