System/Nixinator: Enable open-webui web search + document extraction
This commit is contained in:
@ -80,13 +80,27 @@
|
||||
"kdeconnect-cert"
|
||||
"kdeconnect-privatekey"
|
||||
"kdeconnect-devices"
|
||||
"kagi-api-key"
|
||||
"google-pse-id"
|
||||
"google-pse-key"
|
||||
];
|
||||
};
|
||||
|
||||
sops.templates."open-webui-secrets.env".content = ''
|
||||
KAGI_SEARCH_API_KEY=${config.sops.placeholder.kagi-api-key}
|
||||
GOOGLE_PSE_ENGINE_ID=${config.sops.placeholder.google-pse-id}
|
||||
GOOGLE_PSE_API_KEY=${config.sops.placeholder.google-pse-key}
|
||||
'';
|
||||
|
||||
boot = {
|
||||
kernelPackages = pkgs.linuxPackages_zen;
|
||||
};
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
# TODO: Not found by docling
|
||||
tesseract # For services.docling-serve
|
||||
];
|
||||
|
||||
programs = {
|
||||
ausweisapp = {
|
||||
enable = true;
|
||||
@ -94,17 +108,43 @@
|
||||
};
|
||||
};
|
||||
|
||||
# TODO: To AI module
|
||||
services = {
|
||||
# TODO: Docling doesn't find tesseract OCR engine... Probably use docker?
|
||||
docling-serve = {
|
||||
enable = true;
|
||||
stateDir = "/var/lib/docling-serve";
|
||||
|
||||
host = "127.0.0.1";
|
||||
port = 11111;
|
||||
openFirewall = false;
|
||||
};
|
||||
|
||||
ollama = {
|
||||
enable = true;
|
||||
acceleration = "cuda";
|
||||
home = "/var/lib/ollama";
|
||||
|
||||
loadModels = [
|
||||
"deepseek-r1:8b"
|
||||
"deepseek-r1:8b" # Default
|
||||
"deepseek-r1:14b"
|
||||
];
|
||||
|
||||
# https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-configure-ollama-server
|
||||
environmentVariables = {
|
||||
# Flash Attention is a feature of most modern models
|
||||
# that can significantly reduce memory usage as the context size grows.
|
||||
OLLAMA_FLASH_ATTENTION = "1";
|
||||
|
||||
# The K/V context cache can be quantized to significantly
|
||||
# reduce memory usage when Flash Attention is enabled.
|
||||
OLLAMA_KV_CACHE_TYPE = "q8_0"; # f16, q8_0 q4_0
|
||||
|
||||
# To improve Retrieval-Augmented Generation (RAG) performance, you should increase
|
||||
# the context length to 8192+ tokens in your Ollama model settings.
|
||||
OLLAMA_CONTEXT_LENGTH = "8192";
|
||||
};
|
||||
|
||||
host = "127.0.0.1";
|
||||
port = 11434;
|
||||
openFirewall = false;
|
||||
@ -116,18 +156,36 @@
|
||||
|
||||
# https://docs.openwebui.com/getting-started/env-configuration
|
||||
environment = {
|
||||
WEBUI_AUTH = "False";
|
||||
DEFAULT_MODELS = builtins.head config.services.ollama.loadModels;
|
||||
TASK_MODEL = builtins.head config.services.ollama.loadModels;
|
||||
|
||||
ENABLE_OPENAI_API = "False";
|
||||
ENABLE_OLLAMA_API = "True";
|
||||
OLLAMA_BASE_URL = "http://${config.services.ollama.host}:${builtins.toString config.services.ollama.port}";
|
||||
|
||||
ENABLE_OPENAI_API = "False";
|
||||
ENABLE_EVALUATION_ARENA_MODELS = "False";
|
||||
ENABLE_COMMUNITY_SHARING = "False";
|
||||
|
||||
CONTENT_EXTRACTION_ENGINE = "docling";
|
||||
DOCLING_SERVER_URL = "http://${config.services.docling-serve.host}:${builtins.toString config.services.docling-serve.port}";
|
||||
|
||||
ENABLE_RAG_HYBRID_SEARCH = "False";
|
||||
ENABLE_RAG_LOCAL_WEB_FETCH = "True";
|
||||
|
||||
ENABLE_WEB_SEARCH = "True";
|
||||
WEB_SEARCH_ENGINE = "google_pse";
|
||||
# GOOGLE_PSE_ENGINE_ID = ""; # Use environmentFile
|
||||
# GOOGLE_PSE_API_KEY = ""; # Use environmentFile
|
||||
# KAGI_SEARCH_API_KEY = ""; # Use environmentFile
|
||||
|
||||
WEBUI_AUTH = "False";
|
||||
ANONYMIZED_TELEMETRY = "False";
|
||||
DO_NOT_TRACK = "True";
|
||||
SCARF_NO_ANALYTICS = "True";
|
||||
};
|
||||
|
||||
environmentFile = config.sops.templates."open-webui-secrets.env".path;
|
||||
|
||||
host = "127.0.0.1";
|
||||
port = 11435;
|
||||
openFirewall = false;
|
||||
|
Reference in New Issue
Block a user