1

System/Nixinator: Enable open-webui web search + document extraction

This commit is contained in:
2025-07-14 15:57:54 +02:00
parent b0d3c15786
commit e431c3ad25
3 changed files with 67 additions and 6 deletions

View File

@ -80,13 +80,27 @@
"kdeconnect-cert"
"kdeconnect-privatekey"
"kdeconnect-devices"
"kagi-api-key"
"google-pse-id"
"google-pse-key"
];
};
sops.templates."open-webui-secrets.env".content = ''
KAGI_SEARCH_API_KEY=${config.sops.placeholder.kagi-api-key}
GOOGLE_PSE_ENGINE_ID=${config.sops.placeholder.google-pse-id}
GOOGLE_PSE_API_KEY=${config.sops.placeholder.google-pse-key}
'';
boot = {
kernelPackages = pkgs.linuxPackages_zen;
};
environment.systemPackages = with pkgs; [
# TODO: Not found by docling
tesseract # For services.docling-serve
];
programs = {
ausweisapp = {
enable = true;
@ -94,17 +108,43 @@
};
};
# TODO: To AI module
services = {
# TODO: Docling doesn't find tesseract OCR engine... Probably use docker?
docling-serve = {
enable = true;
stateDir = "/var/lib/docling-serve";
host = "127.0.0.1";
port = 11111;
openFirewall = false;
};
ollama = {
enable = true;
acceleration = "cuda";
home = "/var/lib/ollama";
loadModels = [
"deepseek-r1:8b"
"deepseek-r1:8b" # Default
"deepseek-r1:14b"
];
# https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-configure-ollama-server
environmentVariables = {
# Flash Attention is a feature of most modern models
# that can significantly reduce memory usage as the context size grows.
OLLAMA_FLASH_ATTENTION = "1";
# The K/V context cache can be quantized to significantly
# reduce memory usage when Flash Attention is enabled.
OLLAMA_KV_CACHE_TYPE = "q8_0"; # f16, q8_0 q4_0
# To improve Retrieval-Augmented Generation (RAG) performance, you should increase
# the context length to 8192+ tokens in your Ollama model settings.
OLLAMA_CONTEXT_LENGTH = "8192";
};
host = "127.0.0.1";
port = 11434;
openFirewall = false;
@ -116,18 +156,36 @@
# https://docs.openwebui.com/getting-started/env-configuration
environment = {
WEBUI_AUTH = "False";
DEFAULT_MODELS = builtins.head config.services.ollama.loadModels;
TASK_MODEL = builtins.head config.services.ollama.loadModels;
ENABLE_OPENAI_API = "False";
ENABLE_OLLAMA_API = "True";
OLLAMA_BASE_URL = "http://${config.services.ollama.host}:${builtins.toString config.services.ollama.port}";
ENABLE_OPENAI_API = "False";
ENABLE_EVALUATION_ARENA_MODELS = "False";
ENABLE_COMMUNITY_SHARING = "False";
CONTENT_EXTRACTION_ENGINE = "docling";
DOCLING_SERVER_URL = "http://${config.services.docling-serve.host}:${builtins.toString config.services.docling-serve.port}";
ENABLE_RAG_HYBRID_SEARCH = "False";
ENABLE_RAG_LOCAL_WEB_FETCH = "True";
ENABLE_WEB_SEARCH = "True";
WEB_SEARCH_ENGINE = "google_pse";
# GOOGLE_PSE_ENGINE_ID = ""; # Use environmentFile
# GOOGLE_PSE_API_KEY = ""; # Use environmentFile
# KAGI_SEARCH_API_KEY = ""; # Use environmentFile
WEBUI_AUTH = "False";
ANONYMIZED_TELEMETRY = "False";
DO_NOT_TRACK = "True";
SCARF_NO_ANALYTICS = "True";
};
environmentFile = config.sops.templates."open-webui-secrets.env".path;
host = "127.0.0.1";
port = 11435;
openFirewall = false;