System/Nixinator: Enable open-webui web search + document extraction

2025-07-14 15:57:54 +02:00
parent b0d3c15786
commit e431c3ad25
3 changed files with 67 additions and 6 deletions
--- a/system/nixinator/default.nix
+++ b/system/nixinator/default.nix
@ -80,13 +80,27 @@
      "kdeconnect-cert"
      "kdeconnect-privatekey"
      "kdeconnect-devices"
+      "kagi-api-key"
+      "google-pse-id"
+      "google-pse-key"
    ];
  };

+  sops.templates."open-webui-secrets.env".content = ''
+    KAGI_SEARCH_API_KEY=${config.sops.placeholder.kagi-api-key}
+    GOOGLE_PSE_ENGINE_ID=${config.sops.placeholder.google-pse-id}
+    GOOGLE_PSE_API_KEY=${config.sops.placeholder.google-pse-key}
+  '';
+
  boot = {
    kernelPackages = pkgs.linuxPackages_zen;
  };

+  environment.systemPackages = with pkgs; [
+    # TODO: Not found by docling
+    tesseract # For services.docling-serve
+  ];
+
  programs = {
    ausweisapp = {
      enable = true;
@ -94,17 +108,43 @@
    };
  };

+  # TODO: To AI module
  services = {
+    # TODO: Docling doesn't find tesseract OCR engine... Probably use docker?
+    docling-serve = {
+      enable = true;
+      stateDir = "/var/lib/docling-serve";
+
+      host = "127.0.0.1";
+      port = 11111;
+      openFirewall = false;
+    };
+
    ollama = {
      enable = true;
      acceleration = "cuda";
      home = "/var/lib/ollama";

      loadModels = [
-        "deepseek-r1:8b"
+        "deepseek-r1:8b" # Default
        "deepseek-r1:14b"
      ];

+      # https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-configure-ollama-server
+      environmentVariables = {
+        # Flash Attention is a feature of most modern models
+        # that can significantly reduce memory usage as the context size grows.
+        OLLAMA_FLASH_ATTENTION = "1";
+
+        # The K/V context cache can be quantized to significantly
+        # reduce memory usage when Flash Attention is enabled.
+        OLLAMA_KV_CACHE_TYPE = "q8_0"; # f16, q8_0 q4_0
+
+        # To improve Retrieval-Augmented Generation (RAG) performance, you should increase
+        # the context length to 8192+ tokens in your Ollama model settings.
+        OLLAMA_CONTEXT_LENGTH = "8192";
+      };
+
      host = "127.0.0.1";
      port = 11434;
      openFirewall = false;
@ -116,18 +156,36 @@

      # https://docs.openwebui.com/getting-started/env-configuration
      environment = {
-        WEBUI_AUTH = "False";
+        DEFAULT_MODELS = builtins.head config.services.ollama.loadModels;
+        TASK_MODEL = builtins.head config.services.ollama.loadModels;

+        ENABLE_OPENAI_API = "False";
        ENABLE_OLLAMA_API = "True";
        OLLAMA_BASE_URL = "http://${config.services.ollama.host}:${builtins.toString config.services.ollama.port}";

-        ENABLE_OPENAI_API = "False";
+        ENABLE_EVALUATION_ARENA_MODELS = "False";
+        ENABLE_COMMUNITY_SHARING = "False";

+        CONTENT_EXTRACTION_ENGINE = "docling";
+        DOCLING_SERVER_URL = "http://${config.services.docling-serve.host}:${builtins.toString config.services.docling-serve.port}";
+
+        ENABLE_RAG_HYBRID_SEARCH = "False";
+        ENABLE_RAG_LOCAL_WEB_FETCH = "True";
+
+        ENABLE_WEB_SEARCH = "True";
+        WEB_SEARCH_ENGINE = "google_pse";
+        # GOOGLE_PSE_ENGINE_ID = ""; # Use environmentFile
+        # GOOGLE_PSE_API_KEY = ""; # Use environmentFile
+        # KAGI_SEARCH_API_KEY = ""; # Use environmentFile
+
+        WEBUI_AUTH = "False";
        ANONYMIZED_TELEMETRY = "False";
        DO_NOT_TRACK = "True";
        SCARF_NO_ANALYTICS = "True";
      };

+      environmentFile = config.sops.templates."open-webui-secrets.env".path;
+
      host = "127.0.0.1";
      port = 11435;
      openFirewall = false;