Dual model templates (#263)

* Dual-mode templates * Fixed generate-all so that works * Fix ability to specify Claude model
2026-07-24 12:41:02 +02:00 · 2025-01-11 11:40:42 +00:00 · 2025-01-11 11:40:42 +00:00 · cec9e29222
commit cec9e29222
parent 1bb1112569
24 changed files with 725 additions and 360 deletions
--- a/templates/components/llamafile.jsonnet
+++ b/templates/components/llamafile.jsonnet
@ -5,6 +5,11 @@ local prompts = import "prompts/slm.jsonnet";

 {

+    with:: function(key, value)
+        self + {
+            ["llamafile-" + key]:: value,
+        },
+
    "llamafile-model":: "LLaMA_CPP",

    "text-completion" +: {
@ -28,46 +33,18 @@ local prompts = import "prompts/slm.jsonnet";
                    .with_limits("0.5", "128M")
                    .with_reservations("0.1", "128M");

-            local containerRag =
-                engine.container("text-completion-rag")
-                    .with_image(images.trustgraph)
-                    .with_command([
-                        "text-completion-llamafile",
-                        "-p",
-                        url.pulsar,
-                        "-m",
-                        $["llamafile-model"],
-                        "-i",
-                        "non-persistent://tg/request/text-completion-rag",
-                        "-o",
-                        "non-persistent://tg/response/text-completion-rag",
-                    ])
-                    .with_env_var_secrets(envSecrets)
-                    .with_limits("0.5", "128M")
-                    .with_reservations("0.1", "128M");
-
            local containerSet = engine.containers(
                "text-completion", [ container ]
            );

-            local containerSetRag = engine.containers(
-                "text-completion-rag", [ containerRag ]
-            );
-
            local service =
                engine.internalService(containerSet)
                .with_port(8080, 8080, "metrics");

-            local serviceRag =
-                engine.internalService(containerSetRag)
-                .with_port(8080, 8080, "metrics");
-
            engine.resources([
                envSecrets,
                containerSet,
-                containerSetRag,
                service,
-                serviceRag,
            ])

    },