Added templates which produce K8s resources.  With the provided GCP wrapper, it works on GCP K8s cluster.  This isn't stable enough for other folks to use so will need more piloting before it can be documented and released.
This commit is contained in:
cybermaggedon 2024-09-07 18:59:38 +01:00 committed by GitHub
parent 7af32b0eef
commit f661791bbf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
50 changed files with 1037 additions and 345 deletions

View file

@ -15,7 +15,13 @@ local prompts = import "prompts/mixtral.jsonnet";
create:: function(engine)
local cfgVol = engine.configVolume("./vertexai");
local cfgVol = engine.secretVolume(
"vertexai-creds",
"./vertexai",
{
"private.json": importstr "vertexai/private.json",
}
);
local container =
engine.container("text-completion")
@ -35,17 +41,22 @@ local prompts = import "prompts/mixtral.jsonnet";
"-m",
$["vertexai-model"],
])
.with_limits("0.5", "128M")
.with_reservations("0.1", "128M")
.with_limits("0.5", "256M")
.with_reservations("0.1", "256M")
.with_volume_mount(cfgVol, "/vertexai");
local containerSet = engine.containers(
"text-completion", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
cfgVol,
containerSet,
service,
])
},
@ -54,7 +65,13 @@ local prompts = import "prompts/mixtral.jsonnet";
create:: function(engine)
local cfgVol = engine.configVolume("./vertexai");
local cfgVol = engine.secretVolume(
"vertexai-creds",
"./vertexai",
{
"private.json": importstr "vertexai/private.json",
}
);
local container =
engine.container("text-completion-rag")
@ -78,20 +95,24 @@ local prompts = import "prompts/mixtral.jsonnet";
"-o",
"non-persistent://tg/response/text-completion-rag-response",
])
.with_limits("0.5", "128M")
.with_reservations("0.1", "128M")
.with_limits("0.5", "256M")
.with_reservations("0.1", "256M")
.with_volume_mount(cfgVol, "/vertexai");
local containerSet = engine.containers(
"text-completion-rag", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
cfgVol,
containerSet,
service,
])
}
} + prompts