Added templates which produce K8s resources.  With the provided GCP wrapper, it works on GCP K8s cluster.  This isn't stable enough for other folks to use so will need more piloting before it can be documented and released.
This commit is contained in:
cybermaggedon 2024-09-07 18:59:38 +01:00 committed by GitHub
parent 7af32b0eef
commit f661791bbf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
50 changed files with 1037 additions and 345 deletions

View file

@ -37,8 +37,13 @@ local prompts = import "prompts/mixtral.jsonnet";
"text-completion", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -74,8 +79,13 @@ local prompts = import "prompts/mixtral.jsonnet";
"text-completion-rag", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])

View file

@ -44,8 +44,13 @@ local chunker = import "chunker-recursive.jsonnet";
"text-completion", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -85,8 +90,13 @@ local chunker = import "chunker-recursive.jsonnet";
"text-completion-rag", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])

View file

@ -27,8 +27,13 @@ cassandra + {
"stop-triples", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -54,8 +59,13 @@ cassandra + {
"query-triples", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])
}

View file

@ -31,8 +31,13 @@ local prompts = import "prompts/mixtral.jsonnet";
"chunker", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},

View file

@ -34,8 +34,13 @@ local prompts = import "prompts/mixtral.jsonnet";
"text-completion", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -69,8 +74,13 @@ local prompts = import "prompts/mixtral.jsonnet";
"text-completion-rag", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])

View file

@ -35,8 +35,13 @@ local prompts = import "prompts/mixtral.jsonnet";
"text-completion", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -68,8 +73,13 @@ local prompts = import "prompts/mixtral.jsonnet";
"text-completion-rag", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])

View file

@ -28,8 +28,13 @@ local prompts = import "prompts/mixtral.jsonnet";
"document-rag", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},

View file

@ -21,15 +21,20 @@ local prompts = import "prompts/mixtral.jsonnet";
"-m",
$["embeddings-model"],
])
.with_limits("1.0", "256M")
.with_reservations("0.5", "256M");
.with_limits("1.0", "400M")
.with_reservations("0.5", "400M");
local containerSet = engine.containers(
"embeddings", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},

View file

@ -30,8 +30,13 @@ local url = import "values/url.jsonnet";
"embeddings", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},

View file

@ -8,26 +8,37 @@ local images = import "values/images.jsonnet";
create:: function(engine)
local vol = engine.volume("prometheus-data").with_size("20G");
local cfgVol = engine.configVolume("./prometheus")
.with_size("20G");
local cfgVol = engine.configVolume(
"prometheus-cfg", "./prometheus",
{
"prometheus.yml": importstr "prometheus/prometheus.yml",
}
);
local container =
engine.container("prometheus")
.with_image(images.prometheus)
.with_limits("0.5", "128M")
.with_reservations("0.1", "128M")
// .with_command(["/bin/sh", "-c", "sleep 9999999"])
.with_port(9090, 9090, "http")
.with_volume_mount(cfgVol, "/etc/prometheus")
.with_volume_mount(cfgVol, "/etc/prometheus/")
.with_volume_mount(vol, "/prometheus");
local containerSet = engine.containers(
"prometheus", [ container ]
);
local service =
engine.service(containerSet)
.with_port(9090, 9090, "http");
engine.resources([
cfgVol,
vol,
containerSet,
service,
])
},
@ -37,12 +48,33 @@ local images = import "values/images.jsonnet";
create:: function(engine)
local vol = engine.volume("grafana-storage").with_size("20G");
local cv1 = engine.configVolume("./grafana/dashboard.yml")
.with_size("20G");
local cv2 = engine.configVolume("./grafana/datasource.yml")
.with_size("20G");
local cv3 = engine.configVolume("./grafana/dashboard.json")
.with_size("20G");
local provDashVol = engine.configVolume(
"prov-dash", "./grafana/provisioning/",
{
"dashboard.yml":
importstr "grafana/provisioning/dashboard.yml",
}
);
local provDataVol = engine.configVolume(
"prov-data", "./grafana/provisioning/",
{
"datasource.yml":
importstr "grafana/provisioning/datasource.yml",
}
);
local dashVol = engine.configVolume(
"dashboards", "./grafana/dashboards/",
{
"dashboard.json":
importstr "grafana/dashboards/dashboard.json",
}
);
local container =
engine.container("grafana")
@ -58,20 +90,31 @@ local images = import "values/images.jsonnet";
.with_reservations("0.5", "256M")
.with_port(3000, 3000, "cassandra")
.with_volume_mount(vol, "/var/lib/grafana")
.with_volume_mount(cv1, "/etc/grafana/provisioning/dashboards/dashboard.yml")
.with_volume_mount(cv2, "/etc/grafana/provisioning/datasources/datasource.yml")
.with_volume_mount(cv3, "/var/lib/grafana/dashboards/dashboard.json");
.with_volume_mount(
provDashVol, "/etc/grafana/provisioning/dashboards/"
)
.with_volume_mount(
provDataVol, "/etc/grafana/provisioning/datasources/"
)
.with_volume_mount(
dashVol, "/var/lib/grafana/dashboards/"
);
local containerSet = engine.containers(
"grafana", [ container ]
);
local service =
engine.service(containerSet)
.with_port(3000, 3000, "http");
engine.resources([
vol,
cv1,
cv2,
cv3,
provDashVol,
provDataVol,
dashVol,
containerSet,
service,
])
},

View file

@ -27,8 +27,13 @@ local url = import "values/url.jsonnet";
"kg-extract-definitions", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -52,8 +57,13 @@ local url = import "values/url.jsonnet";
"kg-extract-relationships", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -87,8 +97,13 @@ local url = import "values/url.jsonnet";
"graph-rag", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},

View file

@ -27,8 +27,13 @@ milvus + {
"store-graph-embeddings", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -54,8 +59,13 @@ milvus + {
"query-graph-embeddings", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -81,8 +91,13 @@ milvus + {
"store-doc-embeddings", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -108,8 +123,13 @@ milvus + {
"query-doc-embeddings", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])

View file

@ -28,8 +28,13 @@ neo4j + {
"store-triples", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -55,8 +60,13 @@ neo4j + {
"query-triples", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])

View file

@ -31,8 +31,13 @@ local prompts = import "prompts/slm.jsonnet";
"text-completion", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -64,8 +69,13 @@ local prompts = import "prompts/slm.jsonnet";
"text-completion-rag", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])

View file

@ -37,8 +37,13 @@ local prompts = import "prompts/mixtral.jsonnet";
"text-completion", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -74,8 +79,13 @@ local prompts = import "prompts/mixtral.jsonnet";
"text-completion-rag", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])

View file

@ -28,8 +28,13 @@ local prompts = import "prompts/mixtral.jsonnet";
"prompt", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -61,8 +66,13 @@ local prompts = import "prompts/mixtral.jsonnet";
"prompt-rag", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])
},

View file

@ -39,8 +39,13 @@ local default_prompts = import "prompts/default-prompts.jsonnet";
"prompt", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -82,8 +87,13 @@ local default_prompts = import "prompts/default-prompts.jsonnet";
"prompt-rag", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])
},

View file

@ -27,8 +27,8 @@ local images = import "values/images.jsonnet";
local service =
engine.service(containerSet)
.with_port(9527, 9527)
.with_port(7750, 7750);
.with_port(9527, 9527, "api")
.with_port(7750, 7750, "api2);
engine.resources([
containerSet,

View file

@ -7,19 +7,20 @@ local images = import "values/images.jsonnet";
create:: function(engine)
local confVolume = engine.volume("pulsar-conf").with_size("2G");
// local confVolume = engine.volume("pulsar-conf").with_size("2G");
local dataVolume = engine.volume("pulsar-data").with_size("20G");
local container =
engine.container("pulsar")
.with_image(images.pulsar)
.with_command("bin/pulsar standalone")
.with_command(["bin/pulsar", "standalone"])
// .with_command(["/bin/sh", "-c", "sleep 9999999"])
.with_environment({
"PULSAR_MEM": "-Xms700M -Xmx700M"
"PULSAR_MEM": "-Xms600M -Xmx600M"
})
.with_limits("1.0", "900M")
.with_reservations("0.5", "900M")
.with_volume_mount(confVolume, "/pulsar/conf")
.with_limits("2.0", "1500M")
.with_reservations("1.0", "1500M")
// .with_volume_mount(confVolume, "/pulsar/conf")
.with_volume_mount(dataVolume, "/pulsar/data")
.with_port(6650, 6650, "bookie")
.with_port(8080, 8080, "http");
@ -30,27 +31,35 @@ local images = import "values/images.jsonnet";
.with_command([
"sh",
"-c",
"pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response",
"while true; do pulsar-admin --admin-url http://pulsar:8080 tenants create tg ; pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow ; pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request ; pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response ; pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response; sleep 20; done",
])
.with_limits("0.5", "128M")
.with_reservations("0.1", "128M");
.with_limits("1", "400M")
.with_reservations("0.1", "400M");
local containerSet = engine.containers(
"pulsar",
[
container, adminContainer
container
]
);
local adminContainerSet = engine.containers(
"init-pulsar",
[
adminContainer
]
);
local service =
engine.service(containerSet)
.with_port(6650, 6650)
.with_port(8080, 8080);
.with_port(6650, 6650, "bookie")
.with_port(8080, 8080, "http");
engine.resources([
confVolume,
// confVolume,
dataVolume,
containerSet,
adminContainerSet,
service,
])
@ -58,5 +67,3 @@ local images = import "values/images.jsonnet";
}

View file

@ -27,8 +27,13 @@ qdrant + {
"store-graph-embeddings", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -54,8 +59,13 @@ qdrant + {
"query-graph-embeddings", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -81,8 +91,13 @@ qdrant + {
"store-doc-embeddings", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -108,8 +123,13 @@ qdrant + {
"query-doc-embeddings", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8080, 8080, "metrics");
engine.resources([
containerSet,
service,
])

View file

@ -31,8 +31,13 @@ local prompt = import "prompt-template.jsonnet";
"chunker", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -56,8 +61,13 @@ local prompt = import "prompt-template.jsonnet";
"pdf-decoder", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},
@ -81,8 +91,13 @@ local prompt = import "prompt-template.jsonnet";
"vectorize", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},

View file

@ -15,7 +15,13 @@ local prompts = import "prompts/mixtral.jsonnet";
create:: function(engine)
local cfgVol = engine.configVolume("./vertexai");
local cfgVol = engine.secretVolume(
"vertexai-creds",
"./vertexai",
{
"private.json": importstr "vertexai/private.json",
}
);
local container =
engine.container("text-completion")
@ -35,17 +41,22 @@ local prompts = import "prompts/mixtral.jsonnet";
"-m",
$["vertexai-model"],
])
.with_limits("0.5", "128M")
.with_reservations("0.1", "128M")
.with_limits("0.5", "256M")
.with_reservations("0.1", "256M")
.with_volume_mount(cfgVol, "/vertexai");
local containerSet = engine.containers(
"text-completion", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
cfgVol,
containerSet,
service,
])
},
@ -54,7 +65,13 @@ local prompts = import "prompts/mixtral.jsonnet";
create:: function(engine)
local cfgVol = engine.configVolume("./vertexai");
local cfgVol = engine.secretVolume(
"vertexai-creds",
"./vertexai",
{
"private.json": importstr "vertexai/private.json",
}
);
local container =
engine.container("text-completion-rag")
@ -78,20 +95,24 @@ local prompts = import "prompts/mixtral.jsonnet";
"-o",
"non-persistent://tg/response/text-completion-rag-response",
])
.with_limits("0.5", "128M")
.with_reservations("0.1", "128M")
.with_limits("0.5", "256M")
.with_reservations("0.1", "256M")
.with_volume_mount(cfgVol, "/vertexai");
local containerSet = engine.containers(
"text-completion-rag", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
cfgVol,
containerSet,
service,
])
}
} + prompts