mirror of
https://github.com/katanemo/plano.git
synced 2026-06-26 15:39:40 +02:00
update getting started guide and add llm gateway and prompt gateway samples (#330)
This commit is contained in:
parent
9d8fe02729
commit
a54db1a098
24 changed files with 1203 additions and 778 deletions
|
|
@ -28,6 +28,11 @@ properties:
|
|||
type: string
|
||||
connect_timeout:
|
||||
type: string
|
||||
protocol:
|
||||
type: string
|
||||
enum:
|
||||
- http
|
||||
- https
|
||||
additionalProperties: false
|
||||
required:
|
||||
- endpoint
|
||||
|
|
@ -92,6 +97,8 @@ properties:
|
|||
type: array
|
||||
items:
|
||||
type: string
|
||||
in_path:
|
||||
type: boolean
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
|
|
@ -104,6 +111,11 @@ properties:
|
|||
type: string
|
||||
path:
|
||||
type: string
|
||||
http_method:
|
||||
type: string
|
||||
enum:
|
||||
- GET
|
||||
- POST
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
|
|
|
|||
|
|
@ -500,7 +500,18 @@ static_resources:
|
|||
socket_address:
|
||||
address: {{ cluster.endpoint }}
|
||||
port_value: {{ cluster.port }}
|
||||
hostname: {{ cluster.name }}
|
||||
hostname: {{ cluster.endpoint }}
|
||||
{% if cluster.protocol == "https" %}
|
||||
transport_socket:
|
||||
name: envoy.transport_sockets.tls
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
||||
sni: {{ cluster.endpoint }}
|
||||
common_tls_context:
|
||||
tls_params:
|
||||
tls_minimum_protocol_version: TLSv1_2
|
||||
tls_maximum_protocol_version: TLSv1_3
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
- name: arch_internal
|
||||
connect_timeout: 5s
|
||||
|
|
|
|||
|
|
@ -49,7 +49,9 @@ def validate_and_render_schema():
|
|||
|
||||
if "prompt_targets" in config_yaml:
|
||||
for prompt_target in config_yaml["prompt_targets"]:
|
||||
name = prompt_target.get("endpoint", {}).get("name", "")
|
||||
name = prompt_target.get("endpoint", {}).get("name", None)
|
||||
if not name:
|
||||
continue
|
||||
if name not in inferred_clusters:
|
||||
inferred_clusters[name] = {
|
||||
"name": name,
|
||||
|
|
|
|||
|
|
@ -196,7 +196,7 @@ def start_arch_modelserver():
|
|||
subprocess.run(
|
||||
["archgw_modelserver", "restart"], check=True, start_new_session=True
|
||||
)
|
||||
log.info("Successfull ran model_server")
|
||||
log.info("Successfully ran model_server")
|
||||
except subprocess.CalledProcessError as e:
|
||||
log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
|
||||
sys.exit(1)
|
||||
|
|
@ -212,7 +212,7 @@ def stop_arch_modelserver():
|
|||
["archgw_modelserver", "stop"],
|
||||
check=True,
|
||||
)
|
||||
log.info("Successfull stopped the archgw model_server")
|
||||
log.info("Successfully stopped the archgw model_server")
|
||||
except subprocess.CalledProcessError as e:
|
||||
log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
|
||||
sys.exit(1)
|
||||
|
|
|
|||
|
|
@ -171,7 +171,7 @@ def up(file, path, service):
|
|||
log.info(f"Error: {str(e)}")
|
||||
sys.exit(1)
|
||||
|
||||
log.info("Starging arch model server and arch gateway")
|
||||
log.info("Starting arch model server and arch gateway")
|
||||
|
||||
# Set the ARCH_CONFIG_FILE environment variable
|
||||
env_stage = {}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue