removing model_server python module to brightstaff (function calling) (#615)

* adding function_calling functionality via rust

* fixed rendered YAML file

* removed model_server from envoy.template and forwarding traffic to bright_staff

* fixed bugs in function_calling.rs that were breaking tests. All good now

* updating e2e test to clean up disk usage

* removing Arch* models to be used as a default model if one is not specified

* if the user sets arch-function base_url we should honor it

* fixing demos as we needed to pin to a particular version of huggingface_hub else the chatbot ui wouldn't build

* adding a constant for Arch-Function model name

* fixing some edge cases with calls made to Arch-Function

* fixed JSON parsing issues in function_calling.rs

* fixed bug where the raw response from Arch-Function was re-encoded

* removed debug from supervisord.conf

* commenting out disk cleanup

* adding back disk space

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-288.local>
Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
Salman Paracha 2025-11-22 12:55:00 -08:00 committed by GitHub
parent 126b029345
commit 88c2bd1851
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
40 changed files with 2517 additions and 1356 deletions

View file

@ -262,19 +262,16 @@ static_resources:
domains:
- "*"
routes:
{% for internal_cluster in ["arch_fc", "model_server"] %}
- match:
prefix: "/"
headers:
- name: "x-arch-upstream"
string_match:
exact: {{ internal_cluster }}
exact: bright_staff
route:
auto_host_rewrite: true
cluster: {{ internal_cluster }}
cluster: bright_staff
timeout: 300s
{% endfor %}
{% for cluster_name, cluster in arch_clusters.items() %}
- match:
prefix: "/"
@ -599,7 +596,7 @@ static_resources:
clusters:
- name: arch
connect_timeout: 0.5s
connect_timeout: 5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -868,24 +865,6 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
{% for internal_cluster in ["arch_fc", "model_server"] %}
- name: {{ internal_cluster }}
connect_timeout: 0.5s
type: STRICT_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: {{ internal_cluster }}
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: host.docker.internal
port_value: 51000
hostname: {{ internal_cluster }}
{% endfor %}
- name: mistral_7b_instruct
connect_timeout: 0.5s
type: STRICT_DNS