diff --git a/requirements.txt b/requirements.txt index a3befbe..222dfc8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ aiohappyeyeballs==2.6.1 aiohttp==3.13.3 aiosignal==1.4.0 -annotated-doc==0.0.3 annotated-types==0.7.0 anyio==4.10.0 async-timeout==5.0.1 @@ -22,6 +21,7 @@ multidict==6.6.4 ollama==0.6.1 openai==1.102.0 orjson>=3.11.5 +numpy>=1.26 pillow==12.1.1 propcache==0.3.2 pydantic==2.11.7 diff --git a/router.py b/router.py index a7f6a75..047bc57 100644 --- a/router.py +++ b/router.py @@ -1064,6 +1064,7 @@ async def _make_chat_request(model: str, messages: list, tools=None, stream: boo if messages: messages = transform_images_to_data_urls(messages) messages = transform_tool_calls_to_openai(messages) + messages = _strip_assistant_prefill(messages) params = { "messages": messages, "model": model, @@ -1295,6 +1296,14 @@ def resize_image_if_needed(image_data): print(f"Error processing image: {e}") return None +def _strip_assistant_prefill(messages: list) -> list: + """Remove a trailing assistant message used as prefill. + OpenAI-compatible endpoints (including Claude) do not support prefill and + will reject requests where the last message has role 'assistant'.""" + if messages and messages[-1].get("role") == "assistant": + return messages[:-1] + return messages + def transform_tool_calls_to_openai(message_list): """ Ensure tool_calls in assistant messages conform to the OpenAI format: @@ -1961,6 +1970,7 @@ async def chat_proxy(request: Request): if messages: messages = transform_images_to_data_urls(messages) messages = transform_tool_calls_to_openai(messages) + messages = _strip_assistant_prefill(messages) params = { "messages": messages, "model": model, @@ -3027,6 +3037,7 @@ async def openai_chat_completions_proxy(request: Request): model = model.split(":latest") model = model[0] + messages = _strip_assistant_prefill(messages) params = { "messages": messages, "model": model,