diff --git a/router.py b/router.py
index 6243dee..d67a93b 100644
--- a/router.py
+++ b/router.py
@@ -100,6 +100,11 @@ app.add_middleware(
     allow_methods=["GET", "POST", "DELETE"],
     allow_headers=["Authorization", "Content-Type"],
 )
+default_headers={
+            "HTTP-Referer": "https://nomyo.ai",
+            "X-Title": "NOMYO Router",
+            }
+        
 # -------------------------------------------------------------
 # 3. Global state: per‑endpoint per‑model active connection counters
 # -------------------------------------------------------------
@@ -271,64 +276,75 @@ def iso8601_ns():
 
 class rechunk:
     def openai_chat_completion2ollama(chunk: dict, stream: bool, start_ts: float):
-        rechunk = {   "model": chunk.model, 
-                    "created_at": iso8601_ns() ,
-                    "done_reason": chunk.choices[0].finish_reason, 
-                    "load_duration": None, 
-                    "prompt_eval_count": None, 
-                    "prompt_eval_duration": None, 
-                    "eval_count": None,
-                    "eval_duration": None,
-                    "eval_count":  (chunk.usage.completion_tokens if chunk.usage is not None else None),
-                    "prompt_eval_count": (chunk.usage.prompt_tokens if chunk.usage is not None else None),
-                    "eval_duration": (int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else None),
-                    "response_token/s": (round(chunk.usage.total_tokens / (time.perf_counter() - start_ts), 2) if chunk.usage is not None else None)
-                }
         if stream == True:
-            rechunk["message"] = {"role": chunk.choices[0].delta.role or "assistant", "content": chunk.choices[0].delta.content, "thinking": None, "images": None, "tool_name": None, "tool_calls": None}
+            assistant_msg = ollama.Message(
+                role=chunk.choices[0].delta.role or "assistant",
+                content=chunk.choices[0].delta.content,
+                thinking=None,
+                images=None,
+                tool_name=None,
+                tool_calls=None
+            )
         else:
-            rechunk["message"] = {"role": chunk.choices[0].message.role or "assistant", "content": chunk.choices[0].message.content, "thinking": None, "images": None, "tool_name": None, "tool_calls": None}
+            assistant_msg = ollama.Message(
+                role=chunk.choices[0].message.role or "assistant",
+                content=chunk.choices[0].message.content,
+                thinking=None,
+                images=None,
+                tool_name=None,
+                tool_calls=None
+            )
+        rechunk = ollama.ChatResponse(model=chunk.model, 
+                    created_at=iso8601_ns(),
+                    done_reason=chunk.choices[0].finish_reason, 
+                    load_duration=100000, 
+                    prompt_eval_duration=(int((time.perf_counter() - start_ts) * 1_000_000_000 * (chunk.usage.prompt_tokens / chunk.usage.completion_tokens / 100)) if chunk.usage is not None else None), 
+                    eval_count= (chunk.usage.completion_tokens if chunk.usage is not None else None),
+                    prompt_eval_count=(chunk.usage.prompt_tokens if chunk.usage is not None else None),
+                    eval_duration=(int((time.perf_counter() - start_ts) * 1_000_000_000) if chunk.usage is not None else None),
+                    total_duration=(int((time.perf_counter() - start_ts) * 1_000_000_000) if chunk.usage is not None else None),
+                    message=assistant_msg)
         return rechunk
     
     def openai_completion2ollama(chunk: dict, stream: bool, start_ts: float):
         with_thinking = chunk.choices[0] if chunk.choices[0] else None
         thinking = getattr(with_thinking, "reasoning", None) if with_thinking else None
-        rechunk = { "model": chunk.model,
-                                    "created_at": iso8601_ns(),
-                                    "load_duration": None,
-                                    "done_reason": chunk.choices[0].finish_reason,
-                                    "total_duration": None, 
-                                    "eval_duration": (int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else None),
-                                    "thinking": thinking,
-                                    "context": None,
-                                    "response": chunk.choices[0].text
-            }
+        rechunk = ollama.GenerateResponse(model=chunk.model,
+                                      created_at=iso8601_ns(),
+                                      load_duration=10000,
+                                      done_reason=chunk.choices[0].finish_reason,
+                                      done=None, #True if chunk.choices[0].finish_reason is not None else False,
+                                      total_duration=(int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else None),
+                                      eval_duration=(int((time.perf_counter() - start_ts) * 1000) if chunk.usage is not None else None),
+                                      thinking=thinking,
+                                      response=chunk.choices[0].text
+                    )
         return rechunk
     
     def openai_embeddings2ollama(chunk: dict):
-        rechunk = {"embedding": chunk.data[0].embedding}
+        rechunk = ollama.EmbeddingsResponse(embedding=chunk.data[0].embedding)
         return rechunk
 
     def openai_embed2ollama(chunk: dict, model: str):
-        rechunk = { "model": model,
-                                    "created_at": iso8601_ns(),
-                                    "done": None,
-                                    "done_reason": None,
-                                    "total_duration": None,
-                                    "load_duration": None,
-                                    "prompt_eval_count": None,
-                                    "prompt_eval_duration": None,
-                                    "eval_count": None,
-                                    "eval_duration": None,
-                                    "embeddings": [chunk.data[0].embedding]
-            }
+        rechunk = ollama.EmbedResponse(model=model,
+                                    created_at=iso8601_ns(),
+                                    done=None,
+                                    done_reason=None,
+                                    total_duration=None,
+                                    load_duration=None,
+                                    prompt_eval_count=None,
+                                    prompt_eval_duration=None,
+                                    eval_count=None,
+                                    eval_duration=None,
+                                    embeddings=[chunk.data[0].embedding]
+                )
         return rechunk
 # ------------------------------------------------------------------
 # SSE Helpser
 # ------------------------------------------------------------------
 async def publish_snapshot():
     async with usage_lock:
-        snapshot = json.dumps({"usage_counts": usage_counts})
+        snapshot = json.dumps({"usage_counts": usage_counts}, sort_keys=True)
     async with _subscribers_lock:
         for q in _subscribers:
             # If the queue is full, drop the message to avoid back‑pressure.
@@ -501,7 +517,7 @@ async def proxy(request: Request):
         }
 
         params.update({k: v for k, v in optional_params.items() if v is not None})
-        oclient = openai.AsyncOpenAI(base_url=endpoint, api_key=config.api_keys[endpoint])
+        oclient = openai.AsyncOpenAI(base_url=endpoint, default_headers=default_headers, api_key=config.api_keys[endpoint])
     else:
         client = ollama.AsyncClient(host=endpoint)
     await increment_usage(endpoint, model)
@@ -564,9 +580,10 @@ async def chat_proxy(request: Request):
         tools = payload.get("tools")
         stream = payload.get("stream")
         think = payload.get("think")
-        format = payload.get("format")
+        _format = payload.get("format")
         options = payload.get("options")
         keep_alive = payload.get("keep_alive")
+        options = payload.get("options")
 
         if not model:
             raise HTTPException(
@@ -576,6 +593,10 @@ async def chat_proxy(request: Request):
             raise HTTPException(
                 status_code=400, detail="Missing or invalid 'messages' field (must be a list)"
             )
+        if options is not None and not isinstance(options, dict):
+            raise HTTPException(
+                status_code=400, detail="`options` must be a JSON object"
+            )
     except json.JSONDecodeError as e:
         raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e
 
@@ -595,9 +616,8 @@ async def chat_proxy(request: Request):
             "tools": tools,
             "stream": stream,
         }
-
         params.update({k: v for k, v in optional_params.items() if v is not None})
-        oclient = openai.AsyncOpenAI(base_url=endpoint, api_key=config.api_keys[endpoint])
+        oclient = openai.AsyncOpenAI(base_url=endpoint, default_headers=default_headers, api_key=config.api_keys[endpoint])
     else:
         client = ollama.AsyncClient(host=endpoint)
     await increment_usage(endpoint, model)
@@ -609,7 +629,7 @@ async def chat_proxy(request: Request):
                 start_ts = time.perf_counter()
                 async_gen = await oclient.chat.completions.create(**params)
             else:
-                async_gen = await client.chat(model=model, messages=messages, tools=tools, stream=stream, think=think, format=format, options=options, keep_alive=keep_alive)
+                async_gen = await client.chat(model=model, messages=messages, tools=tools, stream=stream, think=think, format=_format, options=options, keep_alive=keep_alive)
             if stream == True:
                 async for chunk in async_gen:
                     if is_openai_endpoint:
@@ -689,7 +709,7 @@ async def embedding_proxy(request: Request):
         try:
             # The chat method returns a generator of dicts (or GenerateResponse)
             if is_openai_endpoint:
-                async_gen = await client.embeddings.create(input=[prompt], model=model)
+                async_gen = await client.embeddings.create(input=prompt, model=model)
                 async_gen = rechunk.openai_embeddings2ollama(async_gen)
             else:
                 async_gen = await client.embeddings(model=model, prompt=prompt, options=options, keep_alive=keep_alive)
@@ -755,7 +775,7 @@ async def embed_proxy(request: Request):
         try:
             # The chat method returns a generator of dicts (or GenerateResponse)
             if is_openai_endpoint:
-                async_gen = await client.embeddings.create(input=[_input], model=model)
+                async_gen = await client.embeddings.create(input=_input, model=model)
                 async_gen = rechunk.openai_embed2ollama(async_gen, model)
             else:
                 async_gen = await client.embed(model=model, input=_input, truncate=truncate, options=options, keep_alive=keep_alive)
@@ -1180,10 +1200,10 @@ async def openai_embedding_proxy(request: Request):
     else:
         api_key = "ollama"
     base_url = ep2base(endpoint)
-    oclient = openai.AsyncOpenAI(base_url=base_url, api_key=api_key)
+    oclient = openai.AsyncOpenAI(base_url=base_url, default_headers=default_headers, api_key=api_key)
 
     # 3. Async generator that streams embedding data and decrements the counter
-    async_gen = await oclient.embeddings.create(input=[doc], model=model)
+    async_gen = await oclient.embeddings.create(input=doc, model=model)
             
     await decrement_usage(endpoint, model)
 
@@ -1260,7 +1280,7 @@ async def openai_chat_completions_proxy(request: Request):
     endpoint = await choose_endpoint(model)
     await increment_usage(endpoint, model)
     base_url = ep2base(endpoint)
-    oclient = openai.AsyncOpenAI(base_url=base_url, api_key=config.api_keys[endpoint])
+    oclient = openai.AsyncOpenAI(base_url=base_url, default_headers=default_headers, api_key=config.api_keys[endpoint])
     
     # 3. Async generator that streams completions data and decrements the counter
     async def stream_ochat_response():
@@ -1363,7 +1383,7 @@ async def openai_completions_proxy(request: Request):
     endpoint = await choose_endpoint(model)
     await increment_usage(endpoint, model)
     base_url = ep2base(endpoint)
-    oclient = openai.AsyncOpenAI(base_url=base_url, api_key=config.api_keys[endpoint])
+    oclient = openai.AsyncOpenAI(base_url=base_url, default_headers=default_headers, api_key=config.api_keys[endpoint])
 
     # 3. Async generator that streams completions data and decrements the counter
     async def stream_ocompletions_response():
diff --git a/static/index.html b/static/index.html
index 0bd5399..dd0fcd1 100644
--- a/static/index.html
+++ b/static/index.html
@@ -346,7 +346,7 @@
                     const body = document.getElementById("tags-body");
                     body.innerHTML = data.models
                         .map((m) => {
-                            let modelCell = `${m.id || m.name}`;
+                            let modelCell = `${m.model}`;
                             if (m.digest) {
                                 modelCell += `<a href="#" class="delete-link" data-model="${m.name}">delete</a>`;
                                 modelCell += `<a href="#" class="copy-link" data-source="${m.name}">copy</a>`;