mirror of
https://github.com/katanemo/plano.git
synced 2026-05-18 13:45:15 +02:00
bidirectional streaming for output filter chains
Replace per-chunk HTTP requests to output filters with a single bidirectional streaming connection per filter. This eliminates the 50-200+ round-trips per streaming LLM response. Filters opt in via streaming: true in config. When all output filters support streaming, brightstaff opens one POST per filter with a streaming request body (Body::wrap_stream) and reads the streaming response. Filters that don't opt in fall back to the existing per-chunk behavior. Updates the PII deanonymizer demo as the reference implementation with request.stream() + StreamingResponse support. Made-with: Cursor
This commit is contained in:
parent
1f23c573bf
commit
42d3de8906
10 changed files with 613 additions and 133 deletions
|
|
@ -7,6 +7,7 @@ filters:
|
|||
- id: pii_deanonymizer
|
||||
url: http://localhost:10501/deanonymize
|
||||
type: http
|
||||
streaming: true
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
|
|
|
|||
|
|
@ -21,10 +21,16 @@ import logging
|
|||
from typing import Any, Dict
|
||||
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.responses import Response
|
||||
from fastapi.responses import Response, StreamingResponse
|
||||
|
||||
from pii import anonymize_text, anonymize_message_content
|
||||
from store import get_mapping, store_mapping, deanonymize_sse, deanonymize_json
|
||||
from store import (
|
||||
get_mapping,
|
||||
store_mapping,
|
||||
deanonymize_sse,
|
||||
deanonymize_sse_stream,
|
||||
deanonymize_json,
|
||||
)
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
|
|
@ -105,11 +111,36 @@ async def deanonymize(path: str, request: Request) -> Response:
|
|||
/deanonymize/v1/chat/completions — OpenAI chat completions
|
||||
/deanonymize/v1/messages — Anthropic messages
|
||||
/deanonymize/v1/responses — OpenAI responses API
|
||||
|
||||
Supports two modes:
|
||||
- Bidirectional streaming: request body is streamed (Content-Type: application/octet-stream).
|
||||
Reads via request.stream(), processes SSE events incrementally, returns StreamingResponse.
|
||||
- Per-chunk / full body: reads entire body, processes, returns complete Response.
|
||||
"""
|
||||
endpoint = f"/{path}"
|
||||
is_anthropic = endpoint == "/v1/messages"
|
||||
request_id = request.headers.get("x-request-id", "unknown")
|
||||
mapping = get_mapping(request_id)
|
||||
|
||||
content_type = request.headers.get("content-type", "")
|
||||
is_streaming = "application/octet-stream" in content_type
|
||||
|
||||
if is_streaming:
|
||||
if not mapping:
|
||||
logger.info("request_id=%s streaming, no mapping — passthrough", request_id)
|
||||
|
||||
async def passthrough():
|
||||
async for chunk in request.stream():
|
||||
yield chunk
|
||||
|
||||
return StreamingResponse(passthrough(), media_type="text/event-stream")
|
||||
|
||||
logger.info("request_id=%s streaming deanonymize", request_id)
|
||||
return StreamingResponse(
|
||||
deanonymize_sse_stream(request_id, request.stream(), mapping, is_anthropic),
|
||||
media_type="text/event-stream",
|
||||
)
|
||||
|
||||
raw_body = await request.body()
|
||||
|
||||
if not mapping:
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import json
|
|||
import logging
|
||||
import threading
|
||||
import time
|
||||
from typing import Dict, Optional, Tuple
|
||||
from typing import AsyncIterator, Dict, Optional, Tuple
|
||||
|
||||
from fastapi.responses import Response
|
||||
|
||||
|
|
@ -59,36 +59,71 @@ def restore_streaming(request_id: str, content: str, mapping: Dict[str, str]) ->
|
|||
def deanonymize_sse(
|
||||
request_id: str, body_str: str, mapping: Dict[str, str], is_anthropic: bool
|
||||
) -> Response:
|
||||
result_lines = []
|
||||
for line in body_str.split("\n"):
|
||||
stripped = line.strip()
|
||||
if not (stripped.startswith("data: ") and stripped[6:] != "[DONE]"):
|
||||
result_lines.append(line)
|
||||
continue
|
||||
try:
|
||||
chunk = json.loads(stripped[6:])
|
||||
if is_anthropic:
|
||||
# {"type": "content_block_delta", "delta": {"type": "text_delta", "text": "..."}}
|
||||
if chunk.get("type") == "content_block_delta":
|
||||
delta = chunk.get("delta", {})
|
||||
if delta.get("type") == "text_delta" and delta.get("text"):
|
||||
delta["text"] = restore_streaming(
|
||||
request_id, delta["text"], mapping
|
||||
)
|
||||
else:
|
||||
# {"choices": [{"delta": {"content": "..."}}]}
|
||||
for choice in chunk.get("choices", []):
|
||||
delta = choice.get("delta", {})
|
||||
if delta.get("content"):
|
||||
delta["content"] = restore_streaming(
|
||||
request_id, delta["content"], mapping
|
||||
)
|
||||
result_lines.append("data: " + json.dumps(chunk))
|
||||
except json.JSONDecodeError:
|
||||
result_lines.append(line)
|
||||
result_lines = [
|
||||
_process_sse_line(request_id, line, mapping, is_anthropic)
|
||||
for line in body_str.split("\n")
|
||||
]
|
||||
return Response(content="\n".join(result_lines), media_type="text/plain")
|
||||
|
||||
|
||||
def _process_sse_line(
|
||||
request_id: str, line: str, mapping: Dict[str, str], is_anthropic: bool
|
||||
) -> str:
|
||||
"""Process a single SSE line, restoring PII in data payloads."""
|
||||
stripped = line.strip()
|
||||
if not (stripped.startswith("data: ") and stripped[6:] != "[DONE]"):
|
||||
return line
|
||||
try:
|
||||
chunk = json.loads(stripped[6:])
|
||||
if is_anthropic:
|
||||
if chunk.get("type") == "content_block_delta":
|
||||
delta = chunk.get("delta", {})
|
||||
if delta.get("type") == "text_delta" and delta.get("text"):
|
||||
delta["text"] = restore_streaming(
|
||||
request_id, delta["text"], mapping
|
||||
)
|
||||
else:
|
||||
for choice in chunk.get("choices", []):
|
||||
delta = choice.get("delta", {})
|
||||
if delta.get("content"):
|
||||
delta["content"] = restore_streaming(
|
||||
request_id, delta["content"], mapping
|
||||
)
|
||||
return "data: " + json.dumps(chunk)
|
||||
except json.JSONDecodeError:
|
||||
return line
|
||||
|
||||
|
||||
async def deanonymize_sse_stream(
|
||||
request_id: str,
|
||||
byte_stream: AsyncIterator[bytes],
|
||||
mapping: Dict[str, str],
|
||||
is_anthropic: bool,
|
||||
):
|
||||
"""Async generator that reads SSE events from a streaming request body,
|
||||
de-anonymizes them, and yields processed events as they become complete.
|
||||
Buffers partial data and splits on SSE event boundaries (blank lines).
|
||||
"""
|
||||
buffer = ""
|
||||
async for raw_chunk in byte_stream:
|
||||
buffer += raw_chunk.decode("utf-8", errors="replace")
|
||||
# Yield each complete SSE event (delimited by double newline)
|
||||
while "\n\n" in buffer:
|
||||
event, buffer = buffer.split("\n\n", 1)
|
||||
processed_lines = [
|
||||
_process_sse_line(request_id, line, mapping, is_anthropic)
|
||||
for line in event.split("\n")
|
||||
]
|
||||
yield "\n".join(processed_lines) + "\n\n"
|
||||
# Flush any trailing data
|
||||
if buffer.strip():
|
||||
processed_lines = [
|
||||
_process_sse_line(request_id, line, mapping, is_anthropic)
|
||||
for line in buffer.split("\n")
|
||||
]
|
||||
yield "\n".join(processed_lines)
|
||||
|
||||
|
||||
def deanonymize_json(
|
||||
request_id: str,
|
||||
raw_body: bytes,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue