mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 08:26:21 +02:00
Streaming rag responses (#568)
* Tech spec for streaming RAG * Support for streaming Graph/Doc RAG
This commit is contained in:
parent
b1cc724f7d
commit
1948edaa50
20 changed files with 3087 additions and 94 deletions
|
|
@ -4,6 +4,10 @@ Uses the DocumentRAG service to answer a question
|
|||
|
||||
import argparse
|
||||
import os
|
||||
import asyncio
|
||||
import json
|
||||
import uuid
|
||||
from websockets.asyncio.client import connect
|
||||
from trustgraph.api import Api
|
||||
|
||||
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
|
||||
|
|
@ -11,7 +15,69 @@ default_user = 'trustgraph'
|
|||
default_collection = 'default'
|
||||
default_doc_limit = 10
|
||||
|
||||
def question(url, flow_id, question, user, collection, doc_limit):
|
||||
async def question_streaming(url, flow_id, question, user, collection, doc_limit):
|
||||
"""Streaming version using websockets"""
|
||||
|
||||
# Convert http:// to ws://
|
||||
if url.startswith('http://'):
|
||||
url = 'ws://' + url[7:]
|
||||
elif url.startswith('https://'):
|
||||
url = 'wss://' + url[8:]
|
||||
|
||||
if not url.endswith("/"):
|
||||
url += "/"
|
||||
|
||||
url = url + "api/v1/socket"
|
||||
|
||||
mid = str(uuid.uuid4())
|
||||
|
||||
async with connect(url) as ws:
|
||||
req = {
|
||||
"id": mid,
|
||||
"service": "document-rag",
|
||||
"flow": flow_id,
|
||||
"request": {
|
||||
"query": question,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"doc-limit": doc_limit,
|
||||
"streaming": True
|
||||
}
|
||||
}
|
||||
|
||||
req = json.dumps(req)
|
||||
await ws.send(req)
|
||||
|
||||
while True:
|
||||
msg = await ws.recv()
|
||||
obj = json.loads(msg)
|
||||
|
||||
if "error" in obj:
|
||||
raise RuntimeError(obj["error"])
|
||||
|
||||
if obj["id"] != mid:
|
||||
print("Ignore message")
|
||||
continue
|
||||
|
||||
response = obj["response"]
|
||||
|
||||
# Handle streaming format (chunk)
|
||||
if "chunk" in response:
|
||||
chunk = response["chunk"]
|
||||
print(chunk, end="", flush=True)
|
||||
elif "response" in response:
|
||||
# Final response with complete text
|
||||
# Already printed via chunks, just add newline
|
||||
pass
|
||||
|
||||
if obj["complete"]:
|
||||
print() # Final newline
|
||||
break
|
||||
|
||||
await ws.close()
|
||||
|
||||
def question_non_streaming(url, flow_id, question, user, collection, doc_limit):
|
||||
"""Non-streaming version using HTTP API"""
|
||||
|
||||
api = Api(url).flow().id(flow_id)
|
||||
|
||||
|
|
@ -65,18 +131,36 @@ def main():
|
|||
help=f'Document limit (default: {default_doc_limit})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--no-streaming',
|
||||
action='store_true',
|
||||
help='Disable streaming (use non-streaming mode)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
question(
|
||||
url=args.url,
|
||||
flow_id = args.flow_id,
|
||||
question=args.question,
|
||||
user=args.user,
|
||||
collection=args.collection,
|
||||
doc_limit=args.doc_limit,
|
||||
)
|
||||
if not args.no_streaming:
|
||||
asyncio.run(
|
||||
question_streaming(
|
||||
url=args.url,
|
||||
flow_id=args.flow_id,
|
||||
question=args.question,
|
||||
user=args.user,
|
||||
collection=args.collection,
|
||||
doc_limit=args.doc_limit,
|
||||
)
|
||||
)
|
||||
else:
|
||||
question_non_streaming(
|
||||
url=args.url,
|
||||
flow_id=args.flow_id,
|
||||
question=args.question,
|
||||
user=args.user,
|
||||
collection=args.collection,
|
||||
doc_limit=args.doc_limit,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,10 @@ Uses the GraphRAG service to answer a question
|
|||
|
||||
import argparse
|
||||
import os
|
||||
import asyncio
|
||||
import json
|
||||
import uuid
|
||||
from websockets.asyncio.client import connect
|
||||
from trustgraph.api import Api
|
||||
|
||||
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
|
||||
|
|
@ -14,10 +18,78 @@ default_triple_limit = 30
|
|||
default_max_subgraph_size = 150
|
||||
default_max_path_length = 2
|
||||
|
||||
def question(
|
||||
async def question_streaming(
|
||||
url, flow_id, question, user, collection, entity_limit, triple_limit,
|
||||
max_subgraph_size, max_path_length
|
||||
):
|
||||
"""Streaming version using websockets"""
|
||||
|
||||
# Convert http:// to ws://
|
||||
if url.startswith('http://'):
|
||||
url = 'ws://' + url[7:]
|
||||
elif url.startswith('https://'):
|
||||
url = 'wss://' + url[8:]
|
||||
|
||||
if not url.endswith("/"):
|
||||
url += "/"
|
||||
|
||||
url = url + "api/v1/socket"
|
||||
|
||||
mid = str(uuid.uuid4())
|
||||
|
||||
async with connect(url) as ws:
|
||||
req = {
|
||||
"id": mid,
|
||||
"service": "graph-rag",
|
||||
"flow": flow_id,
|
||||
"request": {
|
||||
"query": question,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"entity-limit": entity_limit,
|
||||
"triple-limit": triple_limit,
|
||||
"max-subgraph-size": max_subgraph_size,
|
||||
"max-path-length": max_path_length,
|
||||
"streaming": True
|
||||
}
|
||||
}
|
||||
|
||||
req = json.dumps(req)
|
||||
await ws.send(req)
|
||||
|
||||
while True:
|
||||
msg = await ws.recv()
|
||||
obj = json.loads(msg)
|
||||
|
||||
if "error" in obj:
|
||||
raise RuntimeError(obj["error"])
|
||||
|
||||
if obj["id"] != mid:
|
||||
print("Ignore message")
|
||||
continue
|
||||
|
||||
response = obj["response"]
|
||||
|
||||
# Handle streaming format (chunk)
|
||||
if "chunk" in response:
|
||||
chunk = response["chunk"]
|
||||
print(chunk, end="", flush=True)
|
||||
elif "response" in response:
|
||||
# Final response with complete text
|
||||
# Already printed via chunks, just add newline
|
||||
pass
|
||||
|
||||
if obj["complete"]:
|
||||
print() # Final newline
|
||||
break
|
||||
|
||||
await ws.close()
|
||||
|
||||
def question_non_streaming(
|
||||
url, flow_id, question, user, collection, entity_limit, triple_limit,
|
||||
max_subgraph_size, max_path_length
|
||||
):
|
||||
"""Non-streaming version using HTTP API"""
|
||||
|
||||
api = Api(url).flow().id(flow_id)
|
||||
|
||||
|
|
@ -91,21 +163,42 @@ def main():
|
|||
help=f'Max path length (default: {default_max_path_length})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--no-streaming',
|
||||
action='store_true',
|
||||
help='Disable streaming (use non-streaming mode)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
question(
|
||||
url=args.url,
|
||||
flow_id = args.flow_id,
|
||||
question=args.question,
|
||||
user=args.user,
|
||||
collection=args.collection,
|
||||
entity_limit=args.entity_limit,
|
||||
triple_limit=args.triple_limit,
|
||||
max_subgraph_size=args.max_subgraph_size,
|
||||
max_path_length=args.max_path_length,
|
||||
)
|
||||
if not args.no_streaming:
|
||||
asyncio.run(
|
||||
question_streaming(
|
||||
url=args.url,
|
||||
flow_id=args.flow_id,
|
||||
question=args.question,
|
||||
user=args.user,
|
||||
collection=args.collection,
|
||||
entity_limit=args.entity_limit,
|
||||
triple_limit=args.triple_limit,
|
||||
max_subgraph_size=args.max_subgraph_size,
|
||||
max_path_length=args.max_path_length,
|
||||
)
|
||||
)
|
||||
else:
|
||||
question_non_streaming(
|
||||
url=args.url,
|
||||
flow_id=args.flow_id,
|
||||
question=args.question,
|
||||
user=args.user,
|
||||
collection=args.collection,
|
||||
entity_limit=args.entity_limit,
|
||||
triple_limit=args.triple_limit,
|
||||
max_subgraph_size=args.max_subgraph_size,
|
||||
max_path_length=args.max_path_length,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue