Feature/streaming llm phase 1 (#566)

* Tidy up duplicate tech specs in doc directory

* Streaming LLM text-completion service tech spec.

* text-completion and prompt interfaces

* streaming change applied to all LLMs, so far tested with VertexAI

* Skip Pinecone unit tests, upstream module issue is affecting things, tests are passing again

* Added agent streaming, not working and has broken tests
This commit is contained in:
cybermaggedon 2025-11-26 09:59:10 +00:00 committed by GitHub
parent 943a9d83b0
commit 310a2deb06
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
44 changed files with 2684 additions and 937 deletions

View file

@ -1,5 +1,5 @@
from pulsar.schema import Record, String, Array, Map
from pulsar.schema import Record, String, Array, Map, Boolean
from ..core.topic import topic
from ..core.primitives import Error
@ -21,8 +21,16 @@ class AgentRequest(Record):
group = Array(String())
history = Array(AgentStep())
user = String() # User context for multi-tenancy
streaming = Boolean() # NEW: Enable streaming response delivery (default false)
class AgentResponse(Record):
# Streaming-first design
chunk_type = String() # "thought", "action", "observation", "answer", "error"
content = String() # The actual content (interpretation depends on chunk_type)
end_of_message = Boolean() # Current chunk type (thought/action/etc.) is complete
end_of_dialog = Boolean() # Entire agent dialog is complete
# Legacy fields (deprecated but kept for backward compatibility)
answer = String()
error = Error()
thought = String()

View file

@ -1,5 +1,5 @@
from pulsar.schema import Record, String, Array, Double, Integer
from pulsar.schema import Record, String, Array, Double, Integer, Boolean
from ..core.topic import topic
from ..core.primitives import Error
@ -11,6 +11,7 @@ from ..core.primitives import Error
class TextCompletionRequest(Record):
system = String()
prompt = String()
streaming = Boolean() # Default false for backward compatibility
class TextCompletionResponse(Record):
error = Error()
@ -18,6 +19,7 @@ class TextCompletionResponse(Record):
in_token = Integer()
out_token = Integer()
model = String()
end_of_stream = Boolean() # Indicates final message in stream
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Map
from pulsar.schema import Record, String, Map, Boolean
from ..core.primitives import Error
from ..core.topic import topic
@ -24,6 +24,9 @@ class PromptRequest(Record):
# JSON encoded values
terms = Map(String())
# Streaming support (default false for backward compatibility)
streaming = Boolean()
class PromptResponse(Record):
# Error case
@ -35,4 +38,7 @@ class PromptResponse(Record):
# JSON encoded
object = String()
# Indicates final message in stream
end_of_stream = Boolean()
############################################################################