Prompt refactor (#125)

* Prompt manager integrated and working with 6 tests * Updated templates to for prompt-template update
2026-04-25 00:16:23 +02:00 · 2024-10-26 22:17:43 +01:00 · 2024-10-26 22:17:43 +01:00 · 1e137768ca
commit 1e137768ca
parent 51aef6c730
19 changed files with 649 additions and 479 deletions
--- a/templates/components/googleaistudio.jsonnet
+++ b/templates/components/googleaistudio.jsonnet
@ -13,7 +13,7 @@ local prompts = import "prompts/mixtral.jsonnet";
        create:: function(engine)

            local envSecrets = engine.envSecrets("bedrock-credentials")
-                .with_env_var("GOOGLEAISTUDIO_KEY", "googleaistudio-key");
+                .with_env_var("GOOGLE_AI_STUDIO_KEY", "googleaistudio-key");

            local container =
                engine.container("text-completion")
--- a/templates/components/prompt-template.jsonnet
+++ b/templates/components/prompt-template.jsonnet
@ -17,22 +17,38 @@ local default_prompts = import "prompts/default-prompts.jsonnet";
                        "prompt-template",
                        "-p",
                        url.pulsar,
+
                        "--text-completion-request-queue",
                        "non-persistent://tg/request/text-completion",
                        "--text-completion-response-queue",
                        "non-persistent://tg/response/text-completion-response",
-                        "--definition-template",
+
+                        "--system-prompt",
+                        $["system-template"],
+
+                        "--prompt",
+                        "question={{question}}",
+                        "extract-definitions=" +
                        $["prompt-definition-template"],
-                        "--relationship-template",
+                        "extract-relationships=" +
                        $["prompt-relationship-template"],
-                        "--topic-template",
+                        "extract-topics=" +
                        $["prompt-topic-template"],
-                        "--knowledge-query-template",
+                        "kg-prompt=" +
                        $["prompt-knowledge-query-template"],
-                        "--document-query-template",
+                        "document-prompt=" +
                        $["prompt-document-query-template"],
-                        "--rows-template",
+                        "extract-rows=" +
                        $["prompt-rows-template"],
+
+                        "--prompt-response-type",
+                        "extract-definitions=json",
+                        "extract-relationships=json",
+                        "extract-topics=json",
+                        "kg-prompt=text",
+                        "document-prompt=text",
+                        "extract-rows=json",
+
                    ])
                    .with_limits("0.5", "128M")
                    .with_reservations("0.1", "128M");
@ -71,18 +87,33 @@ local default_prompts = import "prompts/default-prompts.jsonnet";
                        "non-persistent://tg/request/text-completion-rag",
                        "--text-completion-response-queue",
                        "non-persistent://tg/response/text-completion-rag-response",
-                        "--definition-template",
+
+                        "--system-prompt",
+                        $["system-template"],
+
+                        "--prompt",
+                        "question={{question}}",
+                        "extract-definitions=" +
                        $["prompt-definition-template"],
-                        "--relationship-template",
+                        "extract-relationships=" +
                        $["prompt-relationship-template"],
-                        "--topic-template",
+                        "extract-topics=" +
                        $["prompt-topic-template"],
-                        "--knowledge-query-template",
+                        "kg-prompt=" +
                        $["prompt-knowledge-query-template"],
-                        "--document-query-template",
+                        "document-prompt=" +
                        $["prompt-document-query-template"],
-                        "--rows-template",
+                        "extract-rows=" +
                        $["prompt-rows-template"],
+
+                        "--prompt-response-type",
+                        "extract-definitions=json",
+                        "extract-relationships=json",
+                        "extract-topics=json",
+                        "kg-prompt=text",
+                        "document-prompt=text",
+                        "extract-rows=json",
+
                    ])
                    .with_limits("0.5", "128M")
                    .with_reservations("0.1", "128M");
--- a/templates/prompts/default-prompts.jsonnet
+++ b/templates/prompts/default-prompts.jsonnet
@ -4,16 +4,18 @@

 {

-    "prompt-definition-template":: "<instructions>\nStudy the following text and derive definitions for any discovered entities.\nDo not provide definitions for entities whose definitions are incomplete\nor unknown.\nOutput relationships in JSON format as an arary of objects with fields:\n- entity: the name of the entity\n- definition: English text which defines the entity\n</instructions>\n\n<text>\n{text}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract will be written as plain text.  Do not add markdown formatting\nor headers or prefixes.  Do not include null or unknown definitions.\n</requirements>",
+    "system-template":: "You are a helpful assistant.",

-    "prompt-relationship-template":: "<instructions>\nStudy the following text and derive entity relationships.  For each\nrelationship, derive the subject, predicate and object of the relationship.\nOutput relationships in JSON format as an arary of objects with fields:\n- subject: the subject of the relationship\n- predicate: the predicate\n- object: the object of the relationship\n- object-entity: false if the object is a simple data type: name, value or date.  true if it is an entity.\n</instructions>\n\n<text>\n{text}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract must be written as plain text.  Do not add markdown formatting\nor headers or prefixes.\n</requirements>",
+    "prompt-definition-template":: "<instructions>\nStudy the following text and derive definitions for any discovered entities.\nDo not provide definitions for entities whose definitions are incomplete\nor unknown.\nOutput relationships in JSON format as an arary of objects with fields:\n- entity: the name of the entity\n- definition: English text which defines the entity\n</instructions>\n\n<text>\n{{text}}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract will be written as plain text.  Do not add markdown formatting\nor headers or prefixes.  Do not include null or unknown definitions.\n</requirements>",

-    "prompt-topic-template":: "You are a helpful assistant that performs information extraction tasks for a provided text.\nRead the provided text. You will identify topics and their definitions in JSON.\n\nReading Instructions:\n- Ignore document formatting in the provided text.\n- Study the provided text carefully.\n\nHere is the text:\n{text}\n\nResponse Instructions: \n- Do not respond with special characters.\n- Return only topics that are concepts and unique to the provided text.\n- Respond only with well-formed JSON.\n- The JSON response shall be an array of objects with keys \"topic\" and \"definition\". \n- The JSON response shall use the following structure:\n\n```json\n[{{\"topic\": string, \"definition\": string}}]\n```\n\n- Do not write any additional text or explanations.",
+    "prompt-relationship-template":: "<instructions>\nStudy the following text and derive entity relationships.  For each\nrelationship, derive the subject, predicate and object of the relationship.\nOutput relationships in JSON format as an arary of objects with fields:\n- subject: the subject of the relationship\n- predicate: the predicate\n- object: the object of the relationship\n- object-entity: false if the object is a simple data type: name, value or date.  true if it is an entity.\n</instructions>\n\n<text>\n{{text}}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract must be written as plain text.  Do not add markdown formatting\nor headers or prefixes.\n</requirements>",

-    "prompt-knowledge-query-template":: "Study the following set of knowledge statements. The statements are written in Cypher format that has been extracted from a knowledge graph. Use only the provided set of knowledge statements in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.\n\nHere's the knowledge statements:\n{graph}\n\nUse only the provided knowledge statements to respond to the following:\n{query}\n",
+    "prompt-topic-template":: "You are a helpful assistant that performs information extraction tasks for a provided text.\nRead the provided text. You will identify topics and their definitions in JSON.\n\nReading Instructions:\n- Ignore document formatting in the provided text.\n- Study the provided text carefully.\n\nHere is the text:\n{{text}}\n\nResponse Instructions: \n- Do not respond with special characters.\n- Return only topics that are concepts and unique to the provided text.\n- Respond only with well-formed JSON.\n- The JSON response shall be an array of objects with keys \"topic\" and \"definition\". \n- The JSON response shall use the following structure:\n\n```json\n[{\"topic\": string, \"definition\": string}]\n```\n\n- Do not write any additional text or explanations.",

-    "prompt-document-query-template":: "Study the following context. Use only the information provided in the context in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.\n\nHere is the context:\n{documents}\n\nUse only the provided knowledge statements to respond to the following:\n{query}\n",
+    "prompt-knowledge-query-template":: "Study the following set of knowledge statements. The statements are written in Cypher format that has been extracted from a knowledge graph. Use only the provided set of knowledge statements in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.\n\nHere's the knowledge statements:\n{% for edge in knowledge %}({{edge.s}})-[{{edge.p}}]->({{edge.o}})\n{%endfor%}\n\nUse only the provided knowledge statements to respond to the following:\n{{query}}\n",

-    "prompt-rows-template":: "<instructions>\nStudy the following text and derive objects which match the schema provided.\n\nYou must output an array of JSON objects for each object you discover\nwhich matches the schema.  For each object, output a JSON object whose fields\ncarry the name field specified in the schema.\n</instructions>\n\n<schema>\n{schema}\n</schema>\n\n<text>\n{text}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not add markdown formatting or headers or prefixes.\n</requirements>",
+    "prompt-document-query-template":: "Study the following context. Use only the information provided in the context in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.\n\nHere is the context:\n{{documents}}\n\nUse only the provided knowledge statements to respond to the following:\n{{query}}\n",
+
+    "prompt-rows-template":: "<instructions>\nStudy the following text and derive objects which match the schema provided.\n\nYou must output an array of JSON objects for each object you discover\nwhich matches the schema.  For each object, output a JSON object whose fields\ncarry the name field specified in the schema.\n</instructions>\n\n<schema>\n{{schema}}\n</schema>\n\n<text>\n{{text}}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not add markdown formatting or headers or prefixes.\n</requirements>",

 }
--- a/tests/README.prompts
+++ b/tests/README.prompts
@ -0,0 +1,27 @@
+
+test-prompt-... is tested with this prompt set...
+
+prompt-template \
+    -p pulsar://localhost:6650 \
+    --system-prompt 'You are a {{attitude}}, you are called {{name}}' \
+    --global-term \
+        'name=Craig' \
+        'attitude=LOUD, SHOUTY ANNOYING BOT' \
+    --prompt \
+        'question={{question}}' \
+        'french-question={{question}}' \
+        "analyze=Find the name and age in this text, and output a JSON structure containing just the name and age fields: {{description}}.  Don't add markup, just output the raw JSON object." \
+        "graph-query=Study the following knowledge graph, and then answer the question.\\n\nGraph:\\n{% for edge in knowledge %}({{edge.0}})-[{{edge.1}}]->({{edge.2}})\\n{%endfor%}\\nQuestion:\\n{{question}}" \
+        "extract-definition=Analyse the text provided, and then return a list of terms and definitions.  The output should be a JSON array, each item in the array is an object with fields 'term' and 'definition'.Don't add markup, just output the raw JSON object.  Here is the text:\\n{{text}}" \
+    --prompt-response-type \
+        'question=text' \
+        'analyze=json' \
+        'graph-query=text' \
+        'extract-definition=json' \
+    --prompt-term \
+        'question=name:Bonny' \
+        'french-question=attitude:French-speaking bot' \
+    --prompt-schema \
+        'analyze={ "type" : "object", "properties" : { "age": { "type" : "number" }, "name": { "type" : "string" } } }' \
+        'extract-definition={ "type": "array", "items": { "type": "object", "properties": { "term": { "type": "string" }, "definition": { "type": "string" } }, "required": [ "term", "definition" ] } }'
+    
--- a/tests/test-lang-definition
+++ b/tests/test-lang-definition
@ -7,7 +7,13 @@ p = PromptClient(pulsar_host="pulsar://localhost:6650")

 chunk = """I noticed a cat in my garden.  It is a four-legged animal
 which is a mammal and can be tame or wild.  I wonder if it will be friends
-with me.  I think the cat's name is Fred and it has 4 legs"""
+with me.  I think the cat's name is Fred and it has 4 legs.
+
+A cat is a small mammal.
+
+A grapefruit is a citrus fruit.
+
+"""

 resp = p.request_definitions(
    chunk=chunk,
--- a/tests/test-lang-topics
+++ b/tests/test-lang-topics
@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+
+import pulsar
+from trustgraph.clients.prompt_client import PromptClient
+
+p = PromptClient(pulsar_host="pulsar://localhost:6650")
+
+chunk = """I noticed a cat in my garden.  It is a four-legged animal
+which is a mammal and can be tame or wild.  I wonder if it will be friends
+with me.  I think the cat's name is Fred and it has 4 legs"""
+
+resp = p.request_topics(
+    chunk=chunk,
+)
+
+for d in resp:
+    print(d.topic)
+    print("  ", d.definition)
+
--- a/tests/test-prompt-analyze
+++ b/tests/test-prompt-analyze
@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+
+import json
+from trustgraph.clients.prompt_client import PromptClient
+
+p = PromptClient(pulsar_host="pulsar://localhost:6650")
+
+description = """Fred is a 4-legged cat who is 12 years old"""
+
+resp = p.request(
+    id="analyze",
+    terms = {
+        "description": description,
+    }
+)
+
+print(json.dumps(resp, indent=4))
+
--- a/tests/test-prompt-extraction
+++ b/tests/test-prompt-extraction
@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+
+import json
+from trustgraph.clients.prompt_client import PromptClient
+
+p = PromptClient(pulsar_host="pulsar://localhost:6650")
+
+chunk="""
+ The Space Shuttle was a reusable spacecraft that transported astronauts and cargo to and from Earth's orbit. It was designed to launch like a rocket, maneuver in orbit like a spacecraft, and land like an airplane. The Space Shuttle was NASA's space transportation system and was used for many purposes, including: 
+
+    Carrying astronauts
+    The Space Shuttle could carry up to seven astronauts at a time. 
+
+Launching, recovering, and repairing satellites
+The Space Shuttle could launch satellites into orbit, recover them, and repair them. 
+Building the International Space Station
+The Space Shuttle carried large parts into space to build the International Space Station. 
+Conducting research
+Astronauts conducted experiments in the Space Shuttle, which was like a science lab in space. 
+
+The Space Shuttle was retired in 2011 after the Columbia accident in 2003. The Columbia Accident Investigation Board report found that the Space Shuttle was unsafe and expensive to make safe. 
+Here are some other facts about the Space Shuttle: 
+
+    The Space Shuttle was 184 ft tall and had a diameter of 29 ft. 
+
+The Space Shuttle had a mass of 4,480,000 lb. 
+The Space Shuttle's first flight was on April 12, 1981. 
+The Space Shuttle's last mission was in 2011. 
+"""
+
+q = "Tell me some facts in the knowledge graph"
+
+resp = p.request(
+    id="extract-definition",
+    terms = {
+        "text": chunk,
+    }
+)
+
+print(resp)
+
+for fact in resp:
+    print(fact["term"], "::")
+    print(fact["definition"])
+    print()
+
--- a/tests/test-prompt-french-question
+++ b/tests/test-prompt-french-question
@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+
+import pulsar
+from trustgraph.clients.prompt_client import PromptClient
+
+p = PromptClient(pulsar_host="pulsar://localhost:6650")
+
+question = """What is the square root of 16?"""
+
+resp = p.request(
+    id="french-question",
+    terms = {
+        "question": question
+    }
+)
+
+print(resp)
+
--- a/tests/test-prompt-knowledge
+++ b/tests/test-prompt-knowledge
@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+
+import json
+from trustgraph.clients.prompt_client import PromptClient
+
+p = PromptClient(pulsar_host="pulsar://localhost:6650")
+
+knowledge = [
+      ("accident", "evoked", "a wide range of deeply felt public responses"),
+      ("Space Shuttle concept", "had", "genesis"),
+      ("Commission", "had", "a mandate to develop recommendations for corrective or other action based upon the Commission's findings and determinations"),
+      ("Commission", "established", "teams of persons"),
+      ("Space Shuttle Challenger", "http://www.w3.org/2004/02/skos/core#definition", "A space shuttle that was destroyed in an accident during mission 51-L."),
+      ("The mid fuselage", "contains", "the payload bay"),
+      ("Volume I", "contains", "Chapter IX"),
+      ("accident", "resulted in", "firm national resolve that those men and women be forever enshrined in the annals of American heroes"),
+      ("Volume I", "contains", "Chapter VII"),
+      ("Volume I", "contains", "Chapter II"),
+      ("Volume I", "contains", "Chapter V"),
+      ("Commission", "believes", "its investigation and report have been responsive to the request of the President and hopes that they will serve the best interests of the nation in restoring the United States space program to its preeminent position in the world"),
+      ("Commission", "construe", "mandate"),
+      ("accident", "became", "a milestone on the way to achieving the full potential that space offers to mankind"),
+      ("Volume I", "contains", "The Commission"),
+      ("Commission", "http://www.w3.org/2004/02/skos/core#definition", "A group established to investigate the space shuttle accident"),
+      ("Volume I", "contains", "Appendix D"),
+      ("Commission", "had", "a mandate to review the circumstances surrounding the accident to establish the probable cause or causes of the accident"),
+      ("Volume I", "contains", "Recommendations")
+]
+
+q = "Tell me some facts in the knowledge graph"
+
+resp = p.request(
+    id="graph-query",
+    terms = {
+        "name": "Jayney",
+        "knowledge": knowledge,
+        "question": q
+    }
+)
+
+print(resp)
+
+
+
--- a/tests/test-prompt-question
+++ b/tests/test-prompt-question
@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+
+import pulsar
+from trustgraph.clients.prompt_client import PromptClient
+
+p = PromptClient(pulsar_host="pulsar://localhost:6650")
+
+question = """What is the square root of 16?"""
+
+resp = p.request(
+    id="question",
+    terms = {
+        "question": question
+    }
+)
+
+print(resp)
+
--- a/tests/test-prompt-spanish-question
+++ b/tests/test-prompt-spanish-question
@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+
+import pulsar
+from trustgraph.clients.prompt_client import PromptClient
+
+p = PromptClient(pulsar_host="pulsar://localhost:6650")
+
+question = """What is the square root of 16?"""
+
+resp = p.request(
+    id="question",
+    terms = {
+        "question": question,
+        "attitude": "Spanish-speaking bot"
+    }
+)
+
+print(resp)
+
--- a/trustgraph-base/trustgraph/clients/prompt_client.py
+++ b/trustgraph-base/trustgraph/clients/prompt_client.py
@ -1,7 +1,9 @@

 import _pulsar
+import json
+import dataclasses

-from .. schema import PromptRequest, PromptResponse, Fact, RowSchema, Field
+from .. schema import PromptRequest, PromptResponse
 from .. schema import prompt_request_queue
 from .. schema import prompt_response_queue
 from . base import BaseClient
@ -12,6 +14,23 @@ WARN=_pulsar.LoggerLevel.Warn
 INFO=_pulsar.LoggerLevel.Info
 DEBUG=_pulsar.LoggerLevel.Debug

+@dataclasses.dataclass
+class Definition:
+    name: str
+    definition: str
+
+@dataclasses.dataclass
+class Relationship:
+    s: str
+    p: str
+    o: str
+    o_entity: str
+
+@dataclasses.dataclass
+class Topic:
+    topic: str
+    definition: str
+
 class PromptClient(BaseClient):

    def __init__(
@ -38,63 +57,116 @@ class PromptClient(BaseClient):
            output_schema=PromptResponse,
        )

+    def request(self, id, terms, timeout=300):
+
+        resp = self.call(
+            id=id,
+            terms={
+                k: json.dumps(v)
+                for k, v in terms.items()
+            },
+            timeout=timeout
+        )
+
+        if resp.text: return resp.text
+
+        return json.loads(resp.object)
+
    def request_definitions(self, chunk, timeout=300):

-        return self.call(
-            kind="extract-definitions", chunk=chunk,
+        defs = self.request(
+            id="extract-definitions",
+            terms={
+                "text": chunk
+            },
            timeout=timeout
-        ).definitions
-    
-    def request_topics(self, chunk, timeout=300):
+        )

-        return self.call(
-            kind="extract-topics", chunk=chunk,
-            timeout=timeout
-        ).topics
+        return [
+            Definition(name=d["entity"], definition=d["definition"])
+            for d in defs
+        ]

    def request_relationships(self, chunk, timeout=300):

-        return self.call(
-            kind="extract-relationships", chunk=chunk,
+        rels = self.request(
+            id="extract-relationships",
+            terms={
+                "text": chunk
+            },
            timeout=timeout
-        ).relationships
+        )
+
+        return [
+            Relationship(
+                s=d["subject"],
+                p=d["predicate"],
+                o=d["object"],
+                o_entity=d["object-entity"]
+            )
+            for d in rels
+        ]
+
+    def request_topics(self, chunk, timeout=300):
+
+        topics = self.request(
+            id="extract-topics",
+            terms={
+                "text": chunk
+            },
+            timeout=timeout
+        )
+        
+        return [
+            Topic(topic=d["topic"], definition=d["definition"])
+            for d in topics
+        ]

    def request_rows(self, schema, chunk, timeout=300):

-        return self.call(
-            kind="extract-rows", chunk=chunk,
-            row_schema=RowSchema(
-                name=schema.name,
-                description=schema.description,
-                fields=[
-                    Field(
-                        name=f.name, type=str(f.type), size=f.size,
-                        primary=f.primary, description=f.description,
-                    )
-                    for f in schema.fields
-                ]
-            ),
+        return self.request(
+            id="extract-rows",
+            terms={
+                "chunk": chunk,
+                "row-schema": {
+                    "name": schema.name,
+                    "description": schema.description,
+                    "fields": [
+                        {
+                            "name": f.name, "type": str(f.type),
+                            "size": f.size, "primary": f.primary,
+                            "description": f.description,
+                        }
+                        for f in schema.fields
+                    ]
+                }
+            },
            timeout=timeout
-        ).rows
+        )

    def request_kg_prompt(self, query, kg, timeout=300):

-        return self.call(
-            kind="kg-prompt",
-            query=query,
-            kg=[
-                Fact(s=v[0], p=v[1], o=v[2])
-                for v in kg
-            ],
+        return self.request(
+            id="kg-prompt",
+            terms={
+                "query": query,
+                "knowledge": [
+                    { "s": v[0], "p": v[1], "o": v[2] }
+                    for v in kg
+                ]
+            },
            timeout=timeout
-        ).answer
+        )

    def request_document_prompt(self, query, documents, timeout=300):

-        return self.call(
-            kind="document-prompt",
-            query=query,
-            documents=documents,
+        return self.request(
+            id="document-prompt",
+            terms={
+                "query": query,
+                "documents": documents,
+            },
            timeout=timeout
-        ).answer
+        )
+

--- a/trustgraph-base/trustgraph/schema/prompt.py
+++ b/trustgraph-base/trustgraph/schema/prompt.py
@ -39,20 +39,21 @@ class Fact(Record):
 #   schema, chunk -> rows

 class PromptRequest(Record):
-    kind = String()
-    chunk = String()
-    query = String()
-    kg = Array(Fact())
-    documents = Array(Bytes())
-    row_schema = RowSchema()
+    id = String()
+
+    # JSON encoded values
+    terms = Map(String())

 class PromptResponse(Record):
+
+    # Error case
    error = Error()
-    answer = String()
-    definitions = Array(Definition())
-    topics = Array(Topic())
-    relationships = Array(Relationship())
-    rows = Array(Map(String()))
+
+    # Just plain text
+    text = String()
+
+    # JSON encoded
+    object = String()

 prompt_request_queue = topic(
    'prompt', kind='non-persistent', namespace='request'
--- a/trustgraph-flow/setup.py
+++ b/trustgraph-flow/setup.py
@ -56,6 +56,8 @@ setuptools.setup(
        "neo4j",
        "tiktoken",
        "google-generativeai",
+        "ibis",
+        "jsonschema",
    ],
    scripts=[
        "scripts/chunker-recursive",
--- a/trustgraph-flow/trustgraph/model/prompt/template/README.md
+++ b/trustgraph-flow/trustgraph/model/prompt/template/README.md
@ -0,0 +1,25 @@
+
+prompt-template \
+    -p pulsar://localhost:6650 \
+    --system-prompt 'You are a {{attitude}}, you are called {{name}}' \
+    --global-term \
+        'name=Craig' \
+        'attitude=LOUD, SHOUTY ANNOYING BOT' \
+    --prompt \
+        'question={{question}}' \
+        'french-question={{question}}' \
+        "analyze=Find the name and age in this text, and output a JSON structure containing just the name and age fields: {{description}}.  Don't add markup, just output the raw JSON object." \
+        "graph-query=Study the following knowledge graph, and then answer the question.\\n\nGraph:\\n{% for edge in knowledge %}({{edge.0}})-[{{edge.1}}]->({{edge.2}})\\n{%endfor%}\\nQuestion:\\n{{question}}" \
+        "extract-definition=Analyse the text provided, and then return a list of terms and definitions.  The output should be a JSON array, each item in the array is an object with fields 'term' and 'definition'.Don't add markup, just output the raw JSON object.  Here is the text:\\n{{text}}" \
+    --prompt-response-type \
+        'question=text' \
+        'analyze=json' \
+        'graph-query=text' \
+        'extract-definition=json' \
+    --prompt-term \
+        'question=name:Bonny' \
+        'french-question=attitude:French-speaking bot' \
+    --prompt-schema \
+        'analyze={ "type" : "object", "properties" : { "age": { "type" : "number" }, "name": { "type" : "string" } } }' \
+        'extract-definition={ "type": "array", "items": { "type": "object", "properties": { "term": { "type": "string" }, "definition": { "type": "string" } }, "required": [ "term", "definition" ] } }'
+    
--- a/trustgraph-flow/trustgraph/model/prompt/template/prompt_manager.py
+++ b/trustgraph-flow/trustgraph/model/prompt/template/prompt_manager.py
@ -0,0 +1,95 @@
+
+import ibis
+import json
+from jsonschema import validate
+import re
+
+from trustgraph.clients.llm_client import LlmClient
+
+class PromptConfiguration:
+    def __init__(self, system_template, global_terms={}, prompts={}):
+        self.system_template = system_template
+        self.global_terms = global_terms
+        self.prompts = prompts
+
+class Prompt:
+    def __init__(self, template, response_type = "text", terms=None, schema=None):
+        self.template = template
+        self.response_type = response_type
+        self.terms = terms
+        self.schema = schema
+
+class PromptManager:
+
+    def __init__(self, llm, config):
+        self.llm = llm
+        self.config = config
+        self.terms = config.global_terms
+
+        self.prompts = config.prompts
+
+        try:
+            self.system_template = ibis.Template(config.system_template)
+        except:
+            raise RuntimeError("Error in system template")
+
+        self.templates = {}
+        for k, v in self.prompts.items():
+            try:
+                self.templates[k] = ibis.Template(v.template)
+            except:
+                raise RuntimeError(f"Error in template: {k}")
+
+            if v.terms is None:
+                v.terms = {}
+
+    def parse_json(self, text):
+        json_match = re.search(r'```(?:json)?(.*?)```', text, re.DOTALL)
+    
+        if json_match:
+            json_str = json_match.group(1).strip()
+        else:
+            # If no delimiters, assume the entire output is JSON
+            json_str = text.strip()
+
+        return json.loads(json_str)
+
+    def invoke(self, id, input):
+
+        if id not in self.prompts:
+            raise RuntimeError("ID invalid")
+
+        terms = self.terms | self.prompts[id].terms | input
+
+        resp_type = self.prompts[id].response_type
+
+        prompt = {
+            "system": self.system_template.render(terms),
+            "prompt": self.templates[id].render(terms)
+        }
+
+        resp = self.llm.request(**prompt)
+
+        print(resp, flush=True)
+
+        if resp_type == "text":
+            return resp
+
+        if resp_type != "json":
+            raise RuntimeError(f"Response type {resp_type} not known")
+
+        try:
+            obj = self.parse_json(resp)
+        except:
+            raise RuntimeError("JSON parse fail")
+
+        print(obj, flush=True)
+        if self.prompts[id].schema:
+            try:
+                print(self.prompts[id].schema)
+                validate(instance=obj, schema=self.prompts[id].schema)
+            except Exception as e:
+                raise RuntimeError(f"Schema validation fail: {e}")
+
+        return obj
+
--- a/trustgraph-flow/trustgraph/model/prompt/template/prompts.py
+++ b/trustgraph-flow/trustgraph/model/prompt/template/prompts.py
@ -1,47 +0,0 @@
-
-def to_relationships(template, text):
-    return template.format(text=text)
-    
-def to_definitions(template, text):
-    return template.format(text=text)
-
-def to_topics(template, text):
-    return template.format(text=text)
-
-def to_rows(template, schema, text):
-
-    field_schema = [
-        f"- Name: {f.name}\n  Type: {f.type}\n  Definition: {f.description}"
-        for f in schema.fields
-    ]
-
-    field_schema = "\n".join(field_schema)
-
-    return template.format(schema=schema, text=text)
-
-    schema = f"""Object name: {schema.name}
-Description: {schema.description}
-
-Fields:
-{schema}"""
-
-    prompt = f""""""
-    
-    return prompt
-    
-def get_cypher(kg):
-    sg2 = []
-    for f in kg:
-        sg2.append(f"({f.s})-[{f.p}]->({f.o})")
-    kg = "\n".join(sg2)
-    kg = kg.replace("\\", "-")
-    return kg
-
-def to_kg_query(template, query, kg):
-    cypher =  get_cypher(kg)
-    return template.format(query=query, graph=cypher)
-
-def to_document_query(template, query, docs):
-    docs = "\n\n".join(docs)
-    return template.format(query=query, documents=docs)
-
--- a/trustgraph-flow/trustgraph/model/prompt/template/service.py
+++ b/trustgraph-flow/trustgraph/model/prompt/template/service.py
@ -16,8 +16,7 @@ from .... schema import prompt_request_queue, prompt_response_queue
 from .... base import ConsumerProducer
 from .... clients.llm_client import LlmClient

-from . prompts import to_definitions, to_relationships, to_rows
-from . prompts import to_kg_query, to_document_query, to_topics
+from . prompt_manager import PromptConfiguration, Prompt, PromptManager

 module = ".".join(__name__.split(".")[1:-1])

@ -29,6 +28,82 @@ class Processor(ConsumerProducer):

    def __init__(self, **params):

+        prompt_base = {}
+
+        # Parsing the prompt information to the prompt configuration
+        # structure
+        prompt_arg = params.get("prompt", [])
+        if prompt_arg:
+            for p in prompt_arg:
+                toks = p.split("=", 1)
+                if len(toks) < 2:
+                    raise RuntimeError(f"Prompt string not well-formed: {p}")
+                prompt_base[toks[0]] = {
+                    "template": toks[1]
+                }
+
+        prompt_response_type_arg = params.get("prompt_response_type", [])
+        if prompt_response_type_arg:
+            for p in prompt_response_type_arg:
+                toks = p.split("=", 1)
+                if len(toks) < 2:
+                    raise RuntimeError(f"Response type not well-formed: {p}")
+                if toks[0] not in prompt_base:
+                    raise RuntimeError(f"Response-type, {toks[0]} not known")
+                prompt_base[toks[0]]["response_type"] = toks[1]
+
+        prompt_schema_arg = params.get("prompt_schema", [])
+        if prompt_schema_arg:
+            for p in prompt_schema_arg:
+                toks = p.split("=", 1)
+                if len(toks) < 2:
+                    raise RuntimeError(f"Schema arg not well-formed: {p}")
+                if toks[0] not in prompt_base:
+                    raise RuntimeError(f"Schema, {toks[0]} not known")
+                try:
+                    prompt_base[toks[0]]["schema"] = json.loads(toks[1])
+                except:
+                    raise RuntimeError(f"Failed to parse JSON schema: {p}")
+
+        prompt_term_arg = params.get("prompt_term", [])
+        if prompt_term_arg:
+            for p in prompt_term_arg:
+                toks = p.split("=", 1)
+                if len(toks) < 2:
+                    raise RuntimeError(f"Term arg not well-formed: {p}")
+                if toks[0] not in prompt_base:
+                    raise RuntimeError(f"Term, {toks[0]} not known")
+                kvtoks = toks[1].split(":", 1)
+                if len(kvtoks) < 2:
+                    raise RuntimeError(f"Term not well-formed: {toks[1]}")
+                k, v = kvtoks
+                if "terms" not in prompt_base[toks[0]]:
+                    prompt_base[toks[0]]["terms"] = {}
+                prompt_base[toks[0]]["terms"][k] = v
+
+        global_terms = {}
+
+        global_term_arg = params.get("global_term", [])
+        if global_term_arg:
+            for t in global_term_arg:
+                toks = t.split("=", 1)
+                if len(toks) < 2:
+                    raise RuntimeError(f"Global term arg not well-formed: {t}")
+                global_terms[toks[0]] = toks[1]
+
+        print(global_terms)
+
+        prompts = {
+            k: Prompt(**v)
+            for k, v in prompt_base.items()
+        }
+
+        prompt_configuration = PromptConfiguration(
+            system_template = params.get("system_prompt", ""),
+            global_terms = global_terms,
+            prompts = prompts
+        )
+
        input_queue = params.get("input_queue", default_input_queue)
        output_queue = params.get("output_queue", default_output_queue)
        subscriber = params.get("subscriber", default_subscriber)
@ -64,23 +139,21 @@ class Processor(ConsumerProducer):
            pulsar_host = self.pulsar_host
        )

-        self.definition_template = definition_template
-        self.topic_template = topic_template
-        self.relationship_template = relationship_template
-        self.rows_template = rows_template
-        self.knowledge_query_template = knowledge_query_template
-        self.document_query_template = document_query_template
+        # System prompt hack
+        class Llm:
+            def __init__(self, llm):
+                self.llm = llm
+            def request(self, system, prompt):
+                print(system)
+                print(prompt, flush=True)
+                return self.llm.request(system + "\n\n" + prompt)

-    def parse_json(self, text):
-        json_match = re.search(r'```(?:json)?(.*?)```', text, re.DOTALL)
-    
-        if json_match:
-            json_str = json_match.group(1).strip()
-        else:
-            # If no delimiters, assume the entire output is JSON
-            json_str = text.strip()
+        self.llm = Llm(self.llm)

-        return json.loads(json_str)
+        self.manager = PromptManager(
+            llm = self.llm,
+            config = prompt_configuration,
+        )

    def handle(self, msg):

@ -90,88 +163,52 @@ class Processor(ConsumerProducer):

        id = msg.properties()["id"]

-        kind = v.kind
-
-        print(f"Handling kind {kind}...", flush=True)
-
-        if kind == "extract-definitions":
-
-            self.handle_extract_definitions(id, v)
-            return
-
-        elif kind == "extract-topics":
-
-            self.handle_extract_topics(id, v)
-            return
-
-        elif kind == "extract-relationships":
-
-            self.handle_extract_relationships(id, v)
-            return
-
-        elif kind == "extract-rows":
-
-            self.handle_extract_rows(id, v)
-            return
-
-        elif kind == "kg-prompt":
-
-            self.handle_kg_prompt(id, v)
-            return
-
-        elif kind == "document-prompt":
-
-            self.handle_document_prompt(id, v)
-            return
-
-        else:
-
-            print("Invalid kind.", flush=True)
-            return
-
-    def handle_extract_definitions(self, id, v):
+        kind = v.id

        try:

-            prompt = to_definitions(self.definition_template, v.chunk)
+            print(v.terms)

-            ans = self.llm.request(prompt)
+            input = {
+                k: json.loads(v)
+                for k, v in v.terms.items()
+            }
+            
+            print(f"Handling kind {kind}...", flush=True)
+            print(input, flush=True)

-            # Silently ignore JSON parse error
-            try:
-                defs = self.parse_json(ans)
-            except:
-                print("JSON parse error, ignored", flush=True)
-                defs = []
+            resp = self.manager.invoke(kind, input)

-            output = []
+            if isinstance(resp, str):

-            for defn in defs:
+                print("Send text response...", flush=True)
+                print(resp, flush=True)

-                try:
-                    e = defn["entity"]
-                    d = defn["definition"]
+                r = PromptResponse(
+                    text=resp,
+                    object=None,
+                    error=None,
+                )

-                    if e == "": continue
-                    if e is None: continue
-                    if d == "": continue
-                    if d is None: continue
+                self.producer.send(r, properties={"id": id})

-                    output.append(
-                        Definition(
-                            name=e, definition=d
-                        )
-                    )
+                return

-                except:
-                    print("definition fields missing, ignored", flush=True)
+            else:

-            print("Send response...", flush=True)
-            r = PromptResponse(definitions=output, error=None)
-            self.producer.send(r, properties={"id": id})
+                print("Send object response...", flush=True)
+                print(json.dumps(resp, indent=4), flush=True)

-            print("Done.", flush=True)
-        
+                r = PromptResponse(
+                    text=None,
+                    object=json.dumps(resp),
+                    error=None,
+                )
+
+                self.producer.send(r, properties={"id": id})
+
+                return
+            
        except Exception as e:

            print(f"Exception: {e}")
@ -188,122 +225,6 @@ class Processor(ConsumerProducer):

            self.producer.send(r, properties={"id": id})

-    def handle_extract_topics(self, id, v):
-
-        try:
-
-            prompt = to_topics(self.topic_template, v.chunk)
-
-            ans = self.llm.request(prompt)
-
-            # Silently ignore JSON parse error
-            try:
-                defs = self.parse_json(ans)
-            except:
-                print("JSON parse error, ignored", flush=True)
-                defs = []
-
-            output = []
-
-            for defn in defs:
-
-                try:
-                    e = defn["topic"]
-                    d = defn["definition"]
-
-                    if e == "": continue
-                    if e is None: continue
-                    if d == "": continue
-                    if d is None: continue
-
-                    output.append(
-                        Topic(
-                            name=e, definition=d
-                        )
-                    )
-
-                except:
-                    print("definition fields missing, ignored", flush=True)
-
-            print("Send response...", flush=True)
-            r = PromptResponse(topics=output, error=None)
-            self.producer.send(r, properties={"id": id})
-
-            print("Done.", flush=True)
-        
-        except Exception as e:
-
-            print(f"Exception: {e}")
-
-            print("Send error response...", flush=True)
-
-            r = PromptResponse(
-                error=Error(
-                    type = "llm-error",
-                    message = str(e),
-                ),
-                response=None,
-            )
-
-            self.producer.send(r, properties={"id": id})
-
-
-    def handle_extract_relationships(self, id, v):
-
-        try:
-
-            prompt = to_relationships(self.relationship_template, v.chunk)
-
-            ans = self.llm.request(prompt)
-
-            # Silently ignore JSON parse error
-            try:
-                defs = self.parse_json(ans)
-            except:
-                print("JSON parse error, ignored", flush=True)
-                defs = []
-
-            output = []
-
-            for defn in defs:
-
-                try:
-
-                    s = defn["subject"]
-                    p = defn["predicate"]
-                    o = defn["object"]
-                    o_entity = defn["object-entity"]
-
-                    if s == "": continue
-                    if s is None: continue
-
-                    if p == "": continue
-                    if p is None: continue
-
-                    if o == "": continue
-                    if o is None: continue
-
-                    if o_entity == "" or o_entity is None:
-                        o_entity = False
-
-                    output.append(
-                        Relationship(
-                            s = s,
-                            p = p,
-                            o = o,
-                            o_entity = o_entity,
-                        )
-                    )
-
-                except Exception as e:
-                    print("relationship fields missing, ignored", flush=True)
-
-            print("Send response...", flush=True)
-            r = PromptResponse(relationships=output, error=None)
-            self.producer.send(r, properties={"id": id})
-
-            print("Done.", flush=True)
-
        except Exception as e:

            print(f"Exception: {e}")
@ -320,147 +241,6 @@ class Processor(ConsumerProducer):

            self.producer.send(r, properties={"id": id})

-    def handle_extract_rows(self, id, v):
-
-        try:
-
-            fields = v.row_schema.fields
-
-            prompt = to_rows(self.rows_template, v.row_schema, v.chunk)
-
-            print(prompt)
-
-            ans = self.llm.request(prompt)
-
-            print(ans)
-
-            # Silently ignore JSON parse error
-            try:
-                objs = self.parse_json(ans)
-            except:
-                print("JSON parse error, ignored", flush=True)
-                objs = []
-
-            output = []
-
-            for obj in objs:
-
-                try:
-
-                    row = {}
-
-                    for f in fields:
-
-                        if f.name not in obj:
-                            print(f"Object ignored, missing field {f.name}")
-                            row = {}
-                            break
-
-                        row[f.name] = obj[f.name]
-
-                    if row == {}:
-                        continue
-
-                    output.append(row)
-
-                except Exception as e:
-                    print("row fields missing, ignored", flush=True)
-
-            for row in output:
-                print(row)
-
-            print("Send response...", flush=True)
-            r = PromptResponse(rows=output, error=None)
-            self.producer.send(r, properties={"id": id})
-
-            print("Done.", flush=True)
-
-        except Exception as e:
-
-            print(f"Exception: {e}")
-
-            print("Send error response...", flush=True)
-
-            r = PromptResponse(
-                error=Error(
-                    type = "llm-error",
-                    message = str(e),
-                ),
-                response=None,
-            )
-
-            self.producer.send(r, properties={"id": id})
-        
-    def handle_kg_prompt(self, id, v):
-
-        try:
-
-            prompt = to_kg_query(self.knowledge_query_template, v.query, v.kg)
-
-            print(prompt)
-
-            ans = self.llm.request(prompt)
-
-            print(ans)
-
-            print("Send response...", flush=True)
-            r = PromptResponse(answer=ans, error=None)
-            self.producer.send(r, properties={"id": id})
-
-            print("Done.", flush=True)
-
-        except Exception as e:
-
-            print(f"Exception: {e}")
-
-            print("Send error response...", flush=True)
-
-            r = PromptResponse(
-                error=Error(
-                    type = "llm-error",
-                    message = str(e),
-                ),
-                response=None,
-            )
-
-            self.producer.send(r, properties={"id": id})
-        
-    def handle_document_prompt(self, id, v):
-
-        try:
-
-            prompt = to_document_query(
-                self.document_query_template, v.query, v.documents
-            )
-
-            print(prompt)
-
-            ans = self.llm.request(prompt)
-
-            print(ans)
-
-            print("Send response...", flush=True)
-            r = PromptResponse(answer=ans, error=None)
-            self.producer.send(r, properties={"id": id})
-
-            print("Done.", flush=True)
-
-        except Exception as e:
-
-            print(f"Exception: {e}")
-
-            print("Send error response...", flush=True)
-
-            r = PromptResponse(
-                error=Error(
-                    type = "llm-error",
-                    message = str(e),
-                ),
-                response=None,
-            )
-
-            self.producer.send(r, properties={"id": id})
-        
    @staticmethod
    def add_args(parser):

@ -482,39 +262,33 @@ class Processor(ConsumerProducer):
        )

        parser.add_argument(
-            '--definition-template',
-            required=True,
-            help=f'Definition extraction template',
+            '--prompt', nargs='*',
+            help=f'Prompt template form id=template',
        )

        parser.add_argument(
-            '--topic-template',
-            required=True,
-            help=f'Topic extraction template',
+            '--prompt-response-type', nargs='*',
+            help=f'Prompt response type, form id=json|text',
        )

        parser.add_argument(
-            '--rows-template',
-            required=True,
-            help=f'Rows extraction template',
+            '--prompt-term', nargs='*',
+            help=f'Prompt response type, form id=key:value',
        )

        parser.add_argument(
-            '--relationship-template',
-            required=True,
-            help=f'Relationship extraction template',
+            '--prompt-schema', nargs='*',
+            help=f'Prompt response schema, form id=schema',
        )

        parser.add_argument(
-            '--knowledge-query-template',
-            required=True,
-            help=f'Knowledge query template',
+            '--system-prompt',
+            help=f'System prompt template',
        )

        parser.add_argument(
-            '--document-query-template',
-            required=True,
-            help=f'Document query template',
+            '--global-term', nargs='+',
+            help=f'Global term, form key:value'
        )

 def run():