trustgraph/tg-launch-vertexai-cassandra.yaml

"services":
  "cassandra":
    "deploy":
      "resources":
        "limits":
          "cpus": "1.0"
          "memory": "800M"
        "reservations":
          "cpus": "0.5"
          "memory": "800M"
    "environment":
      "JVM_OPTS": "-Xms256M -Xmx256M"
    "image": "docker.io/cassandra:4.1.6"
    "ports":
    - "9042:9042"
    "restart": "on-failure:100"
    "volumes":
    - "cassandra:/var/lib/cassandra"
  "chunker":
    "command":
    - "chunker-token"
    - "-p"
    - "pulsar://pulsar:6650"
    - "--chunk-size"
    - "250"
    - "--chunk-overlap"
    - "15"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
  "embeddings":
    "command":
    - "embeddings-hf"
    - "-p"
    - "pulsar://pulsar:6650"
    - "-m"
    - "all-MiniLM-L6-v2"
    "deploy":
      "resources":
        "limits":
          "cpus": "1.0"
          "memory": "256M"
        "reservations":
          "cpus": "0.5"
          "memory": "256M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
  "grafana":
    "deploy":
      "resources":
        "limits":
          "cpus": "1.0"
          "memory": "256M"
        "reservations":
          "cpus": "0.5"
          "memory": "256M"
    "environment":
      "GF_ORG_NAME": "trustgraph.ai"
    "image": "docker.io/grafana/grafana:11.1.4"
    "ports":
    - "3000:3000"
    "restart": "on-failure:100"
    "volumes":
    - "grafana-storage:/var/lib/grafana"
    - "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
    - "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
    - "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
  "graph-rag":
    "command":
    - "graph-rag"
    - "-p"
    - "pulsar://pulsar:6650"
    - "--prompt-request-queue"
    - "non-persistent://tg/request/prompt-rag"
    - "--prompt-response-queue"
    - "non-persistent://tg/response/prompt-rag-response"
    - "--entity-limit"
    - "50"
    - "--triple-limit"
    - "30"
    - "--max-subgraph-size"
    - "3000"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
  "init-pulsar":
    "command":
    - "sh"
    - "-c"
    - "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/apachepulsar/pulsar:3.3.1"
    "restart": "on-failure:100"
  "kg-extract-definitions":
    "command":
    - "kg-extract-definitions"
    - "-p"
    - "pulsar://pulsar:6650"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
  "kg-extract-relationships":
    "command":
    - "kg-extract-relationships"
    - "-p"
    - "pulsar://pulsar:6650"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
  "kg-extract-topics":
    "command":
    - "kg-extract-topics"
    - "-p"
    - "pulsar://pulsar:6650"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
  "pdf-decoder":
    "command":
    - "pdf-decoder"
    - "-p"
    - "pulsar://pulsar:6650"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
  "prometheus":
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/prom/prometheus:v2.53.2"
    "ports":
    - "9090:9090"
    "restart": "on-failure:100"
    "volumes":
    - "./prometheus:/etc/prometheus"
    - "prometheus-data:/prometheus"
  "prompt":
    "command":
    - "prompt-template"
    - "-p"
    - "pulsar://pulsar:6650"
    - "--text-completion-request-queue"
    - "non-persistent://tg/request/text-completion"
    - "--text-completion-response-queue"
    - "non-persistent://tg/response/text-completion-response"
    - "--definition-template"
    - "<instructions>\nStudy the following text and derive definitions for any discovered entities.\nDo not provide definitions for entities whose definitions are incomplete\nor unknown.\nOutput relationships in JSON format as an arary of objects with fields:\n- entity: the name of the entity\n- definition: English text which defines the entity\n</instructions>\n\n<text>\n{text}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract will be written as plain text.  Do not add markdown formatting\nor headers or prefixes.  Do not include null or unknown definitions.\n</requirements>"
    - "--relationship-template"
    - "<instructions>\nStudy the following text and derive entity relationships.  For each\nrelationship, derive the subject, predicate and object of the relationship.\nOutput relationships in JSON format as an arary of objects with fields:\n- subject: the subject of the relationship\n- predicate: the predicate\n- object: the object of the relationship\n- object-entity: false if the object is a simple data type: name, value or date.  true if it is an entity.\n</instructions>\n\n<text>\n{text}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract must be written as plain text.  Do not add markdown formatting\nor headers or prefixes.\n</requirements>"
    - "--topic-template"
    - "You are a helpful assistant that performs information extraction tasks for a provided text.\nRead the provided text. You will identify topics and their definitions in JSON.\n\nReading Instructions:\n- Ignore document formatting in the provided text.\n- Study the provided text carefully.\n\nHere is the text:\n{text}\n\nResponse Instructions: \n- Do not respond with special characters.\n- Return only topics that are concepts and unique to the provided text.\n- Respond only with well-formed JSON.\n- The JSON response shall be an array of objects with keys \"topic\" and \"definition\". \n- The JSON response shall use the following structure:\n\n```json\n[{{\"topic\": string, \"definition\": string}}]\n```\n\n- Do not write any additional text or explanations."
    - "--knowledge-query-template"
    - |
      Study the following set of knowledge statements. The statements are written in Cypher format that has been extracted from a knowledge graph. Use only the provided set of knowledge statements in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.

      Here's the knowledge statements:
      {graph}

      Use only the provided knowledge statements to respond to the following:
      {query}
    - "--document-query-template"
    - |
      Study the following context. Use only the information provided in the context in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.

      Here is the context:
      {documents}

      Use only the provided knowledge statements to respond to the following:
      {query}
    - "--rows-template"
    - "<instructions>\nStudy the following text and derive objects which match the schema provided.\n\nYou must output an array of JSON objects for each object you discover\nwhich matches the schema.  For each object, output a JSON object whose fields\ncarry the name field specified in the schema.\n</instructions>\n\n<schema>\n{schema}\n</schema>\n\n<text>\n{text}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not add markdown formatting or headers or prefixes.\n</requirements>"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
  "prompt-rag":
    "command":
    - "prompt-template"
    - "-p"
    - "pulsar://pulsar:6650"
    - "-i"
    - "non-persistent://tg/request/prompt-rag"
    - "-o"
    - "non-persistent://tg/response/prompt-rag-response"
    - "--text-completion-request-queue"
    - "non-persistent://tg/request/text-completion-rag"
    - "--text-completion-response-queue"
    - "non-persistent://tg/response/text-completion-rag-response"
    - "--definition-template"
    - "<instructions>\nStudy the following text and derive definitions for any discovered entities.\nDo not provide definitions for entities whose definitions are incomplete\nor unknown.\nOutput relationships in JSON format as an arary of objects with fields:\n- entity: the name of the entity\n- definition: English text which defines the entity\n</instructions>\n\n<text>\n{text}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract will be written as plain text.  Do not add markdown formatting\nor headers or prefixes.  Do not include null or unknown definitions.\n</requirements>"
    - "--relationship-template"
    - "<instructions>\nStudy the following text and derive entity relationships.  For each\nrelationship, derive the subject, predicate and object of the relationship.\nOutput relationships in JSON format as an arary of objects with fields:\n- subject: the subject of the relationship\n- predicate: the predicate\n- object: the object of the relationship\n- object-entity: false if the object is a simple data type: name, value or date.  true if it is an entity.\n</instructions>\n\n<text>\n{text}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract must be written as plain text.  Do not add markdown formatting\nor headers or prefixes.\n</requirements>"
    - "--topic-template"
    - "You are a helpful assistant that performs information extraction tasks for a provided text.\nRead the provided text. You will identify topics and their definitions in JSON.\n\nReading Instructions:\n- Ignore document formatting in the provided text.\n- Study the provided text carefully.\n\nHere is the text:\n{text}\n\nResponse Instructions: \n- Do not respond with special characters.\n- Return only topics that are concepts and unique to the provided text.\n- Respond only with well-formed JSON.\n- The JSON response shall be an array of objects with keys \"topic\" and \"definition\". \n- The JSON response shall use the following structure:\n\n```json\n[{{\"topic\": string, \"definition\": string}}]\n```\n\n- Do not write any additional text or explanations."
    - "--knowledge-query-template"
    - |
      Study the following set of knowledge statements. The statements are written in Cypher format that has been extracted from a knowledge graph. Use only the provided set of knowledge statements in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.

      Here's the knowledge statements:
      {graph}

      Use only the provided knowledge statements to respond to the following:
      {query}
    - "--document-query-template"
    - |
      Study the following context. Use only the information provided in the context in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.

      Here is the context:
      {documents}

      Use only the provided knowledge statements to respond to the following:
      {query}
    - "--rows-template"
    - "<instructions>\nStudy the following text and derive objects which match the schema provided.\n\nYou must output an array of JSON objects for each object you discover\nwhich matches the schema.  For each object, output a JSON object whose fields\ncarry the name field specified in the schema.\n</instructions>\n\n<schema>\n{schema}\n</schema>\n\n<text>\n{text}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not add markdown formatting or headers or prefixes.\n</requirements>"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
  "pulsar":
    "command": "bin/pulsar standalone"
    "deploy":
      "resources":
        "limits":
          "cpus": "1.0"
          "memory": "900M"
        "reservations":
          "cpus": "0.5"
          "memory": "900M"
    "environment":
      "PULSAR_MEM": "-Xms700M -Xmx700M"
    "image": "docker.io/apachepulsar/pulsar:3.3.1"
    "ports":
    - "6650:6650"
    - "8080:8080"
    "restart": "on-failure:100"
    "volumes":
    - "pulsar-conf:/pulsar/conf"
    - "pulsar-data:/pulsar/data"
  "qdrant":
    "deploy":
      "resources":
        "limits":
          "cpus": "1.0"
          "memory": "256M"
        "reservations":
          "cpus": "0.5"
          "memory": "256M"
    "image": "docker.io/qdrant/qdrant:v1.11.1"
    "ports":
    - "6333:6333"
    - "6334:6334"
    "restart": "on-failure:100"
    "volumes":
    - "qdrant:/qdrant/storage"
  "query-doc-embeddings":
    "command":
    - "de-query-qdrant"
    - "-p"
    - "pulsar://pulsar:6650"
    - "-t"
    - "http://qdrant:6333"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
  "query-graph-embeddings":
    "command":
    - "ge-query-qdrant"
    - "-p"
    - "pulsar://pulsar:6650"
    - "-t"
    - "http://qdrant:6333"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
  "query-triples":
    "command":
    - "triples-query-cassandra"
    - "-p"
    - "pulsar://pulsar:6650"
    - "-g"
    - "cassandra"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "512M"
        "reservations":
          "cpus": "0.1"
          "memory": "512M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
  "store-doc-embeddings":
    "command":
    - "de-write-qdrant"
    - "-p"
    - "pulsar://pulsar:6650"
    - "-t"
    - "http://qdrant:6333"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
  "store-graph-embeddings":
    "command":
    - "ge-write-qdrant"
    - "-p"
    - "pulsar://pulsar:6650"
    - "-t"
    - "http://qdrant:6333"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
  "store-triples":
    "command":
    - "triples-write-cassandra"
    - "-p"
    - "pulsar://pulsar:6650"
    - "-g"
    - "cassandra"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
  "text-completion":
    "command":
    - "text-completion-vertexai"
    - "-p"
    - "pulsar://pulsar:6650"
    - "-k"
    - "/vertexai/private.json"
    - "-r"
    - "us-central1"
    - "-x"
    - "4096"
    - "-t"
    - "0"
    - "-m"
    - "gemini-1.0-pro-001"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
    "volumes":
    - "./vertexai:/vertexai"
  "text-completion-rag":
    "command":
    - "text-completion-vertexai"
    - "-p"
    - "pulsar://pulsar:6650"
    - "-k"
    - "/vertexai/private.json"
    - "-r"
    - "us-central1"
    - "-x"
    - "4096"
    - "-t"
    - "0"
    - "-m"
    - "gemini-1.0-pro-001"
    - "-i"
    - "non-persistent://tg/request/text-completion-rag"
    - "-o"
    - "non-persistent://tg/response/text-completion-rag-response"
    "deploy":
      "resources":
        "limits":
          "cpus": "0.5"
          "memory": "128M"
        "reservations":
          "cpus": "0.1"
          "memory": "128M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
    "volumes":
    - "./vertexai:/vertexai"
  "vectorize":
    "command":
    - "embeddings-vectorize"
    - "-p"
    - "pulsar://pulsar:6650"
    "deploy":
      "resources":
        "limits":
          "cpus": "1.0"
          "memory": "512M"
        "reservations":
          "cpus": "0.5"
          "memory": "512M"
    "image": "docker.io/trustgraph/trustgraph-flow:0.9.5"
    "restart": "on-failure:100"
"volumes":
  "cassandra": {}
  "grafana-storage": {}
  "prometheus-data": {}
  "pulsar-conf": {}
  "pulsar-data": {}
  "qdrant": {}