diff --git a/Makefile b/Makefile index 957c1810..2d419147 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # VERSION=$(shell git describe | sed 's/^v//') -VERSION=0.6.7 +VERSION=0.6.8 DOCKER=podman diff --git a/docker-compose-azure.yaml b/docker-compose-azure.yaml index 66643d3d..345a0d54 100644 --- a/docker-compose-azure.yaml +++ b/docker-compose-azure.yaml @@ -11,14 +11,14 @@ - "chunker-recursive" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "embeddings": "command": - "embeddings-hf" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "etcd": "command": @@ -60,7 +60,7 @@ - "non-persistent://tg/request/prompt-rag" - "--prompt-response-queue" - "non-persistent://tg/response/prompt-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "init-pulsar": "command": @@ -77,14 +77,14 @@ - "kg-extract-definitions" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "kg-extract-relationships": "command": - "kg-extract-relationships" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "milvus": "command": @@ -122,7 +122,7 @@ - "pdf-decoder" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prometheus": "image": "docker.io/prom/prometheus:v2.53.1" @@ -141,7 +141,7 @@ - "non-persistent://tg/request/text-completion" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prompt-rag": "command": @@ -156,7 +156,7 @@ - "non-persistent://tg/request/text-completion-rag" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "pulsar": "command": "bin/pulsar standalone" @@ -183,7 +183,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "query-triples": "command": @@ -192,7 +192,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-graph-embeddings": "command": @@ -201,7 +201,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-triples": "command": @@ -210,7 +210,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion": "command": @@ -221,7 +221,7 @@ - "${AZURE_TOKEN}" - "-e" - "${AZURE_ENDPOINT}" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion-rag": "command": @@ -236,14 +236,14 @@ - "non-persistent://tg/request/text-completion-rag" - "-o" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "vectorize": "command": - "embeddings-vectorize" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "volumes": "cassandra": {} diff --git a/docker-compose-bedrock.yaml b/docker-compose-bedrock.yaml index 59e7f9ce..e01075c1 100644 --- a/docker-compose-bedrock.yaml +++ b/docker-compose-bedrock.yaml @@ -15,14 +15,14 @@ - "2000" - "--chunk-overlap" - "100" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "embeddings": "command": - "embeddings-hf" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "etcd": "command": @@ -64,7 +64,7 @@ - "non-persistent://tg/request/prompt-rag" - "--prompt-response-queue" - "non-persistent://tg/response/prompt-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "init-pulsar": "command": @@ -81,14 +81,14 @@ - "kg-extract-definitions" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "kg-extract-relationships": "command": - "kg-extract-relationships" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "milvus": "command": @@ -126,7 +126,7 @@ - "pdf-decoder" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prometheus": "image": "docker.io/prom/prometheus:v2.53.1" @@ -145,7 +145,7 @@ - "non-persistent://tg/request/text-completion" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prompt-rag": "command": @@ -160,7 +160,7 @@ - "non-persistent://tg/request/text-completion-rag" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "pulsar": "command": "bin/pulsar standalone" @@ -187,7 +187,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "query-triples": "command": @@ -196,7 +196,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-graph-embeddings": "command": @@ -205,7 +205,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-triples": "command": @@ -214,7 +214,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion": "command": @@ -227,7 +227,7 @@ - "${AWS_SECRET_KEY}" - "-r" - "us-west-2" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion-rag": "command": @@ -244,14 +244,14 @@ - "non-persistent://tg/request/text-completion-rag" - "-o" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "vectorize": "command": - "embeddings-vectorize" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "volumes": "cassandra": {} diff --git a/docker-compose-claude.yaml b/docker-compose-claude.yaml index 6260100e..a8b208ef 100644 --- a/docker-compose-claude.yaml +++ b/docker-compose-claude.yaml @@ -11,14 +11,14 @@ - "chunker-recursive" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "embeddings": "command": - "embeddings-hf" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "etcd": "command": @@ -60,7 +60,7 @@ - "non-persistent://tg/request/prompt-rag" - "--prompt-response-queue" - "non-persistent://tg/response/prompt-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "init-pulsar": "command": @@ -77,14 +77,14 @@ - "kg-extract-definitions" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "kg-extract-relationships": "command": - "kg-extract-relationships" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "milvus": "command": @@ -122,7 +122,7 @@ - "pdf-decoder" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prometheus": "image": "docker.io/prom/prometheus:v2.53.1" @@ -141,7 +141,7 @@ - "non-persistent://tg/request/text-completion" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prompt-rag": "command": @@ -156,7 +156,7 @@ - "non-persistent://tg/request/text-completion-rag" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "pulsar": "command": "bin/pulsar standalone" @@ -183,7 +183,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "query-triples": "command": @@ -192,7 +192,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-graph-embeddings": "command": @@ -201,7 +201,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-triples": "command": @@ -210,7 +210,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion": "command": @@ -219,7 +219,7 @@ - "pulsar://pulsar:6650" - "-k" - "${CLAUDE_KEY}" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion-rag": "command": @@ -232,14 +232,14 @@ - "non-persistent://tg/request/text-completion-rag" - "-o" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "vectorize": "command": - "embeddings-vectorize" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "volumes": "cassandra": {} diff --git a/docker-compose-cohere.yaml b/docker-compose-cohere.yaml index cde86d28..000ab0e7 100644 --- a/docker-compose-cohere.yaml +++ b/docker-compose-cohere.yaml @@ -15,14 +15,14 @@ - "1000" - "--chunk-overlap" - "50" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "embeddings": "command": - "embeddings-hf" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "etcd": "command": @@ -64,7 +64,7 @@ - "non-persistent://tg/request/prompt-rag" - "--prompt-response-queue" - "non-persistent://tg/response/prompt-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "init-pulsar": "command": @@ -81,14 +81,14 @@ - "kg-extract-definitions" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "kg-extract-relationships": "command": - "kg-extract-relationships" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "milvus": "command": @@ -126,7 +126,7 @@ - "pdf-decoder" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prometheus": "image": "docker.io/prom/prometheus:v2.53.1" @@ -145,7 +145,7 @@ - "non-persistent://tg/request/text-completion" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prompt-rag": "command": @@ -160,7 +160,7 @@ - "non-persistent://tg/request/text-completion-rag" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "pulsar": "command": "bin/pulsar standalone" @@ -187,7 +187,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "query-triples": "command": @@ -196,7 +196,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-graph-embeddings": "command": @@ -205,7 +205,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-triples": "command": @@ -214,7 +214,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion": "command": @@ -223,7 +223,7 @@ - "pulsar://pulsar:6650" - "-k" - "${COHERE_KEY}" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion-rag": "command": @@ -236,14 +236,14 @@ - "non-persistent://tg/request/text-completion-rag" - "-o" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "vectorize": "command": - "embeddings-vectorize" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "volumes": "cassandra": {} diff --git a/docker-compose-mix.yaml b/docker-compose-mix.yaml index 43cd1567..a44a0f23 100644 --- a/docker-compose-mix.yaml +++ b/docker-compose-mix.yaml @@ -15,14 +15,14 @@ - "4000" - "--chunk-overlap" - "120" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "embeddings": "command": - "embeddings-hf" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "etcd": "command": @@ -64,7 +64,7 @@ - "non-persistent://tg/request/prompt-rag" - "--prompt-response-queue" - "non-persistent://tg/response/prompt-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "init-pulsar": "command": @@ -81,14 +81,14 @@ - "kg-extract-definitions" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "kg-extract-relationships": "command": - "kg-extract-relationships" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "milvus": "command": @@ -126,7 +126,7 @@ - "pdf-decoder" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prometheus": "image": "docker.io/prom/prometheus:v2.53.1" @@ -145,7 +145,7 @@ - "non-persistent://tg/request/text-completion" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prompt-rag": "command": @@ -160,7 +160,7 @@ - "non-persistent://tg/request/text-completion-rag" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "pulsar": "command": "bin/pulsar standalone" @@ -187,7 +187,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "query-triples": "command": @@ -196,7 +196,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-graph-embeddings": "command": @@ -205,7 +205,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-triples": "command": @@ -214,7 +214,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion": "command": @@ -225,7 +225,7 @@ - "${COHERE_KEY}" - "-m" - "c4ai-aya-23-35b" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion-rag": "command": @@ -240,14 +240,14 @@ - "non-persistent://tg/response/text-completion-rag-response" - "-m" - "c4ai-aya-23-8b" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "vectorize": "command": - "embeddings-vectorize" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "volumes": "cassandra": {} diff --git a/docker-compose-ollama.yaml b/docker-compose-ollama.yaml index cb1e0d34..f2ba8e67 100644 --- a/docker-compose-ollama.yaml +++ b/docker-compose-ollama.yaml @@ -11,14 +11,14 @@ - "chunker-recursive" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "embeddings": "command": - "embeddings-hf" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "etcd": "command": @@ -60,7 +60,7 @@ - "non-persistent://tg/request/prompt-rag" - "--prompt-response-queue" - "non-persistent://tg/response/prompt-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "init-pulsar": "command": @@ -77,14 +77,14 @@ - "kg-extract-definitions" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "kg-extract-relationships": "command": - "kg-extract-relationships" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "milvus": "command": @@ -122,7 +122,7 @@ - "pdf-decoder" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prometheus": "image": "docker.io/prom/prometheus:v2.53.1" @@ -141,7 +141,7 @@ - "non-persistent://tg/request/text-completion" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prompt-rag": "command": @@ -156,7 +156,7 @@ - "non-persistent://tg/request/text-completion-rag" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "pulsar": "command": "bin/pulsar standalone" @@ -183,7 +183,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "query-triples": "command": @@ -192,7 +192,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-graph-embeddings": "command": @@ -201,7 +201,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-triples": "command": @@ -210,7 +210,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion": "command": @@ -219,7 +219,7 @@ - "pulsar://pulsar:6650" - "-r" - "${OLLAMA_HOST}" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion-rag": "command": @@ -232,14 +232,14 @@ - "non-persistent://tg/request/text-completion-rag" - "-o" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "vectorize": "command": - "embeddings-vectorize" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "volumes": "cassandra": {} diff --git a/docker-compose-openai-neo4j.yaml b/docker-compose-openai-neo4j.yaml index 3697dc01..231b0d46 100644 --- a/docker-compose-openai-neo4j.yaml +++ b/docker-compose-openai-neo4j.yaml @@ -4,14 +4,14 @@ - "chunker-recursive" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "embeddings": "command": - "embeddings-hf" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "etcd": "command": @@ -53,7 +53,7 @@ - "non-persistent://tg/request/prompt-rag" - "--prompt-response-queue" - "non-persistent://tg/response/prompt-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "init-pulsar": "command": @@ -70,14 +70,14 @@ - "kg-extract-definitions" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "kg-extract-relationships": "command": - "kg-extract-relationships" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "milvus": "command": @@ -125,7 +125,7 @@ - "pdf-decoder" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prometheus": "image": "docker.io/prom/prometheus:v2.53.1" @@ -144,7 +144,7 @@ - "non-persistent://tg/request/text-completion" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prompt-rag": "command": @@ -159,7 +159,7 @@ - "non-persistent://tg/request/text-completion-rag" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "pulsar": "command": "bin/pulsar standalone" @@ -186,7 +186,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "query-triples": "command": @@ -195,7 +195,7 @@ - "pulsar://pulsar:6650" - "-g" - "bolt://neo4j:7687" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-graph-embeddings": "command": @@ -204,7 +204,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-triples": "command": @@ -213,7 +213,7 @@ - "pulsar://pulsar:6650" - "-g" - "bolt://neo4j:7687" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion": "command": @@ -222,7 +222,7 @@ - "pulsar://pulsar:6650" - "-k" - "${OPENAI_KEY}" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion-rag": "command": @@ -235,14 +235,14 @@ - "non-persistent://tg/request/text-completion-rag" - "-o" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "vectorize": "command": - "embeddings-vectorize" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "volumes": "etcd": {} diff --git a/docker-compose-openai.yaml b/docker-compose-openai.yaml index e60f355d..e1f59daa 100644 --- a/docker-compose-openai.yaml +++ b/docker-compose-openai.yaml @@ -11,14 +11,14 @@ - "chunker-recursive" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "embeddings": "command": - "embeddings-hf" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "etcd": "command": @@ -60,7 +60,7 @@ - "non-persistent://tg/request/prompt-rag" - "--prompt-response-queue" - "non-persistent://tg/response/prompt-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "init-pulsar": "command": @@ -77,14 +77,14 @@ - "kg-extract-definitions" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "kg-extract-relationships": "command": - "kg-extract-relationships" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "milvus": "command": @@ -122,7 +122,7 @@ - "pdf-decoder" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prometheus": "image": "docker.io/prom/prometheus:v2.53.1" @@ -141,7 +141,7 @@ - "non-persistent://tg/request/text-completion" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prompt-rag": "command": @@ -156,7 +156,7 @@ - "non-persistent://tg/request/text-completion-rag" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "pulsar": "command": "bin/pulsar standalone" @@ -183,7 +183,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "query-triples": "command": @@ -192,7 +192,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-graph-embeddings": "command": @@ -201,7 +201,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-triples": "command": @@ -210,7 +210,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion": "command": @@ -219,7 +219,7 @@ - "pulsar://pulsar:6650" - "-k" - "${OPENAI_KEY}" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion-rag": "command": @@ -232,14 +232,14 @@ - "non-persistent://tg/request/text-completion-rag" - "-o" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "vectorize": "command": - "embeddings-vectorize" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "volumes": "cassandra": {} diff --git a/docker-compose-vertexai.yaml b/docker-compose-vertexai.yaml index e4e6e8e2..adec3a97 100644 --- a/docker-compose-vertexai.yaml +++ b/docker-compose-vertexai.yaml @@ -11,14 +11,14 @@ - "chunker-recursive" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "embeddings": "command": - "embeddings-hf" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "etcd": "command": @@ -60,7 +60,7 @@ - "non-persistent://tg/request/prompt-rag" - "--prompt-response-queue" - "non-persistent://tg/response/prompt-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "init-pulsar": "command": @@ -77,14 +77,14 @@ - "kg-extract-definitions" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "kg-extract-relationships": "command": - "kg-extract-relationships" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "milvus": "command": @@ -122,7 +122,7 @@ - "pdf-decoder" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prometheus": "image": "docker.io/prom/prometheus:v2.53.1" @@ -141,7 +141,7 @@ - "non-persistent://tg/request/text-completion" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "prompt-rag": "command": @@ -156,7 +156,7 @@ - "non-persistent://tg/request/text-completion-rag" - "--text-completion-response-queue" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "pulsar": "command": "bin/pulsar standalone" @@ -183,7 +183,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "query-triples": "command": @@ -192,7 +192,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-graph-embeddings": "command": @@ -201,7 +201,7 @@ - "pulsar://pulsar:6650" - "-t" - "http://milvus:19530" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "store-triples": "command": @@ -210,7 +210,7 @@ - "pulsar://pulsar:6650" - "-g" - "cassandra" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "text-completion": "command": @@ -221,7 +221,7 @@ - "/vertexai/private.json" - "-r" - "us-west1" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "volumes": - "./vertexai:/vertexai" @@ -238,7 +238,7 @@ - "non-persistent://tg/request/text-completion-rag" - "-o" - "non-persistent://tg/response/text-completion-rag-response" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "volumes": - "./vertexai:/vertexai" @@ -247,7 +247,7 @@ - "embeddings-vectorize" - "-p" - "pulsar://pulsar:6650" - "image": "docker.io/trustgraph/trustgraph-flow:0.6.6" + "image": "docker.io/trustgraph/trustgraph-flow:0.6.8" "restart": "on-failure:100" "volumes": "cassandra": {} diff --git a/scripts/chunker-token b/scripts/chunker-token new file mode 100755 index 00000000..5090defa --- /dev/null +++ b/scripts/chunker-token @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 + +from trustgraph.chunking.token import run + +run() + diff --git a/setup.py b/setup.py index 6c57c144..be88d7ed 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import os with open("README.md", "r") as fh: long_description = fh.read() -version = "0.6.7" +version = "0.6.8" setuptools.setup( name="trustgraph", @@ -49,9 +49,11 @@ setuptools.setup( "boto3", "openai", "neo4j", + "tiktoken", ], scripts=[ "scripts/chunker-recursive", + "scripts/chunker-token", "scripts/concat-parquet", "scripts/dump-parquet", "scripts/embeddings-hf", diff --git a/trustgraph/chunking/recursive/chunker.py b/trustgraph/chunking/recursive/chunker.py index 7f7026f3..d38b580a 100755 --- a/trustgraph/chunking/recursive/chunker.py +++ b/trustgraph/chunking/recursive/chunker.py @@ -69,7 +69,7 @@ class Processor(ConsumerProducer): self.send(r) - print("Done.", flush=True) + print("Done.", flush=True) @staticmethod def add_args(parser): diff --git a/trustgraph/chunking/token/__init__.py b/trustgraph/chunking/token/__init__.py new file mode 100644 index 00000000..3b816664 --- /dev/null +++ b/trustgraph/chunking/token/__init__.py @@ -0,0 +1,3 @@ + +from . chunker import * + diff --git a/trustgraph/chunking/token/__main__.py b/trustgraph/chunking/token/__main__.py new file mode 100644 index 00000000..18e14ad5 --- /dev/null +++ b/trustgraph/chunking/token/__main__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 + +from . chunker import run + +if __name__ == '__main__': + run() + diff --git a/trustgraph/chunking/token/chunker.py b/trustgraph/chunking/token/chunker.py new file mode 100755 index 00000000..78afeff3 --- /dev/null +++ b/trustgraph/chunking/token/chunker.py @@ -0,0 +1,97 @@ + +""" +Simple decoder, accepts text documents on input, outputs chunks from the +as text as separate output objects. +""" + +from langchain_text_splitters import TokenTextSplitter + +from ... schema import TextDocument, Chunk, Source +from ... schema import text_ingest_queue, chunk_ingest_queue +from ... log_level import LogLevel +from ... base import ConsumerProducer + +module = ".".join(__name__.split(".")[1:-1]) + +default_input_queue = text_ingest_queue +default_output_queue = chunk_ingest_queue +default_subscriber = module + +class Processor(ConsumerProducer): + + def __init__(self, **params): + + input_queue = params.get("input_queue", default_input_queue) + output_queue = params.get("output_queue", default_output_queue) + subscriber = params.get("subscriber", default_subscriber) + chunk_size = params.get("chunk_size", 250) + chunk_overlap = params.get("chunk_overlap", 15) + + super(Processor, self).__init__( + **params | { + "input_queue": input_queue, + "output_queue": output_queue, + "subscriber": subscriber, + "input_schema": TextDocument, + "output_schema": Chunk, + } + ) + + self.text_splitter = TokenTextSplitter( + encoding_name="cl100k_base", + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + ) + + def handle(self, msg): + + v = msg.value() + print(f"Chunking {v.source.id}...", flush=True) + + texts = self.text_splitter.create_documents( + [v.text.decode("utf-8")] + ) + + for ix, chunk in enumerate(texts): + + id = v.source.id + "-c" + str(ix) + + r = Chunk( + source=Source( + source=v.source.source, + id=id, + title=v.source.title + ), + chunk=chunk.page_content.encode("utf-8"), + ) + + self.send(r) + + print("Done.", flush=True) + + @staticmethod + def add_args(parser): + + ConsumerProducer.add_args( + parser, default_input_queue, default_subscriber, + default_output_queue, + ) + + parser.add_argument( + '-z', '--chunk-size', + type=int, + default=250, + help=f'Chunk size (default: 250)' + ) + + parser.add_argument( + '-v', '--chunk-overlap', + type=int, + default=15, + help=f'Chunk overlap (default: 15)' + ) + +def run(): + + Processor.start(module, __doc__) +