mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 16:36:21 +02:00
Expose LLM token usage (in_token, out_token, model) across all service layers Propagate token counts from LLM services through the prompt, text-completion, graph-RAG, document-RAG, and agent orchestrator pipelines to the API gateway and Python SDK. All fields are Optional — None means "not available", distinguishing from a real zero count. Key changes: - Schema: Add in_token/out_token/model to TextCompletionResponse, PromptResponse, GraphRagResponse, DocumentRagResponse, AgentResponse - TextCompletionClient: New TextCompletionResult return type. Split into text_completion() (non-streaming) and text_completion_stream() (streaming with per-chunk handler callback) - PromptClient: New PromptResult with response_type (text/json/jsonl), typed fields (text/object/objects), and token usage. All callers updated. - RAG services: Accumulate token usage across all prompt calls (extract-concepts, edge-scoring, edge-reasoning, synthesis). Non-streaming path sends single combined response instead of chunk + end_of_session. - Agent orchestrator: UsageTracker accumulates tokens across meta-router, pattern prompt calls, and react reasoning. Attached to end_of_dialog. - Translators: Encode token fields when not None (is not None, not truthy) - Python SDK: RAG and text-completion methods return TextCompletionResult (non-streaming) or RAGChunk/AgentAnswer with token fields (streaming) - CLI: --show-usage flag on tg-invoke-llm, tg-invoke-prompt, tg-invoke-graph-rag, tg-invoke-document-rag, tg-invoke-agent
44 lines
2.1 KiB
Python
44 lines
2.1 KiB
Python
|
|
from . pubsub import get_pubsub, add_pubsub_args
|
|
from . async_processor import AsyncProcessor
|
|
from . consumer import Consumer
|
|
from . producer import Producer
|
|
from . publisher import Publisher
|
|
from . subscriber import Subscriber
|
|
from . metrics import ProcessorMetrics, ConsumerMetrics, ProducerMetrics
|
|
from . logging import add_logging_args, setup_logging
|
|
from . flow_processor import FlowProcessor
|
|
from . consumer_spec import ConsumerSpec
|
|
from . parameter_spec import ParameterSpec
|
|
from . producer_spec import ProducerSpec
|
|
from . subscriber_spec import SubscriberSpec
|
|
from . request_response_spec import RequestResponseSpec
|
|
from . llm_service import LlmService, LlmResult, LlmChunk
|
|
from . librarian_client import LibrarianClient
|
|
from . chunking_service import ChunkingService
|
|
from . embeddings_service import EmbeddingsService
|
|
from . embeddings_client import EmbeddingsClientSpec
|
|
from . text_completion_client import (
|
|
TextCompletionClientSpec, TextCompletionClient, TextCompletionResult,
|
|
)
|
|
from . prompt_client import PromptClientSpec, PromptClient, PromptResult
|
|
from . triples_store_service import TriplesStoreService
|
|
from . graph_embeddings_store_service import GraphEmbeddingsStoreService
|
|
from . document_embeddings_store_service import DocumentEmbeddingsStoreService
|
|
from . triples_query_service import TriplesQueryService
|
|
from . graph_embeddings_query_service import GraphEmbeddingsQueryService
|
|
from . document_embeddings_query_service import DocumentEmbeddingsQueryService
|
|
from . graph_embeddings_client import GraphEmbeddingsClientSpec
|
|
from . triples_client import TriplesClientSpec
|
|
from . document_embeddings_client import DocumentEmbeddingsClientSpec
|
|
from . agent_service import AgentService
|
|
from . graph_rag_client import GraphRagClientSpec
|
|
from . tool_service import ToolService
|
|
from . tool_client import ToolClientSpec
|
|
from . dynamic_tool_service import DynamicToolService
|
|
from . tool_service_client import ToolServiceClientSpec
|
|
from . agent_client import AgentClientSpec
|
|
from . structured_query_client import StructuredQueryClientSpec
|
|
from . row_embeddings_query_client import RowEmbeddingsQueryClientSpec
|
|
from . collection_config_handler import CollectionConfigHandler
|
|
|