Clean up dead demos and restructure directory layout

Phase 1 - Remove dead/duplicate content:
- Delete demos/samples_java/ (incomplete Java demo)
- Delete demos/shared/chatbot_ui/ (replaced by AnythingLLM)
- Delete demos/shared/grafana/, prometheus/, logfire/, honeycomb/, signoz/
  (legacy observability stacks; only jaeger is retained)
- Delete variant docker-compose files (honeycomb, logfire, signoz, jaeger)
- Delete demos/use_cases/spotify_bearer_auth/run_demo.sh (stale script)

Phase 2 - Restructure into semantic categories:
- getting_started/  : weather_forecast, llm_gateway
- agent_orchestration/ : travel_agents, multi_agent_crewai_langchain
- llm_routing/       : model_alias_routing, preference_based_routing, claude_code_router
- filter_chains/     : http_filter, mcp_filter
- integrations/      : ollama, spotify_bearer_auth
- advanced/          : model_choice_test_harness, multi_turn_rag,
                       currency_exchange, stock_quote

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Adil Hafeez 2026-02-15 01:43:31 -08:00
parent 9c4b2fc570
commit 78f761ef75
No known key found for this signature in database
GPG key ID: 9B18EF7691369645
183 changed files with 0 additions and 4879 deletions

View file

@ -1,36 +0,0 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"python": "${workspaceFolder}/venv/bin/python",
"name": "chatbot-ui",
"type": "debugpy",
"request": "launch",
"program": "run_stream.py",
"console": "integratedTerminal",
"env": {
"LLM": "1",
"CHAT_COMPLETION_ENDPOINT": "http://localhost:10000/v1",
"STREAMING": "True",
"PLANO_CONFIG": "../../samples_python/weather_forecast/plano_config.yaml"
}
},
{
"python": "${workspaceFolder}/venv/bin/python",
"name": "chatbot-ui llm",
"type": "debugpy",
"request": "launch",
"program": "run_stream.py",
"console": "integratedTerminal",
"env": {
"LLM": "1",
"CHAT_COMPLETION_ENDPOINT": "http://localhost:12000/v1",
"STREAMING": "True",
"PLANO_CONFIG": "../../samples_python/weather_forecast/plano_config.yaml"
}
},
]
}

View file

@ -1,18 +0,0 @@
FROM python:3.12 AS base
FROM base AS builder
WORKDIR /src
COPY requirements.txt /src/
RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
FROM python:3.12-slim AS output
COPY --from=builder /runtime /usr/local
WORKDIR /app
COPY *.py .
CMD ["python", "run_stream.py"]

View file

@ -1,206 +0,0 @@
from datetime import datetime
import json
import logging
import os
import yaml
import gradio as gr
from typing import List, Optional, Tuple
from functools import partial
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
log = logging.getLogger(__name__)
GRADIO_CSS_STYLE = """
.json-container {
height: 80vh !important;
overflow-y: auto !important;
}
.chatbot {
height: calc(80vh - 100px) !important;
overflow-y: auto !important;
}
footer {visibility: hidden}
"""
def chat(
query: Optional[str],
conversation: Optional[List[Tuple[str, str]]],
history: List[dict],
client,
):
history.append({"role": "user", "content": query})
try:
response = client.chat.completions.create(
# we select model from plano_config file
model="None",
messages=history,
temperature=1.0,
stream=True,
)
except Exception as e:
# remove last user message in case of exception
history.pop()
log.info("Error calling gateway API: {}".format(e))
raise gr.Error("Error calling gateway API: {}".format(e))
conversation.append((query, ""))
for chunk in response:
tokens = process_stream_chunk(chunk, history)
if tokens:
conversation[-1] = (
conversation[-1][0],
conversation[-1][1] + tokens,
)
yield "", conversation, history
def create_gradio_app(demo_description, client):
with gr.Blocks(
theme=gr.themes.Default(
font_mono=[gr.themes.GoogleFont("IBM Plex Mono"), "Arial", "sans-serif"]
),
fill_height=True,
css=GRADIO_CSS_STYLE,
) as demo:
with gr.Row(equal_height=True):
history = gr.State([])
with gr.Column(scale=1):
gr.Markdown(demo_description),
with gr.Accordion("Available Tools/APIs", open=True):
with gr.Column(scale=1):
gr.JSON(
value=get_prompt_targets(),
show_indices=False,
elem_classes="json-container",
min_height="80vh",
)
with gr.Column(scale=2):
chatbot = gr.Chatbot(
label="Plano Chatbot",
elem_classes="chatbot",
)
textbox = gr.Textbox(
show_label=False,
placeholder="Enter text and press enter",
autofocus=True,
elem_classes="textbox",
)
chat_with_client = partial(chat, client=client)
textbox.submit(
chat_with_client,
[textbox, chatbot, history],
[textbox, chatbot, history],
)
return demo
def process_stream_chunk(chunk, history):
delta = chunk.choices[0].delta
if delta.role and delta.role != history[-1]["role"]:
# create new history item if role changes
# this is likely due to Plano tool call and api response
history.append({"role": delta.role})
history[-1]["model"] = chunk.model
# append tool calls to history if there are any in the chunk
if delta.tool_calls:
history[-1]["tool_calls"] = delta.tool_calls
if delta.content:
# append content to the last history item
if history[-1]["model"] != "Arch-Function-Chat":
history[-1]["content"] = history[-1].get("content", "") + delta.content
# yield content if it is from assistant
if history[-1]["model"] == "Arch-Function":
return None
if history[-1]["role"] == "assistant":
return delta.content
return None
def convert_prompt_target_to_openai_format(target):
tool = {
"description": target["description"],
"parameters": {"type": "object", "properties": {}, "required": []},
}
if "parameters" in target:
for param_info in target["parameters"]:
parameter = {
"type": param_info["type"],
"description": param_info["description"],
}
for key in ["default", "format", "enum", "items", "minimum", "maximum"]:
if key in param_info:
parameter[key] = param_info[key]
tool["parameters"]["properties"][param_info["name"]] = parameter
required = param_info.get("required", False)
if required:
tool["parameters"]["required"].append(param_info["name"])
return {"name": target["name"], "info": tool}
def get_prompt_targets():
try:
with open(os.getenv("PLANO_CONFIG", "config.yaml"), "r") as file:
config = yaml.safe_load(file)
available_tools = []
if "prompt_targets" in config:
for target in config["prompt_targets"]:
if not target.get("default", False):
available_tools.append(
convert_prompt_target_to_openai_format(target)
)
return {tool["name"]: tool["info"] for tool in available_tools}
elif "llm_providers" in config:
return config["llm_providers"]
except Exception as e:
log.info(e)
return None
def get_llm_models():
try:
with open(os.getenv("PLANO_CONFIG", "config.yaml"), "r") as file:
config = yaml.safe_load(file)
available_models = [""]
default_llm = None
for llm_providers in config["llm_providers"]:
if llm_providers.get("default", False):
default_llm = llm_providers["name"]
else:
available_models.append(llm_providers["name"])
# place default model at the beginning of the list
if default_llm:
available_models.insert(0, default_llm)
return available_models
except Exception as e:
log.info(e)
return []
def format_log(message):
time_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:-3]
return f"{time_now} - {message}"

View file

@ -1,9 +0,0 @@
gradio==5.3.0
huggingface_hub<1.0.0
async_timeout==4.0.3
loguru==0.7.2
asyncio==3.4.3
httpx==0.27.0
python-dotenv==1.0.1
pydantic==2.8.2
openai==1.54.0

View file

@ -1,160 +0,0 @@
import json
import os
import logging
import yaml
import gradio as gr
from typing import List, Optional, Tuple
from openai import OpenAI
from dotenv import load_dotenv
from common import format_log, get_llm_models, get_prompt_targets, process_stream_chunk
load_dotenv()
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
log = logging.getLogger(__name__)
CHAT_COMPLETION_ENDPOINT = os.getenv("CHAT_COMPLETION_ENDPOINT")
log.info(f"CHAT_COMPLETION_ENDPOINT: {CHAT_COMPLETION_ENDPOINT}")
CSS_STYLE = """
.json-container {
height: 95vh !important;
overflow-y: auto !important;
}
.chatbot {
height: calc(95vh - 100px) !important;
overflow-y: auto !important;
}
footer {visibility: hidden}
"""
def chat(
query: Optional[str],
conversation: Optional[List[Tuple[str, str]]],
history: List[dict],
debug_output: str,
model_selector: str,
):
history.append({"role": "user", "content": query})
if debug_output is None:
debug_output = ""
try:
headers = {}
if model_selector and model_selector != "":
headers["x-arch-llm-provider-hint"] = model_selector
client = OpenAI(
api_key="None",
base_url=CHAT_COMPLETION_ENDPOINT,
default_headers=headers,
)
response = client.chat.completions.create(
# we select model from arch_config file
model="None",
messages=history,
temperature=1.0,
stream=True,
)
except Exception as e:
# remove last user message in case of exception
history.pop()
log.info("Error calling gateway API: {}".format(e))
raise gr.Error("Error calling gateway API: {}".format(e))
conversation.append((query, ""))
model_is_set = False
for chunk in response:
tokens = process_stream_chunk(chunk, history)
if tokens and not model_is_set:
model_is_set = True
model = history[-1]["model"]
debug_output = debug_output + "\n" + format_log(f"model: {model}")
if tokens:
conversation[-1] = (
conversation[-1][0],
conversation[-1][1] + tokens,
)
yield "", conversation, history, debug_output, model_selector
# update assistant response to have correct format
# arch-fc 1.1 expects following format:
# {
# "response": "<assistant response>",
# }
# and this entire block needs to be encoded in ```json\n{json_encoded_content}\n```
if not history[-1]["model"].startswith("Arch"):
assistant_response = {
"response": history[-1]["content"],
}
history[-1]["content"] = "```json\n{}\n```".format(
json.dumps(assistant_response)
)
log.info("history: {}".format(json.dumps(history)))
def main():
with gr.Blocks(
theme=gr.themes.Default(
font_mono=[gr.themes.GoogleFont("IBM Plex Mono"), "Arial", "sans-serif"]
),
fill_height=True,
css=CSS_STYLE,
) as demo:
with gr.Row(equal_height=True):
history = gr.State([])
with gr.Column(scale=1):
with gr.Accordion("See available tools", open=False):
with gr.Column(scale=1):
gr.JSON(
value=get_prompt_targets(),
show_indices=False,
elem_classes="json-container",
min_height="50vh",
)
model_selector_textbox = gr.Dropdown(
get_llm_models(),
label="override model",
elem_classes="dropdown",
)
debug_output = gr.TextArea(
label="debug output",
elem_classes="debug_output",
)
with gr.Column(scale=2):
chatbot = gr.Chatbot(
label="Arch Chatbot",
elem_classes="chatbot",
)
textbox = gr.Textbox(
show_label=False,
placeholder="Enter text and press enter",
autofocus=True,
elem_classes="textbox",
)
textbox.submit(
chat,
[textbox, chatbot, history, debug_output, model_selector_textbox],
[textbox, chatbot, history, debug_output, model_selector_textbox],
)
demo.launch(server_name="0.0.0.0", server_port=8080, show_error=True, debug=True)
if __name__ == "__main__":
main()

View file

@ -1,15 +0,0 @@
FROM grafana/grafana:latest
FROM grafana/grafana:latest
# Set environment variables
ENV GF_SECURITY_ADMIN_USER=admin
ENV GF_SECURITY_ADMIN_PASSWORD=grafana
# Copy provisioning files
COPY ./datasource.yaml /etc/grafana/provisioning/datasources/datasource.yaml
COPY ./dashboard.yaml /etc/grafana/provisioning/dashboards/main.yaml
COPY ./dashboards /var/lib/grafana/dashboards
# Expose Grafana port
EXPOSE 3000

View file

@ -1,12 +0,0 @@
apiVersion: 1
providers:
- name: "Dashboard provider"
orgId: 1
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: false
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: true

View file

@ -1,794 +0,0 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": 1,
"links": [],
"panels": [
{
"datasource": {
"default": true,
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 5,
"x": 0,
"y": 0
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.3.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "envoy_cluster_upstream_rq_completed{envoy_cluster_name=~\"openai|api_server\"}",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": true,
"legendFormat": "{{envoy_cluster_name}}",
"range": false,
"refId": "A",
"useBackend": false
}
],
"title": "# of Completed Requests",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))"
},
"properties": [
{
"id": "displayName",
"value": "Input Sequence Length"
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 9,
"x": 5,
"y": 0
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0",
"targets": [
{
"disableTextWrap": false,
"editorMode": "code",
"expr": "histogram_quantile(0.9, sum by(le) (rate(input_sequence_length_bucket[5m])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "input sequence length (p90)",
"type": "timeseries"
},
{
"datasource": {
"default": true,
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))"
},
"properties": [
{
"id": "displayName",
"value": "Output Sequence Length"
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 10,
"x": 14,
"y": 0
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "histogram_quantile(0.9, sum(rate(output_sequence_length_bucket[5m])) by(le))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "output sequence length (p90)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))"
},
"properties": [
{
"id": "displayName",
"value": "Time to First Token"
}
]
}
]
},
"gridPos": {
"h": 14,
"w": 11,
"x": 0,
"y": 9
},
"id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0",
"targets": [
{
"disableTextWrap": false,
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[5m])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "time to first token (p90)",
"type": "timeseries"
},
{
"datasource": {
"default": true,
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum by(le) (rate(request_latency_bucket[1h])))"
},
"properties": [
{
"id": "displayName",
"value": "Request Latency"
}
]
},
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum(rate(time_to_first_token_bucket[60m])) by (le))"
},
"properties": [
{
"id": "displayName",
"value": "Time to First Token"
}
]
}
]
},
"gridPos": {
"h": 14,
"w": 13,
"x": 11,
"y": 9
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(request_latency_bucket[5m])))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": false,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "request latency (p90)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum by(le) (rate(time_per_output_token_bucket[1h])))"
},
"properties": [
{
"id": "displayName",
"value": "Time per Output Token"
}
]
}
]
},
"gridPos": {
"h": 13,
"w": 12,
"x": 0,
"y": 23
},
"id": 9,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(time_per_output_token_bucket[1h])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Time per Output Token (50p)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "histogram_quantile(0.5, sum by(le) (rate(tokens_per_second_bucket[1h])))"
},
"properties": [
{
"id": "displayName",
"value": "Tokens per Second"
}
]
}
]
},
"gridPos": {
"h": 13,
"w": 12,
"x": 12,
"y": 23
},
"id": 10,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.3.0",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(tokens_per_second_bucket[1h])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Tokens per Second(50p)",
"type": "timeseries"
}
],
"preload": false,
"refresh": "",
"schemaVersion": 40,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Plano Gateway Dashboard",
"uid": "adt6uhx5lk8aob",
"version": 1,
"weekStart": ""
}

View file

@ -1,9 +0,0 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://prometheus:9090
isDefault: true
access: proxy
editable: true

View file

@ -1,5 +0,0 @@
FROM otel/opentelemetry-collector:latest
COPY otel-collector-config.yaml /etc/otel-collector-config.yaml
ENTRYPOINT ["/otelcol", "--config=/etc/otel-collector-config.yaml"]

View file

@ -1,24 +0,0 @@
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
exporters:
otlp:
endpoint: "api.honeycomb.io:443"
headers:
"x-honeycomb-team": "${HONEYCOMB_API_KEY}"
processors:
batch:
timeout: 5s
service:
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [otlp]

View file

@ -1,5 +0,0 @@
FROM otel/opentelemetry-collector:latest
COPY otel-collector-config.yaml /etc/otel-collector-config.yaml
ENTRYPOINT ["/otelcol", "--config=/etc/otel-collector-config.yaml"]

View file

@ -1,24 +0,0 @@
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
exporters:
otlphttp:
endpoint: "https://logfire-api.pydantic.dev"
headers:
Authorization: "${LOGFIRE_API_KEY}"
processors:
batch:
timeout: 5s
service:
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [otlphttp]

View file

@ -1,11 +0,0 @@
FROM prom/prometheus:latest
# Set the command to run Prometheus with the specified configuration file
CMD ["--config.file=/etc/prometheus/prometheus.yaml"]
# Copy the Prometheus configuration files
COPY ./prometheus.yaml /etc/prometheus/prometheus.yaml
# Expose Prometheus port
EXPOSE 9090

View file

@ -1,23 +0,0 @@
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: []
scheme: http
timeout: 10s
api_version: v2
scrape_configs:
- job_name: envoy
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 10s
metrics_path: /stats
scheme: http
static_configs:
- targets:
- host.docker.internal:19901
params:
format: ["prometheus"]

View file

@ -1,35 +0,0 @@
global:
resolve_timeout: 1m
slack_api_url: 'https://hooks.slack.com/services/xxx'
route:
receiver: 'slack-notifications'
receivers:
- name: 'slack-notifications'
slack_configs:
- channel: '#alerts'
send_resolved: true
icon_url: https://avatars3.githubusercontent.com/u/3380462
title: |-
[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}
{{- if gt (len .CommonLabels) (len .GroupLabels) -}}
{{" "}}(
{{- with .CommonLabels.Remove .GroupLabels.Names }}
{{- range $index, $label := .SortedPairs -}}
{{ if $index }}, {{ end }}
{{- $label.Name }}="{{ $label.Value -}}"
{{- end }}
{{- end -}}
)
{{- end }}
text: >-
{{ range .Alerts -}}
*Alert:* {{ .Annotations.title }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}
*Description:* {{ .Annotations.description }}
*Details:*
{{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
{{ end }}
{{ end }}

View file

@ -1,11 +0,0 @@
groups:
- name: ExampleCPULoadGroup
rules:
- alert: HighCpuLoad
expr: system_cpu_load_average_1m > 0.1
for: 0m
labels:
severity: warning
annotations:
summary: High CPU load
description: "CPU load is > 0.1\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

View file

@ -1,75 +0,0 @@
<?xml version="1.0"?>
<clickhouse>
<!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables.
Optional. If you don't use replicated tables, you could omit that.
See https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication/
-->
<zookeeper>
<node index="1">
<host>zookeeper-1</host>
<port>2181</port>
</node>
<!-- <node index="2">
<host>zookeeper-2</host>
<port>2181</port>
</node>
<node index="3">
<host>zookeeper-3</host>
<port>2181</port>
</node> -->
</zookeeper>
<!-- Configuration of clusters that could be used in Distributed tables.
https://clickhouse.com/docs/en/operations/table_engines/distributed/
-->
<remote_servers>
<cluster>
<!-- Inter-server per-cluster secret for Distributed queries
default: no secret (no authentication will be performed)
If set, then Distributed queries will be validated on shards, so at least:
- such cluster should exist on the shard,
- such cluster should have the same secret.
And also (and which is more important), the initial_user will
be used as current user for the query.
Right now the protocol is pretty simple and it only takes into account:
- cluster name
- query
Also it will be nice if the following will be implemented:
- source hostname (see interserver_http_host), but then it will depends from DNS,
it can use IP address instead, but then the you need to get correct on the initiator node.
- target hostname / ip address (same notes as for source hostname)
- time-based security tokens
-->
<!-- <secret></secret> -->
<shard>
<!-- Optional. Whether to write data to just one of the replicas. Default: false (write data to all replicas). -->
<!-- <internal_replication>false</internal_replication> -->
<!-- Optional. Shard weight when writing data. Default: 1. -->
<!-- <weight>1</weight> -->
<replica>
<host>clickhouse</host>
<port>9000</port>
<!-- Optional. Priority of the replica for load_balancing. Default: 1 (less value has more priority). -->
<!-- <priority>1</priority> -->
</replica>
</shard>
<!-- <shard>
<replica>
<host>clickhouse-2</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>clickhouse-3</host>
<port>9000</port>
</replica>
</shard> -->
</cluster>
</remote_servers>
</clickhouse>

File diff suppressed because it is too large Load diff

View file

@ -1,41 +0,0 @@
<?xml version="1.0"?>
<clickhouse>
<storage_configuration>
<disks>
<default>
<keep_free_space_bytes>10485760</keep_free_space_bytes>
</default>
<s3>
<type>s3</type>
<!-- For S3 cold storage,
if region is us-east-1, endpoint can be https://<bucket-name>.s3.amazonaws.com
if region is not us-east-1, endpoint should be https://<bucket-name>.s3-<region>.amazonaws.com
For GCS cold storage,
endpoint should be https://storage.googleapis.com/<bucket-name>/data/
-->
<endpoint>https://BUCKET-NAME.s3-REGION-NAME.amazonaws.com/data/</endpoint>
<access_key_id>ACCESS-KEY-ID</access_key_id>
<secret_access_key>SECRET-ACCESS-KEY</secret_access_key>
<!-- In case of S3, uncomment the below configuration in case you want to read
AWS credentials from the Environment variables if they exist. -->
<!-- <use_environment_credentials>true</use_environment_credentials> -->
<!-- In case of GCS, uncomment the below configuration, since GCS does
not support batch deletion and result in error messages in logs. -->
<!-- <support_batch_delete>false</support_batch_delete> -->
</s3>
</disks>
<policies>
<tiered>
<volumes>
<default>
<disk>default</disk>
</default>
<s3>
<disk>s3</disk>
<perform_ttl_move_on_insert>0</perform_ttl_move_on_insert>
</s3>
</volumes>
</tiered>
</policies>
</storage_configuration>
</clickhouse>

View file

@ -1,123 +0,0 @@
<?xml version="1.0"?>
<clickhouse>
<!-- See also the files in users.d directory where the settings can be overridden. -->
<!-- Profiles of settings. -->
<profiles>
<!-- Default settings. -->
<default>
<!-- Maximum memory usage for processing single query, in bytes. -->
<max_memory_usage>10000000000</max_memory_usage>
<!-- How to choose between replicas during distributed query processing.
random - choose random replica from set of replicas with minimum number of errors
nearest_hostname - from set of replicas with minimum number of errors, choose replica
with minimum number of different symbols between replica's hostname and local hostname
(Hamming distance).
in_order - first live replica is chosen in specified order.
first_or_random - if first replica one has higher number of errors, pick a random one from replicas with minimum number of errors.
-->
<load_balancing>random</load_balancing>
</default>
<!-- Profile that allows only read queries. -->
<readonly>
<readonly>1</readonly>
</readonly>
</profiles>
<!-- Users and ACL. -->
<users>
<!-- If user name was not specified, 'default' user is used. -->
<default>
<!-- See also the files in users.d directory where the password can be overridden.
Password could be specified in plaintext or in SHA256 (in hex format).
If you want to specify password in plaintext (not recommended), place it in 'password' element.
Example: <password>qwerty</password>.
Password could be empty.
If you want to specify SHA256, place it in 'password_sha256_hex' element.
Example: <password_sha256_hex>65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5</password_sha256_hex>
Restrictions of SHA256: impossibility to connect to ClickHouse using MySQL JS client (as of July 2019).
If you want to specify double SHA1, place it in 'password_double_sha1_hex' element.
Example: <password_double_sha1_hex>e395796d6546b1b65db9d665cd43f0e858dd4303</password_double_sha1_hex>
If you want to specify a previously defined LDAP server (see 'ldap_servers' in the main config) for authentication,
place its name in 'server' element inside 'ldap' element.
Example: <ldap><server>my_ldap_server</server></ldap>
If you want to authenticate the user via Kerberos (assuming Kerberos is enabled, see 'kerberos' in the main config),
place 'kerberos' element instead of 'password' (and similar) elements.
The name part of the canonical principal name of the initiator must match the user name for authentication to succeed.
You can also place 'realm' element inside 'kerberos' element to further restrict authentication to only those requests
whose initiator's realm matches it.
Example: <kerberos />
Example: <kerberos><realm>EXAMPLE.COM</realm></kerberos>
How to generate decent password:
Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-'
In first line will be password and in second - corresponding SHA256.
How to generate double SHA1:
Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha1sum | tr -d '-' | xxd -r -p | sha1sum | tr -d '-'
In first line will be password and in second - corresponding double SHA1.
-->
<password></password>
<!-- List of networks with open access.
To open access from everywhere, specify:
<ip>::/0</ip>
To open access only from localhost, specify:
<ip>::1</ip>
<ip>127.0.0.1</ip>
Each element of list has one of the following forms:
<ip> IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0
2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::.
<host> Hostname. Example: server01.clickhouse.com.
To check access, DNS query is performed, and all received addresses compared to peer address.
<host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.clickhouse\.com$
To check access, DNS PTR query is performed for peer address and then regexp is applied.
Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
Strongly recommended that regexp is ends with $
All results of DNS requests are cached till server restart.
-->
<networks>
<ip>::/0</ip>
</networks>
<!-- Settings profile for user. -->
<profile>default</profile>
<!-- Quota for user. -->
<quota>default</quota>
<!-- User can create other users and grant rights to them. -->
<!-- <access_management>1</access_management> -->
</default>
</users>
<!-- Quotas. -->
<quotas>
<!-- Name of quota. -->
<default>
<!-- Limits for time interval. You could specify many intervals with different limits. -->
<interval>
<!-- Length of interval. -->
<duration>3600</duration>
<!-- No limits. Just calculate resource usage for time interval. -->
<queries>0</queries>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
<execution_time>0</execution_time>
</interval>
</default>
</quotas>
</clickhouse>

View file

@ -1,21 +0,0 @@
<functions>
<function>
<type>executable</type>
<name>histogramQuantile</name>
<return_type>Float64</return_type>
<argument>
<type>Array(Float64)</type>
<name>buckets</name>
</argument>
<argument>
<type>Array(Float64)</type>
<name>counts</name>
</argument>
<argument>
<type>Float64</type>
<name>quantile</name>
</argument>
<format>CSV</format>
<command>./histogramQuantile</command>
</function>
</functions>

View file

@ -1,133 +0,0 @@
version: "2.4"
include:
- test-app-docker-compose.yaml
services:
zookeeper-1:
image: bitnami/zookeeper:3.7.1
container_name: signoz-zookeeper-1
hostname: zookeeper-1
user: root
ports:
- "2181:2181"
- "2888:2888"
- "3888:3888"
volumes:
- ./data/zookeeper-1:/bitnami/zookeeper
environment:
- ZOO_SERVER_ID=1
# - ZOO_SERVERS=0.0.0.0:2888:3888,zookeeper-2:2888:3888,zookeeper-3:2888:3888
- ALLOW_ANONYMOUS_LOGIN=yes
- ZOO_AUTOPURGE_INTERVAL=1
clickhouse:
image: clickhouse/clickhouse-server:24.1.2-alpine
container_name: signoz-clickhouse
# ports:
# - "9000:9000"
# - "8123:8123"
tty: true
volumes:
- ./clickhouse-config.xml:/etc/clickhouse-server/config.xml
- ./clickhouse-users.xml:/etc/clickhouse-server/users.xml
- ./custom-function.xml:/etc/clickhouse-server/custom-function.xml
- ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
# - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml
- ./data/clickhouse/:/var/lib/clickhouse/
- ./user_scripts:/var/lib/clickhouse/user_scripts/
restart: on-failure
logging:
options:
max-size: 50m
max-file: "3"
healthcheck:
# "clickhouse", "client", "-u ${CLICKHOUSE_USER}", "--password ${CLICKHOUSE_PASSWORD}", "-q 'SELECT 1'"
test:
[
"CMD",
"wget",
"--spider",
"-q",
"0.0.0.0:8123/ping"
]
interval: 30s
timeout: 5s
retries: 3
alertmanager:
container_name: signoz-alertmanager
image: signoz/alertmanager:0.23.7
volumes:
- ./data/alertmanager:/data
depends_on:
query-service:
condition: service_healthy
restart: on-failure
command:
- --queryService.url=http://query-service:8085
- --storage.path=/data
otel-collector-migrator:
image: signoz/signoz-schema-migrator:${OTELCOL_TAG:-0.111.5}
container_name: otel-migrator
command:
- "--dsn=tcp://clickhouse:9000"
depends_on:
clickhouse:
condition: service_healthy
# clickhouse-2:
# condition: service_healthy
# clickhouse-3:
# condition: service_healthy
# Notes for Maintainers/Contributors who will change Line Numbers of Frontend & Query-Section. Please Update Line Numbers in `./scripts/commentLinesForSetup.sh` & `./CONTRIBUTING.md`
otel-collector:
container_name: signoz-otel-collector
image: signoz/signoz-otel-collector:0.111.5
command:
[
"--config=/etc/otel-collector-config.yaml",
"--manager-config=/etc/manager-config.yaml",
"--copy-path=/var/tmp/collector-config.yaml",
"--feature-gates=-pkg.translator.prometheus.NormalizeName"
]
# user: root # required for reading docker container logs
volumes:
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml
- ./otel-collector-opamp-config.yaml:/etc/manager-config.yaml
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /:/hostfs:ro
environment:
- OTEL_RESOURCE_ATTRIBUTES=host.name=signoz-host,os.type=linux
ports:
# - "1777:1777" # pprof extension
- "4317:4317" # OTLP gRPC receiver
- "4318:4318" # OTLP HTTP receiver
# - "8888:8888" # OtelCollector internal metrics
# - "8889:8889" # signoz spanmetrics exposed by the agent
# - "9411:9411" # Zipkin port
# - "13133:13133" # health check extension
# - "14250:14250" # Jaeger gRPC
# - "14268:14268" # Jaeger thrift HTTP
# - "55678:55678" # OpenCensus receiver
# - "55679:55679" # zPages extension
restart: on-failure
depends_on:
clickhouse:
condition: service_healthy
otel-collector-migrator:
condition: service_completed_successfully
query-service:
condition: service_healthy
logspout:
image: "gliderlabs/logspout:v3.2.14"
container_name: signoz-logspout
volumes:
- /etc/hostname:/etc/host_hostname:ro
- /var/run/docker.sock:/var/run/docker.sock
command: syslog+tcp://otel-collector:2255
depends_on:
- otel-collector
restart: on-failure

View file

@ -1,67 +0,0 @@
version: "2.4"
services:
query-service:
hostname: query-service
build:
context: "../../../"
dockerfile: "./pkg/query-service/Dockerfile"
args:
LDFLAGS: ""
TARGETPLATFORM: "${GOOS}/${GOARCH}"
container_name: signoz-query-service
environment:
- ClickHouseUrl=tcp://clickhouse:9000
- ALERTMANAGER_API_PREFIX=http://alertmanager:9093/api/
- SIGNOZ_LOCAL_DB_PATH=/var/lib/signoz/signoz.db
- DASHBOARDS_PATH=/root/config/dashboards
- STORAGE=clickhouse
- GODEBUG=netdns=go
- TELEMETRY_ENABLED=true
volumes:
- ./prometheus.yml:/root/config/prometheus.yml
- ../dashboards:/root/config/dashboards
- ./data/signoz/:/var/lib/signoz/
command:
[
"-config=/root/config/prometheus.yml",
"--use-logs-new-schema=true"
]
ports:
- "6060:6060"
- "8080:8080"
restart: on-failure
healthcheck:
test:
[
"CMD",
"wget",
"--spider",
"-q",
"localhost:8080/api/v1/health"
]
interval: 30s
timeout: 5s
retries: 3
depends_on:
clickhouse:
condition: service_healthy
frontend:
build:
context: "../../../frontend"
dockerfile: "./Dockerfile"
args:
TARGETOS: "${GOOS}"
TARGETPLATFORM: "${GOARCH}"
container_name: signoz-frontend
environment:
- FRONTEND_API_ENDPOINT=http://query-service:8080
restart: on-failure
depends_on:
- alertmanager
- query-service
ports:
- "3301:3301"
volumes:
- ../common/nginx-config.conf:/etc/nginx/conf.d/default.conf

View file

@ -1,296 +0,0 @@
x-clickhouse-defaults: &clickhouse-defaults
restart: on-failure
# addding non LTS version due to this fix https://github.com/ClickHouse/ClickHouse/commit/32caf8716352f45c1b617274c7508c86b7d1afab
image: clickhouse/clickhouse-server:24.1.2-alpine
tty: true
depends_on:
- zookeeper-1
# - zookeeper-2
# - zookeeper-3
logging:
options:
max-size: 50m
max-file: "3"
healthcheck:
# "clickhouse", "client", "-u ${CLICKHOUSE_USER}", "--password ${CLICKHOUSE_PASSWORD}", "-q 'SELECT 1'"
test:
[
"CMD",
"wget",
"--spider",
"-q",
"0.0.0.0:8123/ping"
]
interval: 30s
timeout: 5s
retries: 3
ulimits:
nproc: 65535
nofile:
soft: 262144
hard: 262144
x-db-depend: &db-depend
depends_on:
clickhouse:
condition: service_healthy
otel-collector-migrator-sync:
condition: service_completed_successfully
# clickhouse-2:
# condition: service_healthy
# clickhouse-3:
# condition: service_healthy
services:
zookeeper-1:
image: bitnami/zookeeper:3.7.1
container_name: signoz-zookeeper-1
hostname: zookeeper-1
user: root
ports:
- "2181:2181"
- "2888:2888"
- "3888:3888"
volumes:
- ./data/zookeeper-1:/bitnami/zookeeper
environment:
- ZOO_SERVER_ID=1
# - ZOO_SERVERS=0.0.0.0:2888:3888,zookeeper-2:2888:3888,zookeeper-3:2888:3888
- ALLOW_ANONYMOUS_LOGIN=yes
- ZOO_AUTOPURGE_INTERVAL=1
# zookeeper-2:
# image: bitnami/zookeeper:3.7.0
# container_name: signoz-zookeeper-2
# hostname: zookeeper-2
# user: root
# ports:
# - "2182:2181"
# - "2889:2888"
# - "3889:3888"
# volumes:
# - ./data/zookeeper-2:/bitnami/zookeeper
# environment:
# - ZOO_SERVER_ID=2
# - ZOO_SERVERS=zookeeper-1:2888:3888,0.0.0.0:2888:3888,zookeeper-3:2888:3888
# - ALLOW_ANONYMOUS_LOGIN=yes
# - ZOO_AUTOPURGE_INTERVAL=1
# zookeeper-3:
# image: bitnami/zookeeper:3.7.0
# container_name: signoz-zookeeper-3
# hostname: zookeeper-3
# user: root
# ports:
# - "2183:2181"
# - "2890:2888"
# - "3890:3888"
# volumes:
# - ./data/zookeeper-3:/bitnami/zookeeper
# environment:
# - ZOO_SERVER_ID=3
# - ZOO_SERVERS=zookeeper-1:2888:3888,zookeeper-2:2888:3888,0.0.0.0:2888:3888
# - ALLOW_ANONYMOUS_LOGIN=yes
# - ZOO_AUTOPURGE_INTERVAL=1
clickhouse:
<<: *clickhouse-defaults
container_name: signoz-clickhouse
hostname: clickhouse
ports:
- "9000:9000"
- "8123:8123"
- "9181:9181"
volumes:
- ./clickhouse-config.xml:/etc/clickhouse-server/config.xml
- ./clickhouse-users.xml:/etc/clickhouse-server/users.xml
- ./custom-function.xml:/etc/clickhouse-server/custom-function.xml
- ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
# - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml
- ./data/clickhouse/:/var/lib/clickhouse/
- ./user_scripts:/var/lib/clickhouse/user_scripts/
# clickhouse-2:
# <<: *clickhouse-defaults
# container_name: signoz-clickhouse-2
# hostname: clickhouse-2
# ports:
# - "9001:9000"
# - "8124:8123"
# - "9182:9181"
# volumes:
# - ./clickhouse-config.xml:/etc/clickhouse-server/config.xml
# - ./clickhouse-users.xml:/etc/clickhouse-server/users.xml
# - ./custom-function.xml:/etc/clickhouse-server/custom-function.xml
# - ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
# # - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml
# - ./data/clickhouse-2/:/var/lib/clickhouse/
# - ./user_scripts:/var/lib/clickhouse/user_scripts/
# clickhouse-3:
# <<: *clickhouse-defaults
# container_name: signoz-clickhouse-3
# hostname: clickhouse-3
# ports:
# - "9002:9000"
# - "8125:8123"
# - "9183:9181"
# volumes:
# - ./clickhouse-config.xml:/etc/clickhouse-server/config.xml
# - ./clickhouse-users.xml:/etc/clickhouse-server/users.xml
# - ./custom-function.xml:/etc/clickhouse-server/custom-function.xml
# - ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
# # - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml
# - ./data/clickhouse-3/:/var/lib/clickhouse/
# - ./user_scripts:/var/lib/clickhouse/user_scripts/
alertmanager:
image: signoz/alertmanager:${ALERTMANAGER_TAG:-0.23.7}
container_name: signoz-alertmanager
volumes:
- ./data/alertmanager:/data
depends_on:
query-service:
condition: service_healthy
restart: on-failure
command:
- --queryService.url=http://query-service:8085
- --storage.path=/data
# Notes for Maintainers/Contributors who will change Line Numbers of Frontend & Query-Section. Please Update Line Numbers in `./scripts/commentLinesForSetup.sh` & `./CONTRIBUTING.md`
query-service:
image: signoz/query-service:${DOCKER_TAG:-0.57.0}
container_name: signoz-query-service
command:
[
"-config=/root/config/prometheus.yml",
"--use-logs-new-schema=true"
]
# ports:
# - "6060:6060" # pprof port
# - "8080:8080" # query-service port
volumes:
- ./prometheus.yml:/root/config/prometheus.yml
- ../dashboards:/root/config/dashboards
- ./data/signoz/:/var/lib/signoz/
environment:
- ClickHouseUrl=tcp://clickhouse:9000
- ALERTMANAGER_API_PREFIX=http://alertmanager:9093/api/
- SIGNOZ_LOCAL_DB_PATH=/var/lib/signoz/signoz.db
- DASHBOARDS_PATH=/root/config/dashboards
- STORAGE=clickhouse
- GODEBUG=netdns=go
- TELEMETRY_ENABLED=true
- DEPLOYMENT_TYPE=docker-standalone-amd
restart: on-failure
healthcheck:
test:
[
"CMD",
"wget",
"--spider",
"-q",
"localhost:8080/api/v1/health"
]
interval: 30s
timeout: 5s
retries: 3
<<: *db-depend
frontend:
image: signoz/frontend:${DOCKER_TAG:-0.57.0}
container_name: signoz-frontend
restart: on-failure
depends_on:
- alertmanager
- query-service
ports:
- "3301:3301"
volumes:
- ./nginx-config.conf:/etc/nginx/conf.d/default.conf
otel-collector-migrator-sync:
image: signoz/signoz-schema-migrator:${OTELCOL_TAG:-0.111.5}
container_name: otel-migrator-sync
command:
- "sync"
- "--dsn=tcp://clickhouse:9000"
- "--up="
depends_on:
clickhouse:
condition: service_healthy
# clickhouse-2:
# condition: service_healthy
# clickhouse-3:
# condition: service_healthy
otel-collector-migrator-async:
image: signoz/signoz-schema-migrator:${OTELCOL_TAG:-0.111.5}
container_name: otel-migrator-async
command:
- "async"
- "--dsn=tcp://clickhouse:9000"
- "--up="
depends_on:
clickhouse:
condition: service_healthy
otel-collector-migrator-sync:
condition: service_completed_successfully
# clickhouse-2:
# condition: service_healthy
# clickhouse-3:
# condition: service_healthy
otel-collector:
image: signoz/signoz-otel-collector:${OTELCOL_TAG:-0.111.5}
container_name: signoz-otel-collector
command:
[
"--config=/etc/otel-collector-config.yaml",
"--manager-config=/etc/manager-config.yaml",
"--copy-path=/var/tmp/collector-config.yaml",
"--feature-gates=-pkg.translator.prometheus.NormalizeName"
]
user: root # required for reading docker container logs
volumes:
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml
- ./otel-collector-opamp-config.yaml:/etc/manager-config.yaml
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /:/hostfs:ro
environment:
- OTEL_RESOURCE_ATTRIBUTES=host.name=signoz-host,os.type=linux
- LOW_CARDINAL_EXCEPTION_GROUPING=false
ports:
# - "1777:1777" # pprof extension
- "4317:4317" # OTLP gRPC receiver
- "4318:4318" # OTLP HTTP receiver
# - "8888:8888" # OtelCollector internal metrics
# - "8889:8889" # signoz spanmetrics exposed by the agent
# - "9411:9411" # Zipkin port
# - "13133:13133" # health check extension
# - "14250:14250" # Jaeger gRPC
# - "14268:14268" # Jaeger thrift HTTP
# - "55678:55678" # OpenCensus receiver
# - "55679:55679" # zPages extension
restart: on-failure
depends_on:
clickhouse:
condition: service_healthy
otel-collector-migrator-sync:
condition: service_completed_successfully
query-service:
condition: service_healthy
logspout:
image: "gliderlabs/logspout:v3.2.14"
container_name: signoz-logspout
volumes:
- /etc/hostname:/etc/host_hostname:ro
- /var/run/docker.sock:/var/run/docker.sock
command: syslog+tcp://otel-collector:2255
depends_on:
- otel-collector
restart: on-failure

View file

@ -1,284 +0,0 @@
version: "2.4"
include:
- test-app-docker-compose.yaml
x-clickhouse-defaults: &clickhouse-defaults
restart: on-failure
# addding non LTS version due to this fix https://github.com/ClickHouse/ClickHouse/commit/32caf8716352f45c1b617274c7508c86b7d1afab
image: clickhouse/clickhouse-server:24.1.2-alpine
tty: true
depends_on:
- zookeeper-1
# - zookeeper-2
# - zookeeper-3
logging:
options:
max-size: 50m
max-file: "3"
healthcheck:
# "clickhouse", "client", "-u ${CLICKHOUSE_USER}", "--password ${CLICKHOUSE_PASSWORD}", "-q 'SELECT 1'"
test:
[
"CMD",
"wget",
"--spider",
"-q",
"0.0.0.0:8123/ping"
]
interval: 30s
timeout: 5s
retries: 3
ulimits:
nproc: 65535
nofile:
soft: 262144
hard: 262144
x-db-depend: &db-depend
depends_on:
clickhouse:
condition: service_healthy
otel-collector-migrator:
condition: service_completed_successfully
# clickhouse-2:
# condition: service_healthy
# clickhouse-3:
# condition: service_healthy
services:
zookeeper-1:
image: bitnami/zookeeper:3.7.1
container_name: signoz-zookeeper-1
hostname: zookeeper-1
user: root
ports:
- "2181:2181"
- "2888:2888"
- "3888:3888"
volumes:
- ./data/zookeeper-1:/bitnami/zookeeper
environment:
- ZOO_SERVER_ID=1
# - ZOO_SERVERS=0.0.0.0:2888:3888,zookeeper-2:2888:3888,zookeeper-3:2888:3888
- ALLOW_ANONYMOUS_LOGIN=yes
- ZOO_AUTOPURGE_INTERVAL=1
# zookeeper-2:
# image: bitnami/zookeeper:3.7.0
# container_name: signoz-zookeeper-2
# hostname: zookeeper-2
# user: root
# ports:
# - "2182:2181"
# - "2889:2888"
# - "3889:3888"
# volumes:
# - ./data/zookeeper-2:/bitnami/zookeeper
# environment:
# - ZOO_SERVER_ID=2
# - ZOO_SERVERS=zookeeper-1:2888:3888,0.0.0.0:2888:3888,zookeeper-3:2888:3888
# - ALLOW_ANONYMOUS_LOGIN=yes
# - ZOO_AUTOPURGE_INTERVAL=1
# zookeeper-3:
# image: bitnami/zookeeper:3.7.0
# container_name: signoz-zookeeper-3
# hostname: zookeeper-3
# user: root
# ports:
# - "2183:2181"
# - "2890:2888"
# - "3890:3888"
# volumes:
# - ./data/zookeeper-3:/bitnami/zookeeper
# environment:
# - ZOO_SERVER_ID=3
# - ZOO_SERVERS=zookeeper-1:2888:3888,zookeeper-2:2888:3888,0.0.0.0:2888:3888
# - ALLOW_ANONYMOUS_LOGIN=yes
# - ZOO_AUTOPURGE_INTERVAL=1
clickhouse:
<<: *clickhouse-defaults
container_name: signoz-clickhouse
hostname: clickhouse
ports:
- "9000:9000"
- "8123:8123"
- "9181:9181"
volumes:
- ./clickhouse-config.xml:/etc/clickhouse-server/config.xml
- ./clickhouse-users.xml:/etc/clickhouse-server/users.xml
- ./custom-function.xml:/etc/clickhouse-server/custom-function.xml
- ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
# - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml
- ./data/clickhouse/:/var/lib/clickhouse/
- ./user_scripts:/var/lib/clickhouse/user_scripts/
# clickhouse-2:
# <<: *clickhouse-defaults
# container_name: signoz-clickhouse-2
# hostname: clickhouse-2
# ports:
# - "9001:9000"
# - "8124:8123"
# - "9182:9181"
# volumes:
# - ./clickhouse-config.xml:/etc/clickhouse-server/config.xml
# - ./clickhouse-users.xml:/etc/clickhouse-server/users.xml
# - ./custom-function.xml:/etc/clickhouse-server/custom-function.xml
# - ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
# # - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml
# - ./data/clickhouse-2/:/var/lib/clickhouse/
# - ./user_scripts:/var/lib/clickhouse/user_scripts/
# clickhouse-3:
# <<: *clickhouse-defaults
# container_name: signoz-clickhouse-3
# hostname: clickhouse-3
# ports:
# - "9002:9000"
# - "8125:8123"
# - "9183:9181"
# volumes:
# - ./clickhouse-config.xml:/etc/clickhouse-server/config.xml
# - ./clickhouse-users.xml:/etc/clickhouse-server/users.xml
# - ./custom-function.xml:/etc/clickhouse-server/custom-function.xml
# - ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
# # - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml
# - ./data/clickhouse-3/:/var/lib/clickhouse/
# - ./user_scripts:/var/lib/clickhouse/user_scripts/
alertmanager:
image: signoz/alertmanager:${ALERTMANAGER_TAG:-0.23.7}
container_name: signoz-alertmanager
volumes:
- ./data/alertmanager:/data
depends_on:
query-service:
condition: service_healthy
restart: on-failure
command:
- --queryService.url=http://query-service:8085
- --storage.path=/data
# Notes for Maintainers/Contributors who will change Line Numbers of Frontend & Query-Section. Please Update Line Numbers in `./scripts/commentLinesForSetup.sh` & `./CONTRIBUTING.md`
query-service:
image: signoz/query-service:${DOCKER_TAG:-0.57.0}
container_name: signoz-query-service
command:
[
"-config=/root/config/prometheus.yml",
"-gateway-url=https://api.staging.signoz.cloud",
"--use-logs-new-schema=true"
]
# ports:
# - "6060:6060" # pprof port
# - "8080:8080" # query-service port
volumes:
- ./prometheus.yml:/root/config/prometheus.yml
- ../dashboards:/root/config/dashboards
- ./data/signoz/:/var/lib/signoz/
environment:
- ClickHouseUrl=tcp://clickhouse:9000
- ALERTMANAGER_API_PREFIX=http://alertmanager:9093/api/
- SIGNOZ_LOCAL_DB_PATH=/var/lib/signoz/signoz.db
- DASHBOARDS_PATH=/root/config/dashboards
- STORAGE=clickhouse
- GODEBUG=netdns=go
- TELEMETRY_ENABLED=true
- DEPLOYMENT_TYPE=docker-standalone-amd
restart: on-failure
healthcheck:
test:
[
"CMD",
"wget",
"--spider",
"-q",
"localhost:8080/api/v1/health"
]
interval: 30s
timeout: 5s
retries: 3
<<: *db-depend
frontend:
image: signoz/frontend:${DOCKER_TAG:-0.57.0}
container_name: signoz-frontend
restart: on-failure
depends_on:
- alertmanager
- query-service
ports:
- "3301:3301"
volumes:
- ../common/nginx-config.conf:/etc/nginx/conf.d/default.conf
otel-collector-migrator:
image: signoz/signoz-schema-migrator:${OTELCOL_TAG:-0.111.5}
container_name: otel-migrator
command:
- "--dsn=tcp://clickhouse:9000"
depends_on:
clickhouse:
condition: service_healthy
# clickhouse-2:
# condition: service_healthy
# clickhouse-3:
# condition: service_healthy
otel-collector:
image: signoz/signoz-otel-collector:${OTELCOL_TAG:-0.111.5}
container_name: signoz-otel-collector
command:
[
"--config=/etc/otel-collector-config.yaml",
"--manager-config=/etc/manager-config.yaml",
"--copy-path=/var/tmp/collector-config.yaml",
"--feature-gates=-pkg.translator.prometheus.NormalizeName"
]
user: root # required for reading docker container logs
volumes:
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml
- ./otel-collector-opamp-config.yaml:/etc/manager-config.yaml
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /:/hostfs:ro
environment:
- OTEL_RESOURCE_ATTRIBUTES=host.name=signoz-host,os.type=linux
- LOW_CARDINAL_EXCEPTION_GROUPING=false
ports:
# - "1777:1777" # pprof extension
- "4317:4317" # OTLP gRPC receiver
- "4318:4318" # OTLP HTTP receiver
# - "8888:8888" # OtelCollector internal metrics
# - "8889:8889" # signoz spanmetrics exposed by the agent
# - "9411:9411" # Zipkin port
# - "13133:13133" # health check extension
# - "14250:14250" # Jaeger gRPC
# - "14268:14268" # Jaeger thrift HTTP
# - "55678:55678" # OpenCensus receiver
# - "55679:55679" # zPages extension
restart: on-failure
depends_on:
clickhouse:
condition: service_healthy
otel-collector-migrator:
condition: service_completed_successfully
query-service:
condition: service_healthy
logspout:
image: "gliderlabs/logspout:v3.2.14"
container_name: signoz-logspout
volumes:
- /etc/hostname:/etc/host_hostname:ro
- /var/run/docker.sock:/var/run/docker.sock
command: syslog+tcp://otel-collector:2255
depends_on:
- otel-collector
restart: on-failure

View file

@ -1,3 +0,0 @@
include:
- test-app-docker-compose.yaml
- docker-compose-minimal.yaml

View file

@ -1,64 +0,0 @@
<clickhouse>
<logger>
<!-- Possible levels [1]:
- none (turns off logging)
- fatal
- critical
- error
- warning
- notice
- information
- debug
- trace
[1]: https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/Logger.h#L105-L114
-->
<level>information</level>
<log>/var/log/clickhouse-keeper/clickhouse-keeper.log</log>
<errorlog>/var/log/clickhouse-keeper/clickhouse-keeper.err.log</errorlog>
<!-- Rotation policy
See https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/FileChannel.h#L54-L85
-->
<size>1000M</size>
<count>10</count>
<!-- <console>1</console> --> <!-- Default behavior is autodetection (log to console if not daemon mode and is tty) -->
</logger>
<listen_host>0.0.0.0</listen_host>
<max_connections>4096</max_connections>
<keeper_server>
<tcp_port>9181</tcp_port>
<!-- Must be unique among all keeper serves -->
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/logs</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>10000</operation_timeout_ms>
<min_session_timeout_ms>10000</min_session_timeout_ms>
<session_timeout_ms>100000</session_timeout_ms>
<raft_logs_level>information</raft_logs_level>
<compress_logs>false</compress_logs>
<!-- All settings listed in https://github.com/ClickHouse/ClickHouse/blob/master/src/Coordination/CoordinationSettings.h -->
</coordination_settings>
<!-- enable sanity hostname checks for cluster configuration (e.g. if localhost is used with remote endpoints) -->
<hostname_checks_enabled>true</hostname_checks_enabled>
<raft_configuration>
<server>
<id>1</id>
<!-- Internal port and hostname -->
<hostname>clickhouses-keeper-1</hostname>
<port>9234</port>
</server>
<!-- Add more servers here -->
</raft_configuration>
</keeper_server>
</clickhouse>

View file

@ -1,64 +0,0 @@
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
server {
listen 3301;
server_name _;
gzip on;
gzip_static on;
gzip_types text/plain text/css application/json application/x-javascript text/xml application/xml application/xml+rss text/javascript;
gzip_proxied any;
gzip_vary on;
gzip_comp_level 6;
gzip_buffers 16 8k;
gzip_http_version 1.1;
# to handle uri issue 414 from nginx
client_max_body_size 24M;
large_client_header_buffers 8 128k;
location / {
if ( $uri = '/index.html' ) {
add_header Cache-Control no-store always;
}
root /usr/share/nginx/html;
index index.html index.htm;
try_files $uri $uri/ /index.html;
}
location ~ ^/api/(v1|v3)/logs/(tail|livetail){
proxy_pass http://query-service:8080;
proxy_http_version 1.1;
# connection will be closed if no data is read for 600s between successive read operations
proxy_read_timeout 600s;
# dont buffer the data send it directly to client.
proxy_buffering off;
proxy_cache off;
}
location /api {
proxy_pass http://query-service:8080/api;
# connection will be closed if no data is read for 600s between successive read operations
proxy_read_timeout 600s;
}
location /ws {
proxy_pass http://query-service:8080/ws;
proxy_http_version 1.1;
proxy_set_header Upgrade "websocket";
proxy_set_header Connection "upgrade";
proxy_read_timeout 86400;
}
# redirect server error pages to the static page /50x.html
#
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root /usr/share/nginx/html;
}
}

View file

@ -1,188 +0,0 @@
receivers:
tcplog/docker:
listen_address: "0.0.0.0:2255"
operators:
- type: regex_parser
regex: '^<([0-9]+)>[0-9]+ (?P<timestamp>[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?) (?P<container_id>\S+) (?P<container_name>\S+) [0-9]+ - -( (?P<body>.*))?'
timestamp:
parse_from: attributes.timestamp
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
- type: move
from: attributes["body"]
to: body
- type: remove
field: attributes.timestamp
# please remove names from below if you want to collect logs from them
- type: filter
id: signoz_logs_filter
expr: 'attributes.container_name matches "^signoz-(logspout|frontend|alertmanager|query-service|otel-collector|clickhouse|zookeeper)"'
opencensus:
endpoint: 0.0.0.0:55678
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
jaeger:
protocols:
grpc:
endpoint: 0.0.0.0:14250
thrift_http:
endpoint: 0.0.0.0:14268
# thrift_compact:
# endpoint: 0.0.0.0:6831
# thrift_binary:
# endpoint: 0.0.0.0:6832
hostmetrics:
collection_interval: 30s
root_path: /hostfs
scrapers:
cpu: {}
load: {}
memory: {}
disk: {}
filesystem: {}
network: {}
prometheus:
config:
global:
scrape_interval: 60s
scrape_configs:
# otel-collector internal metrics
- job_name: otel-collector
static_configs:
- targets:
- localhost:8888
labels:
job_name: otel-collector
processors:
batch:
send_batch_size: 10000
send_batch_max_size: 11000
timeout: 10s
signozspanmetrics/cumulative:
metrics_exporter: clickhousemetricswrite
metrics_flush_interval: 60s
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ]
dimensions_cache_size: 100000
dimensions:
- name: service.namespace
default: default
- name: deployment.environment
default: default
# This is added to ensure the uniqueness of the timeseries
# Otherwise, identical timeseries produced by multiple replicas of
# collectors result in incorrect APM metrics
- name: signoz.collector.id
- name: service.version
- name: browser.platform
- name: browser.mobile
- name: k8s.cluster.name
- name: k8s.node.name
- name: k8s.namespace.name
- name: host.name
- name: host.type
- name: container.name
# memory_limiter:
# # 80% of maximum memory up to 2G
# limit_mib: 1500
# # 25% of limit up to 2G
# spike_limit_mib: 512
# check_interval: 5s
#
# # 50% of the maximum memory
# limit_percentage: 50
# # 20% of max memory usage spike expected
# spike_limit_percentage: 20
# queued_retry:
# num_workers: 4
# queue_size: 100
# retry_on_failure: true
resourcedetection:
# Using OTEL_RESOURCE_ATTRIBUTES envvar, env detector adds custom labels.
detectors: [env, system] # include ec2 for AWS, gcp for GCP and azure for Azure.
timeout: 2s
signozspanmetrics/delta:
metrics_exporter: clickhousemetricswrite
metrics_flush_interval: 60s
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ]
dimensions_cache_size: 100000
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
enable_exp_histogram: true
dimensions:
- name: service.namespace
default: default
- name: deployment.environment
default: default
# This is added to ensure the uniqueness of the timeseries
# Otherwise, identical timeseries produced by multiple replicas of
# collectors result in incorrect APM metrics
- name: signoz.collector.id
- name: service.version
- name: browser.platform
- name: browser.mobile
- name: k8s.cluster.name
- name: k8s.node.name
- name: k8s.namespace.name
- name: host.name
- name: host.type
- name: container.name
extensions:
health_check:
endpoint: 0.0.0.0:13133
zpages:
endpoint: 0.0.0.0:55679
pprof:
endpoint: 0.0.0.0:1777
exporters:
clickhousetraces:
datasource: tcp://clickhouse:9000/signoz_traces
low_cardinal_exception_grouping: ${env:LOW_CARDINAL_EXCEPTION_GROUPING}
clickhousemetricswrite:
endpoint: tcp://clickhouse:9000/signoz_metrics
resource_to_telemetry_conversion:
enabled: true
clickhousemetricswrite/prometheus:
endpoint: tcp://clickhouse:9000/signoz_metrics
clickhouselogsexporter:
dsn: tcp://clickhouse:9000/signoz_logs
timeout: 10s
use_new_schema: true
# logging: {}
service:
telemetry:
logs:
encoding: json
metrics:
address: 0.0.0.0:8888
extensions:
- health_check
- zpages
- pprof
pipelines:
traces:
receivers: [jaeger, otlp]
processors: [signozspanmetrics/cumulative, signozspanmetrics/delta, batch]
exporters: [clickhousetraces]
metrics:
receivers: [otlp]
processors: [batch]
exporters: [clickhousemetricswrite]
metrics/generic:
receivers: [hostmetrics]
processors: [resourcedetection, batch]
exporters: [clickhousemetricswrite]
metrics/prometheus:
receivers: [prometheus]
processors: [batch]
exporters: [clickhousemetricswrite/prometheus]
logs:
receivers: [otlp, tcplog/docker]
processors: [batch]
exporters: [clickhouselogsexporter]

View file

@ -1 +0,0 @@
server_endpoint: ws://query-service:4320/v1/opamp

View file

@ -1,25 +0,0 @@
# my global config
global:
scrape_interval: 5s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
- 'alerts.yml'
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs: []
remote_read:
- url: tcp://clickhouse:9000/signoz_metrics

View file

@ -1,26 +0,0 @@
services:
hotrod:
image: jaegertracing/example-hotrod:1.30
container_name: hotrod
logging:
options:
max-size: 50m
max-file: "3"
command: [ "all" ]
environment:
- JAEGER_ENDPOINT=http://otel-collector:14268/api/traces
load-hotrod:
image: "signoz/locust:1.2.3"
container_name: load-hotrod
hostname: load-hotrod
environment:
ATTACKED_HOST: http://hotrod:8080
LOCUST_MODE: standalone
NO_PROXY: standalone
TASK_DELAY_FROM: 5
TASK_DELAY_TO: 30
QUIET_MODE: "${QUIET_MODE:-false}"
LOCUST_OPTS: "--headless -u 10 -r 1"
volumes:
- ../common/locust-scripts:/locust

View file

@ -1,237 +0,0 @@
package main
import (
"bufio"
"fmt"
"math"
"os"
"sort"
"strconv"
"strings"
)
// NOTE: executable must be built with target OS and architecture set to linux/amd64
// env GOOS=linux GOARCH=amd64 go build -o histogramQuantile histogramQuantile.go
// The following code is adapted from the following source:
// https://github.com/prometheus/prometheus/blob/main/promql/quantile.go
type bucket struct {
upperBound float64
count float64
}
// buckets implements sort.Interface.
type buckets []bucket
func (b buckets) Len() int { return len(b) }
func (b buckets) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
func (b buckets) Less(i, j int) bool { return b[i].upperBound < b[j].upperBound }
// bucketQuantile calculates the quantile 'q' based on the given buckets. The
// buckets will be sorted by upperBound by this function (i.e. no sorting
// needed before calling this function). The quantile value is interpolated
// assuming a linear distribution within a bucket. However, if the quantile
// falls into the highest bucket, the upper bound of the 2nd highest bucket is
// returned. A natural lower bound of 0 is assumed if the upper bound of the
// lowest bucket is greater 0. In that case, interpolation in the lowest bucket
// happens linearly between 0 and the upper bound of the lowest bucket.
// However, if the lowest bucket has an upper bound less or equal 0, this upper
// bound is returned if the quantile falls into the lowest bucket.
//
// There are a number of special cases (once we have a way to report errors
// happening during evaluations of AST functions, we should report those
// explicitly):
//
// If 'buckets' has 0 observations, NaN is returned.
//
// If 'buckets' has fewer than 2 elements, NaN is returned.
//
// If the highest bucket is not +Inf, NaN is returned.
//
// If q==NaN, NaN is returned.
//
// If q<0, -Inf is returned.
//
// If q>1, +Inf is returned.
func bucketQuantile(q float64, buckets buckets) float64 {
if math.IsNaN(q) {
return math.NaN()
}
if q < 0 {
return math.Inf(-1)
}
if q > 1 {
return math.Inf(+1)
}
sort.Sort(buckets)
if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) {
return math.NaN()
}
buckets = coalesceBuckets(buckets)
ensureMonotonic(buckets)
if len(buckets) < 2 {
return math.NaN()
}
observations := buckets[len(buckets)-1].count
if observations == 0 {
return math.NaN()
}
rank := q * observations
b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank })
if b == len(buckets)-1 {
return buckets[len(buckets)-2].upperBound
}
if b == 0 && buckets[0].upperBound <= 0 {
return buckets[0].upperBound
}
var (
bucketStart float64
bucketEnd = buckets[b].upperBound
count = buckets[b].count
)
if b > 0 {
bucketStart = buckets[b-1].upperBound
count -= buckets[b-1].count
rank -= buckets[b-1].count
}
return bucketStart + (bucketEnd-bucketStart)*(rank/count)
}
// coalesceBuckets merges buckets with the same upper bound.
//
// The input buckets must be sorted.
func coalesceBuckets(buckets buckets) buckets {
last := buckets[0]
i := 0
for _, b := range buckets[1:] {
if b.upperBound == last.upperBound {
last.count += b.count
} else {
buckets[i] = last
last = b
i++
}
}
buckets[i] = last
return buckets[:i+1]
}
// The assumption that bucket counts increase monotonically with increasing
// upperBound may be violated during:
//
// * Recording rule evaluation of histogram_quantile, especially when rate()
// has been applied to the underlying bucket timeseries.
// * Evaluation of histogram_quantile computed over federated bucket
// timeseries, especially when rate() has been applied.
//
// This is because scraped data is not made available to rule evaluation or
// federation atomically, so some buckets are computed with data from the
// most recent scrapes, but the other buckets are missing data from the most
// recent scrape.
//
// Monotonicity is usually guaranteed because if a bucket with upper bound
// u1 has count c1, then any bucket with a higher upper bound u > u1 must
// have counted all c1 observations and perhaps more, so that c >= c1.
//
// Randomly interspersed partial sampling breaks that guarantee, and rate()
// exacerbates it. Specifically, suppose bucket le=1000 has a count of 10 from
// 4 samples but the bucket with le=2000 has a count of 7 from 3 samples. The
// monotonicity is broken. It is exacerbated by rate() because under normal
// operation, cumulative counting of buckets will cause the bucket counts to
// diverge such that small differences from missing samples are not a problem.
// rate() removes this divergence.)
//
// bucketQuantile depends on that monotonicity to do a binary search for the
// bucket with the φ-quantile count, so breaking the monotonicity
// guarantee causes bucketQuantile() to return undefined (nonsense) results.
//
// As a somewhat hacky solution until ingestion is atomic per scrape, we
// calculate the "envelope" of the histogram buckets, essentially removing
// any decreases in the count between successive buckets.
func ensureMonotonic(buckets buckets) {
max := buckets[0].count
for i := 1; i < len(buckets); i++ {
switch {
case buckets[i].count > max:
max = buckets[i].count
case buckets[i].count < max:
buckets[i].count = max
}
}
}
// End of copied code.
func readLines() []string {
r := bufio.NewReader(os.Stdin)
bytes := []byte{}
lines := []string{}
for {
line, isPrefix, err := r.ReadLine()
if err != nil {
break
}
bytes = append(bytes, line...)
if !isPrefix {
str := strings.TrimSpace(string(bytes))
if len(str) > 0 {
lines = append(lines, str)
bytes = []byte{}
}
}
}
if len(bytes) > 0 {
lines = append(lines, string(bytes))
}
return lines
}
func main() {
lines := readLines()
for _, text := range lines {
// Example input
// "[1, 2, 4, 8, 16]", "[1, 5, 8, 10, 14]", 0.9"
// bounds - counts - quantile
parts := strings.Split(text, "\",")
var bucketNumbers []float64
// Strip the ends with square brackets
text = parts[0][2 : len(parts[0])-1]
// Parse the bucket bounds
for _, num := range strings.Split(text, ",") {
num = strings.TrimSpace(num)
number, err := strconv.ParseFloat(num, 64)
if err == nil {
bucketNumbers = append(bucketNumbers, number)
}
}
var bucketCounts []float64
// Strip the ends with square brackets
text = parts[1][2 : len(parts[1])-1]
// Parse the bucket counts
for _, num := range strings.Split(text, ",") {
num = strings.TrimSpace(num)
number, err := strconv.ParseFloat(num, 64)
if err == nil {
bucketCounts = append(bucketCounts, number)
}
}
// Parse the quantile
q, err := strconv.ParseFloat(parts[2], 64)
var b buckets
if err == nil {
for i := 0; i < len(bucketNumbers); i++ {
b = append(b, bucket{upperBound: bucketNumbers[i], count: bucketCounts[i]})
}
}
fmt.Println(bucketQuantile(q, b))
}
}