diff --git a/.gitignore b/.gitignore index 1a25cc1c..1cf4b473 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ dist/ crates/*/target/ crates/target/ build.log +demos/shared/signoz/data diff --git a/demos/shared/signoz/alertmanager.yml b/demos/shared/signoz/alertmanager.yml new file mode 100644 index 00000000..f59b6655 --- /dev/null +++ b/demos/shared/signoz/alertmanager.yml @@ -0,0 +1,35 @@ +global: + resolve_timeout: 1m + slack_api_url: 'https://hooks.slack.com/services/xxx' + +route: + receiver: 'slack-notifications' + +receivers: +- name: 'slack-notifications' + slack_configs: + - channel: '#alerts' + send_resolved: true + icon_url: https://avatars3.githubusercontent.com/u/3380462 + title: |- + [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }} + {{- if gt (len .CommonLabels) (len .GroupLabels) -}} + {{" "}}( + {{- with .CommonLabels.Remove .GroupLabels.Names }} + {{- range $index, $label := .SortedPairs -}} + {{ if $index }}, {{ end }} + {{- $label.Name }}="{{ $label.Value -}}" + {{- end }} + {{- end -}} + ) + {{- end }} + text: >- + {{ range .Alerts -}} + *Alert:* {{ .Annotations.title }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }} + + *Description:* {{ .Annotations.description }} + + *Details:* + {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}` + {{ end }} + {{ end }} diff --git a/demos/shared/signoz/alerts.yml b/demos/shared/signoz/alerts.yml new file mode 100644 index 00000000..810a2075 --- /dev/null +++ b/demos/shared/signoz/alerts.yml @@ -0,0 +1,11 @@ +groups: +- name: ExampleCPULoadGroup + rules: + - alert: HighCpuLoad + expr: system_cpu_load_average_1m > 0.1 + for: 0m + labels: + severity: warning + annotations: + summary: High CPU load + description: "CPU load is > 0.1\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" diff --git a/demos/shared/signoz/clickhouse-cluster.xml b/demos/shared/signoz/clickhouse-cluster.xml new file mode 100644 index 00000000..8b475ffe --- /dev/null +++ b/demos/shared/signoz/clickhouse-cluster.xml @@ -0,0 +1,75 @@ + + + + + + zookeeper-1 + 2181 + + + + + + + + + + + + + + + + clickhouse + 9000 + + + + + + + + diff --git a/demos/shared/signoz/clickhouse-config.xml b/demos/shared/signoz/clickhouse-config.xml new file mode 100644 index 00000000..12086ec8 --- /dev/null +++ b/demos/shared/signoz/clickhouse-config.xml @@ -0,0 +1,1143 @@ + + + + + + information + + json + + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + + 1000M + 10 + + + + + + + + + + + + + + + + + + 8123 + + + 9000 + + + 9004 + + + 9005 + + + + + + + + + + + + 9009 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 4096 + + + 3 + + + + + false + + + /path/to/ssl_cert_file + /path/to/ssl_key_file + + + false + + + /path/to/ssl_ca_cert_file + + + none + + + 0 + + + -1 + -1 + + + false + + + + + + + + + + + none + true + true + sslv2,sslv3 + true + + + + true + true + sslv2,sslv3 + true + + + + RejectCertificateHandler + + + + + + + + + 100 + + + 0 + + + + 10000 + + + + + + 0.9 + + + 4194304 + + + 0 + + + + + + 8589934592 + + + 5368709120 + + + + 1000 + + + 134217728 + + + 10000 + + + /var/lib/clickhouse/ + + + /var/lib/clickhouse/tmp/ + + + + ` + + + + + + /var/lib/clickhouse/user_files/ + + + + + + + + + + + + + users.xml + + + + /var/lib/clickhouse/access/ + + + + + + + default + + + + + + + + + + + + default + + + + + + + + + true + + + false + + ' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + apt install --no-install-recommends -f ./clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + clickhouse-jdbc-bridge & + + * [CentOS/RHEL] + export MVN_URL=https://repo1.maven.org/maven2/ru/yandex/clickhouse/clickhouse-jdbc-bridge + export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + yum localinstall -y clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + clickhouse-jdbc-bridge & + + Please refer to https://github.com/ClickHouse/clickhouse-jdbc-bridge#usage for more information. + ]]> + + + + + + + + + + + + + + + 01 + example01-01-1 + + + + + + 3600 + + + + 3600 + + + 60 + + + + + + + + + + + + + system + query_log
+ + toYYYYMM(event_date) + + + + + + 7500 +
+ + + + system + trace_log
+ + toYYYYMM(event_date) + 7500 +
+ + + + system + query_thread_log
+ toYYYYMM(event_date) + 7500 +
+ + + + system + query_views_log
+ toYYYYMM(event_date) + 7500 +
+ + + + system + part_log
+ toYYYYMM(event_date) + 7500 +
+ + + + + + system + metric_log
+ 7500 + 1000 +
+ + + + system + asynchronous_metric_log
+ + 7000 +
+ + + + + + engine MergeTree + partition by toYYYYMM(finish_date) + order by (finish_date, finish_time_us, trace_id) + + system + opentelemetry_span_log
+ 7500 +
+ + + + + system + crash_log
+ + + 1000 +
+ + + + + + + system + processors_profile_log
+ + toYYYYMM(event_date) + 7500 +
+ + + + + + + + + *_dictionary.xml + + + *function.xml + /var/lib/clickhouse/user_scripts/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + /clickhouse/task_queue/ddl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + click_cost + any + + 0 + 3600 + + + 86400 + 60 + + + + max + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + + + /var/lib/clickhouse/format_schemas/ + + + + + hide encrypt/decrypt arguments + ((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\) + + \1(???) + + + + + + + + + + false + + false + + + https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277 + + + + + + + + + + + 268435456 + true + +
diff --git a/demos/shared/signoz/clickhouse-storage.xml b/demos/shared/signoz/clickhouse-storage.xml new file mode 100644 index 00000000..54ec4976 --- /dev/null +++ b/demos/shared/signoz/clickhouse-storage.xml @@ -0,0 +1,41 @@ + + + + + + 10485760 + + + s3 + + https://BUCKET-NAME.s3-REGION-NAME.amazonaws.com/data/ + ACCESS-KEY-ID + SECRET-ACCESS-KEY + + + + + + + + + + + default + + + s3 + 0 + + + + + + diff --git a/demos/shared/signoz/clickhouse-users.xml b/demos/shared/signoz/clickhouse-users.xml new file mode 100644 index 00000000..f1856207 --- /dev/null +++ b/demos/shared/signoz/clickhouse-users.xml @@ -0,0 +1,123 @@ + + + + + + + + + + 10000000000 + + + random + + + + + 1 + + + + + + + + + + + + + ::/0 + + + + default + + + default + + + + + + + + + + + + + + 3600 + + + 0 + 0 + 0 + 0 + 0 + + + + diff --git a/demos/shared/signoz/custom-function.xml b/demos/shared/signoz/custom-function.xml new file mode 100644 index 00000000..b2b3f91a --- /dev/null +++ b/demos/shared/signoz/custom-function.xml @@ -0,0 +1,21 @@ + + + executable + histogramQuantile + Float64 + + Array(Float64) + buckets + + + Array(Float64) + counts + + + Float64 + quantile + + CSV + ./histogramQuantile + + diff --git a/demos/shared/signoz/docker-compose-core.yaml b/demos/shared/signoz/docker-compose-core.yaml new file mode 100644 index 00000000..ec9e0697 --- /dev/null +++ b/demos/shared/signoz/docker-compose-core.yaml @@ -0,0 +1,133 @@ +version: "2.4" + +include: + - test-app-docker-compose.yaml + +services: + zookeeper-1: + image: bitnami/zookeeper:3.7.1 + container_name: signoz-zookeeper-1 + hostname: zookeeper-1 + user: root + ports: + - "2181:2181" + - "2888:2888" + - "3888:3888" + volumes: + - ./data/zookeeper-1:/bitnami/zookeeper + environment: + - ZOO_SERVER_ID=1 + # - ZOO_SERVERS=0.0.0.0:2888:3888,zookeeper-2:2888:3888,zookeeper-3:2888:3888 + - ALLOW_ANONYMOUS_LOGIN=yes + - ZOO_AUTOPURGE_INTERVAL=1 + + clickhouse: + image: clickhouse/clickhouse-server:24.1.2-alpine + container_name: signoz-clickhouse + # ports: + # - "9000:9000" + # - "8123:8123" + tty: true + volumes: + - ./clickhouse-config.xml:/etc/clickhouse-server/config.xml + - ./clickhouse-users.xml:/etc/clickhouse-server/users.xml + - ./custom-function.xml:/etc/clickhouse-server/custom-function.xml + - ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml + # - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml + - ./data/clickhouse/:/var/lib/clickhouse/ + - ./user_scripts:/var/lib/clickhouse/user_scripts/ + restart: on-failure + logging: + options: + max-size: 50m + max-file: "3" + healthcheck: + # "clickhouse", "client", "-u ${CLICKHOUSE_USER}", "--password ${CLICKHOUSE_PASSWORD}", "-q 'SELECT 1'" + test: + [ + "CMD", + "wget", + "--spider", + "-q", + "0.0.0.0:8123/ping" + ] + interval: 30s + timeout: 5s + retries: 3 + + alertmanager: + container_name: signoz-alertmanager + image: signoz/alertmanager:0.23.7 + volumes: + - ./data/alertmanager:/data + depends_on: + query-service: + condition: service_healthy + restart: on-failure + command: + - --queryService.url=http://query-service:8085 + - --storage.path=/data + + otel-collector-migrator: + image: signoz/signoz-schema-migrator:${OTELCOL_TAG:-0.111.5} + container_name: otel-migrator + command: + - "--dsn=tcp://clickhouse:9000" + depends_on: + clickhouse: + condition: service_healthy + # clickhouse-2: + # condition: service_healthy + # clickhouse-3: + # condition: service_healthy + + # Notes for Maintainers/Contributors who will change Line Numbers of Frontend & Query-Section. Please Update Line Numbers in `./scripts/commentLinesForSetup.sh` & `./CONTRIBUTING.md` + otel-collector: + container_name: signoz-otel-collector + image: signoz/signoz-otel-collector:0.111.5 + command: + [ + "--config=/etc/otel-collector-config.yaml", + "--manager-config=/etc/manager-config.yaml", + "--copy-path=/var/tmp/collector-config.yaml", + "--feature-gates=-pkg.translator.prometheus.NormalizeName" + ] + # user: root # required for reading docker container logs + volumes: + - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml + - ./otel-collector-opamp-config.yaml:/etc/manager-config.yaml + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /:/hostfs:ro + environment: + - OTEL_RESOURCE_ATTRIBUTES=host.name=signoz-host,os.type=linux + ports: + # - "1777:1777" # pprof extension + - "4317:4317" # OTLP gRPC receiver + - "4318:4318" # OTLP HTTP receiver + # - "8888:8888" # OtelCollector internal metrics + # - "8889:8889" # signoz spanmetrics exposed by the agent + # - "9411:9411" # Zipkin port + # - "13133:13133" # health check extension + # - "14250:14250" # Jaeger gRPC + # - "14268:14268" # Jaeger thrift HTTP + # - "55678:55678" # OpenCensus receiver + # - "55679:55679" # zPages extension + restart: on-failure + depends_on: + clickhouse: + condition: service_healthy + otel-collector-migrator: + condition: service_completed_successfully + query-service: + condition: service_healthy + + logspout: + image: "gliderlabs/logspout:v3.2.14" + container_name: signoz-logspout + volumes: + - /etc/hostname:/etc/host_hostname:ro + - /var/run/docker.sock:/var/run/docker.sock + command: syslog+tcp://otel-collector:2255 + depends_on: + - otel-collector + restart: on-failure diff --git a/demos/shared/signoz/docker-compose-local.yaml b/demos/shared/signoz/docker-compose-local.yaml new file mode 100644 index 00000000..7effc129 --- /dev/null +++ b/demos/shared/signoz/docker-compose-local.yaml @@ -0,0 +1,67 @@ +version: "2.4" + +services: + query-service: + hostname: query-service + build: + context: "../../../" + dockerfile: "./pkg/query-service/Dockerfile" + args: + LDFLAGS: "" + TARGETPLATFORM: "${GOOS}/${GOARCH}" + container_name: signoz-query-service + environment: + - ClickHouseUrl=tcp://clickhouse:9000 + - ALERTMANAGER_API_PREFIX=http://alertmanager:9093/api/ + - SIGNOZ_LOCAL_DB_PATH=/var/lib/signoz/signoz.db + - DASHBOARDS_PATH=/root/config/dashboards + - STORAGE=clickhouse + - GODEBUG=netdns=go + - TELEMETRY_ENABLED=true + volumes: + - ./prometheus.yml:/root/config/prometheus.yml + - ../dashboards:/root/config/dashboards + - ./data/signoz/:/var/lib/signoz/ + command: + [ + "-config=/root/config/prometheus.yml", + "--use-logs-new-schema=true" + ] + ports: + - "6060:6060" + - "8080:8080" + restart: on-failure + healthcheck: + test: + [ + "CMD", + "wget", + "--spider", + "-q", + "localhost:8080/api/v1/health" + ] + interval: 30s + timeout: 5s + retries: 3 + depends_on: + clickhouse: + condition: service_healthy + + frontend: + build: + context: "../../../frontend" + dockerfile: "./Dockerfile" + args: + TARGETOS: "${GOOS}" + TARGETPLATFORM: "${GOARCH}" + container_name: signoz-frontend + environment: + - FRONTEND_API_ENDPOINT=http://query-service:8080 + restart: on-failure + depends_on: + - alertmanager + - query-service + ports: + - "3301:3301" + volumes: + - ../common/nginx-config.conf:/etc/nginx/conf.d/default.conf diff --git a/demos/shared/signoz/docker-compose-minimal.yaml b/demos/shared/signoz/docker-compose-minimal.yaml new file mode 100644 index 00000000..d4861a41 --- /dev/null +++ b/demos/shared/signoz/docker-compose-minimal.yaml @@ -0,0 +1,296 @@ +x-clickhouse-defaults: &clickhouse-defaults + restart: on-failure + # addding non LTS version due to this fix https://github.com/ClickHouse/ClickHouse/commit/32caf8716352f45c1b617274c7508c86b7d1afab + image: clickhouse/clickhouse-server:24.1.2-alpine + tty: true + depends_on: + - zookeeper-1 + # - zookeeper-2 + # - zookeeper-3 + logging: + options: + max-size: 50m + max-file: "3" + healthcheck: + # "clickhouse", "client", "-u ${CLICKHOUSE_USER}", "--password ${CLICKHOUSE_PASSWORD}", "-q 'SELECT 1'" + test: + [ + "CMD", + "wget", + "--spider", + "-q", + "0.0.0.0:8123/ping" + ] + interval: 30s + timeout: 5s + retries: 3 + ulimits: + nproc: 65535 + nofile: + soft: 262144 + hard: 262144 + +x-db-depend: &db-depend + depends_on: + clickhouse: + condition: service_healthy + otel-collector-migrator-sync: + condition: service_completed_successfully + # clickhouse-2: + # condition: service_healthy + # clickhouse-3: + # condition: service_healthy + +services: + + zookeeper-1: + image: bitnami/zookeeper:3.7.1 + container_name: signoz-zookeeper-1 + hostname: zookeeper-1 + user: root + ports: + - "2181:2181" + - "2888:2888" + - "3888:3888" + volumes: + - ./data/zookeeper-1:/bitnami/zookeeper + environment: + - ZOO_SERVER_ID=1 + # - ZOO_SERVERS=0.0.0.0:2888:3888,zookeeper-2:2888:3888,zookeeper-3:2888:3888 + - ALLOW_ANONYMOUS_LOGIN=yes + - ZOO_AUTOPURGE_INTERVAL=1 + + # zookeeper-2: + # image: bitnami/zookeeper:3.7.0 + # container_name: signoz-zookeeper-2 + # hostname: zookeeper-2 + # user: root + # ports: + # - "2182:2181" + # - "2889:2888" + # - "3889:3888" + # volumes: + # - ./data/zookeeper-2:/bitnami/zookeeper + # environment: + # - ZOO_SERVER_ID=2 + # - ZOO_SERVERS=zookeeper-1:2888:3888,0.0.0.0:2888:3888,zookeeper-3:2888:3888 + # - ALLOW_ANONYMOUS_LOGIN=yes + # - ZOO_AUTOPURGE_INTERVAL=1 + + # zookeeper-3: + # image: bitnami/zookeeper:3.7.0 + # container_name: signoz-zookeeper-3 + # hostname: zookeeper-3 + # user: root + # ports: + # - "2183:2181" + # - "2890:2888" + # - "3890:3888" + # volumes: + # - ./data/zookeeper-3:/bitnami/zookeeper + # environment: + # - ZOO_SERVER_ID=3 + # - ZOO_SERVERS=zookeeper-1:2888:3888,zookeeper-2:2888:3888,0.0.0.0:2888:3888 + # - ALLOW_ANONYMOUS_LOGIN=yes + # - ZOO_AUTOPURGE_INTERVAL=1 + + clickhouse: + <<: *clickhouse-defaults + container_name: signoz-clickhouse + hostname: clickhouse + ports: + - "9000:9000" + - "8123:8123" + - "9181:9181" + volumes: + - ./clickhouse-config.xml:/etc/clickhouse-server/config.xml + - ./clickhouse-users.xml:/etc/clickhouse-server/users.xml + - ./custom-function.xml:/etc/clickhouse-server/custom-function.xml + - ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml + # - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml + - ./data/clickhouse/:/var/lib/clickhouse/ + - ./user_scripts:/var/lib/clickhouse/user_scripts/ + + # clickhouse-2: + # <<: *clickhouse-defaults + # container_name: signoz-clickhouse-2 + # hostname: clickhouse-2 + # ports: + # - "9001:9000" + # - "8124:8123" + # - "9182:9181" + # volumes: + # - ./clickhouse-config.xml:/etc/clickhouse-server/config.xml + # - ./clickhouse-users.xml:/etc/clickhouse-server/users.xml + # - ./custom-function.xml:/etc/clickhouse-server/custom-function.xml + # - ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml + # # - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml + # - ./data/clickhouse-2/:/var/lib/clickhouse/ + # - ./user_scripts:/var/lib/clickhouse/user_scripts/ + + + # clickhouse-3: + # <<: *clickhouse-defaults + # container_name: signoz-clickhouse-3 + # hostname: clickhouse-3 + # ports: + # - "9002:9000" + # - "8125:8123" + # - "9183:9181" + # volumes: + # - ./clickhouse-config.xml:/etc/clickhouse-server/config.xml + # - ./clickhouse-users.xml:/etc/clickhouse-server/users.xml + # - ./custom-function.xml:/etc/clickhouse-server/custom-function.xml + # - ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml + # # - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml + # - ./data/clickhouse-3/:/var/lib/clickhouse/ + # - ./user_scripts:/var/lib/clickhouse/user_scripts/ + + alertmanager: + image: signoz/alertmanager:${ALERTMANAGER_TAG:-0.23.7} + container_name: signoz-alertmanager + volumes: + - ./data/alertmanager:/data + depends_on: + query-service: + condition: service_healthy + restart: on-failure + command: + - --queryService.url=http://query-service:8085 + - --storage.path=/data + + # Notes for Maintainers/Contributors who will change Line Numbers of Frontend & Query-Section. Please Update Line Numbers in `./scripts/commentLinesForSetup.sh` & `./CONTRIBUTING.md` + + query-service: + image: signoz/query-service:${DOCKER_TAG:-0.57.0} + container_name: signoz-query-service + command: + [ + "-config=/root/config/prometheus.yml", + "--use-logs-new-schema=true" + ] + # ports: + # - "6060:6060" # pprof port + # - "8080:8080" # query-service port + volumes: + - ./prometheus.yml:/root/config/prometheus.yml + - ../dashboards:/root/config/dashboards + - ./data/signoz/:/var/lib/signoz/ + environment: + - ClickHouseUrl=tcp://clickhouse:9000 + - ALERTMANAGER_API_PREFIX=http://alertmanager:9093/api/ + - SIGNOZ_LOCAL_DB_PATH=/var/lib/signoz/signoz.db + - DASHBOARDS_PATH=/root/config/dashboards + - STORAGE=clickhouse + - GODEBUG=netdns=go + - TELEMETRY_ENABLED=true + - DEPLOYMENT_TYPE=docker-standalone-amd + restart: on-failure + healthcheck: + test: + [ + "CMD", + "wget", + "--spider", + "-q", + "localhost:8080/api/v1/health" + ] + interval: 30s + timeout: 5s + retries: 3 + <<: *db-depend + + frontend: + image: signoz/frontend:${DOCKER_TAG:-0.57.0} + container_name: signoz-frontend + restart: on-failure + depends_on: + - alertmanager + - query-service + ports: + - "3301:3301" + volumes: + - ./nginx-config.conf:/etc/nginx/conf.d/default.conf + + otel-collector-migrator-sync: + image: signoz/signoz-schema-migrator:${OTELCOL_TAG:-0.111.5} + container_name: otel-migrator-sync + command: + - "sync" + - "--dsn=tcp://clickhouse:9000" + - "--up=" + depends_on: + clickhouse: + condition: service_healthy + # clickhouse-2: + # condition: service_healthy + # clickhouse-3: + # condition: service_healthy + + otel-collector-migrator-async: + image: signoz/signoz-schema-migrator:${OTELCOL_TAG:-0.111.5} + container_name: otel-migrator-async + command: + - "async" + - "--dsn=tcp://clickhouse:9000" + - "--up=" + depends_on: + clickhouse: + condition: service_healthy + otel-collector-migrator-sync: + condition: service_completed_successfully + # clickhouse-2: + # condition: service_healthy + # clickhouse-3: + # condition: service_healthy + + otel-collector: + image: signoz/signoz-otel-collector:${OTELCOL_TAG:-0.111.5} + container_name: signoz-otel-collector + command: + [ + "--config=/etc/otel-collector-config.yaml", + "--manager-config=/etc/manager-config.yaml", + "--copy-path=/var/tmp/collector-config.yaml", + "--feature-gates=-pkg.translator.prometheus.NormalizeName" + ] + user: root # required for reading docker container logs + volumes: + - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml + - ./otel-collector-opamp-config.yaml:/etc/manager-config.yaml + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /:/hostfs:ro + environment: + - OTEL_RESOURCE_ATTRIBUTES=host.name=signoz-host,os.type=linux + - LOW_CARDINAL_EXCEPTION_GROUPING=false + ports: + # - "1777:1777" # pprof extension + - "4317:4317" # OTLP gRPC receiver + - "4318:4318" # OTLP HTTP receiver + # - "8888:8888" # OtelCollector internal metrics + # - "8889:8889" # signoz spanmetrics exposed by the agent + # - "9411:9411" # Zipkin port + # - "13133:13133" # health check extension + # - "14250:14250" # Jaeger gRPC + # - "14268:14268" # Jaeger thrift HTTP + # - "55678:55678" # OpenCensus receiver + # - "55679:55679" # zPages extension + restart: on-failure + depends_on: + clickhouse: + condition: service_healthy + otel-collector-migrator-sync: + condition: service_completed_successfully + query-service: + condition: service_healthy + + logspout: + image: "gliderlabs/logspout:v3.2.14" + container_name: signoz-logspout + volumes: + - /etc/hostname:/etc/host_hostname:ro + - /var/run/docker.sock:/var/run/docker.sock + command: syslog+tcp://otel-collector:2255 + depends_on: + - otel-collector + restart: on-failure diff --git a/demos/shared/signoz/docker-compose.testing.yaml b/demos/shared/signoz/docker-compose.testing.yaml new file mode 100644 index 00000000..d8c0db25 --- /dev/null +++ b/demos/shared/signoz/docker-compose.testing.yaml @@ -0,0 +1,284 @@ +version: "2.4" + +include: + - test-app-docker-compose.yaml + +x-clickhouse-defaults: &clickhouse-defaults + restart: on-failure + # addding non LTS version due to this fix https://github.com/ClickHouse/ClickHouse/commit/32caf8716352f45c1b617274c7508c86b7d1afab + image: clickhouse/clickhouse-server:24.1.2-alpine + tty: true + depends_on: + - zookeeper-1 + # - zookeeper-2 + # - zookeeper-3 + logging: + options: + max-size: 50m + max-file: "3" + healthcheck: + # "clickhouse", "client", "-u ${CLICKHOUSE_USER}", "--password ${CLICKHOUSE_PASSWORD}", "-q 'SELECT 1'" + test: + [ + "CMD", + "wget", + "--spider", + "-q", + "0.0.0.0:8123/ping" + ] + interval: 30s + timeout: 5s + retries: 3 + ulimits: + nproc: 65535 + nofile: + soft: 262144 + hard: 262144 + +x-db-depend: &db-depend + depends_on: + clickhouse: + condition: service_healthy + otel-collector-migrator: + condition: service_completed_successfully + # clickhouse-2: + # condition: service_healthy + # clickhouse-3: + # condition: service_healthy + +services: + + zookeeper-1: + image: bitnami/zookeeper:3.7.1 + container_name: signoz-zookeeper-1 + hostname: zookeeper-1 + user: root + ports: + - "2181:2181" + - "2888:2888" + - "3888:3888" + volumes: + - ./data/zookeeper-1:/bitnami/zookeeper + environment: + - ZOO_SERVER_ID=1 + # - ZOO_SERVERS=0.0.0.0:2888:3888,zookeeper-2:2888:3888,zookeeper-3:2888:3888 + - ALLOW_ANONYMOUS_LOGIN=yes + - ZOO_AUTOPURGE_INTERVAL=1 + + # zookeeper-2: + # image: bitnami/zookeeper:3.7.0 + # container_name: signoz-zookeeper-2 + # hostname: zookeeper-2 + # user: root + # ports: + # - "2182:2181" + # - "2889:2888" + # - "3889:3888" + # volumes: + # - ./data/zookeeper-2:/bitnami/zookeeper + # environment: + # - ZOO_SERVER_ID=2 + # - ZOO_SERVERS=zookeeper-1:2888:3888,0.0.0.0:2888:3888,zookeeper-3:2888:3888 + # - ALLOW_ANONYMOUS_LOGIN=yes + # - ZOO_AUTOPURGE_INTERVAL=1 + + # zookeeper-3: + # image: bitnami/zookeeper:3.7.0 + # container_name: signoz-zookeeper-3 + # hostname: zookeeper-3 + # user: root + # ports: + # - "2183:2181" + # - "2890:2888" + # - "3890:3888" + # volumes: + # - ./data/zookeeper-3:/bitnami/zookeeper + # environment: + # - ZOO_SERVER_ID=3 + # - ZOO_SERVERS=zookeeper-1:2888:3888,zookeeper-2:2888:3888,0.0.0.0:2888:3888 + # - ALLOW_ANONYMOUS_LOGIN=yes + # - ZOO_AUTOPURGE_INTERVAL=1 + + clickhouse: + <<: *clickhouse-defaults + container_name: signoz-clickhouse + hostname: clickhouse + ports: + - "9000:9000" + - "8123:8123" + - "9181:9181" + volumes: + - ./clickhouse-config.xml:/etc/clickhouse-server/config.xml + - ./clickhouse-users.xml:/etc/clickhouse-server/users.xml + - ./custom-function.xml:/etc/clickhouse-server/custom-function.xml + - ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml + # - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml + - ./data/clickhouse/:/var/lib/clickhouse/ + - ./user_scripts:/var/lib/clickhouse/user_scripts/ + + # clickhouse-2: + # <<: *clickhouse-defaults + # container_name: signoz-clickhouse-2 + # hostname: clickhouse-2 + # ports: + # - "9001:9000" + # - "8124:8123" + # - "9182:9181" + # volumes: + # - ./clickhouse-config.xml:/etc/clickhouse-server/config.xml + # - ./clickhouse-users.xml:/etc/clickhouse-server/users.xml + # - ./custom-function.xml:/etc/clickhouse-server/custom-function.xml + # - ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml + # # - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml + # - ./data/clickhouse-2/:/var/lib/clickhouse/ + # - ./user_scripts:/var/lib/clickhouse/user_scripts/ + + + # clickhouse-3: + # <<: *clickhouse-defaults + # container_name: signoz-clickhouse-3 + # hostname: clickhouse-3 + # ports: + # - "9002:9000" + # - "8125:8123" + # - "9183:9181" + # volumes: + # - ./clickhouse-config.xml:/etc/clickhouse-server/config.xml + # - ./clickhouse-users.xml:/etc/clickhouse-server/users.xml + # - ./custom-function.xml:/etc/clickhouse-server/custom-function.xml + # - ./clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml + # # - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml + # - ./data/clickhouse-3/:/var/lib/clickhouse/ + # - ./user_scripts:/var/lib/clickhouse/user_scripts/ + + alertmanager: + image: signoz/alertmanager:${ALERTMANAGER_TAG:-0.23.7} + container_name: signoz-alertmanager + volumes: + - ./data/alertmanager:/data + depends_on: + query-service: + condition: service_healthy + restart: on-failure + command: + - --queryService.url=http://query-service:8085 + - --storage.path=/data + + # Notes for Maintainers/Contributors who will change Line Numbers of Frontend & Query-Section. Please Update Line Numbers in `./scripts/commentLinesForSetup.sh` & `./CONTRIBUTING.md` + + query-service: + image: signoz/query-service:${DOCKER_TAG:-0.57.0} + container_name: signoz-query-service + command: + [ + "-config=/root/config/prometheus.yml", + "-gateway-url=https://api.staging.signoz.cloud", + "--use-logs-new-schema=true" + ] + # ports: + # - "6060:6060" # pprof port + # - "8080:8080" # query-service port + volumes: + - ./prometheus.yml:/root/config/prometheus.yml + - ../dashboards:/root/config/dashboards + - ./data/signoz/:/var/lib/signoz/ + environment: + - ClickHouseUrl=tcp://clickhouse:9000 + - ALERTMANAGER_API_PREFIX=http://alertmanager:9093/api/ + - SIGNOZ_LOCAL_DB_PATH=/var/lib/signoz/signoz.db + - DASHBOARDS_PATH=/root/config/dashboards + - STORAGE=clickhouse + - GODEBUG=netdns=go + - TELEMETRY_ENABLED=true + - DEPLOYMENT_TYPE=docker-standalone-amd + restart: on-failure + healthcheck: + test: + [ + "CMD", + "wget", + "--spider", + "-q", + "localhost:8080/api/v1/health" + ] + interval: 30s + timeout: 5s + retries: 3 + <<: *db-depend + + frontend: + image: signoz/frontend:${DOCKER_TAG:-0.57.0} + container_name: signoz-frontend + restart: on-failure + depends_on: + - alertmanager + - query-service + ports: + - "3301:3301" + volumes: + - ../common/nginx-config.conf:/etc/nginx/conf.d/default.conf + + otel-collector-migrator: + image: signoz/signoz-schema-migrator:${OTELCOL_TAG:-0.111.5} + container_name: otel-migrator + command: + - "--dsn=tcp://clickhouse:9000" + depends_on: + clickhouse: + condition: service_healthy + # clickhouse-2: + # condition: service_healthy + # clickhouse-3: + # condition: service_healthy + + + otel-collector: + image: signoz/signoz-otel-collector:${OTELCOL_TAG:-0.111.5} + container_name: signoz-otel-collector + command: + [ + "--config=/etc/otel-collector-config.yaml", + "--manager-config=/etc/manager-config.yaml", + "--copy-path=/var/tmp/collector-config.yaml", + "--feature-gates=-pkg.translator.prometheus.NormalizeName" + ] + user: root # required for reading docker container logs + volumes: + - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml + - ./otel-collector-opamp-config.yaml:/etc/manager-config.yaml + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /:/hostfs:ro + environment: + - OTEL_RESOURCE_ATTRIBUTES=host.name=signoz-host,os.type=linux + - LOW_CARDINAL_EXCEPTION_GROUPING=false + ports: + # - "1777:1777" # pprof extension + - "4317:4317" # OTLP gRPC receiver + - "4318:4318" # OTLP HTTP receiver + # - "8888:8888" # OtelCollector internal metrics + # - "8889:8889" # signoz spanmetrics exposed by the agent + # - "9411:9411" # Zipkin port + # - "13133:13133" # health check extension + # - "14250:14250" # Jaeger gRPC + # - "14268:14268" # Jaeger thrift HTTP + # - "55678:55678" # OpenCensus receiver + # - "55679:55679" # zPages extension + restart: on-failure + depends_on: + clickhouse: + condition: service_healthy + otel-collector-migrator: + condition: service_completed_successfully + query-service: + condition: service_healthy + + logspout: + image: "gliderlabs/logspout:v3.2.14" + container_name: signoz-logspout + volumes: + - /etc/hostname:/etc/host_hostname:ro + - /var/run/docker.sock:/var/run/docker.sock + command: syslog+tcp://otel-collector:2255 + depends_on: + - otel-collector + restart: on-failure diff --git a/demos/shared/signoz/docker-compose.yaml b/demos/shared/signoz/docker-compose.yaml new file mode 100644 index 00000000..251ca253 --- /dev/null +++ b/demos/shared/signoz/docker-compose.yaml @@ -0,0 +1,3 @@ +include: + - test-app-docker-compose.yaml + - docker-compose-minimal.yaml diff --git a/demos/shared/signoz/keeper_config.xml b/demos/shared/signoz/keeper_config.xml new file mode 100644 index 00000000..f9a96dca --- /dev/null +++ b/demos/shared/signoz/keeper_config.xml @@ -0,0 +1,64 @@ + + + + information + /var/log/clickhouse-keeper/clickhouse-keeper.log + /var/log/clickhouse-keeper/clickhouse-keeper.err.log + + 1000M + 10 + + + + 0.0.0.0 + 4096 + + + 9181 + + + 1 + + /var/lib/clickhouse/coordination/logs + /var/lib/clickhouse/coordination/snapshots + + + 10000 + 10000 + 100000 + information + false + + + + + true + + + 1 + + + clickhouses-keeper-1 + 9234 + + + + + + + diff --git a/demos/shared/signoz/nginx-config.conf b/demos/shared/signoz/nginx-config.conf new file mode 100644 index 00000000..442a2899 --- /dev/null +++ b/demos/shared/signoz/nginx-config.conf @@ -0,0 +1,64 @@ +map $http_upgrade $connection_upgrade { + default upgrade; + '' close; +} + +server { + listen 3301; + server_name _; + + gzip on; + gzip_static on; + gzip_types text/plain text/css application/json application/x-javascript text/xml application/xml application/xml+rss text/javascript; + gzip_proxied any; + gzip_vary on; + gzip_comp_level 6; + gzip_buffers 16 8k; + gzip_http_version 1.1; + + # to handle uri issue 414 from nginx + client_max_body_size 24M; + large_client_header_buffers 8 128k; + + location / { + if ( $uri = '/index.html' ) { + add_header Cache-Control no-store always; + } + root /usr/share/nginx/html; + index index.html index.htm; + try_files $uri $uri/ /index.html; + } + + location ~ ^/api/(v1|v3)/logs/(tail|livetail){ + proxy_pass http://query-service:8080; + proxy_http_version 1.1; + + # connection will be closed if no data is read for 600s between successive read operations + proxy_read_timeout 600s; + + # dont buffer the data send it directly to client. + proxy_buffering off; + proxy_cache off; + } + + location /api { + proxy_pass http://query-service:8080/api; + # connection will be closed if no data is read for 600s between successive read operations + proxy_read_timeout 600s; + } + + location /ws { + proxy_pass http://query-service:8080/ws; + proxy_http_version 1.1; + proxy_set_header Upgrade "websocket"; + proxy_set_header Connection "upgrade"; + proxy_read_timeout 86400; + } + + # redirect server error pages to the static page /50x.html + # + error_page 500 502 503 504 /50x.html; + location = /50x.html { + root /usr/share/nginx/html; + } +} diff --git a/demos/shared/signoz/otel-collector-config.yaml b/demos/shared/signoz/otel-collector-config.yaml new file mode 100644 index 00000000..d7b491b7 --- /dev/null +++ b/demos/shared/signoz/otel-collector-config.yaml @@ -0,0 +1,188 @@ +receivers: + tcplog/docker: + listen_address: "0.0.0.0:2255" + operators: + - type: regex_parser + regex: '^<([0-9]+)>[0-9]+ (?P[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?) (?P\S+) (?P\S+) [0-9]+ - -( (?P.*))?' + timestamp: + parse_from: attributes.timestamp + layout: '%Y-%m-%dT%H:%M:%S.%LZ' + - type: move + from: attributes["body"] + to: body + - type: remove + field: attributes.timestamp + # please remove names from below if you want to collect logs from them + - type: filter + id: signoz_logs_filter + expr: 'attributes.container_name matches "^signoz-(logspout|frontend|alertmanager|query-service|otel-collector|clickhouse|zookeeper)"' + opencensus: + endpoint: 0.0.0.0:55678 + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + jaeger: + protocols: + grpc: + endpoint: 0.0.0.0:14250 + thrift_http: + endpoint: 0.0.0.0:14268 + # thrift_compact: + # endpoint: 0.0.0.0:6831 + # thrift_binary: + # endpoint: 0.0.0.0:6832 + hostmetrics: + collection_interval: 30s + root_path: /hostfs + scrapers: + cpu: {} + load: {} + memory: {} + disk: {} + filesystem: {} + network: {} + prometheus: + config: + global: + scrape_interval: 60s + scrape_configs: + # otel-collector internal metrics + - job_name: otel-collector + static_configs: + - targets: + - localhost:8888 + labels: + job_name: otel-collector + + +processors: + batch: + send_batch_size: 10000 + send_batch_max_size: 11000 + timeout: 10s + signozspanmetrics/cumulative: + metrics_exporter: clickhousemetricswrite + metrics_flush_interval: 60s + latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ] + dimensions_cache_size: 100000 + dimensions: + - name: service.namespace + default: default + - name: deployment.environment + default: default + # This is added to ensure the uniqueness of the timeseries + # Otherwise, identical timeseries produced by multiple replicas of + # collectors result in incorrect APM metrics + - name: signoz.collector.id + - name: service.version + - name: browser.platform + - name: browser.mobile + - name: k8s.cluster.name + - name: k8s.node.name + - name: k8s.namespace.name + - name: host.name + - name: host.type + - name: container.name + # memory_limiter: + # # 80% of maximum memory up to 2G + # limit_mib: 1500 + # # 25% of limit up to 2G + # spike_limit_mib: 512 + # check_interval: 5s + # + # # 50% of the maximum memory + # limit_percentage: 50 + # # 20% of max memory usage spike expected + # spike_limit_percentage: 20 + # queued_retry: + # num_workers: 4 + # queue_size: 100 + # retry_on_failure: true + resourcedetection: + # Using OTEL_RESOURCE_ATTRIBUTES envvar, env detector adds custom labels. + detectors: [env, system] # include ec2 for AWS, gcp for GCP and azure for Azure. + timeout: 2s + signozspanmetrics/delta: + metrics_exporter: clickhousemetricswrite + metrics_flush_interval: 60s + latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ] + dimensions_cache_size: 100000 + aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA + enable_exp_histogram: true + dimensions: + - name: service.namespace + default: default + - name: deployment.environment + default: default + # This is added to ensure the uniqueness of the timeseries + # Otherwise, identical timeseries produced by multiple replicas of + # collectors result in incorrect APM metrics + - name: signoz.collector.id + - name: service.version + - name: browser.platform + - name: browser.mobile + - name: k8s.cluster.name + - name: k8s.node.name + - name: k8s.namespace.name + - name: host.name + - name: host.type + - name: container.name + +extensions: + health_check: + endpoint: 0.0.0.0:13133 + zpages: + endpoint: 0.0.0.0:55679 + pprof: + endpoint: 0.0.0.0:1777 + +exporters: + clickhousetraces: + datasource: tcp://clickhouse:9000/signoz_traces + low_cardinal_exception_grouping: ${env:LOW_CARDINAL_EXCEPTION_GROUPING} + clickhousemetricswrite: + endpoint: tcp://clickhouse:9000/signoz_metrics + resource_to_telemetry_conversion: + enabled: true + clickhousemetricswrite/prometheus: + endpoint: tcp://clickhouse:9000/signoz_metrics + clickhouselogsexporter: + dsn: tcp://clickhouse:9000/signoz_logs + timeout: 10s + use_new_schema: true + # logging: {} + +service: + telemetry: + logs: + encoding: json + metrics: + address: 0.0.0.0:8888 + extensions: + - health_check + - zpages + - pprof + pipelines: + traces: + receivers: [jaeger, otlp] + processors: [signozspanmetrics/cumulative, signozspanmetrics/delta, batch] + exporters: [clickhousetraces] + metrics: + receivers: [otlp] + processors: [batch] + exporters: [clickhousemetricswrite] + metrics/generic: + receivers: [hostmetrics] + processors: [resourcedetection, batch] + exporters: [clickhousemetricswrite] + metrics/prometheus: + receivers: [prometheus] + processors: [batch] + exporters: [clickhousemetricswrite/prometheus] + logs: + receivers: [otlp, tcplog/docker] + processors: [batch] + exporters: [clickhouselogsexporter] diff --git a/demos/shared/signoz/otel-collector-opamp-config.yaml b/demos/shared/signoz/otel-collector-opamp-config.yaml new file mode 100644 index 00000000..e408b55e --- /dev/null +++ b/demos/shared/signoz/otel-collector-opamp-config.yaml @@ -0,0 +1 @@ +server_endpoint: ws://query-service:4320/v1/opamp diff --git a/demos/shared/signoz/prometheus.yml b/demos/shared/signoz/prometheus.yml new file mode 100644 index 00000000..d7c52893 --- /dev/null +++ b/demos/shared/signoz/prometheus.yml @@ -0,0 +1,25 @@ +# my global config +global: + scrape_interval: 5s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + # scrape_timeout is set to the global default (10s). + +# Alertmanager configuration +alerting: + alertmanagers: + - static_configs: + - targets: + - alertmanager:9093 + +# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. +rule_files: + # - "first_rules.yml" + # - "second_rules.yml" + - 'alerts.yml' + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: [] + +remote_read: + - url: tcp://clickhouse:9000/signoz_metrics diff --git a/demos/shared/signoz/test-app-docker-compose.yaml b/demos/shared/signoz/test-app-docker-compose.yaml new file mode 100644 index 00000000..c043d75d --- /dev/null +++ b/demos/shared/signoz/test-app-docker-compose.yaml @@ -0,0 +1,26 @@ +services: + hotrod: + image: jaegertracing/example-hotrod:1.30 + container_name: hotrod + logging: + options: + max-size: 50m + max-file: "3" + command: [ "all" ] + environment: + - JAEGER_ENDPOINT=http://otel-collector:14268/api/traces + + load-hotrod: + image: "signoz/locust:1.2.3" + container_name: load-hotrod + hostname: load-hotrod + environment: + ATTACKED_HOST: http://hotrod:8080 + LOCUST_MODE: standalone + NO_PROXY: standalone + TASK_DELAY_FROM: 5 + TASK_DELAY_TO: 30 + QUIET_MODE: "${QUIET_MODE:-false}" + LOCUST_OPTS: "--headless -u 10 -r 1" + volumes: + - ../common/locust-scripts:/locust diff --git a/demos/shared/signoz/user_scripts/histogramQuantile b/demos/shared/signoz/user_scripts/histogramQuantile new file mode 100755 index 00000000..3b77a7b2 Binary files /dev/null and b/demos/shared/signoz/user_scripts/histogramQuantile differ diff --git a/demos/shared/signoz/user_scripts/histogramQuantile.go b/demos/shared/signoz/user_scripts/histogramQuantile.go new file mode 100644 index 00000000..9540a774 --- /dev/null +++ b/demos/shared/signoz/user_scripts/histogramQuantile.go @@ -0,0 +1,237 @@ +package main + +import ( + "bufio" + "fmt" + "math" + "os" + "sort" + "strconv" + "strings" +) + +// NOTE: executable must be built with target OS and architecture set to linux/amd64 +// env GOOS=linux GOARCH=amd64 go build -o histogramQuantile histogramQuantile.go + +// The following code is adapted from the following source: +// https://github.com/prometheus/prometheus/blob/main/promql/quantile.go + +type bucket struct { + upperBound float64 + count float64 +} + +// buckets implements sort.Interface. +type buckets []bucket + +func (b buckets) Len() int { return len(b) } +func (b buckets) Swap(i, j int) { b[i], b[j] = b[j], b[i] } +func (b buckets) Less(i, j int) bool { return b[i].upperBound < b[j].upperBound } + +// bucketQuantile calculates the quantile 'q' based on the given buckets. The +// buckets will be sorted by upperBound by this function (i.e. no sorting +// needed before calling this function). The quantile value is interpolated +// assuming a linear distribution within a bucket. However, if the quantile +// falls into the highest bucket, the upper bound of the 2nd highest bucket is +// returned. A natural lower bound of 0 is assumed if the upper bound of the +// lowest bucket is greater 0. In that case, interpolation in the lowest bucket +// happens linearly between 0 and the upper bound of the lowest bucket. +// However, if the lowest bucket has an upper bound less or equal 0, this upper +// bound is returned if the quantile falls into the lowest bucket. +// +// There are a number of special cases (once we have a way to report errors +// happening during evaluations of AST functions, we should report those +// explicitly): +// +// If 'buckets' has 0 observations, NaN is returned. +// +// If 'buckets' has fewer than 2 elements, NaN is returned. +// +// If the highest bucket is not +Inf, NaN is returned. +// +// If q==NaN, NaN is returned. +// +// If q<0, -Inf is returned. +// +// If q>1, +Inf is returned. +func bucketQuantile(q float64, buckets buckets) float64 { + if math.IsNaN(q) { + return math.NaN() + } + if q < 0 { + return math.Inf(-1) + } + if q > 1 { + return math.Inf(+1) + } + sort.Sort(buckets) + if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) { + return math.NaN() + } + + buckets = coalesceBuckets(buckets) + ensureMonotonic(buckets) + + if len(buckets) < 2 { + return math.NaN() + } + observations := buckets[len(buckets)-1].count + if observations == 0 { + return math.NaN() + } + rank := q * observations + b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank }) + + if b == len(buckets)-1 { + return buckets[len(buckets)-2].upperBound + } + if b == 0 && buckets[0].upperBound <= 0 { + return buckets[0].upperBound + } + var ( + bucketStart float64 + bucketEnd = buckets[b].upperBound + count = buckets[b].count + ) + if b > 0 { + bucketStart = buckets[b-1].upperBound + count -= buckets[b-1].count + rank -= buckets[b-1].count + } + return bucketStart + (bucketEnd-bucketStart)*(rank/count) +} + +// coalesceBuckets merges buckets with the same upper bound. +// +// The input buckets must be sorted. +func coalesceBuckets(buckets buckets) buckets { + last := buckets[0] + i := 0 + for _, b := range buckets[1:] { + if b.upperBound == last.upperBound { + last.count += b.count + } else { + buckets[i] = last + last = b + i++ + } + } + buckets[i] = last + return buckets[:i+1] +} + +// The assumption that bucket counts increase monotonically with increasing +// upperBound may be violated during: +// +// * Recording rule evaluation of histogram_quantile, especially when rate() +// has been applied to the underlying bucket timeseries. +// * Evaluation of histogram_quantile computed over federated bucket +// timeseries, especially when rate() has been applied. +// +// This is because scraped data is not made available to rule evaluation or +// federation atomically, so some buckets are computed with data from the +// most recent scrapes, but the other buckets are missing data from the most +// recent scrape. +// +// Monotonicity is usually guaranteed because if a bucket with upper bound +// u1 has count c1, then any bucket with a higher upper bound u > u1 must +// have counted all c1 observations and perhaps more, so that c >= c1. +// +// Randomly interspersed partial sampling breaks that guarantee, and rate() +// exacerbates it. Specifically, suppose bucket le=1000 has a count of 10 from +// 4 samples but the bucket with le=2000 has a count of 7 from 3 samples. The +// monotonicity is broken. It is exacerbated by rate() because under normal +// operation, cumulative counting of buckets will cause the bucket counts to +// diverge such that small differences from missing samples are not a problem. +// rate() removes this divergence.) +// +// bucketQuantile depends on that monotonicity to do a binary search for the +// bucket with the φ-quantile count, so breaking the monotonicity +// guarantee causes bucketQuantile() to return undefined (nonsense) results. +// +// As a somewhat hacky solution until ingestion is atomic per scrape, we +// calculate the "envelope" of the histogram buckets, essentially removing +// any decreases in the count between successive buckets. + +func ensureMonotonic(buckets buckets) { + max := buckets[0].count + for i := 1; i < len(buckets); i++ { + switch { + case buckets[i].count > max: + max = buckets[i].count + case buckets[i].count < max: + buckets[i].count = max + } + } +} + +// End of copied code. + +func readLines() []string { + r := bufio.NewReader(os.Stdin) + bytes := []byte{} + lines := []string{} + for { + line, isPrefix, err := r.ReadLine() + if err != nil { + break + } + bytes = append(bytes, line...) + if !isPrefix { + str := strings.TrimSpace(string(bytes)) + if len(str) > 0 { + lines = append(lines, str) + bytes = []byte{} + } + } + } + if len(bytes) > 0 { + lines = append(lines, string(bytes)) + } + return lines +} + +func main() { + lines := readLines() + for _, text := range lines { + // Example input + // "[1, 2, 4, 8, 16]", "[1, 5, 8, 10, 14]", 0.9" + // bounds - counts - quantile + parts := strings.Split(text, "\",") + + var bucketNumbers []float64 + // Strip the ends with square brackets + text = parts[0][2 : len(parts[0])-1] + // Parse the bucket bounds + for _, num := range strings.Split(text, ",") { + num = strings.TrimSpace(num) + number, err := strconv.ParseFloat(num, 64) + if err == nil { + bucketNumbers = append(bucketNumbers, number) + } + } + + var bucketCounts []float64 + // Strip the ends with square brackets + text = parts[1][2 : len(parts[1])-1] + // Parse the bucket counts + for _, num := range strings.Split(text, ",") { + num = strings.TrimSpace(num) + number, err := strconv.ParseFloat(num, 64) + if err == nil { + bucketCounts = append(bucketCounts, number) + } + } + + // Parse the quantile + q, err := strconv.ParseFloat(parts[2], 64) + var b buckets + + if err == nil { + for i := 0; i < len(bucketNumbers); i++ { + b = append(b, bucket{upperBound: bucketNumbers[i], count: bucketCounts[i]}) + } + } + fmt.Println(bucketQuantile(q, b)) + } +} diff --git a/demos/weather_forecast_signoz/README.md b/demos/weather_forecast_signoz/README.md new file mode 100644 index 00000000..d4d9175e --- /dev/null +++ b/demos/weather_forecast_signoz/README.md @@ -0,0 +1,29 @@ +# Function calling +This demo shows how you can use Arch's core function calling capabilites. + +# Starting the demo +1. Please make sure the [pre-requisites](https://github.com/katanemo/arch/?tab=readme-ov-file#prerequisites) are installed correctly +2. Start Arch + +3. + ```sh + sh run_demo.sh + ``` +4. Navigate to http://localhost:18080/ +5. You can type in queries like "how is the weather?" + +# Observability +Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from arch and we are using grafana to visalize the stats in dashboard. To see grafana dashboard follow instructions below, + +1. Start grafana and prometheus using following command + ```yaml + docker compose --profile monitoring up + ``` +1. Navigate to http://localhost:3000/ to open grafana UI (use admin/grafana as credentials) +1. From grafana left nav click on dashboards and select "Intelligent Gateway Overview" to view arch gateway stats + + +Here is a sample interaction, +image + +1. Signoz UI: http://localhost:3301 diff --git a/demos/weather_forecast_signoz/arch_config.yaml b/demos/weather_forecast_signoz/arch_config.yaml new file mode 100644 index 00000000..5ff1fcfa --- /dev/null +++ b/demos/weather_forecast_signoz/arch_config.yaml @@ -0,0 +1,77 @@ +version: "0.1-beta" + +listener: + address: 0.0.0.0 + port: 10000 + message_format: huggingface + connect_timeout: 0.005s + +endpoints: + weather_forecast_service: + endpoint: host.docker.internal:18083 + connect_timeout: 0.005s + +overrides: + # confidence threshold for prompt target intent matching + prompt_target_intent_matching_threshold: 0.6 + +llm_providers: + - name: gpt-4o-mini + access_key: $OPENAI_API_KEY + provider: openai + model: gpt-4o-mini + default: true + + - name: gpt-3.5-turbo-0125 + access_key: $OPENAI_API_KEY + provider: openai + model: gpt-3.5-turbo-0125 + + - name: gpt-4o + access_key: $OPENAI_API_KEY + provider: openai + model: gpt-4o + + - name: ministral-3b + access_key: $MISTRAL_API_KEY + provider: mistral + model: ministral-3b-latest + +system_prompt: | + You are a helpful assistant. + +prompt_targets: + - name: weather_forecast + description: Check weather information for a given city. + parameters: + - name: city + description: the name of the city + required: true + type: str + - name: days + description: the number of days + type: int + required: true + - name: units + description: the temperature unit, e.g., Celsius and Fahrenheit + type: str + default: Fahrenheit + endpoint: + name: weather_forecast_service + path: /weather + + - name: default_target + default: true + description: This is the default target for all unmatched prompts. + endpoint: + name: weather_forecast_service + path: /default_target + system_prompt: | + You are a helpful assistant! Summarize the user's request and provide a helpful response. + # if it is set to false arch will send response that it received from this prompt target to the user + # if true arch will forward the response to the default LLM + auto_llm_dispatch_on_response: false + +tracing: + random_sampling: 100 + # trace_arch: true diff --git a/demos/weather_forecast_signoz/docker-compose.yaml b/demos/weather_forecast_signoz/docker-compose.yaml new file mode 100644 index 00000000..d557c0ad --- /dev/null +++ b/demos/weather_forecast_signoz/docker-compose.yaml @@ -0,0 +1,26 @@ +include: + - ../shared/signoz/docker-compose-minimal.yaml + +services: + weather_forecast_service: + build: + context: ../shared/weather_forecast_service + environment: + - OLTP_HOST=http://otel-collector:4317 + extra_hosts: + - "host.docker.internal:host-gateway" + ports: + - "18083:80" + + chatbot_ui: + build: + context: ../shared/chatbot_ui + ports: + - "18080:8080" + environment: + # this is only because we are running the sample app in the same docker container environemtn as archgw + - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 + extra_hosts: + - "host.docker.internal:host-gateway" + volumes: + - ./arch_config.yaml:/app/arch_config.yaml diff --git a/demos/weather_forecast_signoz/run_demo.sh b/demos/weather_forecast_signoz/run_demo.sh new file mode 100644 index 00000000..f6097b09 --- /dev/null +++ b/demos/weather_forecast_signoz/run_demo.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# Function to start the demo +start_demo() { + # Step 1: Check if .env file exists + if [ -f ".env" ]; then + echo ".env file already exists. Skipping creation." + else + # Step 2: Create `.env` file and set OpenAI key + if [ -z "$OPENAI_API_KEY" ]; then + echo "Error: OPENAI_API_KEY environment variable is not set for the demo." + exit 1 + fi + + echo "Creating .env file..." + echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env + echo ".env file created with OPENAI_API_KEY." + fi + + # Step 3: Start Arch + echo "Starting Arch with arch_config.yaml..." + archgw up arch_config.yaml + + # Step 4: Start Network Agent + echo "Starting Network Agent using Docker Compose..." + docker compose up -d # Run in detached mode +} + +# Function to stop the demo +stop_demo() { + # Step 1: Stop Docker Compose services + echo "Stopping Network Agent using Docker Compose..." + docker compose down + + # Step 2: Stop Arch + echo "Stopping Arch..." + archgw down +} + +# Main script logic +if [ "$1" == "down" ]; then + stop_demo +else + # Default action is to bring the demo up + start_demo +fi