From c10f2694a0adf4bd8aadd5bc33e3cb364a5d2e85 Mon Sep 17 00:00:00 2001 From: Jacob Molz Date: Tue, 26 May 2026 07:43:58 -0400 Subject: [PATCH] fix: safely parse metric labels (#948) --- .../test_query/test_ontology_monitoring.py | 73 +++++++++++++++++++ .../trustgraph/query/ontology/monitoring.py | 25 ++++++- 2 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 tests/unit/test_query/test_ontology_monitoring.py diff --git a/tests/unit/test_query/test_ontology_monitoring.py b/tests/unit/test_query/test_ontology_monitoring.py new file mode 100644 index 00000000..ef69965c --- /dev/null +++ b/tests/unit/test_query/test_ontology_monitoring.py @@ -0,0 +1,73 @@ +""" +Tests for ontology monitoring metrics. +""" + +import importlib.util +import sys +from pathlib import Path + + +MODULE_PATH = ( + Path(__file__).resolve().parents[3] + / "trustgraph-flow" + / "trustgraph" + / "query" + / "ontology" + / "monitoring.py" +) +spec = importlib.util.spec_from_file_location("ontology_monitoring", MODULE_PATH) +assert spec is not None and spec.loader is not None +monitoring = importlib.util.module_from_spec(spec) +sys.modules[spec.name] = monitoring +spec.loader.exec_module(monitoring) + +PerformanceMonitor = monitoring.PerformanceMonitor +_extract_metric_label = monitoring._extract_metric_label + + +def test_extract_metric_label_reads_unquoted_label_value(): + metric_name = "cache_requests_total{cache_type=entity,component=ontology}" + + assert _extract_metric_label(metric_name, "cache_type") == "entity" + + +def test_extract_metric_label_reads_quoted_label_value(): + metric_name = 'cache_requests_total{cache_type="entity",component="ontology"}' + + assert _extract_metric_label(metric_name, "cache_type") == "entity" + + +def test_extract_metric_label_returns_none_when_label_missing(): + metric_name = "cache_requests_total{component=ontology}" + + assert _extract_metric_label(metric_name, "cache_type") is None + + +def test_performance_report_ignores_counters_without_cache_type_label(): + monitor = PerformanceMonitor({"enabled": False}) + monitor.metrics_collector.increment( + "cache_requests_total", + labels={"component": "ontology"}, + ) + monitor.metrics_collector.increment( + "cache_type=not_a_label", + labels={"component": "ontology"}, + ) + monitor.metrics_collector.increment( + "cache_requests_total", + labels={"cache_type": "entity"}, + ) + monitor.metrics_collector.increment( + "cache_hits_total", + labels={"cache_type": "entity"}, + ) + + report = monitor.get_performance_report() + + assert report["cache_performance"] == { + "entity": { + "hit_rate": 1.0, + "total_requests": 1.0, + "total_hits": 1.0, + } + } diff --git a/trustgraph-flow/trustgraph/query/ontology/monitoring.py b/trustgraph-flow/trustgraph/query/ontology/monitoring.py index 703c6e95..cb7e8a2e 100644 --- a/trustgraph-flow/trustgraph/query/ontology/monitoring.py +++ b/trustgraph-flow/trustgraph/query/ontology/monitoring.py @@ -4,6 +4,7 @@ Provides comprehensive monitoring of system performance, query patterns, and res """ import logging +import re import time import asyncio import inspect @@ -276,6 +277,26 @@ class MetricsCollector: return f"{name}{{{label_str}}}" +def _extract_metric_label(metric_name: str, label: str) -> Optional[str]: + """Extract a label value from an internal metric key.""" + labels_start = metric_name.find('{') + labels_end = metric_name.find('}', labels_start + 1) + + if labels_start == -1 or labels_end == -1: + return None + + labels = metric_name[labels_start + 1:labels_end] + label_match = re.search( + rf'(?:^|,){re.escape(label)}=(?:"([^"]*)"|([^,]*))', + labels, + ) + if not label_match: + return None + + quoted_value, unquoted_value = label_match.groups() + return quoted_value if quoted_value is not None else unquoted_value + + class PerformanceMonitor: """Monitors system performance and component health.""" @@ -474,8 +495,8 @@ class PerformanceMonitor: # Cache performance cache_types = set() for metric_name in self.metrics_collector.counters.keys(): - if 'cache_type=' in metric_name: - cache_type = metric_name.split('cache_type=')[1].split(',')[0].split('}')[0] + cache_type = _extract_metric_label(metric_name, 'cache_type') + if cache_type is not None: cache_types.add(cache_type) for cache_type in cache_types: