Better reporting from api-gateway's metric endpoint (#845)

- Connect failures (DNS, connect refused, server disconnect) now
  return 502 Bad Gateway with a body that names the upstream URL.
- Other exceptions still return 500 but now include the exception
  message in the body and log with exc_info=True so the stack trace
  lands in the gateway log.
- Also fixed the logging.error → logger.error inconsistency in the
  same block (module had a named logger at the top that wasn't being
  used).
This commit is contained in:
cybermaggedon 2026-04-22 16:16:57 +01:00 committed by GitHub
parent 95c3b62ef1
commit 31027e30ae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -47,26 +47,37 @@ class MetricsEndpoint:
if not self.auth.permitted(token, self.operation):
return web.HTTPUnauthorized()
try:
path = request.match_info["path"]
async with aiohttp.ClientSession() as session:
url = (
self.prometheus_url + "/api/v1/" + path + "?" +
request.query_string
)
try:
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
return web.Response(
status=resp.status,
text=await resp.text()
)
except aiohttp.ClientConnectionError as e:
# Upstream unreachable (connect refused, DNS failure,
# server disconnect). Distinguish from our own errors so
# callers know where the fault is.
logger.error(f"Metrics upstream {url} unreachable: {e}")
return web.Response(
status=502,
text=f"Bad Gateway: metrics upstream unreachable: {e}",
)
except Exception as e:
logging.error(f"Exception: {e}")
raise web.HTTPInternalServerError()
logger.error(f"Metrics proxy exception: {e}", exc_info=True)
return web.Response(
status=500,
text=f"Internal Server Error: {e}",
)