ktx/python/ktx-daemon/src/ktx_daemon/code_execution.py

"""Portable in-process code execution helpers for KTX daemon.

This module preserves the host application's current Python execution behavior.
It runs code with Python ``exec`` in the current process and does not provide
OS-level sandboxing.
"""

from __future__ import annotations

import json
import logging
import re
import sys
from collections.abc import Callable
from io import BytesIO, StringIO
from typing import Any

import numpy as np
import orjson
import pandas as pd
import requests
from pydantic import BaseModel, Field

logger = logging.getLogger(__name__)

VALID_VISUALIZATION_TYPES = ["pie", "bar", "line", "area", "table", "boxplot"]


class ExecuteCodeRequest(BaseModel):
    """Request schema for executing Python code."""

    code: str = Field(..., description="Python code to execute")
    source_id: str | None = Field(
        None,
        description="Chat/dashboard ID for scratchpad file access",
    )
    message_id: str | None = Field(
        None,
        description="Message ID for visualization association",
    )


class VisualizationSpec(BaseModel):
    """Specification for a visualization to be saved by the host application."""

    type: str = Field(..., description="Type marker, always 'visualization'")
    vis_type: str = Field(
        ...,
        description="Visualization type: pie, bar, line, area, table",
    )
    config: dict[str, Any] = Field(
        ...,
        description="Visualization configuration",
    )
    data: list[dict[str, Any]] = Field(
        ...,
        description="Visualization data",
    )
    title: str | None = Field(None, description="Optional title")


class ExecuteCodeResponse(BaseModel):
    """Response schema for code execution."""

    formatted_result: str = Field(
        ...,
        description="Formatted execution result for display",
    )
    result: Any | None = Field(
        None,
        description="The value of the 'result' variable if set",
    )
    console_output: str | None = Field(
        None,
        description="Captured stdout from print statements",
    )
    error: str | None = Field(None, description="Error message if execution failed")
    message: str | None = Field(
        None,
        description="Message if no clear result was returned",
    )
    visualizations: list[VisualizationSpec] | None = Field(
        None,
        description="List of visualizations detected in the result",
    )


ScratchpadHelpers = tuple[
    Callable[[pd.DataFrame, str | None], str],
    Callable[[str], pd.DataFrame],
    Callable[[str, dict[str, Any], list[dict[str, Any]]], str],
]


def dumps_numpy_json(content: Any) -> bytes:
    """Serialize JSON response content with numpy scalar and array support."""

    return orjson.dumps(content, option=orjson.OPT_SERIALIZE_NUMPY)


def _strip_ansi_sequences(text: str) -> str:
    ansi_escape = re.compile(
        r"\x1b\[[0-9;]*[a-zA-Z]|\x1b\([0-9;]*[a-zA-Z]|\x1b\[[0-9;]*~"
    )
    return ansi_escape.sub("", text)


def create_scratchpad_helpers(
    nest_api_url: str | None,
    auth_header: str | None,
    source_id: str | None,
    message_id: str | None = None,
    http_client: Any = requests,
) -> ScratchpadHelpers:
    """Create scratchpad and visualization helpers that call host app APIs."""

    def save_df_to_scratchpad(df: pd.DataFrame, filename: str | None = None) -> str:
        if not nest_api_url or not auth_header or not source_id:
            raise ValueError(
                "nest_api_url, Authorization header, and source_id are required "
                "for scratchpad operations"
            )

        data_json = df.to_dict(orient="records")
        url = f"{nest_api_url}/private_api/scratchpad/{source_id}/files"
        response = http_client.post(
            url,
            data=dumps_numpy_json(
                {"filename": filename, "data": data_json, "format": "json"}
            ),
            headers={"Authorization": auth_header, "Content-Type": "application/json"},
            timeout=30,
        )
        response.raise_for_status()

        saved_filename = response.json()["filename"]
        rows, _cols = df.shape
        return f"{rows} rows saved to {saved_filename}"

    def read_scratchpad_file(filename: str) -> pd.DataFrame:
        if not nest_api_url or not auth_header or not source_id:
            raise ValueError(
                "nest_api_url, Authorization header, and source_id are required "
                "for scratchpad operations"
            )

        url = f"{nest_api_url}/private_api/scratchpad/{source_id}/files/{filename}?format=raw"
        response = http_client.get(
            url,
            headers={"Authorization": auth_header, "Accept": "text/csv"},
            timeout=30,
        )
        response.raise_for_status()

        content_type = response.headers.get("content-type", "")
        if "text/csv" in content_type:
            return pd.read_csv(BytesIO(response.content))

        data = response.json()["data"]
        return pd.DataFrame(data)

    def save_visualization(
        vis_type: str,
        config: dict[str, Any],
        data: list[dict[str, Any]],
    ) -> str:
        if not nest_api_url or not auth_header or not source_id:
            raise ValueError(
                "nest_api_url, Authorization header, and source_id are required "
                "for visualization operations"
            )

        if not message_id:
            raise ValueError("message_id is required for visualization operations")

        if vis_type not in VALID_VISUALIZATION_TYPES:
            raise ValueError(
                f"Invalid visualization type: {vis_type}. Must be one of {VALID_VISUALIZATION_TYPES}"
            )

        url = f"{nest_api_url}/private_api/visualizations/{source_id}"
        payload = {
            "visualizationType": vis_type,
            "config": config,
            "data": data,
            "messageId": message_id,
        }

        response = http_client.post(
            url,
            data=dumps_numpy_json(payload),
            headers={"Authorization": auth_header, "Content-Type": "application/json"},
            timeout=30,
        )
        response.raise_for_status()

        filename = response.json()["filename"]
        print(f"Visualization saved: {filename}")
        return f"![viz]({filename})"

    return save_df_to_scratchpad, read_scratchpad_file, save_visualization


def detect_visualizations(result: Any) -> list[dict[str, Any]]:
    """Detect visualization specs in a code execution result value."""

    visualizations = []

    if isinstance(result, dict) and result.get("type") == "visualization":
        visualizations.append(result)
    elif isinstance(result, list):
        for item in result:
            if isinstance(item, dict) and item.get("type") == "visualization":
                visualizations.append(item)

    return visualizations


def execute_code(
    code: str,
    nest_api_url: str | None = None,
    auth_header: str | None = None,
    source_id: str | None = None,
    message_id: str | None = None,
    scratchpad_helpers: ScratchpadHelpers | None = None,
) -> dict[str, Any]:
    """Execute Python code with the current in-process execution boundary."""

    logger.info("Starting code execution")
    save_df, read_file, save_viz = scratchpad_helpers or create_scratchpad_helpers(
        nest_api_url,
        auth_header,
        source_id,
        message_id,
    )

    namespace = {
        "pd": pd,
        "np": np,
        "json": json,
        "requests": requests,
        "save_df_to_scratchpad": save_df,
        "read_scratchpad_file": read_file,
        "save_visualization": save_viz,
    }

    stdout_capture = StringIO()
    original_stdout = sys.stdout
    sys.stdout = stdout_capture
    console_output = ""

    try:
        logger.info("Executing code in current process namespace")
        exec(code, namespace)

        console_output = stdout_capture.getvalue()
        if "result" in namespace:
            logger.info("Code execution complete, 'result' variable found")
            result_value = namespace["result"]
            visualizations = detect_visualizations(result_value)

            result = {"result": result_value}
            if console_output:
                result["console_output"] = console_output
            if visualizations:
                result["visualizations"] = visualizations

            return result

        logger.info("No result variable found")
        result = {
            "message": "Code executed successfully but no result variable was set"
        }
        if console_output:
            result["console_output"] = console_output
        return result

    except Exception as error:
        logger.exception("Error executing code: %s", error)
        result = {"error": str(error)}
        if console_output:
            result["console_output"] = console_output
        return result

    finally:
        sys.stdout = original_stdout


def format_execution_result(result: dict[str, Any]) -> str:
    """Format execution output for display in host chat responses."""

    formatted_result = ""
    if "console_output" in result:
        formatted_result += "=== Console Output ===\n\n"
        formatted_result += _strip_ansi_sequences(result["console_output"])

    if "result" in result:
        formatted_result += "\n\n=== Result ===\n\n"
        formatted_result += str(result["result"])
    elif "message" in result:
        formatted_result += "\n\n=== Message ===\n\n"
        formatted_result += result["message"]
    elif "error" in result:
        formatted_result += "\n\n=== Error ===\n\n"
        formatted_result += result["error"]

    return formatted_result


def execute_code_response(
    request: ExecuteCodeRequest,
    *,
    nest_api_url: str | None,
    auth_header: str | None,
) -> ExecuteCodeResponse:
    """Execute a validated request and return the public response model."""

    result = execute_code(
        code=request.code,
        nest_api_url=nest_api_url,
        auth_header=auth_header,
        source_id=request.source_id,
        message_id=request.message_id,
    )

    return ExecuteCodeResponse(
        formatted_result=format_execution_result(result),
        result=result.get("result"),
        console_output=result.get("console_output"),
        error=result.get("error"),
        message=result.get("message"),
        visualizations=result.get("visualizations"),
    )
rename klo to ktx 2026-05-10 23:51:24 +02:00			`"""Portable in-process code execution helpers for KTX daemon.`
Initial open-source release 2026-05-10 23:12:26 +02:00
			`This module preserves the host application's current Python execution behavior.`
			It runs code with Python ``exec`` in the current process and does not provide
			`OS-level sandboxing.`
			`"""`

			`from __future__ import annotations`

			`import json`
			`import logging`
			`import re`
			`import sys`
			`from collections.abc import Callable`
			`from io import BytesIO, StringIO`
			`from typing import Any`

			`import numpy as np`
			`import orjson`
			`import pandas as pd`
			`import requests`
			`from pydantic import BaseModel, Field`

			`logger = logging.getLogger(__name__)`

			`VALID_VISUALIZATION_TYPES = ["pie", "bar", "line", "area", "table", "boxplot"]`


			`class ExecuteCodeRequest(BaseModel):`
			`"""Request schema for executing Python code."""`

			`code: str = Field(..., description="Python code to execute")`
			`source_id: str \| None = Field(`
			`None,`
			`description="Chat/dashboard ID for scratchpad file access",`
			`)`
			`message_id: str \| None = Field(`
			`None,`
			`description="Message ID for visualization association",`
			`)`


			`class VisualizationSpec(BaseModel):`
			`"""Specification for a visualization to be saved by the host application."""`

			`type: str = Field(..., description="Type marker, always 'visualization'")`
			`vis_type: str = Field(`
			`...,`
			`description="Visualization type: pie, bar, line, area, table",`
			`)`
			`config: dict[str, Any] = Field(`
			`...,`
			`description="Visualization configuration",`
			`)`
			`data: list[dict[str, Any]] = Field(`
			`...,`
			`description="Visualization data",`
			`)`
			`title: str \| None = Field(None, description="Optional title")`


			`class ExecuteCodeResponse(BaseModel):`
			`"""Response schema for code execution."""`

			`formatted_result: str = Field(`
			`...,`
			`description="Formatted execution result for display",`
			`)`
			`result: Any \| None = Field(`
			`None,`
			`description="The value of the 'result' variable if set",`
			`)`
			`console_output: str \| None = Field(`
			`None,`
			`description="Captured stdout from print statements",`
			`)`
			`error: str \| None = Field(None, description="Error message if execution failed")`
			`message: str \| None = Field(`
			`None,`
			`description="Message if no clear result was returned",`
			`)`
			`visualizations: list[VisualizationSpec] \| None = Field(`
			`None,`
			`description="List of visualizations detected in the result",`
			`)`


			`ScratchpadHelpers = tuple[`
			`Callable[[pd.DataFrame, str \| None], str],`
			`Callable[[str], pd.DataFrame],`
			`Callable[[str, dict[str, Any], list[dict[str, Any]]], str],`
			`]`


			`def dumps_numpy_json(content: Any) -> bytes:`
			`"""Serialize JSON response content with numpy scalar and array support."""`

			`return orjson.dumps(content, option=orjson.OPT_SERIALIZE_NUMPY)`


			`def _strip_ansi_sequences(text: str) -> str:`
			`ansi_escape = re.compile(`
			`r"\x1b\[[0-9;][a-zA-Z]\|\x1b\([0-9;][a-zA-Z]\|\x1b\[[0-9;]*~"`
			`)`
			`return ansi_escape.sub("", text)`


			`def create_scratchpad_helpers(`
			`nest_api_url: str \| None,`
			`auth_header: str \| None,`
			`source_id: str \| None,`
			`message_id: str \| None = None,`
			`http_client: Any = requests,`
			`) -> ScratchpadHelpers:`
			`"""Create scratchpad and visualization helpers that call host app APIs."""`

			`def save_df_to_scratchpad(df: pd.DataFrame, filename: str \| None = None) -> str:`
			`if not nest_api_url or not auth_header or not source_id:`
			`raise ValueError(`
			`"nest_api_url, Authorization header, and source_id are required "`
			`"for scratchpad operations"`
			`)`

			`data_json = df.to_dict(orient="records")`
			`url = f"{nest_api_url}/private_api/scratchpad/{source_id}/files"`
			`response = http_client.post(`
			`url,`
			`data=dumps_numpy_json(`
			`{"filename": filename, "data": data_json, "format": "json"}`
			`),`
			`headers={"Authorization": auth_header, "Content-Type": "application/json"},`
			`timeout=30,`
			`)`
			`response.raise_for_status()`

			`saved_filename = response.json()["filename"]`
			`rows, _cols = df.shape`
			`return f"{rows} rows saved to {saved_filename}"`

			`def read_scratchpad_file(filename: str) -> pd.DataFrame:`
			`if not nest_api_url or not auth_header or not source_id:`
			`raise ValueError(`
			`"nest_api_url, Authorization header, and source_id are required "`
			`"for scratchpad operations"`
			`)`

			`url = f"{nest_api_url}/private_api/scratchpad/{source_id}/files/{filename}?format=raw"`
			`response = http_client.get(`
			`url,`
			`headers={"Authorization": auth_header, "Accept": "text/csv"},`
			`timeout=30,`
			`)`
			`response.raise_for_status()`

			`content_type = response.headers.get("content-type", "")`
			`if "text/csv" in content_type:`
			`return pd.read_csv(BytesIO(response.content))`

			`data = response.json()["data"]`
			`return pd.DataFrame(data)`

			`def save_visualization(`
			`vis_type: str,`
			`config: dict[str, Any],`
			`data: list[dict[str, Any]],`
			`) -> str:`
			`if not nest_api_url or not auth_header or not source_id:`
			`raise ValueError(`
			`"nest_api_url, Authorization header, and source_id are required "`
			`"for visualization operations"`
			`)`

			`if not message_id:`
			`raise ValueError("message_id is required for visualization operations")`

			`if vis_type not in VALID_VISUALIZATION_TYPES:`
			`raise ValueError(`
			`f"Invalid visualization type: {vis_type}. Must be one of {VALID_VISUALIZATION_TYPES}"`
			`)`

			`url = f"{nest_api_url}/private_api/visualizations/{source_id}"`
			`payload = {`
			`"visualizationType": vis_type,`
			`"config": config,`
			`"data": data,`
			`"messageId": message_id,`
			`}`

			`response = http_client.post(`
			`url,`
			`data=dumps_numpy_json(payload),`
			`headers={"Authorization": auth_header, "Content-Type": "application/json"},`
			`timeout=30,`
			`)`
			`response.raise_for_status()`

			`filename = response.json()["filename"]`
			`print(f"Visualization saved: {filename}")`
			`return f"![viz]({filename})"`

			`return save_df_to_scratchpad, read_scratchpad_file, save_visualization`


			`def detect_visualizations(result: Any) -> list[dict[str, Any]]:`
			`"""Detect visualization specs in a code execution result value."""`

			`visualizations = []`

			`if isinstance(result, dict) and result.get("type") == "visualization":`
			`visualizations.append(result)`
			`elif isinstance(result, list):`
			`for item in result:`
			`if isinstance(item, dict) and item.get("type") == "visualization":`
			`visualizations.append(item)`

			`return visualizations`


			`def execute_code(`
			`code: str,`
			`nest_api_url: str \| None = None,`
			`auth_header: str \| None = None,`
			`source_id: str \| None = None,`
			`message_id: str \| None = None,`
			`scratchpad_helpers: ScratchpadHelpers \| None = None,`
			`) -> dict[str, Any]:`
			`"""Execute Python code with the current in-process execution boundary."""`

			`logger.info("Starting code execution")`
			`save_df, read_file, save_viz = scratchpad_helpers or create_scratchpad_helpers(`
			`nest_api_url,`
			`auth_header,`
			`source_id,`
			`message_id,`
			`)`

			`namespace = {`
			`"pd": pd,`
			`"np": np,`
			`"json": json,`
			`"requests": requests,`
			`"save_df_to_scratchpad": save_df,`
			`"read_scratchpad_file": read_file,`
			`"save_visualization": save_viz,`
			`}`

			`stdout_capture = StringIO()`
			`original_stdout = sys.stdout`
			`sys.stdout = stdout_capture`
			`console_output = ""`

			`try:`
			`logger.info("Executing code in current process namespace")`
			`exec(code, namespace)`

			`console_output = stdout_capture.getvalue()`
			`if "result" in namespace:`
			`logger.info("Code execution complete, 'result' variable found")`
			`result_value = namespace["result"]`
			`visualizations = detect_visualizations(result_value)`

			`result = {"result": result_value}`
			`if console_output:`
			`result["console_output"] = console_output`
			`if visualizations:`
			`result["visualizations"] = visualizations`

			`return result`

			`logger.info("No result variable found")`
			`result = {`
			`"message": "Code executed successfully but no result variable was set"`
			`}`
			`if console_output:`
			`result["console_output"] = console_output`
			`return result`

			`except Exception as error:`
			`logger.exception("Error executing code: %s", error)`
			`result = {"error": str(error)}`
			`if console_output:`
			`result["console_output"] = console_output`
			`return result`

			`finally:`
			`sys.stdout = original_stdout`


			`def format_execution_result(result: dict[str, Any]) -> str:`
			`"""Format execution output for display in host chat responses."""`

			`formatted_result = ""`
			`if "console_output" in result:`
			`formatted_result += "=== Console Output ===\n\n"`
			`formatted_result += _strip_ansi_sequences(result["console_output"])`

			`if "result" in result:`
			`formatted_result += "\n\n=== Result ===\n\n"`
			`formatted_result += str(result["result"])`
			`elif "message" in result:`
			`formatted_result += "\n\n=== Message ===\n\n"`
			`formatted_result += result["message"]`
			`elif "error" in result:`
			`formatted_result += "\n\n=== Error ===\n\n"`
			`formatted_result += result["error"]`

			`return formatted_result`


			`def execute_code_response(`
			`request: ExecuteCodeRequest,`
			`*,`
			`nest_api_url: str \| None,`
			`auth_header: str \| None,`
			`) -> ExecuteCodeResponse:`
			`"""Execute a validated request and return the public response model."""`

			`result = execute_code(`
			`code=request.code,`
			`nest_api_url=nest_api_url,`
			`auth_header=auth_header,`
			`source_id=request.source_id,`
			`message_id=request.message_id,`
			`)`

			`return ExecuteCodeResponse(`
			`formatted_result=format_execution_result(result),`
			`result=result.get("result"),`
			`console_output=result.get("console_output"),`
			`error=result.get("error"),`
			`message=result.get("message"),`
			`visualizations=result.get("visualizations"),`
			`)`