refactor(jira): remove dead legacy indexing and write tools (mcp-only now)

2026-06-04 20:05:16 +02:00 · 2026-06-02 16:38:00 +02:00 · 2026-06-02 16:38:00 +02:00 · b6710ae9af
commit b6710ae9af
parent ef60af90cf
11 changed files with 0 additions and 3075 deletions
--- a/surfsense_backend/app/services/jira/init.py
+++ b/surfsense_backend/app/services/jira/init.py
@ -1,13 +0,0 @@
-from app.services.jira.kb_sync_service import JiraKBSyncService
-from app.services.jira.tool_metadata_service import (
-    JiraIssue,
-    JiraToolMetadataService,
-    JiraWorkspace,
-)
-
-__all__ = [
-    "JiraIssue",
-    "JiraKBSyncService",
-    "JiraToolMetadataService",
-    "JiraWorkspace",
-]
--- a/surfsense_backend/app/services/jira/kb_sync_service.py
+++ b/surfsense_backend/app/services/jira/kb_sync_service.py
@ -1,257 +0,0 @@
-import asyncio
-import logging
-from datetime import datetime
-
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from app.connectors.jira_history import JiraHistoryConnector
-from app.db import Document, DocumentType
-from app.utils.document_converters import (
-    create_document_chunks,
-    embed_text,
-    generate_content_hash,
-    generate_document_summary,
-    generate_unique_identifier_hash,
-)
-
-logger = logging.getLogger(__name__)
-
-
-class JiraKBSyncService:
-    """Syncs Jira issue documents to the knowledge base after HITL actions."""
-
-    def __init__(self, db_session: AsyncSession):
-        self.db_session = db_session
-
-    async def sync_after_create(
-        self,
-        issue_id: str,
-        issue_identifier: str,
-        issue_title: str,
-        description: str | None,
-        state: str | None,
-        connector_id: int,
-        search_space_id: int,
-        user_id: str,
-    ) -> dict:
-        from app.tasks.connector_indexers.base import (
-            check_document_by_unique_identifier,
-            check_duplicate_document_by_hash,
-            get_current_timestamp,
-            safe_set_chunks,
-        )
-
-        try:
-            unique_hash = generate_unique_identifier_hash(
-                DocumentType.JIRA_CONNECTOR, issue_id, search_space_id
-            )
-
-            existing = await check_document_by_unique_identifier(
-                self.db_session, unique_hash
-            )
-            if existing:
-                logger.info(
-                    "Document for Jira issue %s already exists (doc_id=%s), skipping",
-                    issue_identifier,
-                    existing.id,
-                )
-                return {"status": "success"}
-
-            indexable_content = (description or "").strip()
-            if not indexable_content:
-                indexable_content = f"Jira Issue {issue_identifier}: {issue_title}"
-
-            issue_content = (
-                f"# {issue_identifier}: {issue_title}\n\n{indexable_content}"
-            )
-
-            content_hash = generate_content_hash(issue_content, search_space_id)
-
-            with self.db_session.no_autoflush:
-                dup = await check_duplicate_document_by_hash(
-                    self.db_session, content_hash
-                )
-            if dup:
-                content_hash = unique_hash
-
-            from app.services.llm_service import get_user_long_context_llm
-
-            user_llm = await get_user_long_context_llm(
-                self.db_session,
-                user_id,
-                search_space_id,
-                disable_streaming=True,
-            )
-
-            doc_metadata_for_summary = {
-                "issue_id": issue_identifier,
-                "issue_title": issue_title,
-                "document_type": "Jira Issue",
-                "connector_type": "Jira",
-            }
-
-            if user_llm:
-                summary_content, summary_embedding = await generate_document_summary(
-                    issue_content, user_llm, doc_metadata_for_summary
-                )
-            else:
-                summary_content = (
-                    f"Jira Issue {issue_identifier}: {issue_title}\n\n{issue_content}"
-                )
-                summary_embedding = await asyncio.to_thread(embed_text, summary_content)
-
-            chunks = await create_document_chunks(issue_content)
-            now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-
-            document = Document(
-                title=f"{issue_identifier}: {issue_title}",
-                document_type=DocumentType.JIRA_CONNECTOR,
-                document_metadata={
-                    "issue_id": issue_id,
-                    "issue_identifier": issue_identifier,
-                    "issue_title": issue_title,
-                    "state": state or "Unknown",
-                    "indexed_at": now_str,
-                    "connector_id": connector_id,
-                },
-                content=summary_content,
-                content_hash=content_hash,
-                unique_identifier_hash=unique_hash,
-                embedding=summary_embedding,
-                search_space_id=search_space_id,
-                connector_id=connector_id,
-                updated_at=get_current_timestamp(),
-                created_by_id=user_id,
-            )
-
-            self.db_session.add(document)
-            await self.db_session.flush()
-            await safe_set_chunks(self.db_session, document, chunks)
-            await self.db_session.commit()
-
-            logger.info(
-                "KB sync after create succeeded: doc_id=%s, issue=%s",
-                document.id,
-                issue_identifier,
-            )
-            return {"status": "success"}
-
-        except Exception as e:
-            error_str = str(e).lower()
-            if (
-                "duplicate key value violates unique constraint" in error_str
-                or "uniqueviolationerror" in error_str
-            ):
-                await self.db_session.rollback()
-                return {"status": "error", "message": "Duplicate document detected"}
-
-            logger.error(
-                "KB sync after create failed for issue %s: %s",
-                issue_identifier,
-                e,
-                exc_info=True,
-            )
-            await self.db_session.rollback()
-            return {"status": "error", "message": str(e)}
-
-    async def sync_after_update(
-        self,
-        document_id: int,
-        issue_id: str,
-        user_id: str,
-        search_space_id: int,
-    ) -> dict:
-        from app.tasks.connector_indexers.base import (
-            get_current_timestamp,
-            safe_set_chunks,
-        )
-
-        try:
-            document = await self.db_session.get(Document, document_id)
-            if not document:
-                return {"status": "not_indexed"}
-
-            connector_id = document.connector_id
-            if not connector_id:
-                return {"status": "error", "message": "Document has no connector_id"}
-
-            jira_history = JiraHistoryConnector(
-                session=self.db_session, connector_id=connector_id
-            )
-            jira_client = await jira_history._get_jira_client()
-            issue_raw = await asyncio.to_thread(jira_client.get_issue, issue_id)
-            formatted = jira_client.format_issue(issue_raw)
-            issue_content = jira_client.format_issue_to_markdown(formatted)
-
-            if not issue_content:
-                return {"status": "error", "message": "Issue produced empty content"}
-
-            issue_identifier = formatted.get("key", "")
-            issue_title = formatted.get("title", "")
-            state = formatted.get("status", "Unknown")
-            comment_count = len(formatted.get("comments", []))
-
-            from app.services.llm_service import get_user_long_context_llm
-
-            user_llm = await get_user_long_context_llm(
-                self.db_session, user_id, search_space_id, disable_streaming=True
-            )
-
-            if user_llm:
-                doc_meta = {
-                    "issue_key": issue_identifier,
-                    "issue_title": issue_title,
-                    "status": state,
-                    "document_type": "Jira Issue",
-                    "connector_type": "Jira",
-                }
-                summary_content, summary_embedding = await generate_document_summary(
-                    issue_content, user_llm, doc_meta
-                )
-            else:
-                summary_content = (
-                    f"Jira Issue {issue_identifier}: {issue_title}\n\n{issue_content}"
-                )
-                summary_embedding = await asyncio.to_thread(embed_text, summary_content)
-
-            chunks = await create_document_chunks(issue_content)
-
-            document.title = f"{issue_identifier}: {issue_title}"
-            document.content = summary_content
-            document.content_hash = generate_content_hash(
-                issue_content, search_space_id
-            )
-            document.embedding = summary_embedding
-
-            from sqlalchemy.orm.attributes import flag_modified
-
-            document.document_metadata = {
-                **(document.document_metadata or {}),
-                "issue_id": issue_id,
-                "issue_identifier": issue_identifier,
-                "issue_title": issue_title,
-                "state": state,
-                "comment_count": comment_count,
-                "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                "connector_id": connector_id,
-            }
-            flag_modified(document, "document_metadata")
-            await safe_set_chunks(self.db_session, document, chunks)
-            document.updated_at = get_current_timestamp()
-
-            await self.db_session.commit()
-
-            logger.info(
-                "KB sync successful for document %s (%s: %s)",
-                document_id,
-                issue_identifier,
-                issue_title,
-            )
-            return {"status": "success"}
-
-        except Exception as e:
-            logger.error(
-                "KB sync failed for document %s: %s", document_id, e, exc_info=True
-            )
-            await self.db_session.rollback()
-            return {"status": "error", "message": str(e)}
--- a/surfsense_backend/app/services/jira/tool_metadata_service.py
+++ b/surfsense_backend/app/services/jira/tool_metadata_service.py
@ -1,332 +0,0 @@
-import asyncio
-import logging
-from dataclasses import dataclass
-
-from sqlalchemy import and_, func, or_
-from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.future import select
-from sqlalchemy.orm.attributes import flag_modified
-
-from app.connectors.jira_history import JiraHistoryConnector
-from app.db import (
-    Document,
-    DocumentType,
-    SearchSourceConnector,
-    SearchSourceConnectorType,
-)
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class JiraWorkspace:
-    """Represents a Jira connector as a workspace for tool context."""
-
-    id: int
-    name: str
-    base_url: str
-
-    @classmethod
-    def from_connector(cls, connector: SearchSourceConnector) -> "JiraWorkspace":
-        return cls(
-            id=connector.id,
-            name=connector.name,
-            base_url=connector.config.get("base_url", ""),
-        )
-
-    def to_dict(self) -> dict:
-        return {
-            "id": self.id,
-            "name": self.name,
-            "base_url": self.base_url,
-        }
-
-
-@dataclass
-class JiraIssue:
-    """Represents an indexed Jira issue resolved from the knowledge base."""
-
-    issue_id: str
-    issue_identifier: str
-    issue_title: str
-    state: str
-    connector_id: int
-    document_id: int
-    indexed_at: str | None
-
-    @classmethod
-    def from_document(cls, document: Document) -> "JiraIssue":
-        meta = document.document_metadata or {}
-        return cls(
-            issue_id=meta.get("issue_id", ""),
-            issue_identifier=meta.get("issue_identifier", ""),
-            issue_title=meta.get("issue_title", document.title),
-            state=meta.get("state", ""),
-            connector_id=document.connector_id,
-            document_id=document.id,
-            indexed_at=meta.get("indexed_at"),
-        )
-
-    def to_dict(self) -> dict:
-        return {
-            "issue_id": self.issue_id,
-            "issue_identifier": self.issue_identifier,
-            "issue_title": self.issue_title,
-            "state": self.state,
-            "connector_id": self.connector_id,
-            "document_id": self.document_id,
-            "indexed_at": self.indexed_at,
-        }
-
-
-class JiraToolMetadataService:
-    """Builds interrupt context for Jira HITL tools."""
-
-    def __init__(self, db_session: AsyncSession):
-        self._db_session = db_session
-
-    async def _check_account_health(self, connector: SearchSourceConnector) -> bool:
-        """Check if the Jira connector auth is still valid.
-
-        Returns True if auth is expired/invalid, False if healthy.
-        """
-        try:
-            jira_history = JiraHistoryConnector(
-                session=self._db_session, connector_id=connector.id
-            )
-            jira_client = await jira_history._get_jira_client()
-            await asyncio.to_thread(jira_client.get_myself)
-            return False
-        except Exception as e:
-            logger.warning("Jira connector %s health check failed: %s", connector.id, e)
-            try:
-                connector.config = {**connector.config, "auth_expired": True}
-                flag_modified(connector, "config")
-                await self._db_session.commit()
-                await self._db_session.refresh(connector)
-            except Exception:
-                logger.warning(
-                    "Failed to persist auth_expired for connector %s",
-                    connector.id,
-                    exc_info=True,
-                )
-            return True
-
-    async def get_creation_context(self, search_space_id: int, user_id: str) -> dict:
-        """Return context needed to create a new Jira issue.
-
-        Fetches all connected Jira accounts, and for the first healthy one
-        fetches projects, issue types, and priorities.
-        """
-        connectors = await self._get_all_jira_connectors(search_space_id, user_id)
-        if not connectors:
-            return {"error": "No Jira account connected"}
-
-        accounts = []
-        projects = []
-        issue_types = []
-        priorities = []
-        fetched_context = False
-
-        for connector in connectors:
-            auth_expired = await self._check_account_health(connector)
-            workspace = JiraWorkspace.from_connector(connector)
-            account_info = {
-                **workspace.to_dict(),
-                "auth_expired": auth_expired,
-            }
-            accounts.append(account_info)
-
-            if not auth_expired and not fetched_context:
-                try:
-                    jira_history = JiraHistoryConnector(
-                        session=self._db_session, connector_id=connector.id
-                    )
-                    jira_client = await jira_history._get_jira_client()
-                    raw_projects = await asyncio.to_thread(jira_client.get_projects)
-                    projects = [
-                        {"id": p.get("id"), "key": p.get("key"), "name": p.get("name")}
-                        for p in raw_projects
-                    ]
-                    raw_types = await asyncio.to_thread(jira_client.get_issue_types)
-                    seen_type_names: set[str] = set()
-                    issue_types = []
-                    for t in raw_types:
-                        if t.get("subtask", False):
-                            continue
-                        name = t.get("name")
-                        if name not in seen_type_names:
-                            seen_type_names.add(name)
-                            issue_types.append({"id": t.get("id"), "name": name})
-                    raw_priorities = await asyncio.to_thread(jira_client.get_priorities)
-                    priorities = [
-                        {"id": p.get("id"), "name": p.get("name")}
-                        for p in raw_priorities
-                    ]
-                    fetched_context = True
-                except Exception as e:
-                    logger.warning(
-                        "Failed to fetch Jira context for connector %s: %s",
-                        connector.id,
-                        e,
-                    )
-
-        return {
-            "accounts": accounts,
-            "projects": projects,
-            "issue_types": issue_types,
-            "priorities": priorities,
-        }
-
-    async def get_update_context(
-        self, search_space_id: int, user_id: str, issue_ref: str
-    ) -> dict:
-        """Return context needed to update an indexed Jira issue.
-
-        Resolves the issue from the KB, then fetches current details from the Jira API.
-        """
-        document = await self._resolve_issue(search_space_id, user_id, issue_ref)
-        if not document:
-            return {
-                "error": f"Issue '{issue_ref}' not found in your synced Jira issues. "
-                "Please make sure the issue is indexed in your knowledge base."
-            }
-
-        connector = await self._get_connector_for_document(document, user_id)
-        if not connector:
-            return {"error": "Connector not found or access denied"}
-
-        auth_expired = await self._check_account_health(connector)
-        if auth_expired:
-            return {
-                "error": "Jira authentication has expired. Please re-authenticate.",
-                "auth_expired": True,
-                "connector_id": connector.id,
-            }
-
-        workspace = JiraWorkspace.from_connector(connector)
-        issue = JiraIssue.from_document(document)
-
-        try:
-            jira_history = JiraHistoryConnector(
-                session=self._db_session, connector_id=connector.id
-            )
-            jira_client = await jira_history._get_jira_client()
-            issue_data = await asyncio.to_thread(jira_client.get_issue, issue.issue_id)
-            formatted = jira_client.format_issue(issue_data)
-        except Exception as e:
-            error_str = str(e).lower()
-            if (
-                "401" in error_str
-                or "403" in error_str
-                or "authentication" in error_str
-            ):
-                return {
-                    "error": f"Failed to fetch Jira issue: {e!s}",
-                    "auth_expired": True,
-                    "connector_id": connector.id,
-                }
-            return {"error": f"Failed to fetch Jira issue: {e!s}"}
-
-        return {
-            "account": {**workspace.to_dict(), "auth_expired": False},
-            "issue": {
-                "issue_id": formatted.get("key", issue.issue_id),
-                "issue_identifier": formatted.get("key", issue.issue_identifier),
-                "issue_title": formatted.get("title", issue.issue_title),
-                "state": formatted.get("status", "Unknown"),
-                "priority": formatted.get("priority", "Unknown"),
-                "issue_type": formatted.get("issue_type", "Unknown"),
-                "assignee": formatted.get("assignee"),
-                "description": formatted.get("description"),
-                "project": formatted.get("project", ""),
-                "document_id": issue.document_id,
-                "indexed_at": issue.indexed_at,
-            },
-        }
-
-    async def get_deletion_context(
-        self, search_space_id: int, user_id: str, issue_ref: str
-    ) -> dict:
-        """Return context needed to delete a Jira issue (KB metadata only, no API call)."""
-        document = await self._resolve_issue(search_space_id, user_id, issue_ref)
-        if not document:
-            return {
-                "error": f"Issue '{issue_ref}' not found in your synced Jira issues. "
-                "Please make sure the issue is indexed in your knowledge base."
-            }
-
-        connector = await self._get_connector_for_document(document, user_id)
-        if not connector:
-            return {"error": "Connector not found or access denied"}
-
-        auth_expired = connector.config.get("auth_expired", False)
-        workspace = JiraWorkspace.from_connector(connector)
-        issue = JiraIssue.from_document(document)
-
-        return {
-            "account": {**workspace.to_dict(), "auth_expired": auth_expired},
-            "issue": issue.to_dict(),
-        }
-
-    async def _resolve_issue(
-        self, search_space_id: int, user_id: str, issue_ref: str
-    ) -> Document | None:
-        """Resolve an issue from KB: issue_identifier -> issue_title -> document.title."""
-        ref_lower = issue_ref.lower()
-
-        result = await self._db_session.execute(
-            select(Document)
-            .join(
-                SearchSourceConnector, Document.connector_id == SearchSourceConnector.id
-            )
-            .filter(
-                and_(
-                    Document.search_space_id == search_space_id,
-                    Document.document_type == DocumentType.JIRA_CONNECTOR,
-                    SearchSourceConnector.user_id == user_id,
-                    or_(
-                        func.lower(
-                            Document.document_metadata.op("->>")("issue_identifier")
-                        )
-                        == ref_lower,
-                        func.lower(Document.document_metadata.op("->>")("issue_title"))
-                        == ref_lower,
-                        func.lower(Document.title) == ref_lower,
-                    ),
-                )
-            )
-            .order_by(Document.updated_at.desc().nullslast())
-            .limit(1)
-        )
-        return result.scalars().first()
-
-    async def _get_all_jira_connectors(
-        self, search_space_id: int, user_id: str
-    ) -> list[SearchSourceConnector]:
-        result = await self._db_session.execute(
-            select(SearchSourceConnector).filter(
-                and_(
-                    SearchSourceConnector.search_space_id == search_space_id,
-                    SearchSourceConnector.user_id == user_id,
-                    SearchSourceConnector.connector_type
-                    == SearchSourceConnectorType.JIRA_CONNECTOR,
-                )
-            )
-        )
-        return result.scalars().all()
-
-    async def _get_connector_for_document(
-        self, document: Document, user_id: str
-    ) -> SearchSourceConnector | None:
-        if not document.connector_id:
-            return None
-        result = await self._db_session.execute(
-            select(SearchSourceConnector).filter(
-                and_(
-                    SearchSourceConnector.id == document.connector_id,
-                    SearchSourceConnector.user_id == user_id,
-                )
-            )
-        )
-        return result.scalars().first()