feat: add chunking and embedding logic to indexer

This commit is contained in:
CREDO23 2026-01-09 15:45:17 +02:00
parent fff851ae3f
commit 2e83ed8dcd

View file

@ -8,6 +8,9 @@ import logging
import re
from pathlib import Path
from app.config import config
from app.db import SurfsenseDocsChunk
logger = logging.getLogger(__name__)
# Path to docs relative to project root
@ -62,3 +65,22 @@ def generate_surfsense_docs_content_hash(content: str) -> str:
"""Generate SHA-256 hash for Surfsense docs content."""
return hashlib.sha256(content.encode("utf-8")).hexdigest()
def create_surfsense_docs_chunks(content: str) -> list[SurfsenseDocsChunk]:
"""
Create chunks from Surfsense documentation content.
Args:
content: Document content to chunk
Returns:
List of SurfsenseDocsChunk objects with embeddings
"""
return [
SurfsenseDocsChunk(
content=chunk.text,
embedding=config.embedding_model_instance.embed(chunk.text),
)
for chunk in config.chunker_instance.chunk(content)
]