mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-20 21:18:13 +02:00
The cached payload is the indexing pipeline's embeddings (markdown is chunked then embedded), so "embedding cache" names the expensive output directly and removes the "index" ambiguity (DB index vs vector index vs indexing phase). Renames the service, settings, eligibility, eviction task, metrics, config flags (INDEX_CACHE_* -> EMBEDDING_CACHE_*), object prefix, and the table (index_cache_embedding_sets -> embedding_cache_sets) with its constraint and indexes. Migration 161 renamed accordingly.
47 lines
1.5 KiB
Python
47 lines
1.5 KiB
Python
"""``embedding_cache_sets``: one reusable chunk+embedding set per markdown."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from sqlalchemy import (
|
|
BigInteger,
|
|
Column,
|
|
DateTime,
|
|
Index,
|
|
Integer,
|
|
String,
|
|
UniqueConstraint,
|
|
)
|
|
|
|
from app.db import BaseModel, TimestampMixin
|
|
|
|
|
|
class CachedEmbeddingSet(BaseModel, TimestampMixin):
|
|
__tablename__ = "embedding_cache_sets"
|
|
|
|
# Key: markdown text + the recipe that turned it into vectors.
|
|
markdown_sha256 = Column(String(64), nullable=False)
|
|
embedding_model = Column(String(255), nullable=False)
|
|
embedding_dim = Column(Integer, nullable=False)
|
|
chunker_kind = Column(String(8), nullable=False)
|
|
chunker_version = Column(Integer, nullable=False)
|
|
|
|
# Where the embedding blob lives (kept out of the row to stay small).
|
|
storage_backend = Column(String(32), nullable=False)
|
|
storage_key = Column(String, nullable=False)
|
|
size_bytes = Column(BigInteger, nullable=False)
|
|
chunk_count = Column(Integer, nullable=False, default=0, server_default="0")
|
|
|
|
# Drives eviction (popularity + recency).
|
|
times_reused = Column(BigInteger, nullable=False, default=0, server_default="0")
|
|
last_used_at = Column(DateTime(timezone=True), nullable=False)
|
|
|
|
__table_args__ = (
|
|
UniqueConstraint(
|
|
"markdown_sha256",
|
|
"embedding_model",
|
|
"chunker_kind",
|
|
"chunker_version",
|
|
name="uq_embedding_cache_sets_key",
|
|
),
|
|
Index("ix_embedding_cache_sets_last_used_at", "last_used_at"),
|
|
)
|