2026-06-12 17:00:01 +02:00
|
|
|
"""``embedding_cache_sets``: one reusable chunk+embedding set per markdown."""
|
2026-06-12 16:48:01 +02:00
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
from sqlalchemy import (
|
|
|
|
|
BigInteger,
|
|
|
|
|
Column,
|
|
|
|
|
DateTime,
|
|
|
|
|
Index,
|
|
|
|
|
Integer,
|
|
|
|
|
String,
|
|
|
|
|
UniqueConstraint,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
from app.db import BaseModel, TimestampMixin
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CachedEmbeddingSet(BaseModel, TimestampMixin):
|
2026-06-12 17:00:01 +02:00
|
|
|
__tablename__ = "embedding_cache_sets"
|
2026-06-12 16:48:01 +02:00
|
|
|
|
|
|
|
|
# Key: markdown text + the recipe that turned it into vectors.
|
|
|
|
|
markdown_sha256 = Column(String(64), nullable=False)
|
|
|
|
|
embedding_model = Column(String(255), nullable=False)
|
|
|
|
|
embedding_dim = Column(Integer, nullable=False)
|
|
|
|
|
chunker_kind = Column(String(8), nullable=False)
|
|
|
|
|
chunker_version = Column(Integer, nullable=False)
|
|
|
|
|
|
|
|
|
|
# Where the embedding blob lives (kept out of the row to stay small).
|
|
|
|
|
storage_backend = Column(String(32), nullable=False)
|
|
|
|
|
storage_key = Column(String, nullable=False)
|
|
|
|
|
size_bytes = Column(BigInteger, nullable=False)
|
|
|
|
|
chunk_count = Column(Integer, nullable=False, default=0, server_default="0")
|
|
|
|
|
|
|
|
|
|
# Drives eviction (popularity + recency).
|
|
|
|
|
times_reused = Column(BigInteger, nullable=False, default=0, server_default="0")
|
|
|
|
|
last_used_at = Column(DateTime(timezone=True), nullable=False)
|
|
|
|
|
|
|
|
|
|
__table_args__ = (
|
|
|
|
|
UniqueConstraint(
|
|
|
|
|
"markdown_sha256",
|
|
|
|
|
"embedding_model",
|
|
|
|
|
"chunker_kind",
|
|
|
|
|
"chunker_version",
|
2026-06-12 17:00:01 +02:00
|
|
|
name="uq_embedding_cache_sets_key",
|
2026-06-12 16:48:01 +02:00
|
|
|
),
|
2026-06-12 17:00:01 +02:00
|
|
|
Index("ix_embedding_cache_sets_last_used_at", "last_used_at"),
|
2026-06-12 16:48:01 +02:00
|
|
|
)
|