feat: Added content based hashing to prevent duplicates and fix resync issues

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2025-05-28 23:52:00 -07:00
parent 38516e74f9
commit 5411bac8e0
17 changed files with 297 additions and 334 deletions

View file

@ -99,6 +99,7 @@ class Document(BaseModel, TimestampMixin):
document_metadata = Column(JSON, nullable=True)
content = Column(Text, nullable=False)
content_hash = Column(String, nullable=False, index=True, unique=True)
embedding = Column(Vector(config.embedding_model_instance.dimension))
search_space_id = Column(Integer, ForeignKey("searchspaces.id", ondelete='CASCADE'), nullable=False)