mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-27 01:36:30 +02:00
feat: Added content based hashing to prevent duplicates and fix resync issues
This commit is contained in:
parent
38516e74f9
commit
5411bac8e0
17 changed files with 297 additions and 334 deletions
|
|
@ -99,6 +99,7 @@ class Document(BaseModel, TimestampMixin):
|
|||
document_metadata = Column(JSON, nullable=True)
|
||||
|
||||
content = Column(Text, nullable=False)
|
||||
content_hash = Column(String, nullable=False, index=True, unique=True)
|
||||
embedding = Column(Vector(config.embedding_model_instance.dimension))
|
||||
|
||||
search_space_id = Column(Integer, ForeignKey("searchspaces.id", ondelete='CASCADE'), nullable=False)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue