update exp_pool manager

This commit is contained in:
seehi 2024-08-19 14:07:13 +08:00
parent 243c7a65d6
commit 665ca6ff97
9 changed files with 31 additions and 22 deletions

View file

@ -7,6 +7,7 @@ import chromadb
import faiss
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.core.embeddings import BaseEmbedding
from llama_index.core.embeddings.mock_embed_model import MockEmbedding
from llama_index.core.schema import BaseNode
from llama_index.core.vector_stores.types import BasePydanticVectorStore
from llama_index.vector_stores.chroma import ChromaVectorStore
@ -84,9 +85,13 @@ class RetrieverFactory(ConfigBasedFactory):
def _create_bm25_retriever(self, config: BM25RetrieverConfig, **kwargs) -> DynamicBM25Retriever:
index = self._extract_index(config, **kwargs)
nodes = list(index.docstore.docs.values()) if index else self._extract_nodes(config, **kwargs)
if index and not config.index:
config.index = index
if not config.index and config.create_index:
config.index = VectorStoreIndex(nodes, embed_model=MockEmbedding(embed_dim=1))
return DynamicBM25Retriever(nodes=nodes, **config.model_dump())
def _create_chroma_retriever(self, config: ChromaRetrieverConfig, **kwargs) -> ChromaRetriever:

View file

@ -60,6 +60,11 @@ class FAISSRetrieverConfig(IndexRetrieverConfig):
class BM25RetrieverConfig(IndexRetrieverConfig):
"""Config for BM25-based retrievers."""
create_index: bool = Field(
default=False,
description="Indicates whether to create an index for the nodes. It is useful when you need to persist data while only using BM25.",
exclude=True,
)
_no_embedding: bool = PrivateAttr(default=True)