mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-04-24 16:26:36 +02:00
remove milvus from RAG
This commit is contained in:
parent
ad334450f4
commit
2fdfd703ba
8 changed files with 0 additions and 144 deletions
|
|
@ -8,7 +8,6 @@ from llama_index.core.vector_stores.types import BasePydanticVectorStore
|
|||
from llama_index.vector_stores.chroma import ChromaVectorStore
|
||||
from llama_index.vector_stores.elasticsearch import ElasticsearchStore
|
||||
from llama_index.vector_stores.faiss import FaissVectorStore
|
||||
from llama_index.vector_stores.milvus import MilvusVectorStore
|
||||
|
||||
from metagpt.rag.factories.base import ConfigBasedFactory
|
||||
from metagpt.rag.schema import (
|
||||
|
|
@ -18,7 +17,6 @@ from metagpt.rag.schema import (
|
|||
ElasticsearchIndexConfig,
|
||||
ElasticsearchKeywordIndexConfig,
|
||||
FAISSIndexConfig,
|
||||
MilvusIndexConfig,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -30,7 +28,6 @@ class RAGIndexFactory(ConfigBasedFactory):
|
|||
BM25IndexConfig: self._create_bm25,
|
||||
ElasticsearchIndexConfig: self._create_es,
|
||||
ElasticsearchKeywordIndexConfig: self._create_es,
|
||||
MilvusIndexConfig: self._create_milvus,
|
||||
}
|
||||
super().__init__(creators)
|
||||
|
||||
|
|
@ -49,11 +46,6 @@ class RAGIndexFactory(ConfigBasedFactory):
|
|||
|
||||
return self._index_from_storage(storage_context=storage_context, config=config, **kwargs)
|
||||
|
||||
def _create_milvus(self, config: MilvusIndexConfig, **kwargs) -> VectorStoreIndex:
|
||||
vector_store = MilvusVectorStore(collection_name=config.collection_name, uri=config.uri, token=config.token)
|
||||
|
||||
return self._index_from_vector_store(vector_store=vector_store, config=config, **kwargs)
|
||||
|
||||
def _create_chroma(self, config: ChromaIndexConfig, **kwargs) -> VectorStoreIndex:
|
||||
db = chromadb.PersistentClient(str(config.persist_path))
|
||||
chroma_collection = db.get_or_create_collection(config.collection_name, metadata=config.metadata)
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ from llama_index.core.vector_stores.types import BasePydanticVectorStore
|
|||
from llama_index.vector_stores.chroma import ChromaVectorStore
|
||||
from llama_index.vector_stores.elasticsearch import ElasticsearchStore
|
||||
from llama_index.vector_stores.faiss import FaissVectorStore
|
||||
from llama_index.vector_stores.milvus import MilvusVectorStore
|
||||
|
||||
from metagpt.rag.factories.base import ConfigBasedFactory
|
||||
from metagpt.rag.retrievers.base import RAGRetriever
|
||||
|
|
@ -22,7 +21,6 @@ from metagpt.rag.retrievers.chroma_retriever import ChromaRetriever
|
|||
from metagpt.rag.retrievers.es_retriever import ElasticsearchRetriever
|
||||
from metagpt.rag.retrievers.faiss_retriever import FAISSRetriever
|
||||
from metagpt.rag.retrievers.hybrid_retriever import SimpleHybridRetriever
|
||||
from metagpt.rag.retrievers.milvus_retriever import MilvusRetriever
|
||||
from metagpt.rag.schema import (
|
||||
BaseRetrieverConfig,
|
||||
BM25RetrieverConfig,
|
||||
|
|
@ -30,7 +28,6 @@ from metagpt.rag.schema import (
|
|||
ElasticsearchKeywordRetrieverConfig,
|
||||
ElasticsearchRetrieverConfig,
|
||||
FAISSRetrieverConfig,
|
||||
MilvusRetrieverConfig,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -60,7 +57,6 @@ class RetrieverFactory(ConfigBasedFactory):
|
|||
ChromaRetrieverConfig: self._create_chroma_retriever,
|
||||
ElasticsearchRetrieverConfig: self._create_es_retriever,
|
||||
ElasticsearchKeywordRetrieverConfig: self._create_es_retriever,
|
||||
MilvusRetrieverConfig: self._create_milvus_retriever,
|
||||
}
|
||||
super().__init__(creators)
|
||||
|
||||
|
|
@ -81,11 +77,6 @@ class RetrieverFactory(ConfigBasedFactory):
|
|||
|
||||
return index.as_retriever()
|
||||
|
||||
def _create_milvus_retriever(self, config: MilvusRetrieverConfig, **kwargs) -> MilvusRetriever:
|
||||
config.index = self._build_milvus_index(config, **kwargs)
|
||||
|
||||
return MilvusRetriever(**config.model_dump())
|
||||
|
||||
def _create_faiss_retriever(self, config: FAISSRetrieverConfig, **kwargs) -> FAISSRetriever:
|
||||
config.index = self._build_faiss_index(config, **kwargs)
|
||||
|
||||
|
|
@ -144,14 +135,6 @@ class RetrieverFactory(ConfigBasedFactory):
|
|||
|
||||
return self._build_index_from_vector_store(config, vector_store, **kwargs)
|
||||
|
||||
@get_or_build_index
|
||||
def _build_milvus_index(self, config: MilvusRetrieverConfig, **kwargs) -> VectorStoreIndex:
|
||||
vector_store = MilvusVectorStore(
|
||||
uri=config.uri, collection_name=config.collection_name, token=config.token, dim=config.dimensions
|
||||
)
|
||||
|
||||
return self._build_index_from_vector_store(config, vector_store, **kwargs)
|
||||
|
||||
@get_or_build_index
|
||||
def _build_es_index(self, config: ElasticsearchRetrieverConfig, **kwargs) -> VectorStoreIndex:
|
||||
vector_store = ElasticsearchStore(**config.store_config.model_dump())
|
||||
|
|
|
|||
|
|
@ -1,17 +0,0 @@
|
|||
"""Milvus retriever."""
|
||||
|
||||
from llama_index.core.retrievers import VectorIndexRetriever
|
||||
from llama_index.core.schema import BaseNode
|
||||
|
||||
|
||||
class MilvusRetriever(VectorIndexRetriever):
|
||||
"""Milvus retriever."""
|
||||
|
||||
def add_nodes(self, nodes: list[BaseNode], **kwargs) -> None:
|
||||
"""Support add nodes."""
|
||||
self._index.insert_nodes(nodes, **kwargs)
|
||||
|
||||
def persist(self, persist_dir: str, **kwargs) -> None:
|
||||
"""Support persist.
|
||||
|
||||
Milvus automatically saves, so there is no need to implement."""
|
||||
|
|
@ -69,36 +69,6 @@ class BM25RetrieverConfig(IndexRetrieverConfig):
|
|||
_no_embedding: bool = PrivateAttr(default=True)
|
||||
|
||||
|
||||
class MilvusRetrieverConfig(IndexRetrieverConfig):
|
||||
"""Config for Milvus-based retrievers."""
|
||||
|
||||
uri: str = Field(default="./milvus_local.db", description="The directory to save data.")
|
||||
collection_name: str = Field(default="metagpt", description="The name of the collection.")
|
||||
token: str = Field(default=None, description="The token for Milvus")
|
||||
metadata: Optional[CollectionMetadata] = Field(
|
||||
default=None, description="Optional metadata to associate with the collection"
|
||||
)
|
||||
dimensions: int = Field(default=0, description="Dimensionality of the vectors for Milvus index construction.")
|
||||
|
||||
_embedding_type_to_dimensions: ClassVar[dict[EmbeddingType, int]] = {
|
||||
EmbeddingType.GEMINI: 768,
|
||||
EmbeddingType.OLLAMA: 4096,
|
||||
}
|
||||
|
||||
@model_validator(mode="after")
|
||||
def check_dimensions(self):
|
||||
if self.dimensions == 0:
|
||||
self.dimensions = config.embedding.dimensions or self._embedding_type_to_dimensions.get(
|
||||
config.embedding.api_type, 1536
|
||||
)
|
||||
if not config.embedding.dimensions and config.embedding.api_type not in self._embedding_type_to_dimensions:
|
||||
logger.warning(
|
||||
f"You didn't set dimensions in config when using {config.embedding.api_type}, default to 1536"
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
|
||||
class ChromaRetrieverConfig(IndexRetrieverConfig):
|
||||
"""Config for Chroma-based retrievers."""
|
||||
|
||||
|
|
@ -210,17 +180,6 @@ class ChromaIndexConfig(VectorIndexConfig):
|
|||
)
|
||||
|
||||
|
||||
class MilvusIndexConfig(VectorIndexConfig):
|
||||
"""Config for milvus-based index."""
|
||||
|
||||
collection_name: str = Field(default="metagpt", description="The name of the collection.")
|
||||
uri: str = Field(default="./milvus_local.db", description="The uri of the index.")
|
||||
token: Optional[str] = Field(default=None, description="The token of the index.")
|
||||
metadata: Optional[CollectionMetadata] = Field(
|
||||
default=None, description="Optional metadata to associate with the collection"
|
||||
)
|
||||
|
||||
|
||||
class BM25IndexConfig(BaseIndexConfig):
|
||||
"""Config for bm25-based index."""
|
||||
|
||||
|
|
|
|||
1
setup.py
1
setup.py
|
|
@ -43,7 +43,6 @@ extras_require = {
|
|||
"llama-index-postprocessor-cohere-rerank==0.1.4",
|
||||
"llama-index-postprocessor-colbert-rerank==0.1.1",
|
||||
"llama-index-postprocessor-flag-embedding-reranker==0.1.2",
|
||||
"llama-index-vector-stores-milvus==0.1.23",
|
||||
"docx2txt==0.8",
|
||||
],
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,5 @@
|
|||
import random
|
||||
|
||||
import pytest
|
||||
|
||||
from metagpt.document_store.milvus_store import MilvusConnection, MilvusStore
|
||||
|
||||
seed_value = 42
|
||||
random.seed(seed_value)
|
||||
|
||||
|
|
@ -20,29 +16,3 @@ def assert_almost_equal(actual, expected):
|
|||
assert abs(ac - exp) <= delta, f"{ac} is not within {delta} of {exp}"
|
||||
else:
|
||||
assert abs(actual - expected) <= delta, f"{actual} is not within {delta} of {expected}"
|
||||
|
||||
|
||||
@pytest.mark.skip() # Skip because the pymilvus dependency is not installed by default
|
||||
def test_milvus_store():
|
||||
milvus_connection = MilvusConnection(uri="./milvus_local.db")
|
||||
milvus_store = MilvusStore(milvus_connection)
|
||||
|
||||
collection_name = "TestCollection"
|
||||
milvus_store.create_collection(collection_name, dim=8)
|
||||
|
||||
milvus_store.add(collection_name, ids, vectors, metadata)
|
||||
|
||||
search_results = milvus_store.search(collection_name, query=[1.0] * 8)
|
||||
assert len(search_results) > 0
|
||||
first_result = search_results[0]
|
||||
assert first_result["id"] == "doc_0"
|
||||
|
||||
search_results_with_filter = milvus_store.search(collection_name, query=[1.0] * 8, filter={"rand_number": 1})
|
||||
assert len(search_results_with_filter) > 0
|
||||
assert search_results_with_filter[0]["id"] == "doc_1"
|
||||
|
||||
milvus_store.delete(collection_name, _ids=["doc_0"])
|
||||
deleted_results = milvus_store.search(collection_name, query=[1.0] * 8, limit=1)
|
||||
assert deleted_results[0]["id"] != "doc_0"
|
||||
|
||||
milvus_store.client.drop_collection(collection_name)
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ from metagpt.rag.schema import (
|
|||
ElasticsearchIndexConfig,
|
||||
ElasticsearchStoreConfig,
|
||||
FAISSIndexConfig,
|
||||
MilvusIndexConfig,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -21,10 +20,6 @@ class TestRAGIndexFactory:
|
|||
def faiss_config(self):
|
||||
return FAISSIndexConfig(persist_path="")
|
||||
|
||||
@pytest.fixture
|
||||
def milvus_config(self):
|
||||
return MilvusIndexConfig(uri="", collection_name="")
|
||||
|
||||
@pytest.fixture
|
||||
def chroma_config(self):
|
||||
return ChromaIndexConfig(persist_path="", collection_name="")
|
||||
|
|
@ -70,16 +65,6 @@ class TestRAGIndexFactory:
|
|||
):
|
||||
self.index_factory.get_index(bm25_config, embed_model=mock_embedding)
|
||||
|
||||
def test_create_milvus_index(self, mocker, milvus_config, mock_from_vector_store, mock_embedding):
|
||||
# Mock
|
||||
mock_milvus_store = mocker.patch("metagpt.rag.factories.index.MilvusVectorStore")
|
||||
|
||||
# Exec
|
||||
self.index_factory.get_index(milvus_config, embed_model=mock_embedding)
|
||||
|
||||
# Assert
|
||||
mock_milvus_store.assert_called_once()
|
||||
|
||||
def test_create_chroma_index(self, mocker, chroma_config, mock_from_vector_store, mock_embedding):
|
||||
# Mock
|
||||
mock_chroma_db = mocker.patch("metagpt.rag.factories.index.chromadb.PersistentClient")
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ from llama_index.core.embeddings import MockEmbedding
|
|||
from llama_index.core.schema import TextNode
|
||||
from llama_index.vector_stores.chroma import ChromaVectorStore
|
||||
from llama_index.vector_stores.elasticsearch import ElasticsearchStore
|
||||
from llama_index.vector_stores.milvus import MilvusVectorStore
|
||||
|
||||
from metagpt.rag.factories.retriever import RetrieverFactory
|
||||
from metagpt.rag.retrievers.bm25_retriever import DynamicBM25Retriever
|
||||
|
|
@ -13,14 +12,12 @@ from metagpt.rag.retrievers.chroma_retriever import ChromaRetriever
|
|||
from metagpt.rag.retrievers.es_retriever import ElasticsearchRetriever
|
||||
from metagpt.rag.retrievers.faiss_retriever import FAISSRetriever
|
||||
from metagpt.rag.retrievers.hybrid_retriever import SimpleHybridRetriever
|
||||
from metagpt.rag.retrievers.milvus_retriever import MilvusRetriever
|
||||
from metagpt.rag.schema import (
|
||||
BM25RetrieverConfig,
|
||||
ChromaRetrieverConfig,
|
||||
ElasticsearchRetrieverConfig,
|
||||
ElasticsearchStoreConfig,
|
||||
FAISSRetrieverConfig,
|
||||
MilvusRetrieverConfig,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -44,10 +41,6 @@ class TestRetrieverFactory:
|
|||
def mock_chroma_vector_store(self, mocker):
|
||||
return mocker.MagicMock(spec=ChromaVectorStore)
|
||||
|
||||
@pytest.fixture
|
||||
def mock_milvus_vector_store(self, mocker):
|
||||
return mocker.MagicMock(spec=MilvusVectorStore)
|
||||
|
||||
@pytest.fixture
|
||||
def mock_es_vector_store(self, mocker):
|
||||
return mocker.MagicMock(spec=ElasticsearchStore)
|
||||
|
|
@ -98,14 +91,6 @@ class TestRetrieverFactory:
|
|||
|
||||
assert isinstance(retriever, ChromaRetriever)
|
||||
|
||||
def test_get_retriever_with_milvus_config(self, mocker, mock_milvus_vector_store, mock_embedding):
|
||||
mock_config = MilvusRetrieverConfig(uri="/path/to/milvus.db", collection_name="test_collection")
|
||||
mocker.patch("metagpt.rag.factories.retriever.MilvusVectorStore", return_value=mock_milvus_vector_store)
|
||||
|
||||
retriever = self.retriever_factory.get_retriever(configs=[mock_config], nodes=[], embed_model=mock_embedding)
|
||||
|
||||
assert isinstance(retriever, MilvusRetriever)
|
||||
|
||||
def test_get_retriever_with_es_config(self, mocker, mock_es_vector_store, mock_embedding):
|
||||
mock_config = ElasticsearchRetrieverConfig(store_config=ElasticsearchStoreConfig())
|
||||
mocker.patch("metagpt.rag.factories.retriever.ElasticsearchStore", return_value=mock_es_vector_store)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue