diff --git a/metagpt/rag/factories/index.py b/metagpt/rag/factories/index.py index 4e6d6b167..f897af3ad 100644 --- a/metagpt/rag/factories/index.py +++ b/metagpt/rag/factories/index.py @@ -8,7 +8,6 @@ from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.chroma import ChromaVectorStore from llama_index.vector_stores.elasticsearch import ElasticsearchStore from llama_index.vector_stores.faiss import FaissVectorStore -from llama_index.vector_stores.milvus import MilvusVectorStore from metagpt.rag.factories.base import ConfigBasedFactory from metagpt.rag.schema import ( @@ -18,7 +17,6 @@ from metagpt.rag.schema import ( ElasticsearchIndexConfig, ElasticsearchKeywordIndexConfig, FAISSIndexConfig, - MilvusIndexConfig, ) @@ -30,7 +28,6 @@ class RAGIndexFactory(ConfigBasedFactory): BM25IndexConfig: self._create_bm25, ElasticsearchIndexConfig: self._create_es, ElasticsearchKeywordIndexConfig: self._create_es, - MilvusIndexConfig: self._create_milvus, } super().__init__(creators) @@ -49,11 +46,6 @@ class RAGIndexFactory(ConfigBasedFactory): return self._index_from_storage(storage_context=storage_context, config=config, **kwargs) - def _create_milvus(self, config: MilvusIndexConfig, **kwargs) -> VectorStoreIndex: - vector_store = MilvusVectorStore(collection_name=config.collection_name, uri=config.uri, token=config.token) - - return self._index_from_vector_store(vector_store=vector_store, config=config, **kwargs) - def _create_chroma(self, config: ChromaIndexConfig, **kwargs) -> VectorStoreIndex: db = chromadb.PersistentClient(str(config.persist_path)) chroma_collection = db.get_or_create_collection(config.collection_name, metadata=config.metadata) diff --git a/metagpt/rag/factories/retriever.py b/metagpt/rag/factories/retriever.py index f487d0e7a..6bc8e4ad5 100644 --- a/metagpt/rag/factories/retriever.py +++ b/metagpt/rag/factories/retriever.py @@ -13,7 +13,6 @@ from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.chroma import ChromaVectorStore from llama_index.vector_stores.elasticsearch import ElasticsearchStore from llama_index.vector_stores.faiss import FaissVectorStore -from llama_index.vector_stores.milvus import MilvusVectorStore from metagpt.rag.factories.base import ConfigBasedFactory from metagpt.rag.retrievers.base import RAGRetriever @@ -22,7 +21,6 @@ from metagpt.rag.retrievers.chroma_retriever import ChromaRetriever from metagpt.rag.retrievers.es_retriever import ElasticsearchRetriever from metagpt.rag.retrievers.faiss_retriever import FAISSRetriever from metagpt.rag.retrievers.hybrid_retriever import SimpleHybridRetriever -from metagpt.rag.retrievers.milvus_retriever import MilvusRetriever from metagpt.rag.schema import ( BaseRetrieverConfig, BM25RetrieverConfig, @@ -30,7 +28,6 @@ from metagpt.rag.schema import ( ElasticsearchKeywordRetrieverConfig, ElasticsearchRetrieverConfig, FAISSRetrieverConfig, - MilvusRetrieverConfig, ) @@ -60,7 +57,6 @@ class RetrieverFactory(ConfigBasedFactory): ChromaRetrieverConfig: self._create_chroma_retriever, ElasticsearchRetrieverConfig: self._create_es_retriever, ElasticsearchKeywordRetrieverConfig: self._create_es_retriever, - MilvusRetrieverConfig: self._create_milvus_retriever, } super().__init__(creators) @@ -81,11 +77,6 @@ class RetrieverFactory(ConfigBasedFactory): return index.as_retriever() - def _create_milvus_retriever(self, config: MilvusRetrieverConfig, **kwargs) -> MilvusRetriever: - config.index = self._build_milvus_index(config, **kwargs) - - return MilvusRetriever(**config.model_dump()) - def _create_faiss_retriever(self, config: FAISSRetrieverConfig, **kwargs) -> FAISSRetriever: config.index = self._build_faiss_index(config, **kwargs) @@ -144,14 +135,6 @@ class RetrieverFactory(ConfigBasedFactory): return self._build_index_from_vector_store(config, vector_store, **kwargs) - @get_or_build_index - def _build_milvus_index(self, config: MilvusRetrieverConfig, **kwargs) -> VectorStoreIndex: - vector_store = MilvusVectorStore( - uri=config.uri, collection_name=config.collection_name, token=config.token, dim=config.dimensions - ) - - return self._build_index_from_vector_store(config, vector_store, **kwargs) - @get_or_build_index def _build_es_index(self, config: ElasticsearchRetrieverConfig, **kwargs) -> VectorStoreIndex: vector_store = ElasticsearchStore(**config.store_config.model_dump()) diff --git a/metagpt/rag/retrievers/milvus_retriever.py b/metagpt/rag/retrievers/milvus_retriever.py deleted file mode 100644 index bcc66330b..000000000 --- a/metagpt/rag/retrievers/milvus_retriever.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Milvus retriever.""" - -from llama_index.core.retrievers import VectorIndexRetriever -from llama_index.core.schema import BaseNode - - -class MilvusRetriever(VectorIndexRetriever): - """Milvus retriever.""" - - def add_nodes(self, nodes: list[BaseNode], **kwargs) -> None: - """Support add nodes.""" - self._index.insert_nodes(nodes, **kwargs) - - def persist(self, persist_dir: str, **kwargs) -> None: - """Support persist. - - Milvus automatically saves, so there is no need to implement.""" diff --git a/metagpt/rag/schema.py b/metagpt/rag/schema.py index 5c63b09df..257be8eac 100644 --- a/metagpt/rag/schema.py +++ b/metagpt/rag/schema.py @@ -69,36 +69,6 @@ class BM25RetrieverConfig(IndexRetrieverConfig): _no_embedding: bool = PrivateAttr(default=True) -class MilvusRetrieverConfig(IndexRetrieverConfig): - """Config for Milvus-based retrievers.""" - - uri: str = Field(default="./milvus_local.db", description="The directory to save data.") - collection_name: str = Field(default="metagpt", description="The name of the collection.") - token: str = Field(default=None, description="The token for Milvus") - metadata: Optional[CollectionMetadata] = Field( - default=None, description="Optional metadata to associate with the collection" - ) - dimensions: int = Field(default=0, description="Dimensionality of the vectors for Milvus index construction.") - - _embedding_type_to_dimensions: ClassVar[dict[EmbeddingType, int]] = { - EmbeddingType.GEMINI: 768, - EmbeddingType.OLLAMA: 4096, - } - - @model_validator(mode="after") - def check_dimensions(self): - if self.dimensions == 0: - self.dimensions = config.embedding.dimensions or self._embedding_type_to_dimensions.get( - config.embedding.api_type, 1536 - ) - if not config.embedding.dimensions and config.embedding.api_type not in self._embedding_type_to_dimensions: - logger.warning( - f"You didn't set dimensions in config when using {config.embedding.api_type}, default to 1536" - ) - - return self - - class ChromaRetrieverConfig(IndexRetrieverConfig): """Config for Chroma-based retrievers.""" @@ -210,17 +180,6 @@ class ChromaIndexConfig(VectorIndexConfig): ) -class MilvusIndexConfig(VectorIndexConfig): - """Config for milvus-based index.""" - - collection_name: str = Field(default="metagpt", description="The name of the collection.") - uri: str = Field(default="./milvus_local.db", description="The uri of the index.") - token: Optional[str] = Field(default=None, description="The token of the index.") - metadata: Optional[CollectionMetadata] = Field( - default=None, description="Optional metadata to associate with the collection" - ) - - class BM25IndexConfig(BaseIndexConfig): """Config for bm25-based index.""" diff --git a/setup.py b/setup.py index 1fe74b64d..fc0191672 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,6 @@ extras_require = { "llama-index-postprocessor-cohere-rerank==0.1.4", "llama-index-postprocessor-colbert-rerank==0.1.1", "llama-index-postprocessor-flag-embedding-reranker==0.1.2", - "llama-index-vector-stores-milvus==0.1.23", "docx2txt==0.8", ], } diff --git a/tests/metagpt/document_store/test_milvus_store.py b/tests/metagpt/document_store/test_milvus_store.py index 93d4187f9..1d4b1d99d 100644 --- a/tests/metagpt/document_store/test_milvus_store.py +++ b/tests/metagpt/document_store/test_milvus_store.py @@ -1,9 +1,5 @@ import random -import pytest - -from metagpt.document_store.milvus_store import MilvusConnection, MilvusStore - seed_value = 42 random.seed(seed_value) @@ -20,29 +16,3 @@ def assert_almost_equal(actual, expected): assert abs(ac - exp) <= delta, f"{ac} is not within {delta} of {exp}" else: assert abs(actual - expected) <= delta, f"{actual} is not within {delta} of {expected}" - - -@pytest.mark.skip() # Skip because the pymilvus dependency is not installed by default -def test_milvus_store(): - milvus_connection = MilvusConnection(uri="./milvus_local.db") - milvus_store = MilvusStore(milvus_connection) - - collection_name = "TestCollection" - milvus_store.create_collection(collection_name, dim=8) - - milvus_store.add(collection_name, ids, vectors, metadata) - - search_results = milvus_store.search(collection_name, query=[1.0] * 8) - assert len(search_results) > 0 - first_result = search_results[0] - assert first_result["id"] == "doc_0" - - search_results_with_filter = milvus_store.search(collection_name, query=[1.0] * 8, filter={"rand_number": 1}) - assert len(search_results_with_filter) > 0 - assert search_results_with_filter[0]["id"] == "doc_1" - - milvus_store.delete(collection_name, _ids=["doc_0"]) - deleted_results = milvus_store.search(collection_name, query=[1.0] * 8, limit=1) - assert deleted_results[0]["id"] != "doc_0" - - milvus_store.client.drop_collection(collection_name) diff --git a/tests/metagpt/rag/factories/test_index.py b/tests/metagpt/rag/factories/test_index.py index e084eb6e7..9dc5bfb6b 100644 --- a/tests/metagpt/rag/factories/test_index.py +++ b/tests/metagpt/rag/factories/test_index.py @@ -8,7 +8,6 @@ from metagpt.rag.schema import ( ElasticsearchIndexConfig, ElasticsearchStoreConfig, FAISSIndexConfig, - MilvusIndexConfig, ) @@ -21,10 +20,6 @@ class TestRAGIndexFactory: def faiss_config(self): return FAISSIndexConfig(persist_path="") - @pytest.fixture - def milvus_config(self): - return MilvusIndexConfig(uri="", collection_name="") - @pytest.fixture def chroma_config(self): return ChromaIndexConfig(persist_path="", collection_name="") @@ -70,16 +65,6 @@ class TestRAGIndexFactory: ): self.index_factory.get_index(bm25_config, embed_model=mock_embedding) - def test_create_milvus_index(self, mocker, milvus_config, mock_from_vector_store, mock_embedding): - # Mock - mock_milvus_store = mocker.patch("metagpt.rag.factories.index.MilvusVectorStore") - - # Exec - self.index_factory.get_index(milvus_config, embed_model=mock_embedding) - - # Assert - mock_milvus_store.assert_called_once() - def test_create_chroma_index(self, mocker, chroma_config, mock_from_vector_store, mock_embedding): # Mock mock_chroma_db = mocker.patch("metagpt.rag.factories.index.chromadb.PersistentClient") diff --git a/tests/metagpt/rag/factories/test_retriever.py b/tests/metagpt/rag/factories/test_retriever.py index b808de26e..cd55a32db 100644 --- a/tests/metagpt/rag/factories/test_retriever.py +++ b/tests/metagpt/rag/factories/test_retriever.py @@ -5,7 +5,6 @@ from llama_index.core.embeddings import MockEmbedding from llama_index.core.schema import TextNode from llama_index.vector_stores.chroma import ChromaVectorStore from llama_index.vector_stores.elasticsearch import ElasticsearchStore -from llama_index.vector_stores.milvus import MilvusVectorStore from metagpt.rag.factories.retriever import RetrieverFactory from metagpt.rag.retrievers.bm25_retriever import DynamicBM25Retriever @@ -13,14 +12,12 @@ from metagpt.rag.retrievers.chroma_retriever import ChromaRetriever from metagpt.rag.retrievers.es_retriever import ElasticsearchRetriever from metagpt.rag.retrievers.faiss_retriever import FAISSRetriever from metagpt.rag.retrievers.hybrid_retriever import SimpleHybridRetriever -from metagpt.rag.retrievers.milvus_retriever import MilvusRetriever from metagpt.rag.schema import ( BM25RetrieverConfig, ChromaRetrieverConfig, ElasticsearchRetrieverConfig, ElasticsearchStoreConfig, FAISSRetrieverConfig, - MilvusRetrieverConfig, ) @@ -44,10 +41,6 @@ class TestRetrieverFactory: def mock_chroma_vector_store(self, mocker): return mocker.MagicMock(spec=ChromaVectorStore) - @pytest.fixture - def mock_milvus_vector_store(self, mocker): - return mocker.MagicMock(spec=MilvusVectorStore) - @pytest.fixture def mock_es_vector_store(self, mocker): return mocker.MagicMock(spec=ElasticsearchStore) @@ -98,14 +91,6 @@ class TestRetrieverFactory: assert isinstance(retriever, ChromaRetriever) - def test_get_retriever_with_milvus_config(self, mocker, mock_milvus_vector_store, mock_embedding): - mock_config = MilvusRetrieverConfig(uri="/path/to/milvus.db", collection_name="test_collection") - mocker.patch("metagpt.rag.factories.retriever.MilvusVectorStore", return_value=mock_milvus_vector_store) - - retriever = self.retriever_factory.get_retriever(configs=[mock_config], nodes=[], embed_model=mock_embedding) - - assert isinstance(retriever, MilvusRetriever) - def test_get_retriever_with_es_config(self, mocker, mock_es_vector_store, mock_embedding): mock_config = ElasticsearchRetrieverConfig(store_config=ElasticsearchStoreConfig()) mocker.patch("metagpt.rag.factories.retriever.ElasticsearchStore", return_value=mock_es_vector_store)