Merge pull request #974 from better629/feat_memory

Feat add rag
This commit is contained in:
Alexander Wu 2024-03-17 23:39:12 +08:00 committed by GitHub
commit e783e5b208
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
61 changed files with 2353 additions and 248 deletions

View file

@ -12,7 +12,7 @@ from metagpt.document_store.chromadb_store import ChromaStore
def test_chroma_store():
"""FIXMEchroma使用感觉很诡异一用Python就挂测试用例里也是"""
# 创建 ChromaStore 实例,使用 'sample_collection' 集合
document_store = ChromaStore("sample_collection_1")
document_store = ChromaStore("sample_collection_1", get_or_create=True)
# 使用 write 方法添加多个文档
document_store.write(

View file

@ -6,8 +6,6 @@
@File : test_faiss_store.py
"""
from typing import Optional
import numpy as np
import pytest
@ -17,18 +15,24 @@ from metagpt.logs import logger
from metagpt.roles import Sales
def mock_openai_embed_documents(self, texts: list[str], chunk_size: Optional[int] = 0) -> list[list[float]]:
def mock_openai_embed_documents(self, texts: list[str], show_progress: bool = False) -> list[list[float]]:
num = len(texts)
embeds = np.random.randint(1, 100, size=(num, 1536)) # 1536: openai embedding dim
embeds = (embeds - embeds.mean(axis=0)) / (embeds.std(axis=0))
return embeds
embeds = (embeds - embeds.mean(axis=0)) / embeds.std(axis=0)
return embeds.tolist()
def mock_openai_embed_document(self, text: str) -> list[float]:
embeds = mock_openai_embed_documents(self, [text])
return embeds[0]
@pytest.mark.asyncio
async def test_search_json(mocker):
mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
mocker.patch("llama_index.embeddings.openai.base.OpenAIEmbedding._get_text_embeddings", mock_openai_embed_documents)
mocker.patch("llama_index.embeddings.openai.base.OpenAIEmbedding._get_text_embedding", mock_openai_embed_document)
store = FaissStore(EXAMPLE_PATH / "example.json")
store = FaissStore(EXAMPLE_PATH / "data/search_kb/example.json")
role = Sales(profile="Sales", store=store)
query = "Which facial cleanser is good for oily skin?"
result = await role.run(query)
@ -37,9 +41,10 @@ async def test_search_json(mocker):
@pytest.mark.asyncio
async def test_search_xlsx(mocker):
mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
mocker.patch("llama_index.embeddings.openai.base.OpenAIEmbedding._get_text_embeddings", mock_openai_embed_documents)
mocker.patch("llama_index.embeddings.openai.base.OpenAIEmbedding._get_text_embedding", mock_openai_embed_document)
store = FaissStore(EXAMPLE_PATH / "example.xlsx")
store = FaissStore(EXAMPLE_PATH / "data/search_kb/example.xlsx", meta_col="Answer", content_col="Question")
role = Sales(profile="Sales", store=store)
query = "Which facial cleanser is good for oily skin?"
result = await role.run(query)
@ -48,9 +53,10 @@ async def test_search_xlsx(mocker):
@pytest.mark.asyncio
async def test_write(mocker):
mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
mocker.patch("llama_index.embeddings.openai.base.OpenAIEmbedding._get_text_embeddings", mock_openai_embed_documents)
mocker.patch("llama_index.embeddings.openai.base.OpenAIEmbedding._get_text_embedding", mock_openai_embed_document)
store = FaissStore(EXAMPLE_PATH / "example.xlsx", meta_col="Answer", content_col="Question")
store = FaissStore(EXAMPLE_PATH / "data/search_kb/example.xlsx", meta_col="Answer", content_col="Question")
_faiss_store = store.write()
assert _faiss_store.docstore
assert _faiss_store.index
assert _faiss_store.storage_context.docstore
assert _faiss_store.storage_context.vector_store.client

View file

@ -2,32 +2,41 @@
# -*- coding: utf-8 -*-
# @Desc :
from typing import Optional
import numpy as np
dim = 1536 # openai embedding dim
embed_zeros_arrr = np.zeros(shape=[1, dim]).tolist()
embed_ones_arrr = np.ones(shape=[1, dim]).tolist()
text_embed_arr = [
{"text": "Write a cli snake game", "embed": np.zeros(shape=[1, dim])}, # mock data, same as below
{"text": "Write a game of cli snake", "embed": np.zeros(shape=[1, dim])},
{"text": "Write a 2048 web game", "embed": np.ones(shape=[1, dim])},
{"text": "Write a Battle City", "embed": np.ones(shape=[1, dim])},
{"text": "Write a cli snake game", "embed": embed_zeros_arrr}, # mock data, same as below
{"text": "Write a game of cli snake", "embed": embed_zeros_arrr},
{"text": "Write a 2048 web game", "embed": embed_ones_arrr},
{"text": "Write a Battle City", "embed": embed_ones_arrr},
{
"text": "The user has requested the creation of a command-line interface (CLI) snake game",
"embed": np.zeros(shape=[1, dim]),
"embed": embed_zeros_arrr,
},
{"text": "The request is command-line interface (CLI) snake game", "embed": np.zeros(shape=[1, dim])},
{"text": "The request is command-line interface (CLI) snake game", "embed": embed_zeros_arrr},
{
"text": "Incorporate basic features of a snake game such as scoring and increasing difficulty",
"embed": np.ones(shape=[1, dim]),
"embed": embed_ones_arrr,
},
]
text_idx_dict = {item["text"]: idx for idx, item in enumerate(text_embed_arr)}
def mock_openai_embed_documents(self, texts: list[str], chunk_size: Optional[int] = 0) -> list[list[float]]:
def mock_openai_embed_documents(self, texts: list[str], show_progress: bool = False) -> list[list[float]]:
idx = text_idx_dict.get(texts[0])
embed = text_embed_arr[idx].get("embed")
return embed
def mock_openai_embed_document(self, text: str) -> list[float]:
embeds = mock_openai_embed_documents(self, [text])
return embeds[0]
async def mock_openai_aembed_document(self, text: str) -> list[float]:
return mock_openai_embed_document(self, text)

View file

@ -12,13 +12,20 @@ from metagpt.memory.longterm_memory import LongTermMemory
from metagpt.roles.role import RoleContext
from metagpt.schema import Message
from tests.metagpt.memory.mock_text_embed import (
mock_openai_aembed_document,
mock_openai_embed_document,
mock_openai_embed_documents,
text_embed_arr,
)
def test_ltm_search(mocker):
mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
@pytest.mark.asyncio
async def test_ltm_search(mocker):
mocker.patch("llama_index.embeddings.openai.base.OpenAIEmbedding._get_text_embeddings", mock_openai_embed_documents)
mocker.patch("llama_index.embeddings.openai.base.OpenAIEmbedding._get_text_embedding", mock_openai_embed_document)
mocker.patch(
"llama_index.embeddings.openai.base.OpenAIEmbedding._aget_query_embedding", mock_openai_aembed_document
)
role_id = "UTUserLtm(Product Manager)"
from metagpt.environment import Environment
@ -31,39 +38,24 @@ def test_ltm_search(mocker):
idea = text_embed_arr[0].get("text", "Write a cli snake game")
message = Message(role="User", content=idea, cause_by=UserRequirement)
news = ltm.find_news([message])
news = await ltm.find_news([message])
assert len(news) == 1
ltm.add(message)
sim_idea = text_embed_arr[1].get("text", "Write a game of cli snake")
sim_message = Message(role="User", content=sim_idea, cause_by=UserRequirement)
news = ltm.find_news([sim_message])
news = await ltm.find_news([sim_message])
assert len(news) == 0
ltm.add(sim_message)
new_idea = text_embed_arr[2].get("text", "Write a 2048 web game")
new_message = Message(role="User", content=new_idea, cause_by=UserRequirement)
news = ltm.find_news([new_message])
news = await ltm.find_news([new_message])
assert len(news) == 1
ltm.add(new_message)
# restore from local index
ltm_new = LongTermMemory()
ltm_new.recover_memory(role_id, rc)
news = ltm_new.find_news([message])
assert len(news) == 0
ltm_new.recover_memory(role_id, rc)
news = ltm_new.find_news([sim_message])
assert len(news) == 0
new_idea = text_embed_arr[3].get("text", "Write a Battle City")
new_message = Message(role="User", content=new_idea, cause_by=UserRequirement)
news = ltm_new.find_news([new_message])
assert len(news) == 1
ltm_new.clear()
ltm.clear()
if __name__ == "__main__":

View file

@ -8,19 +8,28 @@ import shutil
from pathlib import Path
from typing import List
import pytest
from metagpt.actions import UserRequirement, WritePRD
from metagpt.actions.action_node import ActionNode
from metagpt.const import DATA_PATH
from metagpt.memory.memory_storage import MemoryStorage
from metagpt.schema import Message
from tests.metagpt.memory.mock_text_embed import (
mock_openai_aembed_document,
mock_openai_embed_document,
mock_openai_embed_documents,
text_embed_arr,
)
def test_idea_message(mocker):
mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
@pytest.mark.asyncio
async def test_idea_message(mocker):
mocker.patch("llama_index.embeddings.openai.base.OpenAIEmbedding._get_text_embeddings", mock_openai_embed_documents)
mocker.patch("llama_index.embeddings.openai.base.OpenAIEmbedding._get_text_embedding", mock_openai_embed_document)
mocker.patch(
"llama_index.embeddings.openai.base.OpenAIEmbedding._aget_query_embedding", mock_openai_aembed_document
)
idea = text_embed_arr[0].get("text", "Write a cli snake game")
role_id = "UTUser1(Product Manager)"
@ -29,28 +38,32 @@ def test_idea_message(mocker):
shutil.rmtree(Path(DATA_PATH / f"role_mem/{role_id}/"), ignore_errors=True)
memory_storage: MemoryStorage = MemoryStorage()
messages = memory_storage.recover_memory(role_id)
assert len(messages) == 0
memory_storage.recover_memory(role_id)
memory_storage.add(message)
assert memory_storage.is_initialized is True
sim_idea = text_embed_arr[1].get("text", "Write a game of cli snake")
sim_message = Message(role="User", content=sim_idea, cause_by=UserRequirement)
new_messages = memory_storage.search_dissimilar(sim_message)
assert len(new_messages) == 0 # similar, return []
new_messages = await memory_storage.search_similar(sim_message)
assert len(new_messages) == 1 # similar, return []
new_idea = text_embed_arr[2].get("text", "Write a 2048 web game")
new_message = Message(role="User", content=new_idea, cause_by=UserRequirement)
new_messages = memory_storage.search_dissimilar(new_message)
assert new_messages[0].content == message.content
new_messages = await memory_storage.search_similar(new_message)
assert len(new_messages) == 0
memory_storage.clean()
assert memory_storage.is_initialized is False
def test_actionout_message(mocker):
mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
@pytest.mark.asyncio
async def test_actionout_message(mocker):
mocker.patch("llama_index.embeddings.openai.base.OpenAIEmbedding._get_text_embeddings", mock_openai_embed_documents)
mocker.patch("llama_index.embeddings.openai.base.OpenAIEmbedding._get_text_embedding", mock_openai_embed_document)
mocker.patch(
"llama_index.embeddings.openai.base.OpenAIEmbedding._aget_query_embedding", mock_openai_aembed_document
)
out_mapping = {"field1": (str, ...), "field2": (List[str], ...)}
out_data = {"field1": "field1 value", "field2": ["field2 value1", "field2 value2"]}
@ -67,23 +80,22 @@ def test_actionout_message(mocker):
shutil.rmtree(Path(DATA_PATH / f"role_mem/{role_id}/"), ignore_errors=True)
memory_storage: MemoryStorage = MemoryStorage()
messages = memory_storage.recover_memory(role_id)
assert len(messages) == 0
memory_storage.recover_memory(role_id)
memory_storage.add(message)
assert memory_storage.is_initialized is True
sim_conent = text_embed_arr[5].get("text", "The request is command-line interface (CLI) snake game")
sim_message = Message(content=sim_conent, instruct_content=ic_obj(**out_data), role="user", cause_by=WritePRD)
new_messages = memory_storage.search_dissimilar(sim_message)
assert len(new_messages) == 0 # similar, return []
new_messages = await memory_storage.search_similar(sim_message)
assert len(new_messages) == 1 # similar, return []
new_conent = text_embed_arr[6].get(
"text", "Incorporate basic features of a snake game such as scoring and increasing difficulty"
)
new_message = Message(content=new_conent, instruct_content=ic_obj(**out_data), role="user", cause_by=WritePRD)
new_messages = memory_storage.search_dissimilar(new_message)
assert new_messages[0].content == message.content
new_messages = await memory_storage.search_similar(new_message)
assert len(new_messages) == 0
memory_storage.clean()
assert memory_storage.is_initialized is False

View file

@ -0,0 +1,166 @@
import pytest
from llama_index.core import VectorStoreIndex
from llama_index.core.schema import Document, TextNode
from metagpt.rag.engines import SimpleEngine
from metagpt.rag.retrievers.base import ModifiableRAGRetriever
class TestSimpleEngine:
@pytest.fixture
def mock_simple_directory_reader(self, mocker):
return mocker.patch("metagpt.rag.engines.simple.SimpleDirectoryReader")
@pytest.fixture
def mock_vector_store_index(self, mocker):
return mocker.patch("metagpt.rag.engines.simple.VectorStoreIndex.from_documents")
@pytest.fixture
def mock_get_retriever(self, mocker):
return mocker.patch("metagpt.rag.engines.simple.get_retriever")
@pytest.fixture
def mock_get_rankers(self, mocker):
return mocker.patch("metagpt.rag.engines.simple.get_rankers")
@pytest.fixture
def mock_get_response_synthesizer(self, mocker):
return mocker.patch("metagpt.rag.engines.simple.get_response_synthesizer")
def test_from_docs(
self,
mocker,
mock_simple_directory_reader,
mock_vector_store_index,
mock_get_retriever,
mock_get_rankers,
mock_get_response_synthesizer,
):
# Mock
mock_simple_directory_reader.return_value.load_data.return_value = [
Document(text="document1"),
Document(text="document2"),
]
mock_get_retriever.return_value = mocker.MagicMock()
mock_get_rankers.return_value = [mocker.MagicMock()]
mock_get_response_synthesizer.return_value = mocker.MagicMock()
# Setup
input_dir = "test_dir"
input_files = ["test_file1", "test_file2"]
transformations = [mocker.MagicMock()]
embed_model = mocker.MagicMock()
llm = mocker.MagicMock()
retriever_configs = [mocker.MagicMock()]
ranker_configs = [mocker.MagicMock()]
# Execute
engine = SimpleEngine.from_docs(
input_dir=input_dir,
input_files=input_files,
transformations=transformations,
embed_model=embed_model,
llm=llm,
retriever_configs=retriever_configs,
ranker_configs=ranker_configs,
)
# Assertions
mock_simple_directory_reader.assert_called_once_with(input_dir=input_dir, input_files=input_files)
mock_vector_store_index.assert_called_once()
mock_get_retriever.assert_called_once_with(
configs=retriever_configs, index=mock_vector_store_index.return_value
)
mock_get_rankers.assert_called_once_with(configs=ranker_configs, llm=llm)
mock_get_response_synthesizer.assert_called_once_with(llm=llm)
assert isinstance(engine, SimpleEngine)
@pytest.mark.asyncio
async def test_asearch(self, mocker):
# Mock
test_query = "test query"
expected_result = "expected result"
mock_aquery = mocker.AsyncMock(return_value=expected_result)
# Setup
engine = SimpleEngine(retriever=mocker.MagicMock())
engine.aquery = mock_aquery
# Execute
result = await engine.asearch(test_query)
# Assertions
mock_aquery.assert_called_once_with(test_query)
assert result == expected_result
@pytest.mark.asyncio
async def test_aretrieve(self, mocker):
# Mock
mock_query_bundle = mocker.patch("metagpt.rag.engines.simple.QueryBundle", return_value="query_bundle")
mock_super_aretrieve = mocker.patch(
"metagpt.rag.engines.simple.RetrieverQueryEngine.aretrieve", new_callable=mocker.AsyncMock
)
mock_super_aretrieve.return_value = [TextNode(text="node_with_score", metadata={"is_obj": False})]
# Setup
engine = SimpleEngine(retriever=mocker.MagicMock())
test_query = "test query"
# Execute
result = await engine.aretrieve(test_query)
# Assertions
mock_query_bundle.assert_called_once_with(test_query)
mock_super_aretrieve.assert_called_once_with("query_bundle")
assert result[0].text == "node_with_score"
def test_add_docs(self, mocker):
# Mock
mock_simple_directory_reader = mocker.patch("metagpt.rag.engines.simple.SimpleDirectoryReader")
mock_simple_directory_reader.return_value.load_data.return_value = [
Document(text="document1"),
Document(text="document2"),
]
mock_retriever = mocker.MagicMock(spec=ModifiableRAGRetriever)
mock_index = mocker.MagicMock(spec=VectorStoreIndex)
mock_index._transformations = mocker.MagicMock()
mock_run_transformations = mocker.patch("metagpt.rag.engines.simple.run_transformations")
mock_run_transformations.return_value = ["node1", "node2"]
# Setup
engine = SimpleEngine(retriever=mock_retriever, index=mock_index)
input_files = ["test_file1", "test_file2"]
# Execute
engine.add_docs(input_files=input_files)
# Assertions
mock_simple_directory_reader.assert_called_once_with(input_files=input_files)
mock_retriever.add_nodes.assert_called_once_with(["node1", "node2"])
def test_add_objs(self, mocker):
# Mock
mock_retriever = mocker.MagicMock(spec=ModifiableRAGRetriever)
# Setup
class CustomTextNode(TextNode):
def rag_key(self):
return ""
def model_dump_json(self):
return ""
objs = [CustomTextNode(text=f"text_{i}", metadata={"obj": f"obj_{i}"}) for i in range(2)]
engine = SimpleEngine(retriever=mock_retriever, index=mocker.MagicMock())
# Execute
engine.add_objs(objs=objs)
# Assertions
assert mock_retriever.add_nodes.call_count == 1
for node in mock_retriever.add_nodes.call_args[0][0]:
assert isinstance(node, TextNode)
assert "is_obj" in node.metadata

View file

@ -0,0 +1,102 @@
import pytest
from metagpt.rag.factories.base import ConfigBasedFactory, GenericFactory
class TestGenericFactory:
@pytest.fixture
def creators(self):
return {
"type1": lambda name: f"Instance of type1 with {name}",
"type2": lambda name: f"Instance of type2 with {name}",
}
@pytest.fixture
def factory(self, creators):
return GenericFactory(creators=creators)
def test_get_instance_success(self, factory):
# Test successful retrieval of an instance
key = "type1"
instance = factory.get_instance(key, name="TestName")
assert instance == "Instance of type1 with TestName"
def test_get_instance_failure(self, factory):
# Test failure to retrieve an instance due to unregistered key
with pytest.raises(ValueError) as exc_info:
factory.get_instance("unknown_key")
assert "Creator not registered for key: unknown_key" in str(exc_info.value)
def test_get_instances_success(self, factory):
# Test successful retrieval of multiple instances
keys = ["type1", "type2"]
instances = factory.get_instances(keys, name="TestName")
expected = ["Instance of type1 with TestName", "Instance of type2 with TestName"]
assert instances == expected
@pytest.mark.parametrize(
"keys,expected_exception_message",
[
(["unknown_key"], "Creator not registered for key: unknown_key"),
(["type1", "unknown_key"], "Creator not registered for key: unknown_key"),
],
)
def test_get_instances_with_failure(self, factory, keys, expected_exception_message):
# Test failure to retrieve instances due to at least one unregistered key
with pytest.raises(ValueError) as exc_info:
factory.get_instances(keys, name="TestName")
assert expected_exception_message in str(exc_info.value)
class DummyConfig:
"""A dummy config class for testing."""
def __init__(self, name):
self.name = name
class TestConfigBasedFactory:
@pytest.fixture
def config_creators(self):
return {
DummyConfig: lambda config, **kwargs: f"Processed {config.name} with {kwargs.get('extra', 'no extra')}",
}
@pytest.fixture
def config_factory(self, config_creators):
return ConfigBasedFactory(creators=config_creators)
def test_get_instance_success(self, config_factory):
# Test successful retrieval of an instance
config = DummyConfig(name="TestConfig")
instance = config_factory.get_instance(config, extra="additional data")
assert instance == "Processed TestConfig with additional data"
def test_get_instance_failure(self, config_factory):
# Test failure to retrieve an instance due to unknown config type
class UnknownConfig:
pass
config = UnknownConfig()
with pytest.raises(ValueError) as exc_info:
config_factory.get_instance(config)
assert "Unknown config:" in str(exc_info.value)
def test_val_from_config_or_kwargs_priority(self):
# Test that the value from the config object has priority over kwargs
config = DummyConfig(name="ConfigName")
result = ConfigBasedFactory._val_from_config_or_kwargs("name", config, name="KwargsName")
assert result == "ConfigName"
def test_val_from_config_or_kwargs_fallback_to_kwargs(self):
# Test fallback to kwargs when config object does not have the value
config = DummyConfig(name=None)
result = ConfigBasedFactory._val_from_config_or_kwargs("name", config, name="KwargsName")
assert result == "KwargsName"
def test_val_from_config_or_kwargs_key_error(self):
# Test KeyError when the key is not found in both config object and kwargs
config = DummyConfig(name=None)
with pytest.raises(KeyError) as exc_info:
ConfigBasedFactory._val_from_config_or_kwargs("missing_key", config)
assert "The key 'missing_key' is required but not provided" in str(exc_info.value)

View file

@ -0,0 +1,41 @@
import pytest
from llama_index.core.llms import LLM
from llama_index.core.postprocessor import LLMRerank
from metagpt.rag.factories.ranker import RankerFactory
from metagpt.rag.schema import LLMRankerConfig
class TestRankerFactory:
@pytest.fixture
def ranker_factory(self) -> RankerFactory:
return RankerFactory()
@pytest.fixture
def mock_llm(self, mocker):
return mocker.MagicMock(spec=LLM)
def test_get_rankers_with_no_configs(self, ranker_factory: RankerFactory, mock_llm, mocker):
mocker.patch.object(ranker_factory, "_extract_llm", return_value=mock_llm)
default_rankers = ranker_factory.get_rankers()
assert len(default_rankers) == 0
def test_get_rankers_with_configs(self, ranker_factory: RankerFactory, mock_llm):
mock_config = LLMRankerConfig(llm=mock_llm)
rankers = ranker_factory.get_rankers(configs=[mock_config])
assert len(rankers) == 1
assert isinstance(rankers[0], LLMRerank)
def test_create_llm_ranker_creates_correct_instance(self, ranker_factory: RankerFactory, mock_llm):
mock_config = LLMRankerConfig(llm=mock_llm)
ranker = ranker_factory._create_llm_ranker(mock_config)
assert isinstance(ranker, LLMRerank)
def test_extract_llm_from_config(self, ranker_factory: RankerFactory, mock_llm):
mock_config = LLMRankerConfig(llm=mock_llm)
extracted_llm = ranker_factory._extract_llm(config=mock_config)
assert extracted_llm == mock_llm
def test_extract_llm_from_kwargs(self, ranker_factory: RankerFactory, mock_llm):
extracted_llm = ranker_factory._extract_llm(llm=mock_llm)
assert extracted_llm == mock_llm

View file

@ -0,0 +1,79 @@
import faiss
import pytest
from llama_index.core import VectorStoreIndex
from metagpt.rag.factories.retriever import RetrieverFactory
from metagpt.rag.retrievers.bm25_retriever import DynamicBM25Retriever
from metagpt.rag.retrievers.faiss_retriever import FAISSRetriever
from metagpt.rag.retrievers.hybrid_retriever import SimpleHybridRetriever
from metagpt.rag.schema import BM25RetrieverConfig, FAISSRetrieverConfig
class TestRetrieverFactory:
@pytest.fixture
def retriever_factory(self):
return RetrieverFactory()
@pytest.fixture
def mock_faiss_index(self, mocker):
return mocker.MagicMock(spec=faiss.IndexFlatL2)
@pytest.fixture
def mock_vector_store_index(self, mocker):
mock = mocker.MagicMock(spec=VectorStoreIndex)
mock._embed_model = mocker.MagicMock()
mock.docstore.docs.values.return_value = []
return mock
def test_get_retriever_with_faiss_config(
self, retriever_factory: RetrieverFactory, mock_faiss_index, mocker, mock_vector_store_index
):
mock_config = FAISSRetrieverConfig(dimensions=128)
mocker.patch("faiss.IndexFlatL2", return_value=mock_faiss_index)
mocker.patch.object(retriever_factory, "_extract_index", return_value=mock_vector_store_index)
retriever = retriever_factory.get_retriever(configs=[mock_config])
assert isinstance(retriever, FAISSRetriever)
def test_get_retriever_with_bm25_config(self, retriever_factory: RetrieverFactory, mocker, mock_vector_store_index):
mock_config = BM25RetrieverConfig()
mocker.patch("rank_bm25.BM25Okapi.__init__", return_value=None)
mocker.patch.object(retriever_factory, "_extract_index", return_value=mock_vector_store_index)
retriever = retriever_factory.get_retriever(configs=[mock_config])
assert isinstance(retriever, DynamicBM25Retriever)
def test_get_retriever_with_multiple_configs_returns_hybrid(
self, retriever_factory: RetrieverFactory, mocker, mock_vector_store_index
):
mock_faiss_config = FAISSRetrieverConfig(dimensions=128)
mock_bm25_config = BM25RetrieverConfig()
mocker.patch("rank_bm25.BM25Okapi.__init__", return_value=None)
mocker.patch.object(retriever_factory, "_extract_index", return_value=mock_vector_store_index)
retriever = retriever_factory.get_retriever(configs=[mock_faiss_config, mock_bm25_config])
assert isinstance(retriever, SimpleHybridRetriever)
def test_create_default_retriever(self, retriever_factory: RetrieverFactory, mocker, mock_vector_store_index):
mocker.patch.object(retriever_factory, "_extract_index", return_value=mock_vector_store_index)
mock_vector_store_index.as_retriever = mocker.MagicMock()
retriever = retriever_factory.get_retriever()
mock_vector_store_index.as_retriever.assert_called_once()
assert retriever is mock_vector_store_index.as_retriever.return_value
def test_extract_index_from_config(self, retriever_factory: RetrieverFactory, mock_vector_store_index):
mock_config = FAISSRetrieverConfig(index=mock_vector_store_index)
extracted_index = retriever_factory._extract_index(config=mock_config)
assert extracted_index == mock_vector_store_index
def test_extract_index_from_kwargs(self, retriever_factory: RetrieverFactory, mock_vector_store_index):
extracted_index = retriever_factory._extract_index(index=mock_vector_store_index)
assert extracted_index == mock_vector_store_index

View file

@ -0,0 +1,37 @@
import pytest
from llama_index.core import VectorStoreIndex
from llama_index.core.schema import Node
from metagpt.rag.retrievers.bm25_retriever import DynamicBM25Retriever
class TestDynamicBM25Retriever:
@pytest.fixture(autouse=True)
def setup(self, mocker):
# 创建模拟的Document对象
self.doc1 = mocker.MagicMock(spec=Node)
self.doc1.get_content.return_value = "Document content 1"
self.doc2 = mocker.MagicMock(spec=Node)
self.doc2.get_content.return_value = "Document content 2"
self.mock_nodes = [self.doc1, self.doc2]
# 模拟index
index = mocker.MagicMock(spec=VectorStoreIndex)
# 模拟nodes和tokenizer参数
mock_nodes = []
mock_tokenizer = mocker.MagicMock()
self.mock_bm25okapi = mocker.patch("rank_bm25.BM25Okapi.__init__", return_value=None)
# 初始化DynamicBM25Retriever对象并提供必需的参数
self.retriever = DynamicBM25Retriever(nodes=mock_nodes, tokenizer=mock_tokenizer, index=index)
def test_add_docs_updates_nodes_and_corpus(self):
# Execute
self.retriever.add_nodes(self.mock_nodes)
# Assertions
assert len(self.retriever._nodes) == len(self.mock_nodes)
assert len(self.retriever._corpus) == len(self.mock_nodes)
self.retriever._tokenizer.assert_called()
self.mock_bm25okapi.assert_called()

View file

@ -0,0 +1,22 @@
import pytest
from llama_index.core.schema import Node
from metagpt.rag.retrievers.faiss_retriever import FAISSRetriever
class TestFAISSRetriever:
@pytest.fixture(autouse=True)
def setup(self, mocker):
# 创建模拟的Document对象
self.doc1 = mocker.MagicMock(spec=Node)
self.doc2 = mocker.MagicMock(spec=Node)
self.mock_nodes = [self.doc1, self.doc2]
# 模拟FAISSRetriever的_index属性
self.mock_index = mocker.MagicMock()
self.retriever = FAISSRetriever(self.mock_index)
def test_add_docs_calls_insert_for_each_document(self, mocker):
self.retriever.add_nodes(self.mock_nodes)
assert self.mock_index.insert_nodes.assert_called

View file

@ -0,0 +1,39 @@
from unittest.mock import AsyncMock
import pytest
from llama_index.core.schema import NodeWithScore, TextNode
from metagpt.rag.retrievers import SimpleHybridRetriever
class TestSimpleHybridRetriever:
@pytest.mark.asyncio
async def test_aretrieve(self):
question = "test query"
# Create mock retrievers
mock_retriever1 = AsyncMock()
mock_retriever1.aretrieve.return_value = [
NodeWithScore(node=TextNode(id_="1"), score=1.0),
NodeWithScore(node=TextNode(id_="2"), score=0.95),
]
mock_retriever2 = AsyncMock()
mock_retriever2.aretrieve.return_value = [
NodeWithScore(node=TextNode(id_="2"), score=0.95),
NodeWithScore(node=TextNode(id_="3"), score=0.8),
]
# Instantiate the SimpleHybridRetriever with the mock retrievers
hybrid_retriever = SimpleHybridRetriever(mock_retriever1, mock_retriever2)
# Call the _aretrieve method
results = await hybrid_retriever._aretrieve(question)
# Check if the results are as expected
assert len(results) == 3 # Should be 3 unique nodes
assert set(node.node.node_id for node in results) == {"1", "2", "3"}
# Check if the scores are correct (assuming you want the highest score)
node_scores = {node.node.node_id: node.score for node in results}
assert node_scores["2"] == 0.95