diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index ed4bbb144..d350a87f1 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -11,6 +11,7 @@ on: jobs: pre-commit-check: runs-on: ubuntu-latest + environment: pre-commit steps: - name: Checkout Source Code uses: actions/checkout@v2 diff --git a/config/config2.example.yaml b/config/config2.example.yaml index 3249f5ae3..e57ec3ee8 100644 --- a/config/config2.example.yaml +++ b/config/config2.example.yaml @@ -18,6 +18,7 @@ embedding: model: "" api_version: "" embed_batch_size: 100 + dimensions: # output dimension of embedding model repair_llm_output: true # when the output is not a valid json, try to repair it diff --git a/examples/rag_pipeline.py b/examples/rag_pipeline.py index 7dbca35a6..5b716ce03 100644 --- a/examples/rag_pipeline.py +++ b/examples/rag_pipeline.py @@ -18,13 +18,13 @@ from metagpt.rag.schema import ( ) from metagpt.utils.exceptions import handle_exception +LLM_TIP = "If you not sure, just answer I don't know." + DOC_PATH = EXAMPLE_DATA_PATH / "rag/writer.txt" -QUESTION = "What are key qualities to be a good writer?" +QUESTION = f"What are key qualities to be a good writer? {LLM_TIP}" TRAVEL_DOC_PATH = EXAMPLE_DATA_PATH / "rag/travel.txt" -TRAVEL_QUESTION = "What does Bob like?" - -LLM_TIP = "If you not sure, just answer I don't know." +TRAVEL_QUESTION = f"What does Bob like? {LLM_TIP}" class Player(BaseModel): @@ -40,21 +40,21 @@ class Player(BaseModel): class RAGExample: - """Show how to use RAG. + """Show how to use RAG.""" - Default engine use LLM Reranker, if the answer from the LLM is incorrect, may encounter `IndexError: list index out of range`. - """ - - def __init__(self, engine: SimpleEngine = None): + def __init__(self, engine: SimpleEngine = None, use_llm_ranker: bool = True): self._engine = engine + self._use_llm_ranker = use_llm_ranker @property def engine(self): if not self._engine: + ranker_configs = [LLMRankerConfig()] if self._use_llm_ranker else None + self._engine = SimpleEngine.from_docs( input_files=[DOC_PATH], retriever_configs=[FAISSRetrieverConfig()], - ranker_configs=[LLMRankerConfig()], + ranker_configs=ranker_configs, ) return self._engine @@ -105,7 +105,7 @@ class RAGExample: """ self._print_title("Add Docs") - travel_question = f"{TRAVEL_QUESTION}{LLM_TIP}" + travel_question = f"{TRAVEL_QUESTION}" travel_filepath = TRAVEL_DOC_PATH logger.info("[Before add docs]") @@ -240,8 +240,14 @@ class RAGExample: async def main(): - """RAG pipeline.""" - e = RAGExample() + """RAG pipeline. + + Note: + 1. If `use_llm_ranker` is True, then it will use LLM Reranker to get better result, but it is not always guaranteed that the output will be parseable for reranking, + prefer `gpt-4-turbo`, otherwise might encounter `IndexError: list index out of range` or `ValueError: invalid literal for int() with base 10`. + """ + e = RAGExample(use_llm_ranker=False) + await e.run_pipeline() await e.add_docs() await e.add_objects() diff --git a/metagpt/configs/embedding_config.py b/metagpt/configs/embedding_config.py index 20de47999..f9b41b9dc 100644 --- a/metagpt/configs/embedding_config.py +++ b/metagpt/configs/embedding_config.py @@ -20,11 +20,13 @@ class EmbeddingConfig(YamlModel): --------- api_type: "openai" api_key: "YOU_API_KEY" + dimensions: "YOUR_MODEL_DIMENSIONS" api_type: "azure" api_key: "YOU_API_KEY" base_url: "YOU_BASE_URL" api_version: "YOU_API_VERSION" + dimensions: "YOUR_MODEL_DIMENSIONS" api_type: "gemini" api_key: "YOU_API_KEY" @@ -32,6 +34,7 @@ class EmbeddingConfig(YamlModel): api_type: "ollama" base_url: "YOU_BASE_URL" model: "YOU_MODEL" + dimensions: "YOUR_MODEL_DIMENSIONS" """ api_type: Optional[EmbeddingType] = None @@ -41,6 +44,7 @@ class EmbeddingConfig(YamlModel): model: Optional[str] = None embed_batch_size: Optional[int] = None + dimensions: Optional[int] = None # output dimension of embedding model @field_validator("api_type", mode="before") @classmethod diff --git a/metagpt/provider/base_llm.py b/metagpt/provider/base_llm.py index 6387e3936..e554f7767 100644 --- a/metagpt/provider/base_llm.py +++ b/metagpt/provider/base_llm.py @@ -65,7 +65,7 @@ class BaseLLM(ABC): # image url or image base64 url = image if image.startswith("http") else f"data:image/jpeg;base64,{image}" # it can with multiple-image inputs - content.append({"type": "image_url", "image_url": url}) + content.append({"type": "image_url", "image_url": {"url": url}}) return {"role": "user", "content": content} def _assistant_msg(self, msg: str) -> dict[str, str]: diff --git a/metagpt/provider/human_provider.py b/metagpt/provider/human_provider.py index 87dbd105f..a16a49c20 100644 --- a/metagpt/provider/human_provider.py +++ b/metagpt/provider/human_provider.py @@ -33,6 +33,7 @@ class HumanProvider(BaseLLM): format_msgs: Optional[list[dict[str, str]]] = None, generator: bool = False, timeout=USE_CONFIG_TIMEOUT, + **kwargs ) -> str: return self.ask(msg, timeout=self.get_timeout(timeout)) diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index 7957f775c..68dc156c2 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -100,7 +100,7 @@ class OpenAILLM(BaseLLM): log_llm_stream(chunk_message) collected_messages.append(chunk_message) if finish_reason: - if hasattr(chunk, "usage"): + if hasattr(chunk, "usage") and chunk.usage is not None: # Some services have usage as an attribute of the chunk, such as Fireworks usage = CompletionUsage(**chunk.usage) elif hasattr(chunk.choices[0], "usage"): diff --git a/metagpt/rag/benchmark/base.py b/metagpt/rag/benchmark/base.py index c1fd297d9..b5d265b35 100644 --- a/metagpt/rag/benchmark/base.py +++ b/metagpt/rag/benchmark/base.py @@ -121,7 +121,7 @@ class RAGBenchmark: return mrr_sum return mrr_sum - + async def semantic_similarity(self, response: str, reference: str) -> float: result = await self.evaluator.aevaluate( response=response, diff --git a/metagpt/rag/factories/ranker.py b/metagpt/rag/factories/ranker.py index b75745a1f..7abda162a 100644 --- a/metagpt/rag/factories/ranker.py +++ b/metagpt/rag/factories/ranker.py @@ -8,11 +8,11 @@ from metagpt.rag.factories.base import ConfigBasedFactory from metagpt.rag.rankers.object_ranker import ObjectSortPostprocessor from metagpt.rag.schema import ( BaseRankerConfig, + BGERerankConfig, + CohereRerankConfig, ColbertRerankConfig, LLMRankerConfig, ObjectRankerConfig, - CohereRerankConfig, - BGERerankConfig ) @@ -60,13 +60,15 @@ class RankerFactory(ConfigBasedFactory): def _create_bge_rerank(self, config: BGERerankConfig, **kwargs) -> LLMRerank: try: - from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker + from llama_index.postprocessor.flag_embedding_reranker import ( + FlagEmbeddingReranker, + ) except ImportError: raise ImportError( "`llama-index-postprocessor-flag-embedding-reranker` package not found, please run `pip install llama-index-postprocessor-flag-embedding-reranker`" ) return FlagEmbeddingReranker(**config.model_dump()) - + def _create_object_ranker(self, config: ObjectRankerConfig, **kwargs) -> LLMRerank: return ObjectSortPostprocessor(**config.model_dump()) diff --git a/metagpt/rag/retrievers/bm25_retriever.py b/metagpt/rag/retrievers/bm25_retriever.py index 241820cf4..dc75d87b0 100644 --- a/metagpt/rag/retrievers/bm25_retriever.py +++ b/metagpt/rag/retrievers/bm25_retriever.py @@ -40,8 +40,10 @@ class DynamicBM25Retriever(BM25Retriever): self._corpus = [self._tokenizer(node.get_content()) for node in self._nodes] self.bm25 = BM25Okapi(self._corpus) - self._index.insert_nodes(nodes, **kwargs) + if self._index: + self._index.insert_nodes(nodes, **kwargs) def persist(self, persist_dir: str, **kwargs) -> None: """Support persist.""" - self._index.storage_context.persist(persist_dir) + if self._index: + self._index.storage_context.persist(persist_dir) diff --git a/metagpt/rag/schema.py b/metagpt/rag/schema.py index e7b2e5ce9..618880a22 100644 --- a/metagpt/rag/schema.py +++ b/metagpt/rag/schema.py @@ -12,6 +12,7 @@ from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator from metagpt.config2 import config from metagpt.configs.embedding_config import EmbeddingType +from metagpt.logs import logger from metagpt.rag.interface import RAGObject @@ -44,7 +45,13 @@ class FAISSRetrieverConfig(IndexRetrieverConfig): @model_validator(mode="after") def check_dimensions(self): if self.dimensions == 0: - self.dimensions = self._embedding_type_to_dimensions.get(config.embedding.api_type, 1536) + self.dimensions = config.embedding.dimensions or self._embedding_type_to_dimensions.get( + config.embedding.api_type, 1536 + ) + if not config.embedding.dimensions and config.embedding.api_type not in self._embedding_type_to_dimensions: + logger.warning( + f"You didn't set dimensions in config when using {config.embedding.api_type}, default to 1536" + ) return self diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py index 9249d674e..de549cc5a 100644 --- a/metagpt/utils/token_counter.py +++ b/metagpt/utils/token_counter.py @@ -35,8 +35,11 @@ TOKEN_COSTS = { "gpt-4-1106-preview": {"prompt": 0.01, "completion": 0.03}, "gpt-4-0125-preview": {"prompt": 0.01, "completion": 0.03}, "gpt-4-turbo": {"prompt": 0.01, "completion": 0.03}, + "gpt-4-turbo-2024-04-09": {"prompt": 0.01, "completion": 0.03}, "gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03}, # TODO add extra image price calculator "gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03}, + "gpt-4o": {"prompt": 0.005, "completion": 0.015}, + "gpt-4o-2024-05-13": {"prompt": 0.005, "completion": 0.015}, "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0}, "glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens "glm-4": {"prompt": 0.014, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens @@ -56,11 +59,14 @@ TOKEN_COSTS = { "claude-3-opus-20240229": {"prompt": 0.015, "completion": 0.075}, "yi-34b-chat-0205": {"prompt": 0.0003, "completion": 0.0003}, "yi-34b-chat-200k": {"prompt": 0.0017, "completion": 0.0017}, + "yi-large": {"prompt": 0.0028, "completion": 0.0028}, "microsoft/wizardlm-2-8x22b": {"prompt": 0.00108, "completion": 0.00108}, # for openrouter, start "meta-llama/llama-3-70b-instruct": {"prompt": 0.008, "completion": 0.008}, "llama3-70b-8192": {"prompt": 0.0059, "completion": 0.0079}, "openai/gpt-3.5-turbo-0125": {"prompt": 0.0005, "completion": 0.0015}, "openai/gpt-4-turbo-preview": {"prompt": 0.01, "completion": 0.03}, + "deepseek-chat": {"prompt": 0.00014, "completion": 0.00028}, + "deepseek-coder": {"prompt": 0.00014, "completion": 0.00028}, } @@ -155,6 +161,9 @@ FIREWORKS_GRADE_TOKEN_COSTS = { # https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo TOKEN_MAX = { + "gpt-4o-2024-05-13": 128000, + "gpt-4o": 128000, + "gpt-4-turbo-2024-04-09": 128000, "gpt-4-0125-preview": 128000, "gpt-4-turbo-preview": 128000, "gpt-4-1106-preview": 128000, @@ -191,11 +200,14 @@ TOKEN_MAX = { "claude-3-opus-20240229": 200000, "yi-34b-chat-0205": 4000, "yi-34b-chat-200k": 200000, + "yi-large": 16385, "microsoft/wizardlm-2-8x22b": 65536, "meta-llama/llama-3-70b-instruct": 8192, "llama3-70b-8192": 8192, "openai/gpt-3.5-turbo-0125": 16385, "openai/gpt-4-turbo-preview": 128000, + "deepseek-chat": 32768, + "deepseek-coder": 16385, } # For Amazon Bedrock US region @@ -271,6 +283,8 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0125"): "gpt-4-turbo", "gpt-4-vision-preview", "gpt-4-1106-vision-preview", + "gpt-4o-2024-05-13", + "gpt-4o", }: tokens_per_message = 3 # # every reply is primed with <|start|>assistant<|message|> tokens_per_name = 1 diff --git a/requirements.txt b/requirements.txt index 76c158115..82b478902 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,7 +26,7 @@ PyYAML==6.0.1 # sentence_transformers==2.2.2 setuptools==65.6.3 tenacity==8.2.3 -tiktoken==0.6.0 +tiktoken==0.7.0 tqdm==4.66.2 #unstructured[local-inference] # selenium>4