From 28d293d4904b22cd4f0cb11d4fa682ea8504eac7 Mon Sep 17 00:00:00 2001 From: yzlin Date: Mon, 25 Mar 2024 15:57:31 +0800 Subject: [PATCH] docstring are usually english, discard jieba tokenizer --- metagpt/tools/tool_recommend.py | 5 ++--- requirements.txt | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/metagpt/tools/tool_recommend.py b/metagpt/tools/tool_recommend.py index 69b9a4b5d..d8b6040db 100644 --- a/metagpt/tools/tool_recommend.py +++ b/metagpt/tools/tool_recommend.py @@ -3,7 +3,6 @@ from __future__ import annotations import json from typing import Any -import jieba import numpy as np from pydantic import BaseModel, field_validator from rank_bm25 import BM25Okapi @@ -182,7 +181,7 @@ class BM25ToolRecommender(ToolRecommender): self.bm25 = BM25Okapi(tokenized_corpus) def _tokenize(self, text): - return jieba.lcut(text) # FIXME: needs more sophisticated tokenization + return text.split() # FIXME: needs more sophisticated tokenization async def recall_tools(self, context: str = "", plan: Plan = None, topk: int = 20) -> list[Tool]: query = plan.current_task.instruction if plan else context @@ -193,7 +192,7 @@ class BM25ToolRecommender(ToolRecommender): recalled_tools = [list(self.tools.values())[index] for index in top_indexes] logger.info( - f"Recalled tools: \n{[tool.name for tool in recalled_tools]}; Scores: {[doc_scores[index] for index in top_indexes]}" + f"Recalled tools: \n{[tool.name for tool in recalled_tools]}; Scores: {[np.round(doc_scores[index], 4) for index in top_indexes]}" ) return recalled_tools diff --git a/requirements.txt b/requirements.txt index d0ee8c95c..c97d4b2f2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -71,5 +71,4 @@ Pillow imap_tools==1.5.0 # Used by metagpt/tools/libs/email_login.py qianfan==0.3.2 dashscope==1.14.1 -rank-bm25==0.2.2 # for tool recommendation -jieba==0.42.1 # for tool recommendation \ No newline at end of file +rank-bm25==0.2.2 # for tool recommendation \ No newline at end of file