docstring are usually english, discard jieba tokenizer

2026-04-29 02:46:24 +02:00 · 2024-03-25 15:57:31 +08:00 · 2024-03-25 15:57:31 +08:00 · 28d293d490
commit 28d293d490
parent 8d4567ea19
2 changed files with 3 additions and 5 deletions
--- a/metagpt/tools/tool_recommend.py
+++ b/metagpt/tools/tool_recommend.py
@ -3,7 +3,6 @@ from __future__ import annotations
 import json
 from typing import Any

-import jieba
 import numpy as np
 from pydantic import BaseModel, field_validator
 from rank_bm25 import BM25Okapi
@ -182,7 +181,7 @@ class BM25ToolRecommender(ToolRecommender):
        self.bm25 = BM25Okapi(tokenized_corpus)

    def _tokenize(self, text):
-        return jieba.lcut(text)  # FIXME: needs more sophisticated tokenization
+        return text.split()  # FIXME: needs more sophisticated tokenization

    async def recall_tools(self, context: str = "", plan: Plan = None, topk: int = 20) -> list[Tool]:
        query = plan.current_task.instruction if plan else context
@ -193,7 +192,7 @@ class BM25ToolRecommender(ToolRecommender):
        recalled_tools = [list(self.tools.values())[index] for index in top_indexes]

        logger.info(
-            f"Recalled tools: \n{[tool.name for tool in recalled_tools]}; Scores: {[doc_scores[index] for index in top_indexes]}"
+            f"Recalled tools: \n{[tool.name for tool in recalled_tools]}; Scores: {[np.round(doc_scores[index], 4) for index in top_indexes]}"
        )

        return recalled_tools