From 460555423049bbef4a37d4509e4b87f96ec6eae3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= Date: Mon, 2 Sep 2024 18:32:39 +0800 Subject: [PATCH] fixbug: _is_fingerprint_changed --- metagpt/tools/libs/index_repo.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/metagpt/tools/libs/index_repo.py b/metagpt/tools/libs/index_repo.py index cf41c2e9c..105a11d73 100644 --- a/metagpt/tools/libs/index_repo.py +++ b/metagpt/tools/libs/index_repo.py @@ -93,6 +93,8 @@ class IndexRepo(BaseModel): delete_filenames = [] for i in filenames: content = await aread(filename=i) + if not self._is_fingerprint_changed(filename=i, content=content): + continue token_count = len(encoding.encode(content)) if self._is_buildable(token_count): filter_filenames.append(i) @@ -103,6 +105,7 @@ class IndexRepo(BaseModel): async def _add_batch(self, filenames: List[Union[str, Path]], delete_filenames: List[Union[str, Path]]): if not filenames: return + logger.info(f"update index repo, add {filenames}, remove {delete_filenames}") engine = None if Path(self.filename).exists(): engine = SimpleEngine.from_index( @@ -168,3 +171,10 @@ class IndexRepo(BaseModel): ) rsp = await engine.aretrieve(query) return [i for i in rsp if i.metadata.get("file_path") in filters] + + def _is_fingerprint_changed(self, filename: Union[str, Path], content: str) -> bool: + old_fp = self.fingerprints.get(str(filename)) + if not old_fp: + return True + fp = generate_fingerprint(content) + return old_fp != fp