diff --git a/metagpt/utils/di_graph_repository.py b/metagpt/utils/di_graph_repository.py index 22917b4a5..1d5542fab 100644 --- a/metagpt/utils/di_graph_repository.py +++ b/metagpt/utils/di_graph_repository.py @@ -4,7 +4,9 @@ @Time : 2023/12/19 @Author : mashenquan @File : di_graph_repository.py -@Desc : Graph repository based on DiGraph +@Desc : Graph repository based on DiGraph. + This script defines a graph repository class based on a directed graph (DiGraph), providing functionalities + specific to handling directed relationships between entities. """ from __future__ import annotations @@ -19,14 +21,87 @@ from metagpt.utils.graph_repository import SPO, GraphRepository class DiGraphRepository(GraphRepository): + """Graph repository based on DiGraph. + + This class represents a graph repository that utilizes a directed graph (DiGraph) to manage relationships + between entities. It inherits from the GraphRepository class, providing a common interface for graph repositories. + + Attributes: + _repo (DiGraph): The underlying directed graph representing the repository. + + Methods: + insert: Insert a new triple into the graph repository. + select: Retrieve triples from the graph repository based on specified criteria. + delete: Delete triples from the graph repository based on specified criteria. + save: Save any changes made to the graph repository. + name: Get the name of the graph repository. + + Example: + di_graph_repo = DiGraphRepository(name="MyDiGraphRepo") + di_graph_repo.insert(subject="Node1", predicate="connects_to", object_="Node2") + # Represents a directed relationship: Node1 connects_to Node2 + + Note: + This class extends the GraphRepository class and is specifically designed for managing directed relationships + using a DiGraph. + + """ + def __init__(self, name: str, **kwargs): super().__init__(name=name, **kwargs) self._repo = networkx.DiGraph() async def insert(self, subject: str, predicate: str, object_: str): + """Insert a new triple into the directed graph repository. + + This method adds a new triple to the underlying directed graph. The triple consists of a subject, a predicate + describing the relationship, and an object. + + Args: + subject (str): The subject of the triple. + predicate (str): The predicate describing the relationship. + object_ (str): The object of the triple. + + Returns: + None + + Raises: + SomeException: Describe any exceptions that might be raised during the insertion process. + + Example: + await my_di_graph_repo.insert(subject="Node1", predicate="connects_to", object_="Node2") + # Adds a directed relationship: Node1 connects_to Node2 + + Note: + Implementations should handle the insertion of triples into the directed graph. + + """ self._repo.add_edge(subject, object_, predicate=predicate) async def select(self, subject: str = None, predicate: str = None, object_: str = None) -> List[SPO]: + """Retrieve triples from the directed graph repository based on specified criteria. + + This method queries the directed graph repository and retrieves triples that match the specified criteria. + + Args: + subject (str, optional): The subject of the triple to filter by. + predicate (str, optional): The predicate describing the relationship to filter by. + object_ (str, optional): The object of the triple to filter by. + + Returns: + List[SPO]: A list of SPO objects representing the selected triples. + + Raises: + SomeException: Describe any exceptions that might be raised during the selection process. + + Example: + selected_triples = await my_di_graph_repo.select(subject="Node1", predicate="connects_to") + # Retrieves directed relationships where Node1 is the subject and the predicate is 'connects_to'. + + Note: + Implementations should handle the selection of triples from the directed graph. + + """ result = [] for s, o, p in self._repo.edges(data="predicate"): if subject and subject != s: @@ -39,6 +114,29 @@ class DiGraphRepository(GraphRepository): return result async def delete(self, subject: str = None, predicate: str = None, object_: str = None) -> int: + """Delete triples from the directed graph repository based on specified criteria. + + This method removes triples from the directed graph repository that match the specified criteria. + + Args: + subject (str, optional): The subject of the triple to filter by. + predicate (str, optional): The predicate describing the relationship to filter by. + object_ (str, optional): The object of the triple to filter by. + + Returns: + int: The number of triples deleted from the repository. + + Raises: + SomeException: Describe any exceptions that might be raised during the deletion process. + + Example: + deleted_count = await my_di_graph_repo.delete(subject="Node1", predicate="connects_to") + # Deletes directed relationships where Node1 is the subject and the predicate is 'connects_to'. + + Note: + Implementations should handle the deletion of triples from the directed graph. + + """ rows = await self.select(subject=subject, predicate=predicate, object_=object_) if not rows: return 0 @@ -47,11 +145,47 @@ class DiGraphRepository(GraphRepository): return len(rows) def json(self) -> str: + """Convert the directed graph repository to a JSON-formatted string. + + This method converts the underlying directed graph repository to a JSON-formatted string using the node-link data + format. + + Returns: + str: A JSON-formatted string representing the directed graph repository. + + Example: + json_data = my_di_graph_repo.json() + # Retrieves a JSON-formatted string representing the directed graph repository. + + Note: + The resulting JSON string can be used for serialization or data interchange. + + """ m = networkx.node_link_data(self._repo) data = json.dumps(m) return data async def save(self, path: str | Path = None): + """Save the directed graph repository to a JSON file. + + This method converts the underlying directed graph repository to a JSON-formatted string and saves it to a file. + The file is saved with the name of the graph repository and a ".json" extension. + + Args: + path (Union[str, Path], optional): The directory path where the JSON file will be saved. + If not provided, the default path is taken from the 'root' key in the keyword arguments. + + Returns: + None + + Example: + await my_di_graph_repo.save(path="/path/to/save") + # Saves the directed graph repository to a JSON file at the specified path. + + Note: + The saved JSON file contains the node-link data representing the directed graph. + + """ data = self.json() path = path or self._kwargs.get("root") if not path.exists(): @@ -60,12 +194,50 @@ class DiGraphRepository(GraphRepository): await awrite(filename=pathname.with_suffix(".json"), data=data, encoding="utf-8") async def load(self, pathname: str | Path): + """Load a directed graph repository from a JSON file. + + This method reads a JSON file containing node-link data representing a directed graph and loads it into the + directed graph repository. + + Args: + pathname (Union[str, Path]): The path to the JSON file to be loaded. + + Returns: + None + + Example: + await my_di_graph_repo.load(pathname="/path/to/load/my_graph.json") + # Loads a directed graph repository from the specified JSON file. + + Note: + The JSON file should contain node-link data compatible with the format produced by the 'json' method. + + """ data = await aread(filename=pathname, encoding="utf-8") m = json.loads(data) self._repo = networkx.node_link_graph(m) @staticmethod async def load_from(pathname: str | Path) -> GraphRepository: + """Create and load a directed graph repository from a JSON file. + + This class method creates a new instance of a graph repository and loads it from a JSON file containing node-link + data representing a directed graph. + + Args: + pathname (Union[str, Path]): The path to the JSON file to be loaded. + + Returns: + GraphRepository: A new instance of the graph repository loaded from the specified JSON file. + + Example: + loaded_repo = await GraphRepository.load_from(pathname="/path/to/load/my_graph.json") + # Creates and loads a directed graph repository from the specified JSON file. + + Note: + The JSON file should contain node-link data compatible with the format produced by the 'json' method. + + """ pathname = Path(pathname) name = pathname.with_suffix("").name root = pathname.parent @@ -76,13 +248,52 @@ class DiGraphRepository(GraphRepository): @property def root(self) -> str: + """Return the root directory path for the graph repository files. + + Returns: + str: The root directory path. + + Example: + root_path = my_graph_repo.root + # Retrieves the root directory path for the graph repository files. + + Note: + This property provides the directory path where graph repository files are saved or loaded. + + """ return self._kwargs.get("root") @property def pathname(self) -> Path: + """Return the path and filename to the graph repository file. + + Returns: + Path: The path and filename to the graph repository file. + + Example: + file_path = my_graph_repo.pathname + # Retrieves the path and filename to the graph repository file. + + Note: + This property provides the full path, including the filename, to the graph repository file. + + """ p = Path(self.root) / self.name return p.with_suffix(".json") @property def repo(self): + """Get the underlying directed graph repository. + + Returns: + networkx.DiGraph: The directed graph repository. + + Example: + my_di_graph = my_graph_repo.repo + # Retrieves the underlying directed graph repository. + + Note: + This property provides direct access to the networkx.DiGraph instance used by the graph repository. + + """ return self._repo diff --git a/metagpt/utils/graph_repository.py b/metagpt/utils/graph_repository.py index 7a5beb00a..17343114f 100644 --- a/metagpt/utils/graph_repository.py +++ b/metagpt/utils/graph_repository.py @@ -4,7 +4,9 @@ @Time : 2023/12/19 @Author : mashenquan @File : graph_repository.py -@Desc : Superclass for graph repository. +@Desc : Superclass for graph repository. This script defines a superclass for a graph repository, providing a + foundation for specific implementations. + """ from abc import ABC, abstractmethod @@ -19,6 +21,11 @@ from metagpt.utils.common import concat_namespace, split_namespace class GraphKeyword: + """Basic words for a Graph database. + + This class defines a set of basic words commonly used in the context of a Graph database. + """ + IS = "is" OF = "Of" ON = "On" @@ -45,38 +52,218 @@ class GraphKeyword: class SPO(BaseModel): + """Graph repository record type. + + This class represents a record in a graph repository with three components: + - Subject: The subject of the triple. + - Predicate: The predicate describing the relationship between the subject and the object. + - Object: The object of the triple. + + Attributes: + subject (str): The subject of the triple. + predicate (str): The predicate describing the relationship. + object_ (str): The object of the triple. + + Example: + spo_record = SPO(subject="Node1", predicate="connects_to", object_="Node2") + # Represents a triple: Node1 connects_to Node2 + + Note: + This class is a Pydantic BaseModel, allowing easy validation and serialization of graph records. + + """ + subject: str predicate: str object_: str class GraphRepository(ABC): + """Abstract base class for a Graph Repository. + + This class defines the interface for a graph repository, providing methods for inserting, selecting, + deleting, and saving graph data. Concrete implementations of this class must provide functionality + for these operations. + + Attributes: + _repo_name (str): The name of the graph repository. + _kwargs (dict): Additional keyword arguments for customization. + + Methods: + insert: Insert a new triple into the graph repository. + select: Retrieve triples from the graph repository based on specified criteria. + delete: Delete triples from the graph repository based on specified criteria. + save: Save any changes made to the graph repository. + name: Get the name of the graph repository. + + Example: + class MyGraphRepository(GraphRepository): + # Concrete implementation of the GraphRepository interface goes here... + + my_repo = MyGraphRepository(name="MyRepo") + my_repo.insert(subject="Node1", predicate="connects_to", object_="Node2") + + Note: + This class is meant to be subclassed to create specific implementations of graph repositories. + + """ + def __init__(self, name: str, **kwargs): self._repo_name = name self._kwargs = kwargs @abstractmethod async def insert(self, subject: str, predicate: str, object_: str): + """Insert a new triple into the graph repository. + + Args: + subject (str): The subject of the triple. + predicate (str): The predicate describing the relationship. + object_ (str): The object of the triple. + + Returns: + None + + Raises: + SomeException: Describe any exceptions that might be raised during the insertion process. + + Example: + await my_repository.insert(subject="Node1", predicate="connects_to", object_="Node2") + # Inserts a triple: Node1 connects_to Node2 into the graph repository. + + Note: + Implementations of this method should handle the insertion of triples into the underlying graph storage. + + """ pass @abstractmethod async def select(self, subject: str = None, predicate: str = None, object_: str = None) -> List[SPO]: + """Retrieve triples from the graph repository based on specified criteria. + + Args: + subject (str, optional): The subject of the triple to filter by. + predicate (str, optional): The predicate describing the relationship to filter by. + object_ (str, optional): The object of the triple to filter by. + + Returns: + List[SPO]: A list of SPO objects representing the selected triples. + + Raises: + SomeException: Describe any exceptions that might be raised during the selection process. + + Example: + selected_triples = await my_repository.select(subject="Node1", predicate="connects_to") + # Retrieves triples where Node1 is the subject and the predicate is 'connects_to'. + + Note: + Implementations of this method should handle the selection of triples from the underlying graph storage. + + """ pass @abstractmethod async def delete(self, subject: str = None, predicate: str = None, object_: str = None) -> int: + """Delete triples from the graph repository based on specified criteria. + + Args: + subject (str, optional): The subject of the triple to filter by. + predicate (str, optional): The predicate describing the relationship to filter by. + object_ (str, optional): The object of the triple to filter by. + + Returns: + int: The number of triples deleted from the repository. + + Raises: + SomeException: Describe any exceptions that might be raised during the deletion process. + + Example: + deleted_count = await my_repository.delete(subject="Node1", predicate="connects_to") + # Deletes triples where Node1 is the subject and the predicate is 'connects_to'. + + Note: + Implementations of this method should handle the deletion of triples from the underlying graph storage. + + """ pass @abstractmethod async def save(self): + """Save any changes made to the graph repository. + + This method is responsible for persisting any changes made to the graph repository, such as inserts, updates, or + deletions. Implementations should ensure that the changes are properly committed and reflected in the underlying + graph storage. + + Args: + None + + Returns: + None + + Raises: + SomeException: Describe any exceptions that might be raised during the saving process. + + Example: + await my_repository.save() + # Persists any changes made to the graph repository. + + Note: + Implementations of this method should handle the persistence of changes in the underlying graph storage. + + """ pass @property def name(self) -> str: + """Get the name of the graph repository. + + Returns: + str: The name of the graph repository. + + Example: + repository_name = my_repository.name + # Retrieves the name of the graph repository. + + Note: + The name serves as a unique identifier for the graph repository. + + """ return self._repo_name @staticmethod async def update_graph_db_with_file_info(graph_db: "GraphRepository", file_info: RepoFileInfo): + """Insert information of RepoFileInfo into the specified graph repository. + + This function updates the provided graph repository with information from the given RepoFileInfo object. + The function inserts triples related to various dimensions such as file type, class, class method, function, + global variable, and page info. + + Triple Patterns: + - (?, is, [file type]) + - (?, has class, ?) + - (?, is, [class]) + - (?, has class method, ?) + - (?, has function, ?) + - (?, is, [function]) + - (?, is, global variable) + - (?, has page info, ?) + + Args: + graph_db (GraphRepository): The graph repository object to be updated. + file_info (RepoFileInfo): The RepoFileInfo object containing information to be inserted. + + Returns: + None + + Example: + await update_graph_db_with_file_info(my_graph_repo, my_file_info) + # Updates 'my_graph_repo' with information from 'my_file_info'. + + Note: + The function is designed to handle the insertion of specific triple patterns into the graph repository. + + """ await graph_db.insert(subject=file_info.file, predicate=GraphKeyword.IS, object_=GraphKeyword.SOURCE_CODE) file_types = {".py": "python", ".js": "javascript"} file_type = file_types.get(Path(file_info.file).suffix, GraphKeyword.NULL) @@ -138,6 +325,39 @@ class GraphRepository(ABC): @staticmethod async def update_graph_db_with_class_views(graph_db: "GraphRepository", class_views: List[DotClassInfo]): + """Insert dot format class information into the specified graph repository. + + This function updates the provided graph repository with class information from the given list of DotClassInfo objects. + The function inserts triples related to various aspects of class views, including source code, file type, class, + class property, class detail, method, composition, and aggregation. + + Triple Patterns: + - (?, is, source code) + - (?, is, file type) + - (?, has class, ?) + - (?, is, class) + - (?, has class property, ?) + - (?, is, class property) + - (?, has detail, ?) + - (?, has method, ?) + - (?, is composite of, ?) + - (?, is aggregate of, ?) + + Args: + graph_db (GraphRepository): The graph repository object to be updated. + class_views (List[DotClassInfo]): List of DotClassInfo objects containing class information to be inserted. + + Returns: + None + + Example: + await update_graph_db_with_class_views(my_graph_repo, [class_info1, class_info2]) + # Updates 'my_graph_repo' with class information from the provided list of DotClassInfo objects. + + Note: + The function is designed to handle the insertion of specific triple patterns into the graph repository. + + """ for c in class_views: filename, _ = c.package.split(":", 1) await graph_db.insert(subject=filename, predicate=GraphKeyword.IS, object_=GraphKeyword.SOURCE_CODE) @@ -200,6 +420,32 @@ class GraphRepository(ABC): async def update_graph_db_with_class_relationship_views( graph_db: "GraphRepository", relationship_views: List[DotClassRelationship] ): + """Insert class relationships and labels into the specified graph repository. + + This function updates the provided graph repository with class relationship information from the given list + of DotClassRelationship objects. The function inserts triples representing relationships and labels between + classes. + + Triple Patterns: + - (?, is relationship of, ?) + - (?, is relationship on, ?) + + Args: + graph_db (GraphRepository): The graph repository object to be updated. + relationship_views (List[DotClassRelationship]): List of DotClassRelationship objects containing + class relationship information to be inserted. + + Returns: + None + + Example: + await update_graph_db_with_class_relationship_views(my_graph_repo, [relationship1, relationship2]) + # Updates 'my_graph_repo' with class relationship information from the provided list of DotClassRelationship objects. + + Note: + The function is designed to handle the insertion of specific triple patterns into the graph repository. + + """ for r in relationship_views: await graph_db.insert( subject=r.src, predicate=GraphKeyword.IS + r.relationship + GraphKeyword.OF, object_=r.dest @@ -214,6 +460,26 @@ class GraphRepository(ABC): @staticmethod async def rebuild_composition_relationship(graph_db: "GraphRepository"): + """Append namespace-prefixed information to relationship SPO (Subject-Predicate-Object) objects in the graph + repository. + + This function updates the provided graph repository by appending namespace-prefixed information to existing + relationship SPO objects. + + Args: + graph_db (GraphRepository): The graph repository object to be updated. + + Returns: + None + + Example: + await append_namespace_to_relationship_spo_objects(my_graph_repo) + # Appends namespace-prefixed information to relationship SPO objects in 'my_graph_repo'. + + Note: + The function is designed to modify existing relationship SPO objects in the graph repository. + + """ classes = await graph_db.select(predicate=GraphKeyword.IS, object_=GraphKeyword.CLASS) mapping = defaultdict(list) for c in classes: diff --git a/metagpt/utils/visual_graph_repo.py b/metagpt/utils/visual_graph_repo.py index 1c52176ff..49edee39e 100644 --- a/metagpt/utils/visual_graph_repo.py +++ b/metagpt/utils/visual_graph_repo.py @@ -82,9 +82,9 @@ class VisualGraphRepo(ABC): class VisualDiGraphRepo(VisualGraphRepo): - """Implementation of VisualGraphRepo for networkx graph repository. + """Implementation of VisualGraphRepo for DiGraph graph repository. - This class extends VisualGraphRepo to provide specific functionality for a graph repository using networkx. + This class extends VisualGraphRepo to provide specific functionality for a graph repository using DiGraph. """ @classmethod