mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-26 15:49:42 +02:00
feat: Editor + read pdf/docx...
This commit is contained in:
parent
fa06a67a64
commit
dc578d8b0b
15 changed files with 447 additions and 30 deletions
|
|
@ -1,7 +1,7 @@
|
|||
"""RAG schemas."""
|
||||
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, ClassVar, Literal, Optional, Union
|
||||
from typing import Any, ClassVar, List, Literal, Optional, Union
|
||||
|
||||
from chromadb.api.types import CollectionMetadata
|
||||
from llama_index.core.embeddings import BaseEmbedding
|
||||
|
|
@ -12,6 +12,7 @@ from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator
|
|||
|
||||
from metagpt.config2 import config
|
||||
from metagpt.configs.embedding_config import EmbeddingType
|
||||
from metagpt.logs import logger
|
||||
from metagpt.rag.interface import RAGObject
|
||||
|
||||
|
||||
|
|
@ -44,7 +45,13 @@ class FAISSRetrieverConfig(IndexRetrieverConfig):
|
|||
@model_validator(mode="after")
|
||||
def check_dimensions(self):
|
||||
if self.dimensions == 0:
|
||||
self.dimensions = self._embedding_type_to_dimensions.get(config.embedding.api_type, 1536)
|
||||
self.dimensions = config.embedding.dimensions or self._embedding_type_to_dimensions.get(
|
||||
config.embedding.api_type, 1536
|
||||
)
|
||||
if not config.embedding.dimensions and config.embedding.api_type not in self._embedding_type_to_dimensions:
|
||||
logger.warning(
|
||||
f"You didn't set dimensions in config when using {config.embedding.api_type}, default to 1536"
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
|
|
@ -207,3 +214,51 @@ class ObjectNode(TextNode):
|
|||
)
|
||||
|
||||
return metadata.model_dump()
|
||||
|
||||
|
||||
class OmniParseType(str, Enum):
|
||||
"""OmniParseType"""
|
||||
|
||||
PDF = "PDF"
|
||||
DOCUMENT = "DOCUMENT"
|
||||
|
||||
|
||||
class ParseResultType(str, Enum):
|
||||
"""The result type for the parser."""
|
||||
|
||||
TXT = "text"
|
||||
MD = "markdown"
|
||||
JSON = "json"
|
||||
|
||||
|
||||
class OmniParseOptions(BaseModel):
|
||||
"""OmniParse Options config"""
|
||||
|
||||
result_type: ParseResultType = Field(default=ParseResultType.MD, description="OmniParse result_type")
|
||||
parse_type: OmniParseType = Field(default=OmniParseType.DOCUMENT, description="OmniParse parse_type")
|
||||
max_timeout: Optional[int] = Field(default=120, description="Maximum timeout for OmniParse service requests")
|
||||
num_workers: int = Field(
|
||||
default=5,
|
||||
gt=0,
|
||||
lt=10,
|
||||
description="Number of concurrent requests for multiple files",
|
||||
)
|
||||
|
||||
|
||||
class OminParseImage(BaseModel):
|
||||
image: str = Field(default="", description="image str bytes")
|
||||
image_name: str = Field(default="", description="image name")
|
||||
image_info: Optional[dict] = Field(default={}, description="image info")
|
||||
|
||||
|
||||
class OmniParsedResult(BaseModel):
|
||||
markdown: str = Field(default="", description="markdown text")
|
||||
text: str = Field(default="", description="plain text")
|
||||
images: Optional[List[OminParseImage]] = Field(default=[], description="images")
|
||||
metadata: Optional[dict] = Field(default={}, description="metadata")
|
||||
|
||||
@model_validator(mode="before")
|
||||
def set_markdown(cls, values):
|
||||
if not values.get("markdown"):
|
||||
values["markdown"] = values.get("text")
|
||||
return values
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue