This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-04-07 17:14:11 -07:00
commit b6b8f9dc7b
39 changed files with 5186 additions and 6072 deletions

View file

@ -282,6 +282,9 @@ STT_SERVICE=local/base
# LlamaCloud (if ETL_SERVICE=LLAMACLOUD)
# LLAMA_CLOUD_API_KEY=
# Optional: Azure Document Intelligence accelerator (used with LLAMACLOUD)
# AZURE_DI_ENDPOINT=https://your-resource.cognitiveservices.azure.com/
# AZURE_DI_KEY=
# ------------------------------------------------------------------------------
# Observability (optional)

View file

@ -193,6 +193,9 @@ FIRECRAWL_API_KEY=fcr-01J0000000000000000000000
ETL_SERVICE=UNSTRUCTURED or LLAMACLOUD or DOCLING
UNSTRUCTURED_API_KEY=Tpu3P0U8iy
LLAMA_CLOUD_API_KEY=llx-nnn
# Optional: Azure Document Intelligence accelerator (used when ETL_SERVICE=LLAMACLOUD)
# AZURE_DI_ENDPOINT=https://your-resource.cognitiveservices.azure.com/
# AZURE_DI_KEY=your-key
# OPTIONAL: Add these for LangSmith Observability
LANGSMITH_TRACING=true

View file

@ -79,7 +79,9 @@ def upgrade() -> None:
sa.Column("description", sa.String(500), nullable=True),
sa.Column(
"provider",
PG_ENUM(*VISION_PROVIDER_VALUES, name="visionprovider", create_type=False),
PG_ENUM(
*VISION_PROVIDER_VALUES, name="visionprovider", create_type=False
),
nullable=False,
),
sa.Column("custom_provider", sa.String(100), nullable=True),
@ -100,9 +102,7 @@ def upgrade() -> None:
sa.ForeignKeyConstraint(
["search_space_id"], ["searchspaces.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(
["user_id"], ["user.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_vision_llm_configs_name "
@ -117,12 +117,19 @@ def upgrade() -> None:
existing_columns = [
col["name"] for col in sa.inspect(connection).get_columns("searchspaces")
]
if "vision_llm_id" in existing_columns and "vision_llm_config_id" not in existing_columns:
op.alter_column("searchspaces", "vision_llm_id", new_column_name="vision_llm_config_id")
if (
"vision_llm_id" in existing_columns
and "vision_llm_config_id" not in existing_columns
):
op.alter_column(
"searchspaces", "vision_llm_id", new_column_name="vision_llm_config_id"
)
elif "vision_llm_config_id" not in existing_columns:
op.add_column(
"searchspaces",
sa.Column("vision_llm_config_id", sa.Integer(), nullable=True, server_default="0"),
sa.Column(
"vision_llm_config_id", sa.Integer(), nullable=True, server_default="0"
),
)
# 4. Add vision config permissions to existing system roles
@ -181,7 +188,9 @@ def downgrade() -> None:
col["name"] for col in sa.inspect(connection).get_columns("searchspaces")
]
if "vision_llm_config_id" in existing_columns:
op.alter_column("searchspaces", "vision_llm_config_id", new_column_name="vision_llm_id")
op.alter_column(
"searchspaces", "vision_llm_config_id", new_column_name="vision_llm_id"
)
# Drop table and enum
op.execute("DROP INDEX IF EXISTS ix_vision_llm_configs_search_space_id")

View file

@ -452,9 +452,7 @@ async def stream_autocomplete_agent(
raw_text = "".join(text_buffer)
suggestions = _parse_suggestions(raw_text)
yield streaming_service.format_data(
"suggestions", {"options": suggestions}
)
yield streaming_service.format_data("suggestions", {"options": suggestions})
yield streaming_service.format_finish()
yield streaming_service.format_done()

View file

@ -461,8 +461,10 @@ class Config:
UNSTRUCTURED_API_KEY = os.getenv("UNSTRUCTURED_API_KEY")
elif ETL_SERVICE == "LLAMACLOUD":
# LlamaCloud API Key
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")
# Optional: Azure Document Intelligence accelerator for supported file types
AZURE_DI_ENDPOINT = os.getenv("AZURE_DI_ENDPOINT")
AZURE_DI_KEY = os.getenv("AZURE_DI_KEY")
# Residential Proxy Configuration (anonymous-proxies.net)
# Used for web crawling and YouTube transcript fetching to avoid IP bans.

View file

@ -1310,9 +1310,7 @@ class VisionLLMConfig(BaseModel, TimestampMixin):
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
search_space = relationship(
"SearchSpace", back_populates="vision_llm_configs"
)
search_space = relationship("SearchSpace", back_populates="vision_llm_configs")
user_id = Column(
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False

View file

@ -1,3 +1,5 @@
import logging
from app.config import config as app_config
from app.etl_pipeline.etl_document import EtlRequest, EtlResult
from app.etl_pipeline.exceptions import (
@ -75,11 +77,7 @@ class EtlPipelineService:
content = await parse_with_unstructured(request.file_path)
elif etl_service == "LLAMACLOUD":
from app.etl_pipeline.parsers.llamacloud import parse_with_llamacloud
content = await parse_with_llamacloud(
request.file_path, request.estimated_pages
)
content = await self._extract_with_llamacloud(request)
else:
raise EtlServiceUnavailableError(f"Unknown ETL_SERVICE: {etl_service}")
@ -88,3 +86,40 @@ class EtlPipelineService:
etl_service=etl_service,
content_type="document",
)
async def _extract_with_llamacloud(self, request: EtlRequest) -> str:
"""Try Azure Document Intelligence first (when configured) then LlamaCloud.
Azure DI is an internal accelerator: cheaper and faster for its supported
file types. If it is not configured, or the file extension is not in
Azure DI's supported set, LlamaCloud is used directly. If Azure DI
fails for any reason, LlamaCloud is used as a fallback.
"""
from pathlib import PurePosixPath
from app.utils.file_extensions import AZURE_DI_DOCUMENT_EXTENSIONS
ext = PurePosixPath(request.filename).suffix.lower()
azure_configured = bool(
getattr(app_config, "AZURE_DI_ENDPOINT", None)
and getattr(app_config, "AZURE_DI_KEY", None)
)
if azure_configured and ext in AZURE_DI_DOCUMENT_EXTENSIONS:
try:
from app.etl_pipeline.parsers.azure_doc_intelligence import (
parse_with_azure_doc_intelligence,
)
return await parse_with_azure_doc_intelligence(request.file_path)
except Exception:
logging.warning(
"Azure Document Intelligence failed for %s, "
"falling back to LlamaCloud",
request.filename,
exc_info=True,
)
from app.etl_pipeline.parsers.llamacloud import parse_with_llamacloud
return await parse_with_llamacloud(request.file_path, request.estimated_pages)

View file

@ -0,0 +1,93 @@
import asyncio
import logging
import os
import random
from app.config import config as app_config
MAX_RETRIES = 5
BASE_DELAY = 10
MAX_DELAY = 120
async def parse_with_azure_doc_intelligence(file_path: str) -> str:
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import DocumentContentFormat
from azure.core.credentials import AzureKeyCredential
from azure.core.exceptions import (
ClientAuthenticationError,
HttpResponseError,
ServiceRequestError,
ServiceResponseError,
)
file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
retryable_exceptions = (ServiceRequestError, ServiceResponseError)
last_exception = None
attempt_errors: list[str] = []
for attempt in range(1, MAX_RETRIES + 1):
try:
client = DocumentIntelligenceClient(
endpoint=app_config.AZURE_DI_ENDPOINT,
credential=AzureKeyCredential(app_config.AZURE_DI_KEY),
)
async with client:
with open(file_path, "rb") as f:
poller = await client.begin_analyze_document(
"prebuilt-read",
body=f,
output_content_format=DocumentContentFormat.MARKDOWN,
)
result = await poller.result()
if attempt > 1:
logging.info(
f"Azure Document Intelligence succeeded on attempt {attempt} "
f"after {len(attempt_errors)} failures"
)
if not result.content:
return ""
return result.content
except ClientAuthenticationError:
raise
except HttpResponseError as e:
if e.status_code and 400 <= e.status_code < 500:
raise
last_exception = e
error_type = type(e).__name__
error_msg = str(e)[:200]
attempt_errors.append(f"Attempt {attempt}: {error_type} - {error_msg}")
except retryable_exceptions as e:
last_exception = e
error_type = type(e).__name__
error_msg = str(e)[:200]
attempt_errors.append(f"Attempt {attempt}: {error_type} - {error_msg}")
if attempt < MAX_RETRIES:
base_delay = min(BASE_DELAY * (2 ** (attempt - 1)), MAX_DELAY)
jitter = base_delay * 0.25 * (2 * random.random() - 1)
delay = base_delay + jitter
logging.warning(
f"Azure Document Intelligence failed "
f"(attempt {attempt}/{MAX_RETRIES}): "
f"{attempt_errors[-1]}. File: {file_size_mb:.1f}MB. "
f"Retrying in {delay:.0f}s..."
)
await asyncio.sleep(delay)
else:
logging.error(
f"Azure Document Intelligence failed after {MAX_RETRIES} "
f"attempts. File size: {file_size_mb:.1f}MB. "
f"Errors: {'; '.join(attempt_errors)}"
)
raise last_exception or RuntimeError(
f"Azure Document Intelligence parsing failed after {MAX_RETRIES} retries. "
f"File size: {file_size_mb:.1f}MB"
)

View file

@ -273,17 +273,18 @@ class IndexingPipelineService:
continue
dup_check = await self.session.execute(
select(Document.id).filter(
select(Document.id, Document.title).filter(
Document.content_hash == content_hash,
Document.id != existing.id,
)
)
if dup_check.scalars().first() is not None:
dup_row = dup_check.first()
if dup_row is not None:
if not DocumentStatus.is_state(
existing.status, DocumentStatus.READY
):
existing.status = DocumentStatus.failed(
"Duplicate content — already indexed by another document"
f"Duplicate content: matches '{dup_row.title}'"
)
continue

View file

@ -180,9 +180,7 @@ async def list_vision_llm_configs(
) from e
@router.get(
"/vision-llm-configs/{config_id}", response_model=VisionLLMConfigRead
)
@router.get("/vision-llm-configs/{config_id}", response_model=VisionLLMConfigRead)
async def get_vision_llm_config(
config_id: int,
session: AsyncSession = Depends(get_async_session),
@ -214,9 +212,7 @@ async def get_vision_llm_config(
) from e
@router.put(
"/vision-llm-configs/{config_id}", response_model=VisionLLMConfigRead
)
@router.put("/vision-llm-configs/{config_id}", response_model=VisionLLMConfigRead)
async def update_vision_llm_config(
config_id: int,
update_data: VisionLLMConfigUpdate,

View file

@ -183,7 +183,8 @@ class LLMPreferencesRead(BaseModel):
None, description="ID of the image generation config to use"
)
vision_llm_config_id: int | None = Field(
None, description="ID of the vision LLM config to use for vision/screenshot analysis"
None,
description="ID of the vision LLM config to use for vision/screenshot analysis",
)
agent_llm: dict[str, Any] | None = Field(
None, description="Full config for agent LLM"
@ -214,5 +215,6 @@ class LLMPreferencesUpdate(BaseModel):
None, description="ID of the image generation config to use"
)
vision_llm_config_id: int | None = Field(
None, description="ID of the vision LLM config to use for vision/screenshot analysis"
None,
description="ID of the vision LLM config to use for vision/screenshot analysis",
)

View file

@ -434,9 +434,7 @@ async def get_vision_llm(
config_id = search_space.vision_llm_config_id
if config_id is None:
logger.error(
f"No vision LLM configured for search space {search_space_id}"
)
logger.error(f"No vision LLM configured for search space {search_space_id}")
return None
if is_vision_auto_mode(config_id):

View file

@ -15,7 +15,9 @@ import httpx
logger = logging.getLogger(__name__)
OPENROUTER_API_URL = "https://openrouter.ai/api/v1/models"
FALLBACK_FILE = Path(__file__).parent.parent / "config" / "vision_model_list_fallback.json"
FALLBACK_FILE = (
Path(__file__).parent.parent / "config" / "vision_model_list_fallback.json"
)
CACHE_TTL_SECONDS = 86400 # 24 hours
_cache: list[dict] | None = None

View file

@ -93,6 +93,22 @@ UNSTRUCTURED_DOCUMENT_EXTENSIONS: frozenset[str] = frozenset(
}
)
AZURE_DI_DOCUMENT_EXTENSIONS: frozenset[str] = frozenset(
{
".pdf",
".docx",
".xlsx",
".pptx",
".png",
".jpg",
".jpeg",
".bmp",
".tiff",
".tif",
".heif",
}
)
# ---------------------------------------------------------------------------
# Union (used by classify_file for routing) + service lookup
# ---------------------------------------------------------------------------
@ -101,6 +117,7 @@ DOCUMENT_EXTENSIONS: frozenset[str] = (
DOCLING_DOCUMENT_EXTENSIONS
| LLAMAPARSE_DOCUMENT_EXTENSIONS
| UNSTRUCTURED_DOCUMENT_EXTENSIONS
| AZURE_DI_DOCUMENT_EXTENSIONS
)
_SERVICE_MAP: dict[str, frozenset[str]] = {
@ -113,9 +130,21 @@ _SERVICE_MAP: dict[str, frozenset[str]] = {
def get_document_extensions_for_service(etl_service: str | None) -> frozenset[str]:
"""Return the document extensions supported by *etl_service*.
When *etl_service* is ``LLAMACLOUD`` and Azure Document Intelligence
credentials are configured, the set is dynamically expanded to include
Azure DI's supported extensions (e.g. ``.heif``).
Falls back to the full union when the service is ``None`` or unknown.
"""
return _SERVICE_MAP.get(etl_service or "", DOCUMENT_EXTENSIONS)
extensions = _SERVICE_MAP.get(etl_service or "", DOCUMENT_EXTENSIONS)
if etl_service == "LLAMACLOUD":
from app.config import config as app_config
if getattr(app_config, "AZURE_DI_ENDPOINT", None) and getattr(
app_config, "AZURE_DI_KEY", None
):
extensions = extensions | AZURE_DI_DOCUMENT_EXTENSIONS
return extensions
def is_supported_document_extension(filename: str) -> bool:

View file

@ -73,6 +73,7 @@ dependencies = [
"langchain-community>=0.4.1",
"deepagents>=0.4.12",
"stripe>=15.0.0",
"azure-ai-documentintelligence>=1.0.2",
"litellm>=1.83.0",
"langchain-litellm>=0.6.4",
]

View file

@ -249,6 +249,209 @@ async def test_extract_pdf_with_llamacloud(tmp_path, mocker):
assert result.content_type == "document"
# ---------------------------------------------------------------------------
# Slice 9b - LLAMACLOUD + Azure DI accelerator
# ---------------------------------------------------------------------------
def _mock_azure_di(mocker, content="# Azure DI parsed"):
"""Wire up Azure DI mocks and return the fake client for assertions."""
class FakeResult:
pass
FakeResult.content = content
fake_poller = mocker.AsyncMock()
fake_poller.result.return_value = FakeResult()
fake_client = mocker.AsyncMock()
fake_client.begin_analyze_document.return_value = fake_poller
fake_client.__aenter__ = mocker.AsyncMock(return_value=fake_client)
fake_client.__aexit__ = mocker.AsyncMock(return_value=False)
mocker.patch(
"azure.ai.documentintelligence.aio.DocumentIntelligenceClient",
return_value=fake_client,
)
mocker.patch(
"azure.ai.documentintelligence.models.DocumentContentFormat",
mocker.MagicMock(MARKDOWN="markdown"),
)
mocker.patch(
"azure.core.credentials.AzureKeyCredential",
return_value=mocker.MagicMock(),
)
return fake_client
def _mock_llamacloud(mocker, content="# LlamaCloud parsed"):
"""Wire up LlamaCloud mocks and return the fake parser for assertions."""
class FakeDoc:
pass
FakeDoc.text = content
class FakeJobResult:
pages = []
def get_markdown_documents(self, split_by_page=True):
return [FakeDoc()]
fake_parser = mocker.AsyncMock()
fake_parser.aparse.return_value = FakeJobResult()
mocker.patch(
"llama_cloud_services.LlamaParse",
return_value=fake_parser,
)
mocker.patch(
"llama_cloud_services.parse.utils.ResultType",
mocker.MagicMock(MD="md"),
)
return fake_parser
async def test_llamacloud_with_azure_di_uses_azure_for_pdf(tmp_path, mocker):
"""When Azure DI is configured, a supported extension (.pdf) is parsed by Azure DI."""
pdf_file = tmp_path / "report.pdf"
pdf_file.write_bytes(b"%PDF-1.4 fake content " * 10)
mocker.patch("app.config.config.ETL_SERVICE", "LLAMACLOUD")
mocker.patch("app.config.config.LLAMA_CLOUD_API_KEY", "fake-key", create=True)
mocker.patch(
"app.config.config.AZURE_DI_ENDPOINT",
"https://fake.cognitiveservices.azure.com/",
create=True,
)
mocker.patch("app.config.config.AZURE_DI_KEY", "fake-key", create=True)
fake_client = _mock_azure_di(mocker, "# Azure DI parsed")
fake_parser = _mock_llamacloud(mocker)
result = await EtlPipelineService().extract(
EtlRequest(file_path=str(pdf_file), filename="report.pdf")
)
assert result.markdown_content == "# Azure DI parsed"
assert result.etl_service == "LLAMACLOUD"
assert result.content_type == "document"
fake_client.begin_analyze_document.assert_called_once()
fake_parser.aparse.assert_not_called()
async def test_llamacloud_azure_di_fallback_on_failure(tmp_path, mocker):
"""When Azure DI fails, LlamaCloud is used as a fallback."""
pdf_file = tmp_path / "report.pdf"
pdf_file.write_bytes(b"%PDF-1.4 fake content " * 10)
mocker.patch("app.config.config.ETL_SERVICE", "LLAMACLOUD")
mocker.patch("app.config.config.LLAMA_CLOUD_API_KEY", "fake-key", create=True)
mocker.patch(
"app.config.config.AZURE_DI_ENDPOINT",
"https://fake.cognitiveservices.azure.com/",
create=True,
)
mocker.patch("app.config.config.AZURE_DI_KEY", "fake-key", create=True)
mocker.patch(
"app.etl_pipeline.parsers.azure_doc_intelligence.parse_with_azure_doc_intelligence",
side_effect=RuntimeError("Azure DI unavailable"),
)
fake_parser = _mock_llamacloud(mocker, "# LlamaCloud fallback")
result = await EtlPipelineService().extract(
EtlRequest(file_path=str(pdf_file), filename="report.pdf", estimated_pages=5)
)
assert result.markdown_content == "# LlamaCloud fallback"
assert result.etl_service == "LLAMACLOUD"
assert result.content_type == "document"
fake_parser.aparse.assert_called_once()
async def test_llamacloud_skips_azure_di_for_unsupported_ext(tmp_path, mocker):
"""Azure DI is skipped for extensions it doesn't support (e.g. .epub)."""
epub_file = tmp_path / "book.epub"
epub_file.write_bytes(b"\x00" * 10)
mocker.patch("app.config.config.ETL_SERVICE", "LLAMACLOUD")
mocker.patch("app.config.config.LLAMA_CLOUD_API_KEY", "fake-key", create=True)
mocker.patch(
"app.config.config.AZURE_DI_ENDPOINT",
"https://fake.cognitiveservices.azure.com/",
create=True,
)
mocker.patch("app.config.config.AZURE_DI_KEY", "fake-key", create=True)
fake_client = _mock_azure_di(mocker)
fake_parser = _mock_llamacloud(mocker, "# Epub from LlamaCloud")
result = await EtlPipelineService().extract(
EtlRequest(file_path=str(epub_file), filename="book.epub", estimated_pages=50)
)
assert result.markdown_content == "# Epub from LlamaCloud"
assert result.etl_service == "LLAMACLOUD"
fake_client.begin_analyze_document.assert_not_called()
fake_parser.aparse.assert_called_once()
async def test_llamacloud_without_azure_di_uses_llamacloud_directly(tmp_path, mocker):
"""When Azure DI is not configured, LlamaCloud handles all file types directly."""
pdf_file = tmp_path / "report.pdf"
pdf_file.write_bytes(b"%PDF-1.4 fake content " * 10)
mocker.patch("app.config.config.ETL_SERVICE", "LLAMACLOUD")
mocker.patch("app.config.config.LLAMA_CLOUD_API_KEY", "fake-key", create=True)
mocker.patch("app.config.config.AZURE_DI_ENDPOINT", None, create=True)
mocker.patch("app.config.config.AZURE_DI_KEY", None, create=True)
fake_parser = _mock_llamacloud(mocker, "# Direct LlamaCloud")
result = await EtlPipelineService().extract(
EtlRequest(file_path=str(pdf_file), filename="report.pdf", estimated_pages=5)
)
assert result.markdown_content == "# Direct LlamaCloud"
assert result.etl_service == "LLAMACLOUD"
assert result.content_type == "document"
fake_parser.aparse.assert_called_once()
async def test_llamacloud_heif_accepted_only_with_azure_di(tmp_path, mocker):
""".heif is accepted by LLAMACLOUD only when Azure DI credentials are set."""
from app.etl_pipeline.exceptions import EtlUnsupportedFileError
heif_file = tmp_path / "photo.heif"
heif_file.write_bytes(b"\x00" * 100)
mocker.patch("app.config.config.ETL_SERVICE", "LLAMACLOUD")
mocker.patch("app.config.config.LLAMA_CLOUD_API_KEY", "fake-key", create=True)
mocker.patch("app.config.config.AZURE_DI_ENDPOINT", None, create=True)
mocker.patch("app.config.config.AZURE_DI_KEY", None, create=True)
with pytest.raises(EtlUnsupportedFileError, match="not supported by LLAMACLOUD"):
await EtlPipelineService().extract(
EtlRequest(file_path=str(heif_file), filename="photo.heif")
)
mocker.patch(
"app.config.config.AZURE_DI_ENDPOINT",
"https://fake.cognitiveservices.azure.com/",
)
mocker.patch("app.config.config.AZURE_DI_KEY", "fake-key")
fake_client = _mock_azure_di(mocker, "# HEIF from Azure DI")
result = await EtlPipelineService().extract(
EtlRequest(file_path=str(heif_file), filename="photo.heif")
)
assert result.markdown_content == "# HEIF from Azure DI"
assert result.etl_service == "LLAMACLOUD"
fake_client.begin_analyze_document.assert_called_once()
# ---------------------------------------------------------------------------
# Slice 10 - unknown extension falls through to document ETL
# ---------------------------------------------------------------------------
@ -416,9 +619,14 @@ async def test_extract_zip_raises_unsupported_error(tmp_path):
("file.svg", "DOCLING", True),
("file.p7s", "UNSTRUCTURED", False),
("file.p7s", "LLAMACLOUD", True),
("file.heif", "LLAMACLOUD", True),
("file.heif", "DOCLING", True),
("file.heif", "UNSTRUCTURED", True),
],
)
def test_should_skip_for_service(filename, etl_service, expected_skip):
def test_should_skip_for_service(filename, etl_service, expected_skip, monkeypatch):
monkeypatch.setattr("app.config.config.AZURE_DI_ENDPOINT", None, raising=False)
monkeypatch.setattr("app.config.config.AZURE_DI_KEY", None, raising=False)
from app.etl_pipeline.file_classifier import should_skip_for_service
assert should_skip_for_service(filename, etl_service) is expected_skip, (
@ -426,6 +634,19 @@ def test_should_skip_for_service(filename, etl_service, expected_skip):
)
def test_heif_not_skipped_for_llamacloud_when_azure_di_configured(monkeypatch):
"""With Azure DI credentials, .heif is accepted by LLAMACLOUD."""
monkeypatch.setattr(
"app.config.config.AZURE_DI_ENDPOINT",
"https://fake.cognitiveservices.azure.com/",
raising=False,
)
monkeypatch.setattr("app.config.config.AZURE_DI_KEY", "fake-key", raising=False)
from app.etl_pipeline.file_classifier import should_skip_for_service
assert should_skip_for_service("file.heif", "LLAMACLOUD") is False
# ---------------------------------------------------------------------------
# Slice 14b - ETL pipeline rejects per-parser incompatible documents
# ---------------------------------------------------------------------------

View file

@ -92,8 +92,9 @@ def test_non_document_extensions_are_not_supported(filename):
# ---------------------------------------------------------------------------
def test_union_equals_all_three_sets():
def test_union_includes_all_parser_extension_sets():
from app.utils.file_extensions import (
AZURE_DI_DOCUMENT_EXTENSIONS,
DOCLING_DOCUMENT_EXTENSIONS,
DOCUMENT_EXTENSIONS,
LLAMAPARSE_DOCUMENT_EXTENSIONS,
@ -104,6 +105,7 @@ def test_union_equals_all_three_sets():
DOCLING_DOCUMENT_EXTENSIONS
| LLAMAPARSE_DOCUMENT_EXTENSIONS
| UNSTRUCTURED_DOCUMENT_EXTENSIONS
| AZURE_DI_DOCUMENT_EXTENSIONS
)
assert expected == DOCUMENT_EXTENSIONS

8465
surfsense_backend/uv.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,90 +0,0 @@
"use client";
import { ChevronFirst, ChevronLast, ChevronLeft, ChevronRight } from "lucide-react";
import { motion } from "motion/react";
import { Button } from "@/components/ui/button";
const PAGE_SIZE = 50;
export function PaginationControls({
pageIndex,
total,
onFirst,
onPrev,
onNext,
onLast,
canPrev,
canNext,
}: {
pageIndex: number;
total: number;
onFirst: () => void;
onPrev: () => void;
onNext: () => void;
onLast: () => void;
canPrev: boolean;
canNext: boolean;
}) {
const start = pageIndex * PAGE_SIZE + 1;
const end = Math.min((pageIndex + 1) * PAGE_SIZE, total);
return (
<motion.div
className="flex items-center justify-end gap-3 py-3 px-2 select-none"
initial={{ opacity: 0, y: 10 }}
animate={{ opacity: 1, y: 0 }}
transition={{ type: "spring", stiffness: 300, damping: 30, delay: 0.3 }}
>
{/* Range indicator */}
<span className="text-sm text-muted-foreground tabular-nums">
{start}-{end} of {total}
</span>
{/* Navigation buttons */}
<div className="flex items-center gap-1">
<Button
variant="ghost"
size="icon"
className="h-8 w-8 disabled:opacity-40"
onClick={onFirst}
disabled={!canPrev}
aria-label="Go to first page"
>
<ChevronFirst size={18} strokeWidth={2} />
</Button>
<Button
variant="ghost"
size="icon"
className="h-8 w-8 disabled:opacity-40"
onClick={onPrev}
disabled={!canPrev}
aria-label="Go to previous page"
>
<ChevronLeft size={18} strokeWidth={2} />
</Button>
<Button
variant="ghost"
size="icon"
className="h-8 w-8 disabled:opacity-40"
onClick={onNext}
disabled={!canNext}
aria-label="Go to next page"
>
<ChevronRight size={18} strokeWidth={2} />
</Button>
<Button
variant="ghost"
size="icon"
className="h-8 w-8 disabled:opacity-40"
onClick={onLast}
disabled={!canNext}
aria-label="Go to last page"
>
<ChevronLast size={18} strokeWidth={2} />
</Button>
</div>
</motion.div>
);
}
export { PAGE_SIZE };

View file

@ -1,225 +0,0 @@
"use client";
import { useSetAtom } from "jotai";
import { MoreHorizontal, PenLine, Trash2 } from "lucide-react";
import { useState } from "react";
import { toast } from "sonner";
import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom";
import {
AlertDialog,
AlertDialogAction,
AlertDialogCancel,
AlertDialogContent,
AlertDialogDescription,
AlertDialogFooter,
AlertDialogHeader,
AlertDialogTitle,
} from "@/components/ui/alert-dialog";
import { Button } from "@/components/ui/button";
import {
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu";
import type { Document } from "./types";
const EDITABLE_DOCUMENT_TYPES = ["FILE", "NOTE"] as const;
// SURFSENSE_DOCS are system-managed and cannot be deleted
const NON_DELETABLE_DOCUMENT_TYPES = ["SURFSENSE_DOCS"] as const;
export function RowActions({
document,
deleteDocument,
searchSpaceId,
}: {
document: Document;
deleteDocument: (id: number) => Promise<boolean>;
searchSpaceId: string;
}) {
const [isDeleteOpen, setIsDeleteOpen] = useState(false);
const [isDeleting, setIsDeleting] = useState(false);
const openEditorPanel = useSetAtom(openEditorPanelAtom);
const isEditable = EDITABLE_DOCUMENT_TYPES.includes(
document.document_type as (typeof EDITABLE_DOCUMENT_TYPES)[number]
);
const isBeingProcessed =
document.status?.state === "pending" || document.status?.state === "processing";
const shouldShowDelete = !NON_DELETABLE_DOCUMENT_TYPES.includes(
document.document_type as (typeof NON_DELETABLE_DOCUMENT_TYPES)[number]
);
const isEditDisabled = isBeingProcessed;
const isDeleteDisabled = isBeingProcessed;
const handleDelete = async () => {
setIsDeleting(true);
try {
const ok = await deleteDocument(document.id);
if (!ok) toast.error("Failed to delete document");
// Note: Success toast is handled by the mutation atom's onSuccess callback
// Cache is updated optimistically by the mutation, no need to refresh
} catch (error: unknown) {
console.error("Error deleting document:", error);
// Check for 409 Conflict (document started processing after UI loaded)
const status =
(error as { response?: { status?: number } })?.response?.status ??
(error as { status?: number })?.status;
if (status === 409) {
toast.error("Document is now being processed. Please try again later.");
} else {
toast.error("Failed to delete document");
}
} finally {
setIsDeleting(false);
setIsDeleteOpen(false);
}
};
const handleEdit = () => {
openEditorPanel({
documentId: document.id,
searchSpaceId: Number(searchSpaceId),
title: document.title,
});
};
return (
<>
{/* Desktop Actions */}
<div className="hidden md:inline-flex items-center justify-center">
{isEditable ? (
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button
variant="ghost"
size="icon"
className="h-8 w-8 text-muted-foreground hover:text-foreground hover:bg-muted/80"
>
<MoreHorizontal className="h-4 w-4" />
<span className="sr-only">Open menu</span>
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent align="end" className="w-40">
<DropdownMenuItem
onClick={() => !isEditDisabled && handleEdit()}
disabled={isEditDisabled}
className={
isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""
}
>
<PenLine className="mr-2 h-4 w-4" />
<span>Edit</span>
</DropdownMenuItem>
{shouldShowDelete && (
<DropdownMenuItem
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
disabled={isDeleteDisabled}
className={
isDeleteDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""
}
>
<Trash2 className="mr-2 h-4 w-4" />
<span>Delete</span>
</DropdownMenuItem>
)}
</DropdownMenuContent>
</DropdownMenu>
) : (
shouldShowDelete && (
<Button
variant="ghost"
size="icon"
className={`h-8 w-8 ${isDeleteDisabled ? "text-muted-foreground cursor-not-allowed" : "text-muted-foreground hover:text-foreground"}`}
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
disabled={isDeleting || isDeleteDisabled}
>
<Trash2 className="h-4 w-4" />
<span className="sr-only">Delete</span>
</Button>
)
)}
</div>
{/* Mobile Actions Dropdown */}
<div className="inline-flex md:hidden items-center justify-center">
{isEditable ? (
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button variant="ghost" size="icon" className="h-8 w-8 text-muted-foreground">
<MoreHorizontal className="h-4 w-4" />
<span className="sr-only">Open menu</span>
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent align="end" className="w-40">
<DropdownMenuItem
onClick={() => !isEditDisabled && handleEdit()}
disabled={isEditDisabled}
className={
isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""
}
>
<PenLine className="mr-2 h-4 w-4" />
<span>Edit</span>
</DropdownMenuItem>
{shouldShowDelete && (
<DropdownMenuItem
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
disabled={isDeleteDisabled}
className={
isDeleteDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""
}
>
<Trash2 className="mr-2 h-4 w-4" />
<span>Delete</span>
</DropdownMenuItem>
)}
</DropdownMenuContent>
</DropdownMenu>
) : (
shouldShowDelete && (
<Button
variant="ghost"
size="icon"
className={`h-8 w-8 ${isDeleteDisabled ? "text-muted-foreground cursor-not-allowed" : "text-muted-foreground hover:text-foreground"}`}
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
disabled={isDeleting || isDeleteDisabled}
>
<Trash2 className="h-4 w-4" />
<span className="sr-only">Delete</span>
</Button>
)
)}
</div>
<AlertDialog open={isDeleteOpen} onOpenChange={setIsDeleteOpen}>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>Delete document?</AlertDialogTitle>
<AlertDialogDescription>
This action cannot be undone. This will permanently delete this document from your
search space.
</AlertDialogDescription>
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel>Cancel</AlertDialogCancel>
<AlertDialogAction
onClick={(e) => {
e.preventDefault();
handleDelete();
}}
disabled={isDeleting}
className="bg-destructive text-destructive-foreground hover:bg-destructive/90"
>
{isDeleting ? "Deleting" : "Delete"}
</AlertDialogAction>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
</>
);
}

View file

@ -1,28 +0,0 @@
export type DocumentType = string;
export type DocumentStatus = {
state: "ready" | "pending" | "processing" | "failed";
reason?: string;
};
export type Document = {
id: number;
title: string;
document_type: DocumentType;
document_metadata?: any;
content?: string;
content_preview?: string;
created_at: string;
search_space_id: number;
created_by_id?: string | null;
created_by_name?: string | null;
created_by_email?: string | null;
status?: DocumentStatus;
};
export type ColumnVisibility = {
document_type: boolean;
created_by: boolean;
created_at: boolean;
status: boolean;
};

View file

@ -169,7 +169,9 @@ body:has(.suggestion-body) {
padding: 2px 4px;
line-height: 1;
border-radius: 4px;
transition: color 0.15s, background 0.15s;
transition:
color 0.15s,
background 0.15s;
}
.setup-dismiss:hover {

View file

@ -2,7 +2,7 @@
import { Search, Unplug } from "lucide-react";
import type { FC } from "react";
import { getDocumentTypeLabel } from "@/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon";
import { getDocumentTypeLabel } from "@/components/documents/DocumentTypeIcon";
import { Button } from "@/components/ui/button";
import { Spinner } from "@/components/ui/spinner";
import { TabsContent } from "@/components/ui/tabs";

View file

@ -261,7 +261,7 @@ const ConnectToolsBanner: FC<{ isThreadEmpty: boolean }> = ({ isThreadEmpty }) =
<div className="flex w-full items-center gap-2.5 px-4 py-2.5">
<button
type="button"
className="flex flex-1 items-center gap-2.5 text-left cursor-pointer"
className="flex flex-1 items-center gap-2.5 text-left cursor-pointer select-none"
onClick={() => setConnectorDialogOpen(true)}
>
<Unplug className="size-4 text-muted-foreground shrink-0" />

View file

@ -13,7 +13,7 @@ import {
} from "lucide-react";
import React, { useCallback, useRef, useState } from "react";
import { useDrag } from "react-dnd";
import { getDocumentTypeIcon } from "@/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon";
import { getDocumentTypeIcon } from "@/components/documents/DocumentTypeIcon";
import { ExportContextItems, ExportDropdownItems } from "@/components/shared/ExportMenuItems";
import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
@ -167,7 +167,7 @@ export const DocumentNode = React.memo(function DocumentNode({
<Clock className="h-3.5 w-3.5 text-muted-foreground/60" />
</span>
</TooltipTrigger>
<TooltipContent side="top">Pending waiting to be synced</TooltipContent>
<TooltipContent side="top">Pending: waiting to be synced</TooltipContent>
</Tooltip>
);
}
@ -191,7 +191,7 @@ export const DocumentNode = React.memo(function DocumentNode({
<AlertCircle className="h-3.5 w-3.5 text-destructive" />
</span>
</TooltipTrigger>
<TooltipContent side="top" className="max-w-xs">
<TooltipContent side="top">
{doc.status?.reason || "Processing failed"}
</TooltipContent>
</Tooltip>
@ -222,72 +222,83 @@ export const DocumentNode = React.memo(function DocumentNode({
</TooltipContent>
</Tooltip>
{getDocumentTypeIcon(
doc.document_type as DocumentTypeEnum,
"h-3.5 w-3.5 text-muted-foreground"
) && (
<span className="shrink-0">
{getDocumentTypeIcon(
doc.document_type as DocumentTypeEnum,
"h-3.5 w-3.5 text-muted-foreground"
)}
</span>
)}
<DropdownMenu open={dropdownOpen} onOpenChange={setDropdownOpen}>
<DropdownMenuTrigger asChild>
<Button
variant="ghost"
size="icon"
<span className="relative shrink-0 flex items-center justify-center h-6 w-6">
{getDocumentTypeIcon(
doc.document_type as DocumentTypeEnum,
"h-3.5 w-3.5 text-muted-foreground"
) && (
<span
className={cn(
"hidden sm:inline-flex h-6 w-6 shrink-0 hover:bg-transparent",
dropdownOpen
? "opacity-100 bg-accent hover:bg-accent"
: "opacity-0 group-hover:opacity-100"
"absolute inset-0 flex items-center justify-center transition-opacity pointer-events-none",
dropdownOpen ? "opacity-0" : "group-hover:opacity-0"
)}
>
{getDocumentTypeIcon(
doc.document_type as DocumentTypeEnum,
"h-3.5 w-3.5 text-muted-foreground"
)}
</span>
)}
<DropdownMenu open={dropdownOpen} onOpenChange={setDropdownOpen}>
<DropdownMenuTrigger asChild>
<Button
variant="ghost"
size="icon"
className={cn(
"hidden sm:inline-flex h-6 w-6 shrink-0 hover:bg-transparent",
dropdownOpen
? "opacity-100 bg-accent hover:bg-accent"
: "opacity-0 group-hover:opacity-100"
)}
onClick={(e) => e.stopPropagation()}
>
<MoreHorizontal className="h-3.5 w-3.5 text-muted-foreground" />
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent
align="end"
className="w-40"
onClick={(e) => e.stopPropagation()}
>
<MoreHorizontal className="h-3.5 w-3.5" />
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent align="end" className="w-40" onClick={(e) => e.stopPropagation()}>
<DropdownMenuItem onClick={() => onPreview(doc)} disabled={isProcessing}>
<Eye className="mr-2 h-4 w-4" />
Open
</DropdownMenuItem>
{isEditable && (
<DropdownMenuItem onClick={() => onEdit(doc)}>
<PenLine className="mr-2 h-4 w-4" />
Edit
<DropdownMenuItem onClick={() => onPreview(doc)} disabled={isProcessing}>
<Eye className="mr-2 h-4 w-4" />
Open
</DropdownMenuItem>
)}
<DropdownMenuItem onClick={() => onMove(doc)}>
<Move className="mr-2 h-4 w-4" />
Move to...
</DropdownMenuItem>
{onExport && (
<DropdownMenuSub>
<DropdownMenuSubTrigger disabled={isProcessing}>
<Download className="mr-2 h-4 w-4" />
Export
</DropdownMenuSubTrigger>
<DropdownMenuSubContent className="min-w-[180px]">
<ExportDropdownItems onExport={handleExport} exporting={exporting} />
</DropdownMenuSubContent>
</DropdownMenuSub>
)}
{onVersionHistory && isVersionableType(doc.document_type) && (
<DropdownMenuItem disabled={isProcessing} onClick={() => onVersionHistory(doc)}>
<History className="mr-2 h-4 w-4" />
Versions
{isEditable && (
<DropdownMenuItem onClick={() => onEdit(doc)}>
<PenLine className="mr-2 h-4 w-4" />
Edit
</DropdownMenuItem>
)}
<DropdownMenuItem onClick={() => onMove(doc)}>
<Move className="mr-2 h-4 w-4" />
Move to...
</DropdownMenuItem>
)}
<DropdownMenuItem disabled={isProcessing} onClick={() => onDelete(doc)}>
<Trash2 className="mr-2 h-4 w-4" />
Delete
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
{onExport && (
<DropdownMenuSub>
<DropdownMenuSubTrigger disabled={isProcessing}>
<Download className="mr-2 h-4 w-4" />
Export
</DropdownMenuSubTrigger>
<DropdownMenuSubContent className="min-w-[180px]">
<ExportDropdownItems onExport={handleExport} exporting={exporting} />
</DropdownMenuSubContent>
</DropdownMenuSub>
)}
{onVersionHistory && isVersionableType(doc.document_type) && (
<DropdownMenuItem disabled={isProcessing} onClick={() => onVersionHistory(doc)}>
<History className="mr-2 h-4 w-4" />
Versions
</DropdownMenuItem>
)}
<DropdownMenuItem disabled={isProcessing} onClick={() => onDelete(doc)}>
<Trash2 className="mr-2 h-4 w-4" />
Delete
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
</span>
</div>
</ContextMenuTrigger>

View file

@ -77,7 +77,7 @@ interface FolderNodeProps {
contextMenuOpen?: boolean;
onContextMenuOpenChange?: (open: boolean) => void;
isWatched?: boolean;
onRescan?: (folder: FolderDisplay) => void;
onRescan?: (folder: FolderDisplay) => void | Promise<void>;
onStopWatching?: (folder: FolderDisplay) => void;
}
@ -124,6 +124,17 @@ export const FolderNode = React.memo(function FolderNode({
const inputRef = useRef<HTMLInputElement>(null);
const rowRef = useRef<HTMLDivElement>(null);
const [dropZone, setDropZone] = useState<DropZone | null>(null);
const [isRescanning, setIsRescanning] = useState(false);
const handleRescan = useCallback(async () => {
if (isRescanning) return;
setIsRescanning(true);
try {
await onRescan?.(folder);
} finally {
setIsRescanning(false);
}
}, [folder, onRescan, isRescanning]);
const [{ isDragging }, drag] = useDrag(
() => ({
@ -351,10 +362,10 @@ export const FolderNode = React.memo(function FolderNode({
<DropdownMenuItem
onClick={(e) => {
e.stopPropagation();
onRescan(folder);
handleRescan();
}}
>
<RefreshCw className="mr-2 h-4 w-4" />
<RefreshCw className={cn("mr-2 h-4 w-4", isRescanning && "animate-spin")} />
Re-scan
</DropdownMenuItem>
)}
@ -397,7 +408,6 @@ export const FolderNode = React.memo(function FolderNode({
Move to...
</DropdownMenuItem>
<DropdownMenuItem
className="text-destructive focus:text-destructive"
onClick={(e) => {
e.stopPropagation();
onDelete(folder);
@ -415,8 +425,8 @@ export const FolderNode = React.memo(function FolderNode({
{!isRenaming && contextMenuOpen && (
<ContextMenuContent className="w-40">
{isWatched && onRescan && (
<ContextMenuItem onClick={() => onRescan(folder)}>
<RefreshCw className="mr-2 h-4 w-4" />
<ContextMenuItem onClick={() => handleRescan()}>
<RefreshCw className={cn("mr-2 h-4 w-4", isRescanning && "animate-spin")} />
Re-scan
</ContextMenuItem>
)}
@ -438,10 +448,7 @@ export const FolderNode = React.memo(function FolderNode({
<Move className="mr-2 h-4 w-4" />
Move to...
</ContextMenuItem>
<ContextMenuItem
className="text-destructive focus:text-destructive"
onClick={() => onDelete(folder)}
>
<ContextMenuItem onClick={() => onDelete(folder)}>
<Trash2 className="mr-2 h-4 w-4" />
Delete
</ContextMenuItem>

View file

@ -2,12 +2,11 @@
import { useQuery } from "@rocicorp/zero/react";
import { useAtom, useAtomValue, useSetAtom } from "jotai";
import { ChevronLeft, ChevronRight, Trash2, Unplug } from "lucide-react";
import { ChevronLeft, ChevronRight, FolderClock, Trash2, Unplug } from "lucide-react";
import { useParams } from "next/navigation";
import { useTranslations } from "next-intl";
import { useCallback, useEffect, useMemo, useState } from "react";
import { toast } from "sonner";
import { DocumentsFilters } from "@/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsFilters";
import { sidebarSelectedDocumentsAtom } from "@/atoms/chat/mentioned-documents.atom";
import { connectorDialogOpenAtom } from "@/atoms/connector-dialog/connector-dialog.atoms";
import { connectorsAtom } from "@/atoms/connectors/connector-query.atoms";
@ -18,11 +17,13 @@ import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom";
import { rightPanelCollapsedAtom } from "@/atoms/layout/right-panel.atom";
import { CreateFolderDialog } from "@/components/documents/CreateFolderDialog";
import type { DocumentNodeDoc } from "@/components/documents/DocumentNode";
import { DocumentsFilters } from "@/components/documents/DocumentsFilters";
import type { FolderDisplay } from "@/components/documents/FolderNode";
import { FolderPickerDialog } from "@/components/documents/FolderPickerDialog";
import { FolderTreeView } from "@/components/documents/FolderTreeView";
import { VersionHistoryDialog } from "@/components/documents/version-history";
import { EXPORT_FILE_EXTENSIONS } from "@/components/shared/ExportMenuItems";
import { FolderWatchDialog, type SelectedFolder } from "@/components/sources/FolderWatchDialog";
import {
AlertDialog,
AlertDialogAction,
@ -97,6 +98,21 @@ export function DocumentsSidebar({
const debouncedSearch = useDebouncedValue(search, 250);
const [activeTypes, setActiveTypes] = useState<DocumentTypeEnum[]>([]);
const [watchedFolderIds, setWatchedFolderIds] = useState<Set<number>>(new Set());
const [folderWatchOpen, setFolderWatchOpen] = useState(false);
const [watchInitialFolder, setWatchInitialFolder] = useState<SelectedFolder | null>(null);
const isElectron = typeof window !== "undefined" && !!window.electronAPI;
const handleWatchLocalFolder = useCallback(async () => {
const api = window.electronAPI;
if (!api?.selectFolder) return;
const folderPath = await api.selectFolder();
if (!folderPath) return;
const folderName = folderPath.split("/").pop() || folderPath.split("\\").pop() || folderPath;
setWatchInitialFolder({ path: folderPath, name: folderName });
setFolderWatchOpen(true);
}, []);
useEffect(() => {
if (!electronAPI?.getWatchedFolders) return;
@ -293,6 +309,7 @@ export function DocumentsSidebar({
folder_name: matched.name,
search_space_id: searchSpaceId,
root_folder_id: folder.id,
file_extensions: matched.fileExtensions ?? undefined,
});
toast.success(`Re-scanning folder: ${matched.name}`);
} catch (err) {
@ -752,6 +769,17 @@ export function DocumentsSidebar({
</button>
</div>
{isElectron && (
<button
type="button"
onClick={handleWatchLocalFolder}
className="shrink-0 mx-4 mb-4 flex select-none items-center gap-2 rounded-lg border bg-muted/50 px-3 py-2 transition-colors hover:bg-muted/80"
>
<FolderClock className="size-4 shrink-0 text-muted-foreground" />
<span className="truncate text-xs text-muted-foreground">Watch local folder</span>
</button>
)}
<div className="flex-1 min-h-0 pt-0 flex flex-col">
<div className="px-4 pb-2">
<DocumentsFilters
@ -830,6 +858,18 @@ export function DocumentsSidebar({
/>
)}
{isElectron && (
<FolderWatchDialog
open={folderWatchOpen}
onOpenChange={(nextOpen) => {
setFolderWatchOpen(nextOpen);
if (!nextOpen) setWatchInitialFolder(null);
}}
searchSpaceId={searchSpaceId}
initialFolder={watchInitialFolder}
/>
)}
<FolderPickerDialog
open={folderPickerOpen}
onOpenChange={setFolderPickerOpen}

View file

@ -21,9 +21,9 @@ import {
import { useParams, useRouter } from "next/navigation";
import { useTranslations } from "next-intl";
import { useCallback, useDeferredValue, useEffect, useMemo, useRef, useState } from "react";
import { getDocumentTypeLabel } from "@/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon";
import { setTargetCommentIdAtom } from "@/atoms/chat/current-thread.atom";
import { convertRenderedToDisplay } from "@/components/chat-comments/comment-item/comment-item";
import { getDocumentTypeLabel } from "@/components/documents/DocumentTypeIcon";
import { Tabs, TabsList, TabsTrigger } from "@/components/ui/animated-tabs";
import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar";
import { Button } from "@/components/ui/button";

View file

@ -345,9 +345,7 @@ export function VisionConfigDialog({
<CommandInput
placeholder={selectedProvider?.example || "Search model name"}
value={formData.model_name}
onValueChange={(val) =>
setFormData((p) => ({ ...p, model_name: val }))
}
onValueChange={(val) => setFormData((p) => ({ ...p, model_name: val }))}
/>
<CommandList className="max-h-[300px]">
<CommandEmpty>

View file

@ -26,17 +26,16 @@ import { Progress } from "@/components/ui/progress";
import { Spinner } from "@/components/ui/spinner";
import { Switch } from "@/components/ui/switch";
import { useElectronAPI } from "@/hooks/use-platform";
import { documentsApiService } from "@/lib/apis/documents-api.service";
import {
trackDocumentUploadFailure,
trackDocumentUploadStarted,
trackDocumentUploadSuccess,
} from "@/lib/posthog/events";
interface SelectedFolder {
path: string;
name: string;
}
import {
getAcceptedFileTypes,
getSupportedExtensions,
getSupportedExtensionsSet,
} from "@/lib/supported-extensions";
interface DocumentUploadTabProps {
searchSpaceId: string;
@ -44,78 +43,6 @@ interface DocumentUploadTabProps {
onAccordionStateChange?: (isExpanded: boolean) => void;
}
const audioFileTypes = {
"audio/mpeg": [".mp3", ".mpeg", ".mpga"],
"audio/mp4": [".mp4", ".m4a"],
"audio/wav": [".wav"],
"audio/webm": [".webm"],
"text/markdown": [".md", ".markdown"],
"text/plain": [".txt"],
};
const commonTypes = {
"application/pdf": [".pdf"],
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": [".docx"],
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": [".xlsx"],
"application/vnd.openxmlformats-officedocument.presentationml.presentation": [".pptx"],
"text/html": [".html", ".htm"],
"text/csv": [".csv"],
"text/tab-separated-values": [".tsv"],
"image/jpeg": [".jpg", ".jpeg"],
"image/png": [".png"],
"image/bmp": [".bmp"],
"image/webp": [".webp"],
"image/tiff": [".tiff"],
};
const FILE_TYPE_CONFIG: Record<string, Record<string, string[]>> = {
LLAMACLOUD: {
...commonTypes,
"application/msword": [".doc"],
"application/vnd.ms-word.document.macroEnabled.12": [".docm"],
"application/msword-template": [".dot"],
"application/vnd.ms-word.template.macroEnabled.12": [".dotm"],
"application/vnd.ms-powerpoint": [".ppt"],
"application/vnd.ms-powerpoint.template.macroEnabled.12": [".pptm"],
"application/vnd.ms-powerpoint.template": [".pot"],
"application/vnd.openxmlformats-officedocument.presentationml.template": [".potx"],
"application/vnd.ms-excel": [".xls"],
"application/vnd.ms-excel.sheet.macroEnabled.12": [".xlsm"],
"application/vnd.ms-excel.sheet.binary.macroEnabled.12": [".xlsb"],
"application/vnd.ms-excel.workspace": [".xlw"],
"application/rtf": [".rtf"],
"application/xml": [".xml"],
"application/epub+zip": [".epub"],
"image/gif": [".gif"],
"image/svg+xml": [".svg"],
...audioFileTypes,
},
DOCLING: {
...commonTypes,
"text/asciidoc": [".adoc", ".asciidoc"],
"text/html": [".html", ".htm", ".xhtml"],
"image/tiff": [".tiff", ".tif"],
...audioFileTypes,
},
default: {
...commonTypes,
"application/msword": [".doc"],
"message/rfc822": [".eml"],
"application/epub+zip": [".epub"],
"image/heic": [".heic"],
"application/vnd.ms-outlook": [".msg"],
"application/vnd.oasis.opendocument.text": [".odt"],
"text/x-org": [".org"],
"application/pkcs7-signature": [".p7s"],
"application/vnd.ms-powerpoint": [".ppt"],
"text/x-rst": [".rst"],
"application/rtf": [".rtf"],
"application/vnd.ms-excel": [".xls"],
"application/xml": [".xml"],
...audioFileTypes,
},
};
interface FileWithId {
id: string;
file: File;
@ -152,24 +79,16 @@ export function DocumentUploadTab({
}, []);
const electronAPI = useElectronAPI();
const [selectedFolder, setSelectedFolder] = useState<SelectedFolder | null>(null);
const [watchFolder, setWatchFolder] = useState(true);
const [folderSubmitting, setFolderSubmitting] = useState(false);
const isElectron = !!electronAPI?.browseFiles;
const acceptedFileTypes = useMemo(() => {
const etlService = process.env.NEXT_PUBLIC_ETL_SERVICE;
return FILE_TYPE_CONFIG[etlService || "default"] || FILE_TYPE_CONFIG.default;
}, []);
const acceptedFileTypes = useMemo(() => getAcceptedFileTypes(), []);
const supportedExtensions = useMemo(
() => Array.from(new Set(Object.values(acceptedFileTypes).flat())).sort(),
() => getSupportedExtensions(acceptedFileTypes),
[acceptedFileTypes]
);
const supportedExtensionsSet = useMemo(
() => new Set(supportedExtensions.map((ext) => ext.toLowerCase())),
[supportedExtensions]
() => getSupportedExtensionsSet(acceptedFileTypes),
[acceptedFileTypes]
);
const addFiles = useCallback(
@ -199,7 +118,6 @@ export function DocumentUploadTab({
const onDrop = useCallback(
(acceptedFiles: File[]) => {
setSelectedFolder(null);
addFiles(acceptedFiles);
},
[addFiles]
@ -222,26 +140,27 @@ export function DocumentUploadTab({
const paths = await electronAPI.browseFiles();
if (!paths || paths.length === 0) return;
setSelectedFolder(null);
const fileDataList = await electronAPI.readLocalFiles(paths);
const newFiles: FileWithId[] = fileDataList.map((fd) => ({
id: crypto.randomUUID?.() ?? `file-${Date.now()}-${Math.random().toString(36)}`,
file: new File([fd.data], fd.name, { type: fd.mimeType }),
}));
const filtered = fileDataList.filter(
(fd: { name: string; data: ArrayBuffer; mimeType: string }) => {
const ext = fd.name.includes(".") ? `.${fd.name.split(".").pop()?.toLowerCase()}` : "";
return ext !== "" && supportedExtensionsSet.has(ext);
}
);
if (filtered.length === 0) {
toast.error(t("no_supported_files_in_folder"));
return;
}
const newFiles: FileWithId[] = filtered.map(
(fd: { name: string; data: ArrayBuffer; mimeType: string }) => ({
id: crypto.randomUUID?.() ?? `file-${Date.now()}-${Math.random().toString(36)}`,
file: new File([fd.data], fd.name, { type: fd.mimeType }),
})
);
setFiles((prev) => [...prev, ...newFiles]);
}, [electronAPI]);
const handleBrowseFolder = useCallback(async () => {
if (!electronAPI?.selectFolder) return;
const folderPath = await electronAPI.selectFolder();
if (!folderPath) return;
const folderName = folderPath.split("/").pop() || folderPath.split("\\").pop() || folderPath;
setFiles([]);
setSelectedFolder({ path: folderPath, name: folderName });
setWatchFolder(true);
}, [electronAPI]);
}, [electronAPI, supportedExtensionsSet, t]);
const handleFolderChange = useCallback(
(e: ChangeEvent<HTMLInputElement>) => {
@ -275,7 +194,7 @@ export function DocumentUploadTab({
const totalFileSize = files.reduce((total, entry) => total + entry.file.size, 0);
const hasContent = files.length > 0 || selectedFolder !== null;
const hasContent = files.length > 0;
const handleAccordionChange = useCallback(
(value: string) => {
@ -285,52 +204,6 @@ export function DocumentUploadTab({
[onAccordionStateChange]
);
const handleFolderSubmit = useCallback(async () => {
if (!selectedFolder || !electronAPI) return;
setFolderSubmitting(true);
try {
const numericSpaceId = Number(searchSpaceId);
const result = await documentsApiService.folderIndex(numericSpaceId, {
folder_path: selectedFolder.path,
folder_name: selectedFolder.name,
search_space_id: numericSpaceId,
enable_summary: shouldSummarize,
});
const rootFolderId = (result as { root_folder_id?: number })?.root_folder_id ?? null;
if (watchFolder) {
await electronAPI.addWatchedFolder({
path: selectedFolder.path,
name: selectedFolder.name,
excludePatterns: [
".git",
"node_modules",
"__pycache__",
".DS_Store",
".obsidian",
".trash",
],
fileExtensions: null,
rootFolderId,
searchSpaceId: Number(searchSpaceId),
active: true,
});
toast.success(`Watching folder: ${selectedFolder.name}`);
} else {
toast.success(`Syncing folder: ${selectedFolder.name}`);
}
setSelectedFolder(null);
onSuccess?.();
} catch (err) {
toast.error((err as Error)?.message || "Failed to process folder");
} finally {
setFolderSubmitting(false);
}
}, [selectedFolder, watchFolder, searchSpaceId, shouldSummarize, onSuccess, electronAPI]);
const handleUpload = async () => {
setUploadProgress(0);
trackDocumentUploadStarted(Number(searchSpaceId), files.length, totalFileSize);
@ -394,7 +267,7 @@ export function DocumentUploadTab({
<FileIcon className="h-4 w-4 mr-2" />
Files
</DropdownMenuItem>
<DropdownMenuItem onClick={handleBrowseFolder}>
<DropdownMenuItem onClick={() => folderInputRef.current?.click()}>
<FolderOpen className="h-4 w-4 mr-2" />
Folder
</DropdownMenuItem>
@ -456,8 +329,7 @@ export function DocumentUploadTab({
{/* MOBILE DROP ZONE */}
<div className="sm:hidden">
{hasContent ? (
!selectedFolder &&
(isElectron ? (
isElectron ? (
<div className="w-full">{renderBrowseButton({ compact: true, fullWidth: true })}</div>
) : (
<button
@ -467,7 +339,7 @@ export function DocumentUploadTab({
>
Add more files
</button>
))
)
) : (
<button
type="button"
@ -527,66 +399,6 @@ export function DocumentUploadTab({
)}
</div>
{/* FOLDER SELECTED (Electron only — web flattens folder contents into file list) */}
{isElectron && selectedFolder && (
<div className="rounded-lg border border-border p-3 space-y-2">
<div className="flex items-center gap-2 py-1.5 px-2 -mx-1 rounded-md hover:bg-slate-400/5 dark:hover:bg-white/5 group">
<FolderOpen className="h-4 w-4 text-primary shrink-0" />
<div className="min-w-0 flex-1">
<p className="text-sm font-medium truncate">{selectedFolder.name}</p>
<p className="text-xs text-muted-foreground truncate">{selectedFolder.path}</p>
</div>
<Button
variant="ghost"
size="icon"
className="h-7 w-7 shrink-0"
onClick={() => setSelectedFolder(null)}
disabled={folderSubmitting}
>
<X className="h-3.5 w-3.5" />
</Button>
</div>
<div className="rounded-lg bg-slate-400/5 dark:bg-white/5 divide-y divide-border">
<div className="flex items-center justify-between p-3">
<div className="space-y-0.5">
<p className="font-medium text-sm">Watch folder</p>
<p className="text-xs text-muted-foreground">Auto-sync when files change</p>
</div>
<Switch
id="watch-folder-toggle"
checked={watchFolder}
onCheckedChange={setWatchFolder}
/>
</div>
<div className="flex items-center justify-between p-3">
<div className="space-y-0.5">
<p className="font-medium text-sm">Enable AI Summary</p>
<p className="text-xs text-muted-foreground">
Improves search quality but adds latency
</p>
</div>
<Switch checked={shouldSummarize} onCheckedChange={setShouldSummarize} />
</div>
</div>
<Button
className="w-full relative"
onClick={handleFolderSubmit}
disabled={folderSubmitting}
>
<span className={folderSubmitting ? "invisible" : ""}>
{watchFolder ? "Sync & Watch for Changes" : "Sync Folder"}
</span>
{folderSubmitting && (
<span className="absolute inset-0 flex items-center justify-center">
<Spinner size="sm" />
</span>
)}
</Button>
</div>
)}
{/* FILES SELECTED */}
{files.length > 0 && (
<div className="rounded-lg border border-border p-3 space-y-2">

View file

@ -0,0 +1,191 @@
"use client";
import { X } from "lucide-react";
import { useCallback, useEffect, useMemo, useState } from "react";
import { toast } from "sonner";
import { Button } from "@/components/ui/button";
import {
Dialog,
DialogContent,
DialogDescription,
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import { Spinner } from "@/components/ui/spinner";
import { Switch } from "@/components/ui/switch";
import { documentsApiService } from "@/lib/apis/documents-api.service";
import { getSupportedExtensionsSet } from "@/lib/supported-extensions";
export interface SelectedFolder {
path: string;
name: string;
}
interface FolderWatchDialogProps {
open: boolean;
onOpenChange: (open: boolean) => void;
searchSpaceId: number;
onSuccess?: () => void;
initialFolder?: SelectedFolder | null;
}
const DEFAULT_EXCLUDE_PATTERNS = [
".git",
"node_modules",
"__pycache__",
".DS_Store",
".obsidian",
".trash",
];
export function FolderWatchDialog({
open,
onOpenChange,
searchSpaceId,
onSuccess,
initialFolder,
}: FolderWatchDialogProps) {
const [selectedFolder, setSelectedFolder] = useState<SelectedFolder | null>(null);
const [shouldSummarize, setShouldSummarize] = useState(false);
const [submitting, setSubmitting] = useState(false);
useEffect(() => {
if (open && initialFolder) {
setSelectedFolder(initialFolder);
}
}, [open, initialFolder]);
const supportedExtensions = useMemo(() => Array.from(getSupportedExtensionsSet()), []);
const handleSelectFolder = useCallback(async () => {
const api = window.electronAPI;
if (!api?.selectFolder) return;
const folderPath = await api.selectFolder();
if (!folderPath) return;
const folderName = folderPath.split("/").pop() || folderPath.split("\\").pop() || folderPath;
setSelectedFolder({ path: folderPath, name: folderName });
}, []);
const handleSubmit = useCallback(async () => {
if (!selectedFolder) return;
const api = window.electronAPI;
if (!api) return;
setSubmitting(true);
try {
const result = await documentsApiService.folderIndex(searchSpaceId, {
folder_path: selectedFolder.path,
folder_name: selectedFolder.name,
search_space_id: searchSpaceId,
enable_summary: shouldSummarize,
file_extensions: supportedExtensions,
});
const rootFolderId = (result as { root_folder_id?: number })?.root_folder_id ?? null;
await api.addWatchedFolder({
path: selectedFolder.path,
name: selectedFolder.name,
excludePatterns: DEFAULT_EXCLUDE_PATTERNS,
fileExtensions: supportedExtensions,
rootFolderId,
searchSpaceId,
active: true,
});
toast.success(`Watching folder: ${selectedFolder.name}`);
setSelectedFolder(null);
setShouldSummarize(false);
onOpenChange(false);
onSuccess?.();
} catch (err) {
toast.error((err as Error)?.message || "Failed to watch folder");
} finally {
setSubmitting(false);
}
}, [
selectedFolder,
searchSpaceId,
shouldSummarize,
supportedExtensions,
onOpenChange,
onSuccess,
]);
const handleOpenChange = useCallback(
(nextOpen: boolean) => {
if (!nextOpen && !submitting) {
setSelectedFolder(null);
setShouldSummarize(false);
}
onOpenChange(nextOpen);
},
[onOpenChange, submitting]
);
return (
<Dialog open={open} onOpenChange={handleOpenChange}>
<DialogContent className="sm:max-w-md select-none">
<DialogHeader>
<DialogTitle>Watch Local Folder</DialogTitle>
<DialogDescription>Select a folder to sync and watch for changes.</DialogDescription>
</DialogHeader>
<div className="space-y-3 pt-2">
{selectedFolder ? (
<div className="flex items-center gap-2 py-1.5 pl-4 pr-2 rounded-md bg-slate-400/5 dark:bg-white/5 overflow-hidden">
<div className="min-w-0 flex-1 select-text">
<p className="text-sm font-medium break-all line-clamp-2">{selectedFolder.name}</p>
<p className="text-xs text-muted-foreground break-all line-clamp-2">
{selectedFolder.path}
</p>
</div>
<Button
variant="ghost"
size="icon"
className="h-7 w-7 shrink-0"
onClick={() => setSelectedFolder(null)}
disabled={submitting}
>
<X className="h-3.5 w-3.5" />
</Button>
</div>
) : (
<button
type="button"
onClick={handleSelectFolder}
className="flex w-full items-center justify-center gap-2 rounded-lg border-2 border-dashed border-muted-foreground/30 py-8 text-sm text-muted-foreground transition-colors hover:border-foreground/50 hover:text-foreground"
>
Browse for a folder
</button>
)}
{selectedFolder && (
<>
<div className="flex items-center justify-between rounded-lg bg-slate-400/5 dark:bg-white/5 p-3">
<div className="space-y-0.5">
<p className="font-medium text-sm">Enable AI Summary</p>
<p className="text-xs text-muted-foreground">
Improves search quality but adds latency
</p>
</div>
<Switch checked={shouldSummarize} onCheckedChange={setShouldSummarize} />
</div>
<Button className="w-full relative" onClick={handleSubmit} disabled={submitting}>
<span className={submitting ? "invisible" : ""}>Start Folder Sync</span>
{submitting && (
<span className="absolute inset-0 flex items-center justify-center">
<Spinner size="sm" />
</span>
)}
</Button>
</>
)}
</div>
</DialogContent>
</Dialog>
);
}

View file

@ -72,7 +72,7 @@ function TooltipContent({
data-slot="tooltip-content"
sideOffset={sideOffset}
className={cn(
"bg-black text-white font-medium shadow-xl px-3 py-1.5 dark:bg-zinc-800 dark:text-zinc-50 border-none animate-in fade-in-0 zoom-in-95 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-fit rounded-md text-xs text-balance pointer-events-none select-none",
"bg-black text-white font-medium shadow-xl px-3 py-1.5 dark:bg-zinc-800 dark:text-zinc-50 border-none animate-in fade-in-0 zoom-in-95 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-fit rounded-md text-xs text-pretty pointer-events-none select-none",
className
)}
{...props}

View file

@ -107,22 +107,62 @@ export const VISION_MODELS: LLMModel[] = [
{ value: "gpt-4o", label: "GPT-4o", provider: "OPENAI", contextWindow: "128K" },
{ value: "gpt-4o-mini", label: "GPT-4o Mini", provider: "OPENAI", contextWindow: "128K" },
{ value: "gpt-4-turbo", label: "GPT-4 Turbo", provider: "OPENAI", contextWindow: "128K" },
{ value: "claude-sonnet-4-20250514", label: "Claude Sonnet 4", provider: "ANTHROPIC", contextWindow: "200K" },
{ value: "claude-3-7-sonnet-20250219", label: "Claude 3.7 Sonnet", provider: "ANTHROPIC", contextWindow: "200K" },
{ value: "claude-3-5-sonnet-20241022", label: "Claude 3.5 Sonnet", provider: "ANTHROPIC", contextWindow: "200K" },
{ value: "claude-3-opus-20240229", label: "Claude 3 Opus", provider: "ANTHROPIC", contextWindow: "200K" },
{ value: "claude-3-haiku-20240307", label: "Claude 3 Haiku", provider: "ANTHROPIC", contextWindow: "200K" },
{
value: "claude-sonnet-4-20250514",
label: "Claude Sonnet 4",
provider: "ANTHROPIC",
contextWindow: "200K",
},
{
value: "claude-3-7-sonnet-20250219",
label: "Claude 3.7 Sonnet",
provider: "ANTHROPIC",
contextWindow: "200K",
},
{
value: "claude-3-5-sonnet-20241022",
label: "Claude 3.5 Sonnet",
provider: "ANTHROPIC",
contextWindow: "200K",
},
{
value: "claude-3-opus-20240229",
label: "Claude 3 Opus",
provider: "ANTHROPIC",
contextWindow: "200K",
},
{
value: "claude-3-haiku-20240307",
label: "Claude 3 Haiku",
provider: "ANTHROPIC",
contextWindow: "200K",
},
{ value: "gemini-2.5-flash", label: "Gemini 2.5 Flash", provider: "GOOGLE", contextWindow: "1M" },
{ value: "gemini-2.5-pro", label: "Gemini 2.5 Pro", provider: "GOOGLE", contextWindow: "1M" },
{ value: "gemini-2.0-flash", label: "Gemini 2.0 Flash", provider: "GOOGLE", contextWindow: "1M" },
{ value: "gemini-1.5-pro", label: "Gemini 1.5 Pro", provider: "GOOGLE", contextWindow: "1M" },
{ value: "gemini-1.5-flash", label: "Gemini 1.5 Flash", provider: "GOOGLE", contextWindow: "1M" },
{ value: "pixtral-large-latest", label: "Pixtral Large", provider: "MISTRAL", contextWindow: "128K" },
{
value: "pixtral-large-latest",
label: "Pixtral Large",
provider: "MISTRAL",
contextWindow: "128K",
},
{ value: "pixtral-12b-2409", label: "Pixtral 12B", provider: "MISTRAL", contextWindow: "128K" },
{ value: "grok-2-vision-1212", label: "Grok 2 Vision", provider: "XAI", contextWindow: "32K" },
{ value: "llava", label: "LLaVA", provider: "OLLAMA" },
{ value: "bakllava", label: "BakLLaVA", provider: "OLLAMA" },
{ value: "llava-llama3", label: "LLaVA Llama 3", provider: "OLLAMA" },
{ value: "llama-4-scout-17b-16e-instruct", label: "Llama 4 Scout 17B", provider: "GROQ", contextWindow: "128K" },
{ value: "meta-llama/Llama-4-Scout-17B-16E-Instruct", label: "Llama 4 Scout 17B", provider: "TOGETHER_AI", contextWindow: "128K" },
{
value: "llama-4-scout-17b-16e-instruct",
label: "Llama 4 Scout 17B",
provider: "GROQ",
contextWindow: "128K",
},
{
value: "meta-llama/Llama-4-Scout-17B-16E-Instruct",
label: "Llama 4 Scout 17B",
provider: "TOGETHER_AI",
contextWindow: "128K",
},
];

View file

@ -19,7 +19,7 @@ export const AUTH_TYPE = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE || "G
// Placeholder: __NEXT_PUBLIC_FASTAPI_BACKEND_URL__
export const BACKEND_URL = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000";
// ETL Service: "DOCLING" or "UNSTRUCTURED"
// ETL Service: "DOCLING", "UNSTRUCTURED", or "LLAMACLOUD"
// Placeholder: __NEXT_PUBLIC_ETL_SERVICE__
export const ETL_SERVICE = process.env.NEXT_PUBLIC_ETL_SERVICE || "DOCLING";

View file

@ -0,0 +1,92 @@
const audioFileTypes: Record<string, string[]> = {
"audio/mpeg": [".mp3", ".mpeg", ".mpga"],
"audio/mp4": [".mp4", ".m4a"],
"audio/wav": [".wav"],
"audio/webm": [".webm"],
"text/markdown": [".md", ".markdown"],
"text/plain": [".txt"],
};
const commonTypes: Record<string, string[]> = {
"application/pdf": [".pdf"],
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": [".docx"],
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": [".xlsx"],
"application/vnd.openxmlformats-officedocument.presentationml.presentation": [".pptx"],
"text/html": [".html", ".htm"],
"text/csv": [".csv"],
"text/tab-separated-values": [".tsv"],
"image/jpeg": [".jpg", ".jpeg"],
"image/png": [".png"],
"image/bmp": [".bmp"],
"image/webp": [".webp"],
"image/tiff": [".tiff"],
};
export const FILE_TYPE_CONFIG: Record<string, Record<string, string[]>> = {
LLAMACLOUD: {
...commonTypes,
"application/msword": [".doc"],
"application/vnd.ms-word.document.macroEnabled.12": [".docm"],
"application/msword-template": [".dot"],
"application/vnd.ms-word.template.macroEnabled.12": [".dotm"],
"application/vnd.ms-powerpoint": [".ppt"],
"application/vnd.ms-powerpoint.template.macroEnabled.12": [".pptm"],
"application/vnd.ms-powerpoint.template": [".pot"],
"application/vnd.openxmlformats-officedocument.presentationml.template": [".potx"],
"application/vnd.ms-excel": [".xls"],
"application/vnd.ms-excel.sheet.macroEnabled.12": [".xlsm"],
"application/vnd.ms-excel.sheet.binary.macroEnabled.12": [".xlsb"],
"application/vnd.ms-excel.workspace": [".xlw"],
"application/rtf": [".rtf"],
"application/xml": [".xml"],
"application/epub+zip": [".epub"],
"image/gif": [".gif"],
"image/svg+xml": [".svg"],
...audioFileTypes,
},
DOCLING: {
...commonTypes,
"text/asciidoc": [".adoc", ".asciidoc"],
"text/html": [".html", ".htm", ".xhtml"],
"image/tiff": [".tiff", ".tif"],
...audioFileTypes,
},
AZURE_DI: {
...commonTypes,
"image/heic": [".heic"],
...audioFileTypes,
},
default: {
...commonTypes,
"application/msword": [".doc"],
"message/rfc822": [".eml"],
"application/epub+zip": [".epub"],
"image/heic": [".heic"],
"application/vnd.ms-outlook": [".msg"],
"application/vnd.oasis.opendocument.text": [".odt"],
"text/x-org": [".org"],
"application/pkcs7-signature": [".p7s"],
"application/vnd.ms-powerpoint": [".ppt"],
"text/x-rst": [".rst"],
"application/rtf": [".rtf"],
"application/vnd.ms-excel": [".xls"],
"application/xml": [".xml"],
...audioFileTypes,
},
};
export function getAcceptedFileTypes(): Record<string, string[]> {
const etlService = process.env.NEXT_PUBLIC_ETL_SERVICE;
return FILE_TYPE_CONFIG[etlService || "default"] || FILE_TYPE_CONFIG.default;
}
export function getSupportedExtensions(acceptedFileTypes?: Record<string, string[]>): string[] {
const types = acceptedFileTypes ?? getAcceptedFileTypes();
return Array.from(new Set(Object.values(types).flat())).sort();
}
export function getSupportedExtensionsSet(
acceptedFileTypes?: Record<string, string[]>
): Set<string> {
return new Set(getSupportedExtensions(acceptedFileTypes).map((ext) => ext.toLowerCase()));
}

View file

@ -1,4 +1,4 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="#222326" width="200" height="200" viewBox="0 0 100 100">
<svg xmlns="http://www.w3.org/2000/svg" fill="#8299FF" width="200" height="200" viewBox="0 0 100 100">
<path
d="M1.22541 61.5228c-.2225-.9485.90748-1.5459 1.59638-.857L39.3342 97.1782c.6889.6889.0915 1.8189-.857 1.5964C20.0515 94.4522 5.54779 79.9485 1.22541 61.5228ZM.00189135 46.8891c-.01764375.2833.08887215.5599.28957165.7606L52.3503 99.7085c.2007.2007.4773.3075.7606.2896 2.3692-.1476 4.6938-.46 6.9624-.9259.7645-.157 1.0301-1.0963.4782-1.6481L2.57595 39.4485c-.55186-.5519-1.49117-.2863-1.648174.4782-.465915 2.2686-.77832 4.5932-.92588465 6.9624ZM4.21093 29.7054c-.16649.3738-.08169.8106.20765 1.1l64.77602 64.776c.2894.2894.7262.3742 1.1.2077 1.7861-.7956 3.5171-1.6927 5.1855-2.684.5521-.328.6373-1.0867.1832-1.5407L8.43566 24.3367c-.45409-.4541-1.21271-.3689-1.54074.1832-.99132 1.6684-1.88843 3.3994-2.68399 5.1855ZM12.6587 18.074c-.3701-.3701-.393-.9637-.0443-1.3541C21.7795 6.45931 35.1114 0 49.9519 0 77.5927 0 100 22.4073 100 50.0481c0 14.8405-6.4593 28.1724-16.7199 37.3375-.3903.3487-.984.3258-1.3542-.0443L12.6587 18.074Z"
/>

Before

Width:  |  Height:  |  Size: 1 KiB

After

Width:  |  Height:  |  Size: 1 KiB

Before After
Before After