mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-03 21:02:40 +02:00
chore: updated chonkie and temp fix for azure embeddings registry
- TODO: Raise PR in upstream for fix in next version
This commit is contained in:
parent
5addc317f0
commit
71e4860495
4 changed files with 650 additions and 284 deletions
|
|
@ -3,9 +3,27 @@ import shutil
|
|||
from pathlib import Path
|
||||
|
||||
from chonkie import AutoEmbeddings, CodeChunker, RecursiveChunker
|
||||
from chonkie.embeddings.azure_openai import AzureOpenAIEmbeddings
|
||||
from chonkie.embeddings.registry import EmbeddingsRegistry
|
||||
from dotenv import load_dotenv
|
||||
from rerankers import Reranker
|
||||
|
||||
# TODO: Fix this in chonkie upstream
|
||||
# Register Azure OpenAI embeddings with pattern
|
||||
# This automatically infers the following arguments from their corresponding environment variables if they are not provided:
|
||||
# - `api_key` from `AZURE_OPENAI_API_KEY`
|
||||
# - `organization` from `OPENAI_ORG_ID`
|
||||
# - `project` from `OPENAI_PROJECT_ID`
|
||||
# - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
|
||||
# - `api_version` from `OPENAI_API_VERSION`
|
||||
# - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
|
||||
EmbeddingsRegistry.register_provider("azure_openai", AzureOpenAIEmbeddings)
|
||||
EmbeddingsRegistry.register_pattern(r"^text-embedding-", AzureOpenAIEmbeddings)
|
||||
EmbeddingsRegistry.register_model("text-embedding-ada-002", AzureOpenAIEmbeddings)
|
||||
EmbeddingsRegistry.register_model("text-embedding-3-small", AzureOpenAIEmbeddings)
|
||||
EmbeddingsRegistry.register_model("text-embedding-3-large", AzureOpenAIEmbeddings)
|
||||
|
||||
|
||||
# Get the base directory of the project
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent.parent
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue