mirror of
https://github.com/katanemo/plano.git
synced 2026-04-28 18:36:34 +02:00
* Update model server * Delete model_server/.vscode/settings.json * Update loader.py * Fix errors * Update log mode
19 lines
592 B
Python
19 lines
592 B
Python
import numpy as np
|
|
|
|
|
|
def split_text_into_chunks(text, max_words=300):
|
|
"""
|
|
Max number of tokens for tokenizer is 512
|
|
Split the text into chunks of 300 words (as approximation for tokens)
|
|
"""
|
|
words = text.split() # Split text into words
|
|
# Estimate token count based on word count (1 word ≈ 1 token)
|
|
chunk_size = max_words # Use the word count as an approximation for tokens
|
|
chunks = [
|
|
" ".join(words[i : i + chunk_size]) for i in range(0, len(words), chunk_size)
|
|
]
|
|
return chunks
|
|
|
|
|
|
def softmax(x):
|
|
return np.exp(x) / np.exp(x).sum(axis=0)
|