mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-08 15:22:39 +02:00
refactor: improve content extraction and encoding handling
- Enhanced Azure Document Intelligence parser to raise an error for empty or whitespace-only content. - Updated LLMRouterService to log premium model strings more clearly. - Added automatic encoding detection for file reading in document processors. - Improved error handling for empty markdown content extraction in file processors. - Refactored DocumentUploadTab component for better accessibility and user interaction.
This commit is contained in:
parent
4a51ccdc2c
commit
2f793e7a69
5 changed files with 91 additions and 33 deletions
|
|
@ -62,10 +62,13 @@ async def parse_with_azure_doc_intelligence(
|
|||
f"after {len(attempt_errors)} failures"
|
||||
)
|
||||
|
||||
if not result.content:
|
||||
return ""
|
||||
content = result.content or ""
|
||||
if not content.strip():
|
||||
raise RuntimeError(
|
||||
"Azure Document Intelligence returned empty/whitespace-only content"
|
||||
)
|
||||
|
||||
return result.content
|
||||
return content
|
||||
|
||||
except ClientAuthenticationError:
|
||||
raise
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue