fix: resolve UTF-8 string slicing bugs and feature flag issues

- Fix silent errors in stdio.rs: clients now receive fallback error
  responses instead of hanging when JSON serialization fails
- Fix UTF-8 panics in keyword.rs: use char-aware slicing instead of
  byte offsets for query sanitization and term highlighting
- Fix UTF-8 panics in prospective_memory.rs: replace hard-coded byte
  offsets with char-aware slicing for natural language parsing
- Fix UTF-8 panics in git.rs: convert byte positions to char positions
  before slicing commit messages
- Fix feature flag bug in vestige-mcp: add proper [features] section
  to forward embeddings and vector-search features from vestige-core,
  enabling the #[cfg(feature = "embeddings")] initialization code

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Sam Valladares 2026-01-27 01:14:59 -06:00
parent f10367ecd0
commit bfa91474a6
6 changed files with 70 additions and 44 deletions

2
Cargo.lock generated
View file

@ -3511,7 +3511,7 @@ dependencies = [
[[package]]
name = "vestige-mcp"
version = "1.1.0"
version = "1.1.1"
dependencies = [
"anyhow",
"chrono",

View file

@ -540,8 +540,10 @@ impl GitAnalyzer {
// Extract the description (first line, removing the prefix)
let first_line = message.lines().next().unwrap_or("");
let symptom = if let Some(colon_pos) = first_line.find(':') {
first_line[colon_pos + 1..].trim().to_string()
let symptom = if let Some(colon_byte_pos) = first_line.find(':') {
// Convert byte position to char position for safe slicing
let colon_char_pos = first_line[..colon_byte_pos].chars().count();
first_line.chars().skip(colon_char_pos + 1).collect::<String>().trim().to_string()
} else {
first_line.to_string()
};
@ -574,10 +576,13 @@ impl GitAnalyzer {
|| line_lower.contains("closes #")
|| line_lower.contains("resolves #")
{
// Extract issue number
if let Some(hash_pos) = line.find('#') {
let issue_num: String = line[hash_pos + 1..]
// Extract issue number (using char-aware iteration)
if let Some(hash_byte_pos) = line.find('#') {
// Convert byte position to char position for safe slicing
let hash_char_pos = line[..hash_byte_pos].chars().count();
let issue_num: String = line
.chars()
.skip(hash_char_pos + 1)
.take_while(|c| c.is_ascii_digit())
.collect();
if !issue_num.is_empty() {

View file

@ -959,23 +959,26 @@ impl IntentionParser {
original: &str,
) -> Result<(IntentionTrigger, String)> {
// Check for "remind me to X when Y" pattern
if let Some(when_idx) = text_lower.find(" when ") {
let content_part = if text_lower.starts_with("remind me to ") {
&original[13..when_idx]
if let Some(when_byte_idx) = text_lower.find(" when ") {
// Convert byte index to char index for safe slicing
let when_char_idx = text_lower[..when_byte_idx].chars().count();
let content_part: String = if text_lower.starts_with("remind me to ") {
original.chars().skip(13).take(when_char_idx.saturating_sub(13)).collect()
} else if text_lower.starts_with("remind me ") {
&original[10..when_idx]
original.chars().skip(10).take(when_char_idx.saturating_sub(10)).collect()
} else {
&original[..when_idx]
original.chars().take(when_char_idx).collect()
};
let condition_part = &original[when_idx + 6..];
let condition_part: String = original.chars().skip(when_char_idx + 6).collect();
return Ok((
IntentionTrigger::EventBased {
condition: condition_part.to_string(),
pattern: TriggerPattern::contains(condition_part),
condition: condition_part.clone(),
pattern: TriggerPattern::contains(&condition_part),
},
content_part.to_string(),
content_part,
));
}
@ -992,10 +995,11 @@ impl IntentionParser {
// For now, treat as a simple event trigger
let parts: Vec<&str> = original.splitn(2, " at ").collect();
if parts.len() == 2 {
let content = if parts[0].to_lowercase().starts_with("remind me to ") {
parts[0][13..].to_string()
} else if parts[0].to_lowercase().starts_with("remind me ") {
parts[0][10..].to_string()
let part0_lower = parts[0].to_lowercase();
let content: String = if part0_lower.starts_with("remind me to ") {
parts[0].chars().skip(13).collect()
} else if part0_lower.starts_with("remind me ") {
parts[0].chars().skip(10).collect()
} else {
parts[0].to_string()
};
@ -1014,14 +1018,15 @@ impl IntentionParser {
|| text_lower.starts_with("don't forget to ")
|| text_lower.starts_with("remember to ")
{
let content = if text_lower.starts_with("i should ") {
original[9..].to_string()
// Use char-aware slicing to avoid UTF-8 boundary issues
let content: String = if text_lower.starts_with("i should ") {
original.chars().skip(9).collect()
} else if text_lower.starts_with("i need to ") {
original[10..].to_string()
original.chars().skip(10).collect()
} else if text_lower.starts_with("don't forget to ") {
original[16..].to_string()
original.chars().skip(16).collect()
} else {
original[12..].to_string()
original.chars().skip(12).collect()
};
// Extract entity if mentioned

View file

@ -18,12 +18,8 @@ const FTS5_OPERATORS: &[&str] = &["OR", "AND", "NOT", "NEAR"];
/// - Prefix/suffix wildcards for data extraction
/// - DoS via complex query patterns
pub fn sanitize_fts5_query(query: &str) -> String {
// Limit query length to prevent DoS
let limited = if query.len() > 1000 {
&query[..1000]
} else {
query
};
// Limit query length to prevent DoS (char-aware to avoid UTF-8 boundary issues)
let limited: String = query.chars().take(1000).collect();
// Remove FTS5 special characters and operators
let mut sanitized = limited.to_string();
@ -44,12 +40,16 @@ pub fn sanitize_fts5_query(query: &str) -> String {
sanitized = sanitized.replace(&pattern, " ");
sanitized = sanitized.replace(&pattern.to_lowercase(), " ");
// Handle operators at start/end
if sanitized.to_uppercase().starts_with(&format!("{} ", op)) {
sanitized = sanitized[op.len()..].to_string();
// Handle operators at start/end (using char-aware operations)
let upper = sanitized.to_uppercase();
let start_pattern = format!("{} ", op);
if upper.starts_with(&start_pattern) {
sanitized = sanitized.chars().skip(op.len()).collect();
}
if sanitized.to_uppercase().ends_with(&format!(" {}", op)) {
sanitized = sanitized[..sanitized.len() - op.len()].to_string();
let end_pattern = format!(" {}", op);
if upper.ends_with(&end_pattern) {
let char_count = sanitized.chars().count();
sanitized = sanitized.chars().take(char_count.saturating_sub(op.len())).collect();
}
}
@ -170,15 +170,18 @@ impl KeywordSearcher {
let lower_text = result.to_lowercase();
let lower_term = term.to_lowercase();
if let Some(pos) = lower_text.find(&lower_term) {
let matched = &result[pos..pos + term.len()];
if let Some(byte_pos) = lower_text.find(&lower_term) {
// Convert byte position to char position for safe slicing
let char_pos = lower_text[..byte_pos].chars().count();
let term_char_len = lower_term.chars().count();
// Extract matched portion using char indices
let prefix: String = result.chars().take(char_pos).collect();
let matched: String = result.chars().skip(char_pos).take(term_char_len).collect();
let suffix: String = result.chars().skip(char_pos + term_char_len).collect();
let highlighted = format!("**{}**", matched);
result = format!(
"{}{}{}",
&result[..pos],
highlighted,
&result[pos + term.len()..]
);
result = format!("{}{}{}", prefix, highlighted, suffix);
}
}

View file

@ -9,6 +9,11 @@ keywords = ["mcp", "ai", "memory", "fsrs", "neuroscience", "cognitive-science",
categories = ["command-line-utilities", "database"]
repository = "https://github.com/samvallad33/vestige"
[features]
default = ["embeddings", "vector-search"]
embeddings = ["vestige-core/embeddings"]
vector-search = ["vestige-core/vector-search"]
[[bin]]
name = "vestige-mcp"
path = "src/main.rs"
@ -27,7 +32,7 @@ path = "src/bin/cli.rs"
# ============================================================================
# Includes: FSRS-6, spreading activation, synaptic tagging, hippocampal indexing,
# memory states, context memory, importance signals, dreams, and more
vestige-core = { version = "1.0.0", path = "../vestige-core", features = ["full"] }
vestige-core = { version = "1.0.0", path = "../vestige-core" }
# ============================================================================
# MCP Server Dependencies

View file

@ -52,6 +52,10 @@ impl StdioTransport {
}
Err(e) => {
error!("Failed to serialize error response: {}", e);
// Send a minimal error response so client doesn't hang
let fallback = r#"{"jsonrpc":"2.0","id":null,"error":{"code":-32603,"message":"Internal error"}}"#;
let _ = writeln!(stdout, "{}", fallback);
let _ = stdout.flush();
}
}
continue;
@ -68,6 +72,10 @@ impl StdioTransport {
}
Err(e) => {
error!("Failed to serialize response: {}", e);
// Send a minimal error response so client doesn't hang
let fallback = r#"{"jsonrpc":"2.0","id":null,"error":{"code":-32603,"message":"Internal error"}}"#;
let _ = writeln!(stdout, "{}", fallback);
let _ = stdout.flush();
}
}
}