Initial commit: Vestige v1.0.0 - Cognitive memory MCP server

FSRS-6 spaced repetition, spreading activation, synaptic tagging,
hippocampal indexing, and 130 years of memory research.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Sam Valladares 2026-01-25 01:31:03 -06:00
commit f9c60eb5a7
169 changed files with 97206 additions and 0 deletions

View file

@ -0,0 +1,984 @@
//! Context capture for codebase memory
//!
//! This module captures the current working context - what branch you're on,
//! what files you're editing, what the project structure looks like. This
//! context is critical for:
//!
//! - Storing memories with full context for later retrieval
//! - Providing relevant suggestions based on current work
//! - Maintaining continuity across sessions
use std::collections::HashSet;
use std::fs;
use std::path::{Path, PathBuf};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use super::git::{GitAnalyzer, GitContext, GitError};
// ============================================================================
// ERRORS
// ============================================================================
/// Errors that can occur during context capture
#[derive(Debug, thiserror::Error)]
pub enum ContextError {
#[error("Git error: {0}")]
Git(#[from] GitError),
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("Path not found: {0}")]
PathNotFound(PathBuf),
}
pub type Result<T> = std::result::Result<T, ContextError>;
// ============================================================================
// PROJECT TYPE DETECTION
// ============================================================================
/// Detected project type based on files present
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ProjectType {
Rust,
TypeScript,
JavaScript,
Python,
Go,
Java,
Kotlin,
Swift,
CSharp,
Cpp,
Ruby,
Php,
Mixed(Vec<String>), // Multiple languages detected
Unknown,
}
impl ProjectType {
/// Get the file extensions associated with this project type
pub fn extensions(&self) -> Vec<&'static str> {
match self {
Self::Rust => vec!["rs"],
Self::TypeScript => vec!["ts", "tsx"],
Self::JavaScript => vec!["js", "jsx"],
Self::Python => vec!["py"],
Self::Go => vec!["go"],
Self::Java => vec!["java"],
Self::Kotlin => vec!["kt", "kts"],
Self::Swift => vec!["swift"],
Self::CSharp => vec!["cs"],
Self::Cpp => vec!["cpp", "cc", "cxx", "c", "h", "hpp"],
Self::Ruby => vec!["rb"],
Self::Php => vec!["php"],
Self::Mixed(_) => vec![],
Self::Unknown => vec![],
}
}
/// Get the language name as a string
pub fn language_name(&self) -> &str {
match self {
Self::Rust => "Rust",
Self::TypeScript => "TypeScript",
Self::JavaScript => "JavaScript",
Self::Python => "Python",
Self::Go => "Go",
Self::Java => "Java",
Self::Kotlin => "Kotlin",
Self::Swift => "Swift",
Self::CSharp => "C#",
Self::Cpp => "C++",
Self::Ruby => "Ruby",
Self::Php => "PHP",
Self::Mixed(_) => "Mixed",
Self::Unknown => "Unknown",
}
}
}
// ============================================================================
// FRAMEWORK DETECTION
// ============================================================================
/// Known frameworks that can be detected
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum Framework {
// Rust
Tauri,
Actix,
Axum,
Rocket,
Tokio,
Diesel,
SeaOrm,
// JavaScript/TypeScript
React,
Vue,
Angular,
Svelte,
NextJs,
NuxtJs,
Express,
NestJs,
Deno,
Bun,
// Python
Django,
Flask,
FastApi,
Pytest,
Poetry,
// Other
Spring, // Java
Rails, // Ruby
Laravel, // PHP
DotNet, // C#
Other(String),
}
impl Framework {
pub fn name(&self) -> &str {
match self {
Self::Tauri => "Tauri",
Self::Actix => "Actix",
Self::Axum => "Axum",
Self::Rocket => "Rocket",
Self::Tokio => "Tokio",
Self::Diesel => "Diesel",
Self::SeaOrm => "SeaORM",
Self::React => "React",
Self::Vue => "Vue",
Self::Angular => "Angular",
Self::Svelte => "Svelte",
Self::NextJs => "Next.js",
Self::NuxtJs => "Nuxt.js",
Self::Express => "Express",
Self::NestJs => "NestJS",
Self::Deno => "Deno",
Self::Bun => "Bun",
Self::Django => "Django",
Self::Flask => "Flask",
Self::FastApi => "FastAPI",
Self::Pytest => "Pytest",
Self::Poetry => "Poetry",
Self::Spring => "Spring",
Self::Rails => "Rails",
Self::Laravel => "Laravel",
Self::DotNet => ".NET",
Self::Other(name) => name,
}
}
}
// ============================================================================
// WORKING CONTEXT
// ============================================================================
/// Complete working context for memory storage
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct WorkingContext {
/// Git context (branch, commits, changes)
pub git: Option<GitContextInfo>,
/// Currently active file (e.g., file being edited)
pub active_file: Option<PathBuf>,
/// Project type (Rust, TypeScript, etc.)
pub project_type: ProjectType,
/// Detected frameworks
pub frameworks: Vec<Framework>,
/// Project name (from cargo.toml, package.json, etc.)
pub project_name: Option<String>,
/// Project root directory
pub project_root: PathBuf,
/// When this context was captured
pub captured_at: DateTime<Utc>,
/// Recent files (for context)
pub recent_files: Vec<PathBuf>,
/// Key configuration files found
pub config_files: Vec<PathBuf>,
}
/// Serializable git context info
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct GitContextInfo {
pub current_branch: String,
pub head_commit: String,
pub uncommitted_changes: Vec<PathBuf>,
pub staged_changes: Vec<PathBuf>,
pub has_uncommitted: bool,
pub is_clean: bool,
}
impl From<GitContext> for GitContextInfo {
fn from(ctx: GitContext) -> Self {
let has_uncommitted = !ctx.uncommitted_changes.is_empty();
let is_clean = ctx.uncommitted_changes.is_empty() && ctx.staged_changes.is_empty();
Self {
current_branch: ctx.current_branch,
head_commit: ctx.head_commit,
uncommitted_changes: ctx.uncommitted_changes,
staged_changes: ctx.staged_changes,
has_uncommitted,
is_clean,
}
}
}
// ============================================================================
// FILE CONTEXT
// ============================================================================
/// Context specific to a single file
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct FileContext {
/// Path to the file
pub path: PathBuf,
/// Detected language
pub language: Option<String>,
/// File extension
pub extension: Option<String>,
/// Parent directory
pub directory: PathBuf,
/// Related files (imports, tests, etc.)
pub related_files: Vec<PathBuf>,
/// Whether the file has uncommitted changes
pub has_changes: bool,
/// Last modified time
pub last_modified: Option<DateTime<Utc>>,
/// Whether it's a test file
pub is_test_file: bool,
/// Module/package this file belongs to
pub module: Option<String>,
}
// ============================================================================
// CONTEXT CAPTURE
// ============================================================================
/// Captures and manages working context
pub struct ContextCapture {
/// Git analyzer for the repository
git: Option<GitAnalyzer>,
/// Currently active files
active_files: Vec<PathBuf>,
/// Project root directory
project_root: PathBuf,
}
impl ContextCapture {
/// Create a new context capture for a project directory
pub fn new(project_root: PathBuf) -> Result<Self> {
// Try to create git analyzer (may fail if not a git repo)
let git = GitAnalyzer::new(project_root.clone()).ok();
Ok(Self {
git,
active_files: vec![],
project_root,
})
}
/// Set the currently active file(s)
pub fn set_active_files(&mut self, files: Vec<PathBuf>) {
self.active_files = files;
}
/// Add an active file
pub fn add_active_file(&mut self, file: PathBuf) {
if !self.active_files.contains(&file) {
self.active_files.push(file);
}
}
/// Remove an active file
pub fn remove_active_file(&mut self, file: &Path) {
self.active_files.retain(|f| f != file);
}
/// Capture the full working context
pub fn capture(&self) -> Result<WorkingContext> {
let git = self
.git
.as_ref()
.and_then(|g| g.get_current_context().ok().map(GitContextInfo::from));
let project_type = self.detect_project_type()?;
let frameworks = self.detect_frameworks()?;
let project_name = self.detect_project_name()?;
let config_files = self.find_config_files()?;
Ok(WorkingContext {
git,
active_file: self.active_files.first().cloned(),
project_type,
frameworks,
project_name,
project_root: self.project_root.clone(),
captured_at: Utc::now(),
recent_files: self.active_files.clone(),
config_files,
})
}
/// Get context specific to a file
pub fn context_for_file(&self, path: &Path) -> Result<FileContext> {
let extension = path.extension().map(|e| e.to_string_lossy().to_string());
let language = extension
.as_ref()
.and_then(|ext| match ext.as_str() {
"rs" => Some("rust"),
"ts" | "tsx" => Some("typescript"),
"js" | "jsx" => Some("javascript"),
"py" => Some("python"),
"go" => Some("go"),
"java" => Some("java"),
"kt" | "kts" => Some("kotlin"),
"swift" => Some("swift"),
"cs" => Some("csharp"),
"cpp" | "cc" | "cxx" | "c" => Some("cpp"),
"h" | "hpp" => Some("cpp"),
"rb" => Some("ruby"),
"php" => Some("php"),
"sql" => Some("sql"),
"json" => Some("json"),
"yaml" | "yml" => Some("yaml"),
"toml" => Some("toml"),
"md" => Some("markdown"),
_ => None,
})
.map(|s| s.to_string());
let directory = path.parent().unwrap_or(Path::new(".")).to_path_buf();
// Detect related files
let related_files = self.find_related_files(path)?;
// Check git status
let has_changes = self
.git
.as_ref()
.map(|g| {
g.get_current_context()
.ok()
.map(|ctx| {
ctx.uncommitted_changes.contains(&path.to_path_buf())
|| ctx.staged_changes.contains(&path.to_path_buf())
})
.unwrap_or(false)
})
.unwrap_or(false);
// Check if test file
let is_test_file = self.is_test_file(path);
// Get last modified time
let last_modified = fs::metadata(path)
.ok()
.and_then(|m| m.modified().ok().map(|t| DateTime::<Utc>::from(t)));
// Detect module
let module = self.detect_module(path);
Ok(FileContext {
path: path.to_path_buf(),
language,
extension,
directory,
related_files,
has_changes,
last_modified,
is_test_file,
module,
})
}
/// Detect the project type based on files present
fn detect_project_type(&self) -> Result<ProjectType> {
let mut detected = Vec::new();
// Check for Rust
if self.file_exists("Cargo.toml") {
detected.push("Rust".to_string());
}
// Check for JavaScript/TypeScript
if self.file_exists("package.json") {
// Check for TypeScript
if self.file_exists("tsconfig.json") || self.file_exists("tsconfig.base.json") {
detected.push("TypeScript".to_string());
} else {
detected.push("JavaScript".to_string());
}
}
// Check for Python
if self.file_exists("pyproject.toml")
|| self.file_exists("setup.py")
|| self.file_exists("requirements.txt")
{
detected.push("Python".to_string());
}
// Check for Go
if self.file_exists("go.mod") {
detected.push("Go".to_string());
}
// Check for Java/Kotlin
if self.file_exists("pom.xml") || self.file_exists("build.gradle") {
if self.dir_exists("src/main/kotlin") || self.file_exists("build.gradle.kts") {
detected.push("Kotlin".to_string());
} else {
detected.push("Java".to_string());
}
}
// Check for Swift
if self.file_exists("Package.swift") {
detected.push("Swift".to_string());
}
// Check for C#
if self.glob_exists("*.csproj") || self.glob_exists("*.sln") {
detected.push("CSharp".to_string());
}
// Check for Ruby
if self.file_exists("Gemfile") {
detected.push("Ruby".to_string());
}
// Check for PHP
if self.file_exists("composer.json") {
detected.push("PHP".to_string());
}
match detected.len() {
0 => Ok(ProjectType::Unknown),
1 => Ok(match detected[0].as_str() {
"Rust" => ProjectType::Rust,
"TypeScript" => ProjectType::TypeScript,
"JavaScript" => ProjectType::JavaScript,
"Python" => ProjectType::Python,
"Go" => ProjectType::Go,
"Java" => ProjectType::Java,
"Kotlin" => ProjectType::Kotlin,
"Swift" => ProjectType::Swift,
"CSharp" => ProjectType::CSharp,
"Ruby" => ProjectType::Ruby,
"PHP" => ProjectType::Php,
_ => ProjectType::Unknown,
}),
_ => Ok(ProjectType::Mixed(detected)),
}
}
/// Detect frameworks used in the project
fn detect_frameworks(&self) -> Result<Vec<Framework>> {
let mut frameworks = Vec::new();
// Rust frameworks
if let Ok(content) = fs::read_to_string(self.project_root.join("Cargo.toml")) {
if content.contains("tauri") {
frameworks.push(Framework::Tauri);
}
if content.contains("actix-web") {
frameworks.push(Framework::Actix);
}
if content.contains("axum") {
frameworks.push(Framework::Axum);
}
if content.contains("rocket") {
frameworks.push(Framework::Rocket);
}
if content.contains("tokio") {
frameworks.push(Framework::Tokio);
}
if content.contains("diesel") {
frameworks.push(Framework::Diesel);
}
if content.contains("sea-orm") {
frameworks.push(Framework::SeaOrm);
}
}
// JavaScript/TypeScript frameworks
if let Ok(content) = fs::read_to_string(self.project_root.join("package.json")) {
if content.contains("\"react\"") || content.contains("\"react\":") {
frameworks.push(Framework::React);
}
if content.contains("\"vue\"") || content.contains("\"vue\":") {
frameworks.push(Framework::Vue);
}
if content.contains("\"@angular/") {
frameworks.push(Framework::Angular);
}
if content.contains("\"svelte\"") {
frameworks.push(Framework::Svelte);
}
if content.contains("\"next\"") || content.contains("\"next\":") {
frameworks.push(Framework::NextJs);
}
if content.contains("\"nuxt\"") || content.contains("\"nuxt\":") {
frameworks.push(Framework::NuxtJs);
}
if content.contains("\"express\"") {
frameworks.push(Framework::Express);
}
if content.contains("\"@nestjs/") {
frameworks.push(Framework::NestJs);
}
}
// Deno
if self.file_exists("deno.json") || self.file_exists("deno.jsonc") {
frameworks.push(Framework::Deno);
}
// Bun
if self.file_exists("bun.lockb") || self.file_exists("bunfig.toml") {
frameworks.push(Framework::Bun);
}
// Python frameworks
if let Ok(content) = fs::read_to_string(self.project_root.join("pyproject.toml")) {
if content.contains("django") {
frameworks.push(Framework::Django);
}
if content.contains("flask") {
frameworks.push(Framework::Flask);
}
if content.contains("fastapi") {
frameworks.push(Framework::FastApi);
}
if content.contains("pytest") {
frameworks.push(Framework::Pytest);
}
if content.contains("[tool.poetry]") {
frameworks.push(Framework::Poetry);
}
}
// Check requirements.txt too
if let Ok(content) = fs::read_to_string(self.project_root.join("requirements.txt")) {
if content.contains("django") && !frameworks.contains(&Framework::Django) {
frameworks.push(Framework::Django);
}
if content.contains("flask") && !frameworks.contains(&Framework::Flask) {
frameworks.push(Framework::Flask);
}
if content.contains("fastapi") && !frameworks.contains(&Framework::FastApi) {
frameworks.push(Framework::FastApi);
}
}
// Java Spring
if let Ok(content) = fs::read_to_string(self.project_root.join("pom.xml")) {
if content.contains("spring") {
frameworks.push(Framework::Spring);
}
}
// Ruby Rails
if self.file_exists("config/routes.rb") {
frameworks.push(Framework::Rails);
}
// PHP Laravel
if self.file_exists("artisan") && self.dir_exists("app/Http") {
frameworks.push(Framework::Laravel);
}
// .NET
if self.glob_exists("*.csproj") {
frameworks.push(Framework::DotNet);
}
Ok(frameworks)
}
/// Detect the project name from config files
fn detect_project_name(&self) -> Result<Option<String>> {
// Try Cargo.toml
if let Ok(content) = fs::read_to_string(self.project_root.join("Cargo.toml")) {
if let Some(name) = self.extract_toml_value(&content, "name") {
return Ok(Some(name));
}
}
// Try package.json
if let Ok(content) = fs::read_to_string(self.project_root.join("package.json")) {
if let Some(name) = self.extract_json_value(&content, "name") {
return Ok(Some(name));
}
}
// Try pyproject.toml
if let Ok(content) = fs::read_to_string(self.project_root.join("pyproject.toml")) {
if let Some(name) = self.extract_toml_value(&content, "name") {
return Ok(Some(name));
}
}
// Try go.mod
if let Ok(content) = fs::read_to_string(self.project_root.join("go.mod")) {
if let Some(line) = content.lines().next() {
if line.starts_with("module ") {
let name = line
.trim_start_matches("module ")
.split('/')
.last()
.unwrap_or("")
.to_string();
if !name.is_empty() {
return Ok(Some(name));
}
}
}
}
// Fall back to directory name
Ok(self
.project_root
.file_name()
.map(|n| n.to_string_lossy().to_string()))
}
/// Find configuration files in the project
fn find_config_files(&self) -> Result<Vec<PathBuf>> {
let config_names = [
"Cargo.toml",
"package.json",
"tsconfig.json",
"pyproject.toml",
"go.mod",
".gitignore",
".env",
".env.local",
"docker-compose.yml",
"docker-compose.yaml",
"Dockerfile",
"Makefile",
"justfile",
".editorconfig",
".prettierrc",
".eslintrc.json",
"rustfmt.toml",
".rustfmt.toml",
"clippy.toml",
".clippy.toml",
"tauri.conf.json",
];
let mut found = Vec::new();
for name in config_names {
let path = self.project_root.join(name);
if path.exists() {
found.push(path);
}
}
Ok(found)
}
/// Find files related to a given file
fn find_related_files(&self, path: &Path) -> Result<Vec<PathBuf>> {
let mut related = Vec::new();
let file_stem = path.file_stem().map(|s| s.to_string_lossy().to_string());
let extension = path.extension().map(|s| s.to_string_lossy().to_string());
let parent = path.parent();
if let (Some(stem), Some(parent)) = (file_stem, parent) {
// Look for test files
let test_patterns = [
format!("{}.test", stem),
format!("{}_test", stem),
format!("{}.spec", stem),
format!("test_{}", stem),
];
// Common test directories
let test_dirs = ["tests", "test", "__tests__", "spec"];
// Check same directory for test files
if let Ok(entries) = fs::read_dir(parent) {
for entry in entries.filter_map(|e| e.ok()) {
let entry_path = entry.path();
if let Some(entry_stem) = entry_path.file_stem() {
let entry_stem = entry_stem.to_string_lossy();
for pattern in &test_patterns {
if entry_stem.eq_ignore_ascii_case(pattern) {
related.push(entry_path.clone());
break;
}
}
}
}
}
// Check test directories
for test_dir in test_dirs {
let test_path = self.project_root.join(test_dir);
if test_path.exists() {
if let Ok(entries) = fs::read_dir(&test_path) {
for entry in entries.filter_map(|e| e.ok()) {
let entry_path = entry.path();
if let Some(entry_stem) = entry_path.file_stem() {
let entry_stem = entry_stem.to_string_lossy();
if entry_stem.contains(&stem) {
related.push(entry_path);
}
}
}
}
}
}
// For Rust, look for mod.rs in same directory
if extension.as_deref() == Some("rs") {
let mod_path = parent.join("mod.rs");
if mod_path.exists() && mod_path != path {
related.push(mod_path);
}
// Look for lib.rs or main.rs at project root
let lib_path = self.project_root.join("src/lib.rs");
let main_path = self.project_root.join("src/main.rs");
if lib_path.exists() && lib_path != path {
related.push(lib_path);
}
if main_path.exists() && main_path != path {
related.push(main_path);
}
}
}
// Remove duplicates
let related: HashSet<_> = related.into_iter().collect();
Ok(related.into_iter().collect())
}
/// Check if a file is a test file
fn is_test_file(&self, path: &Path) -> bool {
let path_str = path.to_string_lossy().to_lowercase();
path_str.contains("test")
|| path_str.contains("spec")
|| path_str.contains("__tests__")
|| path
.file_name()
.map(|n| {
let n = n.to_string_lossy();
n.starts_with("test_")
|| n.ends_with("_test.rs")
|| n.ends_with(".test.ts")
|| n.ends_with(".test.tsx")
|| n.ends_with(".test.js")
|| n.ends_with(".spec.ts")
|| n.ends_with(".spec.js")
})
.unwrap_or(false)
}
/// Detect the module a file belongs to
fn detect_module(&self, path: &Path) -> Option<String> {
// For Rust, use the parent directory name relative to src/
if path.extension().map(|e| e == "rs").unwrap_or(false) {
if let Ok(relative) = path.strip_prefix(&self.project_root) {
if let Ok(src_relative) = relative.strip_prefix("src") {
// Get the module path
let components: Vec<_> = src_relative
.parent()?
.components()
.map(|c| c.as_os_str().to_string_lossy().to_string())
.collect();
if !components.is_empty() {
return Some(components.join("::"));
}
}
}
}
// For TypeScript/JavaScript, use the parent directory
if path
.extension()
.map(|e| e == "ts" || e == "tsx" || e == "js" || e == "jsx")
.unwrap_or(false)
{
if let Ok(relative) = path.strip_prefix(&self.project_root) {
// Skip src/ or lib/ prefix
let relative = relative
.strip_prefix("src")
.or_else(|_| relative.strip_prefix("lib"))
.unwrap_or(relative);
if let Some(parent) = relative.parent() {
let module = parent.to_string_lossy().replace('/', ".");
if !module.is_empty() {
return Some(module);
}
}
}
}
None
}
/// Check if a file exists relative to project root
fn file_exists(&self, name: &str) -> bool {
self.project_root.join(name).exists()
}
/// Check if a directory exists relative to project root
fn dir_exists(&self, name: &str) -> bool {
let path = self.project_root.join(name);
path.exists() && path.is_dir()
}
/// Check if any file matching a glob pattern exists
fn glob_exists(&self, pattern: &str) -> bool {
if let Ok(entries) = fs::read_dir(&self.project_root) {
for entry in entries.filter_map(|e| e.ok()) {
if let Some(name) = entry.file_name().to_str() {
// Simple glob matching for patterns like "*.ext"
if pattern.starts_with("*.") {
let ext = &pattern[1..];
if name.ends_with(ext) {
return true;
}
}
}
}
}
false
}
/// Simple TOML value extraction (basic, no full parser)
fn extract_toml_value(&self, content: &str, key: &str) -> Option<String> {
for line in content.lines() {
let trimmed = line.trim();
if trimmed.starts_with(&format!("{} ", key))
|| trimmed.starts_with(&format!("{}=", key))
{
if let Some(value) = trimmed.split('=').nth(1) {
let value = value.trim().trim_matches('"').trim_matches('\'');
return Some(value.to_string());
}
}
}
None
}
/// Simple JSON value extraction (basic, no full parser)
fn extract_json_value(&self, content: &str, key: &str) -> Option<String> {
let pattern = format!("\"{}\"", key);
for line in content.lines() {
if line.contains(&pattern) {
// Try to extract the value after the colon
if let Some(colon_pos) = line.find(':') {
let value = line[colon_pos + 1..].trim();
let value = value.trim_start_matches('"');
if let Some(end) = value.find('"') {
return Some(value[..end].to_string());
}
}
}
}
None
}
}
// ============================================================================
// TESTS
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn create_test_project() -> TempDir {
let dir = TempDir::new().unwrap();
// Create Cargo.toml
fs::write(
dir.path().join("Cargo.toml"),
r#"
[package]
name = "test-project"
version = "0.1.0"
[dependencies]
tokio = "1.0"
axum = "0.7"
"#,
)
.unwrap();
// Create src directory
fs::create_dir(dir.path().join("src")).unwrap();
fs::write(dir.path().join("src/main.rs"), "fn main() {}").unwrap();
dir
}
#[test]
fn test_detect_project_type() {
let dir = create_test_project();
let capture = ContextCapture::new(dir.path().to_path_buf()).unwrap();
let project_type = capture.detect_project_type().unwrap();
assert_eq!(project_type, ProjectType::Rust);
}
#[test]
fn test_detect_frameworks() {
let dir = create_test_project();
let capture = ContextCapture::new(dir.path().to_path_buf()).unwrap();
let frameworks = capture.detect_frameworks().unwrap();
assert!(frameworks.contains(&Framework::Tokio));
assert!(frameworks.contains(&Framework::Axum));
}
#[test]
fn test_detect_project_name() {
let dir = create_test_project();
let capture = ContextCapture::new(dir.path().to_path_buf()).unwrap();
let name = capture.detect_project_name().unwrap();
assert_eq!(name, Some("test-project".to_string()));
}
#[test]
fn test_is_test_file() {
let capture = ContextCapture {
git: None,
active_files: vec![],
project_root: PathBuf::from("."),
};
assert!(capture.is_test_file(Path::new("src/utils_test.rs")));
assert!(capture.is_test_file(Path::new("tests/integration.rs")));
assert!(capture.is_test_file(Path::new("src/utils.test.ts")));
assert!(!capture.is_test_file(Path::new("src/utils.rs")));
assert!(!capture.is_test_file(Path::new("src/main.ts")));
}
}

View file

@ -0,0 +1,798 @@
//! Git history analysis for extracting codebase knowledge
//!
//! This module analyzes git history to automatically extract:
//! - File co-change patterns (files that frequently change together)
//! - Bug fix patterns (from commit messages matching conventional formats)
//! - Current git context (branch, uncommitted changes, recent history)
//!
//! This is a key differentiator for Vestige - learning from the codebase's history
//! without requiring explicit user input.
use chrono::{DateTime, TimeZone, Utc};
use git2::{Commit, Repository, Sort};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use super::types::{BugFix, BugSeverity, FileRelationship, RelationType, RelationshipSource};
// ============================================================================
// ERRORS
// ============================================================================
/// Errors that can occur during git analysis
#[derive(Debug, thiserror::Error)]
pub enum GitError {
#[error("Git repository error: {0}")]
Repository(#[from] git2::Error),
#[error("Repository not found at: {0}")]
NotFound(PathBuf),
#[error("Invalid path: {0}")]
InvalidPath(String),
#[error("No commits found")]
NoCommits,
}
pub type Result<T> = std::result::Result<T, GitError>;
// ============================================================================
// GIT CONTEXT
// ============================================================================
/// Current git context for a repository
#[derive(Debug, Clone)]
pub struct GitContext {
/// Root path of the repository
pub repo_root: PathBuf,
/// Current branch name
pub current_branch: String,
/// HEAD commit SHA
pub head_commit: String,
/// Files with uncommitted changes (unstaged)
pub uncommitted_changes: Vec<PathBuf>,
/// Files staged for commit
pub staged_changes: Vec<PathBuf>,
/// Recent commits
pub recent_commits: Vec<CommitInfo>,
/// Whether the repository has any commits
pub has_commits: bool,
/// Whether there are untracked files
pub has_untracked: bool,
}
/// Information about a git commit
#[derive(Debug, Clone)]
pub struct CommitInfo {
/// Commit SHA (short)
pub sha: String,
/// Full commit SHA
pub full_sha: String,
/// Commit message (first line)
pub message: String,
/// Full commit message
pub full_message: String,
/// Author name
pub author: String,
/// Author email
pub author_email: String,
/// Commit timestamp
pub timestamp: DateTime<Utc>,
/// Files changed in this commit
pub files_changed: Vec<PathBuf>,
/// Is this a merge commit?
pub is_merge: bool,
}
// ============================================================================
// GIT ANALYZER
// ============================================================================
/// Analyzes git history to extract knowledge
pub struct GitAnalyzer {
repo_path: PathBuf,
}
impl GitAnalyzer {
/// Create a new GitAnalyzer for the given repository path
pub fn new(repo_path: PathBuf) -> Result<Self> {
// Verify the repository exists
let _ = Repository::open(&repo_path)?;
Ok(Self { repo_path })
}
/// Open the repository
fn open_repo(&self) -> Result<Repository> {
Repository::open(&self.repo_path).map_err(GitError::from)
}
/// Get the current git context
pub fn get_current_context(&self) -> Result<GitContext> {
let repo = self.open_repo()?;
// Get repository root
let repo_root = repo
.workdir()
.map(|p| p.to_path_buf())
.unwrap_or_else(|| self.repo_path.clone());
// Get current branch
let current_branch = self.get_current_branch(&repo)?;
// Get HEAD commit
let (head_commit, has_commits) = match repo.head() {
Ok(head) => match head.peel_to_commit() {
Ok(commit) => (commit.id().to_string()[..8].to_string(), true),
Err(_) => (String::new(), false),
},
Err(_) => (String::new(), false),
};
// Get status
let statuses = repo.statuses(None)?;
let mut uncommitted_changes = Vec::new();
let mut staged_changes = Vec::new();
let mut has_untracked = false;
for entry in statuses.iter() {
let path = entry.path().map(|p| PathBuf::from(p)).unwrap_or_default();
let status = entry.status();
if status.is_wt_new() {
has_untracked = true;
}
if status.is_wt_modified() || status.is_wt_deleted() || status.is_wt_renamed() {
uncommitted_changes.push(path.clone());
}
if status.is_index_new()
|| status.is_index_modified()
|| status.is_index_deleted()
|| status.is_index_renamed()
{
staged_changes.push(path);
}
}
// Get recent commits
let recent_commits = if has_commits {
self.get_recent_commits(&repo, 10)?
} else {
vec![]
};
Ok(GitContext {
repo_root,
current_branch,
head_commit,
uncommitted_changes,
staged_changes,
recent_commits,
has_commits,
has_untracked,
})
}
/// Get the current branch name
fn get_current_branch(&self, repo: &Repository) -> Result<String> {
match repo.head() {
Ok(head) => {
if head.is_branch() {
Ok(head
.shorthand()
.map(|s| s.to_string())
.unwrap_or_else(|| "unknown".to_string()))
} else {
// Detached HEAD
Ok(head
.target()
.map(|oid| oid.to_string()[..8].to_string())
.unwrap_or_else(|| "HEAD".to_string()))
}
}
Err(_) => Ok("main".to_string()), // New repo with no commits
}
}
/// Get recent commits
fn get_recent_commits(&self, repo: &Repository, limit: usize) -> Result<Vec<CommitInfo>> {
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME)?;
let mut commits = Vec::new();
for oid in revwalk.take(limit) {
let oid = oid?;
let commit = repo.find_commit(oid)?;
let commit_info = self.commit_to_info(&commit, repo)?;
commits.push(commit_info);
}
Ok(commits)
}
/// Convert a git2::Commit to CommitInfo
fn commit_to_info(&self, commit: &Commit, repo: &Repository) -> Result<CommitInfo> {
let full_sha = commit.id().to_string();
let sha = full_sha[..8].to_string();
let message = commit
.message()
.map(|m| m.lines().next().unwrap_or("").to_string())
.unwrap_or_default();
let full_message = commit.message().map(|m| m.to_string()).unwrap_or_default();
let author = commit.author();
let author_name = author.name().unwrap_or("Unknown").to_string();
let author_email = author.email().unwrap_or("").to_string();
let timestamp = Utc
.timestamp_opt(commit.time().seconds(), 0)
.single()
.unwrap_or_else(Utc::now);
// Get files changed
let files_changed = self.get_commit_files(commit, repo)?;
let is_merge = commit.parent_count() > 1;
Ok(CommitInfo {
sha,
full_sha,
message,
full_message,
author: author_name,
author_email,
timestamp,
files_changed,
is_merge,
})
}
/// Get files changed in a commit
fn get_commit_files(&self, commit: &Commit, repo: &Repository) -> Result<Vec<PathBuf>> {
let mut files = Vec::new();
if commit.parent_count() == 0 {
// Initial commit - diff against empty tree
let tree = commit.tree()?;
let diff = repo.diff_tree_to_tree(None, Some(&tree), None)?;
for delta in diff.deltas() {
if let Some(path) = delta.new_file().path() {
files.push(path.to_path_buf());
}
}
} else {
// Normal commit - diff against first parent
let parent = commit.parent(0)?;
let parent_tree = parent.tree()?;
let tree = commit.tree()?;
let diff = repo.diff_tree_to_tree(Some(&parent_tree), Some(&tree), None)?;
for delta in diff.deltas() {
if let Some(path) = delta.new_file().path() {
files.push(path.to_path_buf());
}
if let Some(path) = delta.old_file().path() {
if !files.contains(&path.to_path_buf()) {
files.push(path.to_path_buf());
}
}
}
}
Ok(files)
}
/// Find files that frequently change together
///
/// This analyzes git history to find pairs of files that are often modified
/// in the same commit. This can reveal:
/// - Test files and their implementations
/// - Related components
/// - Configuration files and code they configure
pub fn find_cochange_patterns(
&self,
since: Option<DateTime<Utc>>,
min_cooccurrence: f64,
) -> Result<Vec<FileRelationship>> {
let repo = self.open_repo()?;
// Track how often each pair of files changes together
let mut cochange_counts: HashMap<(PathBuf, PathBuf), u32> = HashMap::new();
let mut file_change_counts: HashMap<PathBuf, u32> = HashMap::new();
let mut total_commits = 0u32;
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME)?;
for oid in revwalk {
let oid = oid?;
let commit = repo.find_commit(oid)?;
// Check if commit is after 'since' timestamp
if let Some(since_time) = since {
let commit_time = Utc
.timestamp_opt(commit.time().seconds(), 0)
.single()
.unwrap_or_else(Utc::now);
if commit_time < since_time {
continue;
}
}
// Skip merge commits
if commit.parent_count() > 1 {
continue;
}
let files = self.get_commit_files(&commit, &repo)?;
// Filter to relevant file types
let relevant_files: Vec<_> = files
.into_iter()
.filter(|f| self.is_relevant_file(f))
.collect();
if relevant_files.len() < 2 || relevant_files.len() > 50 {
// Skip commits with too few or too many files
continue;
}
total_commits += 1;
// Count individual file changes
for file in &relevant_files {
*file_change_counts.entry(file.clone()).or_insert(0) += 1;
}
// Count co-occurrences for all pairs
for i in 0..relevant_files.len() {
for j in (i + 1)..relevant_files.len() {
let (a, b) = if relevant_files[i] < relevant_files[j] {
(relevant_files[i].clone(), relevant_files[j].clone())
} else {
(relevant_files[j].clone(), relevant_files[i].clone())
};
*cochange_counts.entry((a, b)).or_insert(0) += 1;
}
}
}
if total_commits == 0 {
return Ok(vec![]);
}
// Convert to relationships, filtering by minimum co-occurrence
let mut relationships = Vec::new();
let mut id_counter = 0u32;
for ((file_a, file_b), count) in cochange_counts {
if count < 2 {
continue; // Need at least 2 co-occurrences
}
// Calculate strength as Jaccard coefficient
// strength = count(A&B) / (count(A) + count(B) - count(A&B))
let count_a = file_change_counts.get(&file_a).copied().unwrap_or(0);
let count_b = file_change_counts.get(&file_b).copied().unwrap_or(0);
let union = count_a + count_b - count;
let strength = if union > 0 {
count as f64 / union as f64
} else {
0.0
};
if strength >= min_cooccurrence {
id_counter += 1;
relationships.push(FileRelationship {
id: format!("cochange-{}", id_counter),
files: vec![file_a, file_b],
relationship_type: RelationType::FrequentCochange,
strength,
description: format!(
"Changed together in {} of {} commits ({:.0}% co-occurrence)",
count,
total_commits,
strength * 100.0
),
created_at: Utc::now(),
last_confirmed: Some(Utc::now()),
source: RelationshipSource::GitCochange,
observation_count: count,
});
}
}
// Sort by strength
relationships.sort_by(|a, b| b.strength.partial_cmp(&a.strength).unwrap_or(std::cmp::Ordering::Equal));
Ok(relationships)
}
/// Check if a file is relevant for analysis
fn is_relevant_file(&self, path: &Path) -> bool {
// Skip common non-source files
let path_str = path.to_string_lossy();
// Skip lock files, generated files, etc.
if path_str.contains("Cargo.lock")
|| path_str.contains("package-lock.json")
|| path_str.contains("yarn.lock")
|| path_str.contains("pnpm-lock.yaml")
|| path_str.contains(".min.")
|| path_str.contains(".map")
|| path_str.contains("node_modules")
|| path_str.contains("target/")
|| path_str.contains("dist/")
|| path_str.contains("build/")
|| path_str.contains(".git/")
{
return false;
}
// Include source files
if let Some(ext) = path.extension() {
let ext = ext.to_string_lossy().to_lowercase();
matches!(
ext.as_str(),
"rs" | "ts"
| "tsx"
| "js"
| "jsx"
| "py"
| "go"
| "java"
| "kt"
| "swift"
| "c"
| "cpp"
| "h"
| "hpp"
| "toml"
| "yaml"
| "yml"
| "json"
| "md"
| "sql"
)
} else {
false
}
}
/// Extract bug fixes from commit messages
///
/// Looks for conventional commit messages like:
/// - "fix: description"
/// - "fix(scope): description"
/// - "bugfix: description"
/// - Messages containing "fixes #123"
pub fn extract_bug_fixes(&self, since: Option<DateTime<Utc>>) -> Result<Vec<BugFix>> {
let repo = self.open_repo()?;
let mut bug_fixes = Vec::new();
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME)?;
let mut id_counter = 0u32;
for oid in revwalk {
let oid = oid?;
let commit = repo.find_commit(oid)?;
// Check timestamp
let commit_time = Utc
.timestamp_opt(commit.time().seconds(), 0)
.single()
.unwrap_or_else(Utc::now);
if let Some(since_time) = since {
if commit_time < since_time {
continue;
}
}
let message = commit.message().map(|m| m.to_string()).unwrap_or_default();
// Check if this looks like a bug fix commit
if let Some(bug_fix) =
self.parse_bug_fix_commit(&message, &commit, &repo, &mut id_counter)?
{
bug_fixes.push(bug_fix);
}
}
Ok(bug_fixes)
}
/// Parse a commit message to extract bug fix information
fn parse_bug_fix_commit(
&self,
message: &str,
commit: &Commit,
repo: &Repository,
counter: &mut u32,
) -> Result<Option<BugFix>> {
let message_lower = message.to_lowercase();
// Check for conventional commit fix patterns
let is_fix = message_lower.starts_with("fix:")
|| message_lower.starts_with("fix(")
|| message_lower.starts_with("bugfix:")
|| message_lower.starts_with("bugfix(")
|| message_lower.starts_with("hotfix:")
|| message_lower.starts_with("hotfix(")
|| message_lower.contains("fixes #")
|| message_lower.contains("closes #")
|| message_lower.contains("resolves #");
if !is_fix {
return Ok(None);
}
*counter += 1;
// Extract the description (first line, removing the prefix)
let first_line = message.lines().next().unwrap_or("");
let symptom = if let Some(colon_pos) = first_line.find(':') {
first_line[colon_pos + 1..].trim().to_string()
} else {
first_line.to_string()
};
// Try to extract root cause and solution from multi-line messages
let mut root_cause = String::new();
let mut solution = String::new();
let mut issue_link = None;
for line in message.lines().skip(1) {
let line_lower = line.to_lowercase().trim().to_string();
if line_lower.starts_with("cause:")
|| line_lower.starts_with("root cause:")
|| line_lower.starts_with("problem:")
{
root_cause = line
.split_once(':')
.map(|(_, v)| v.trim().to_string())
.unwrap_or_default();
} else if line_lower.starts_with("solution:")
|| line_lower.starts_with("fix:")
|| line_lower.starts_with("fixed by:")
{
solution = line
.split_once(':')
.map(|(_, v)| v.trim().to_string())
.unwrap_or_default();
} else if line_lower.contains("fixes #")
|| line_lower.contains("closes #")
|| line_lower.contains("resolves #")
{
// Extract issue number
if let Some(hash_pos) = line.find('#') {
let issue_num: String = line[hash_pos + 1..]
.chars()
.take_while(|c| c.is_ascii_digit())
.collect();
if !issue_num.is_empty() {
issue_link = Some(format!("#{}", issue_num));
}
}
}
}
// If no explicit root cause/solution, use the commit message
if root_cause.is_empty() {
root_cause = "See commit for details".to_string();
}
if solution.is_empty() {
solution = symptom.clone();
}
// Determine severity from keywords
let severity = if message_lower.contains("critical")
|| message_lower.contains("security")
|| message_lower.contains("crash")
{
BugSeverity::Critical
} else if message_lower.contains("hotfix") || message_lower.contains("urgent") {
BugSeverity::High
} else if message_lower.contains("minor") || message_lower.contains("typo") {
BugSeverity::Low
} else {
BugSeverity::Medium
};
let files_changed = self.get_commit_files(commit, repo)?;
let bug_fix = BugFix {
id: format!("bug-{}", counter),
symptom,
root_cause,
solution,
files_changed,
commit_sha: commit.id().to_string(),
created_at: Utc
.timestamp_opt(commit.time().seconds(), 0)
.single()
.unwrap_or_else(Utc::now),
issue_link,
severity,
discovered_by: commit.author().name().map(|s| s.to_string()),
prevention_notes: None,
tags: vec!["auto-detected".to_string()],
};
Ok(Some(bug_fix))
}
/// Analyze the full git history and return discovered knowledge
pub fn analyze_history(&self, since: Option<DateTime<Utc>>) -> Result<HistoryAnalysis> {
// Extract bug fixes
let bug_fixes = self.extract_bug_fixes(since)?;
// Find co-change patterns
let file_relationships = self.find_cochange_patterns(since, 0.3)?;
// Get recent activity summary
let recent_commits = {
let repo = self.open_repo()?;
self.get_recent_commits(&repo, 50)?
};
// Calculate activity stats
let mut author_counts: HashMap<String, u32> = HashMap::new();
let mut file_counts: HashMap<PathBuf, u32> = HashMap::new();
for commit in &recent_commits {
*author_counts.entry(commit.author.clone()).or_insert(0) += 1;
for file in &commit.files_changed {
*file_counts.entry(file.clone()).or_insert(0) += 1;
}
}
// Top contributors
let mut top_contributors: Vec<_> = author_counts.into_iter().collect();
top_contributors.sort_by(|a, b| b.1.cmp(&a.1));
// Hot files (most frequently changed)
let mut hot_files: Vec<_> = file_counts.into_iter().collect();
hot_files.sort_by(|a, b| b.1.cmp(&a.1));
Ok(HistoryAnalysis {
bug_fixes,
file_relationships,
commit_count: recent_commits.len(),
top_contributors: top_contributors.into_iter().take(5).collect(),
hot_files: hot_files.into_iter().take(10).collect(),
analyzed_since: since,
})
}
/// Get files changed since a specific commit
pub fn get_files_changed_since(&self, commit_sha: &str) -> Result<Vec<PathBuf>> {
let repo = self.open_repo()?;
let target_oid = repo.revparse_single(commit_sha)?.id();
let head_commit = repo.head()?.peel_to_commit()?;
let target_commit = repo.find_commit(target_oid)?;
let head_tree = head_commit.tree()?;
let target_tree = target_commit.tree()?;
let diff = repo.diff_tree_to_tree(Some(&target_tree), Some(&head_tree), None)?;
let mut files = Vec::new();
for delta in diff.deltas() {
if let Some(path) = delta.new_file().path() {
files.push(path.to_path_buf());
}
}
Ok(files)
}
/// Get blame information for a file
pub fn get_file_blame(&self, file_path: &Path, line: u32) -> Result<Option<CommitInfo>> {
let repo = self.open_repo()?;
let blame = repo.blame_file(file_path, None)?;
if let Some(hunk) = blame.get_line(line as usize) {
let commit_id = hunk.final_commit_id();
if let Ok(commit) = repo.find_commit(commit_id) {
return Ok(Some(self.commit_to_info(&commit, &repo)?));
}
}
Ok(None)
}
}
// ============================================================================
// HISTORY ANALYSIS RESULT
// ============================================================================
/// Result of analyzing git history
#[derive(Debug)]
pub struct HistoryAnalysis {
/// Bug fixes extracted from commits
pub bug_fixes: Vec<BugFix>,
/// File relationships discovered from co-change patterns
pub file_relationships: Vec<FileRelationship>,
/// Total commits analyzed
pub commit_count: usize,
/// Top contributors (author, commit count)
pub top_contributors: Vec<(String, u32)>,
/// Most frequently changed files (path, change count)
pub hot_files: Vec<(PathBuf, u32)>,
/// Time period analyzed from
pub analyzed_since: Option<DateTime<Utc>>,
}
// ============================================================================
// TESTS
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn create_test_repo() -> (TempDir, Repository) {
let dir = TempDir::new().unwrap();
let repo = Repository::init(dir.path()).unwrap();
// Configure signature
let sig = git2::Signature::now("Test User", "test@example.com").unwrap();
// Create initial commit
{
let tree_id = {
let mut index = repo.index().unwrap();
index.write_tree().unwrap()
};
let tree = repo.find_tree(tree_id).unwrap();
repo.commit(Some("HEAD"), &sig, &sig, "Initial commit", &tree, &[])
.unwrap();
}
(dir, repo)
}
#[test]
fn test_git_analyzer_creation() {
let (dir, _repo) = create_test_repo();
let analyzer = GitAnalyzer::new(dir.path().to_path_buf());
assert!(analyzer.is_ok());
}
#[test]
fn test_get_current_context() {
let (dir, _repo) = create_test_repo();
let analyzer = GitAnalyzer::new(dir.path().to_path_buf()).unwrap();
let context = analyzer.get_current_context().unwrap();
assert!(context.has_commits);
assert!(!context.head_commit.is_empty());
}
#[test]
fn test_is_relevant_file() {
let analyzer = GitAnalyzer {
repo_path: PathBuf::from("."),
};
assert!(analyzer.is_relevant_file(Path::new("src/main.rs")));
assert!(analyzer.is_relevant_file(Path::new("lib/utils.ts")));
assert!(!analyzer.is_relevant_file(Path::new("Cargo.lock")));
assert!(!analyzer.is_relevant_file(Path::new("node_modules/foo.js")));
assert!(!analyzer.is_relevant_file(Path::new("target/debug/main")));
}
}

View file

@ -0,0 +1,769 @@
//! Codebase Memory Module - Vestige's KILLER DIFFERENTIATOR
//!
//! This module makes Vestige unique in the AI memory market. No other tool
//! understands codebases at this level - remembering architectural decisions,
//! bug fixes, patterns, file relationships, and developer preferences.
//!
//! # Overview
//!
//! The Codebase Memory Module provides:
//!
//! - **Git History Analysis**: Automatically learns from your codebase's history
//! - Extracts bug fix patterns from commit messages
//! - Discovers file co-change patterns (files that always change together)
//! - Understands the evolution of the codebase
//!
//! - **Context Capture**: Knows what you're working on
//! - Current branch and uncommitted changes
//! - Project type and frameworks
//! - Active files and editing context
//!
//! - **Pattern Detection**: Learns and applies coding patterns
//! - User-taught patterns
//! - Auto-detected patterns from code
//! - Context-aware pattern suggestions
//!
//! - **Relationship Tracking**: Understands file relationships
//! - Import/dependency relationships
//! - Test-implementation pairs
//! - Co-edit patterns
//!
//! - **File Watching**: Continuous learning from developer behavior
//! - Tracks files edited together
//! - Updates relationship strengths
//! - Triggers pattern detection
//!
//! # Quick Start
//!
//! ```rust,no_run
//! use vestige_core::codebase::CodebaseMemory;
//! use std::path::PathBuf;
//!
//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
//! // Create codebase memory for a project
//! let memory = CodebaseMemory::new(PathBuf::from("/path/to/project"))?;
//!
//! // Learn from git history
//! let analysis = memory.learn_from_history().await?;
//! println!("Found {} bug fixes", analysis.bug_fixes_found);
//! println!("Found {} file relationships", analysis.relationships_found);
//!
//! // Get current context
//! let context = memory.get_context()?;
//! println!("Working on branch: {}", context.git.as_ref().map(|g| &g.current_branch).unwrap_or(&"unknown".to_string()));
//!
//! // Remember an architectural decision
//! memory.remember_decision(
//! "Use Event Sourcing for order management",
//! "Need complete audit trail and ability to replay state",
//! vec![PathBuf::from("src/orders/events.rs")],
//! )?;
//!
//! // Query codebase memories
//! let results = memory.query("error handling", None)?;
//! for node in results {
//! println!("Found: {}", node.to_searchable_text());
//! }
//! # Ok(())
//! # }
//! ```
pub mod context;
pub mod git;
pub mod patterns;
pub mod relationships;
pub mod types;
pub mod watcher;
// Re-export main types
pub use context::{ContextCapture, FileContext, Framework, ProjectType, WorkingContext};
pub use git::{CommitInfo, GitAnalyzer, GitContext, HistoryAnalysis};
pub use patterns::{PatternDetector, PatternMatch, PatternSuggestion};
pub use relationships::{
GraphEdge, GraphMetadata, GraphNode, RelatedFile, RelationshipGraph, RelationshipTracker,
};
pub use types::{
ArchitecturalDecision, BugFix, BugSeverity, CodeEntity, CodePattern, CodebaseNode,
CodingPreference, DecisionStatus, EntityType, FileRelationship, PreferenceSource, RelationType,
RelationshipSource, WorkContext, WorkStatus,
};
pub use watcher::{CodebaseWatcher, FileEvent, FileEventKind, WatcherConfig};
use std::path::PathBuf;
use std::sync::Arc;
use chrono::{DateTime, Utc};
use tokio::sync::RwLock;
use uuid::Uuid;
// ============================================================================
// ERRORS
// ============================================================================
/// Unified error type for codebase memory operations
#[derive(Debug, thiserror::Error)]
pub enum CodebaseError {
#[error("Git error: {0}")]
Git(#[from] git::GitError),
#[error("Context error: {0}")]
Context(#[from] context::ContextError),
#[error("Pattern error: {0}")]
Pattern(#[from] patterns::PatternError),
#[error("Relationship error: {0}")]
Relationship(#[from] relationships::RelationshipError),
#[error("Watcher error: {0}")]
Watcher(#[from] watcher::WatcherError),
#[error("Storage error: {0}")]
Storage(String),
#[error("Not found: {0}")]
NotFound(String),
}
pub type Result<T> = std::result::Result<T, CodebaseError>;
// ============================================================================
// LEARNING RESULT
// ============================================================================
/// Result of learning from git history
#[derive(Debug)]
pub struct LearningResult {
/// Bug fixes extracted
pub bug_fixes_found: usize,
/// File relationships discovered
pub relationships_found: usize,
/// Patterns detected
pub patterns_detected: usize,
/// Time range analyzed
pub analyzed_since: Option<DateTime<Utc>>,
/// Commits analyzed
pub commits_analyzed: usize,
/// Duration of analysis
pub duration_ms: u64,
}
// ============================================================================
// CODEBASE MEMORY
// ============================================================================
/// Main codebase memory interface
///
/// This is the primary entry point for all codebase memory operations.
/// It coordinates between git analysis, context capture, pattern detection,
/// and relationship tracking.
pub struct CodebaseMemory {
/// Repository path
repo_path: PathBuf,
/// Git analyzer
pub git: GitAnalyzer,
/// Context capture
pub context: ContextCapture,
/// Pattern detector
patterns: Arc<RwLock<PatternDetector>>,
/// Relationship tracker
relationships: Arc<RwLock<RelationshipTracker>>,
/// File watcher (optional)
watcher: Option<Arc<RwLock<CodebaseWatcher>>>,
/// Stored codebase nodes
nodes: Arc<RwLock<Vec<CodebaseNode>>>,
}
impl CodebaseMemory {
/// Create a new CodebaseMemory for a repository
pub fn new(repo_path: PathBuf) -> Result<Self> {
let git = GitAnalyzer::new(repo_path.clone())?;
let context = ContextCapture::new(repo_path.clone())?;
let patterns = Arc::new(RwLock::new(PatternDetector::new()));
let relationships = Arc::new(RwLock::new(RelationshipTracker::new()));
// Load built-in patterns
{
let mut detector = patterns.blocking_write();
for pattern in patterns::create_builtin_patterns() {
let _ = detector.learn_pattern(pattern);
}
}
Ok(Self {
repo_path,
git,
context,
patterns,
relationships,
watcher: None,
nodes: Arc::new(RwLock::new(Vec::new())),
})
}
/// Create with file watching enabled
pub fn with_watcher(repo_path: PathBuf) -> Result<Self> {
let mut memory = Self::new(repo_path)?;
let watcher = CodebaseWatcher::new(
Arc::clone(&memory.relationships),
Arc::clone(&memory.patterns),
);
memory.watcher = Some(Arc::new(RwLock::new(watcher)));
Ok(memory)
}
// ========================================================================
// DECISION MANAGEMENT
// ========================================================================
/// Remember an architectural decision
pub fn remember_decision(
&self,
decision: &str,
rationale: &str,
files_affected: Vec<PathBuf>,
) -> Result<String> {
let id = format!("adr-{}", Uuid::new_v4());
let node = CodebaseNode::ArchitecturalDecision(ArchitecturalDecision {
id: id.clone(),
decision: decision.to_string(),
rationale: rationale.to_string(),
files_affected,
commit_sha: self.git.get_current_context().ok().map(|c| c.head_commit),
created_at: Utc::now(),
updated_at: None,
context: None,
tags: vec![],
status: DecisionStatus::Accepted,
alternatives_considered: vec![],
});
self.nodes.blocking_write().push(node);
Ok(id)
}
/// Remember an architectural decision with full details
pub fn remember_decision_full(&self, decision: ArchitecturalDecision) -> Result<String> {
let id = decision.id.clone();
self.nodes
.blocking_write()
.push(CodebaseNode::ArchitecturalDecision(decision));
Ok(id)
}
// ========================================================================
// BUG FIX MANAGEMENT
// ========================================================================
/// Remember a bug fix
pub fn remember_bug_fix(&self, fix: BugFix) -> Result<String> {
let id = fix.id.clone();
self.nodes.blocking_write().push(CodebaseNode::BugFix(fix));
Ok(id)
}
/// Remember a bug fix with minimal details
pub fn remember_bug_fix_simple(
&self,
symptom: &str,
root_cause: &str,
solution: &str,
files_changed: Vec<PathBuf>,
) -> Result<String> {
let id = format!("bug-{}", Uuid::new_v4());
let commit_sha = self
.git
.get_current_context()
.map(|c| c.head_commit)
.unwrap_or_default();
let fix = BugFix::new(
id.clone(),
symptom.to_string(),
root_cause.to_string(),
solution.to_string(),
commit_sha,
)
.with_files(files_changed);
self.remember_bug_fix(fix)?;
Ok(id)
}
// ========================================================================
// PATTERN MANAGEMENT
// ========================================================================
/// Remember a coding pattern
pub fn remember_pattern(&self, pattern: CodePattern) -> Result<String> {
let id = pattern.id.clone();
self.patterns.blocking_write().learn_pattern(pattern)?;
Ok(id)
}
/// Get pattern suggestions for current context
pub async fn get_pattern_suggestions(&self) -> Result<Vec<PatternSuggestion>> {
let context = self.get_context()?;
let detector = self.patterns.read().await;
Ok(detector.suggest_patterns(&context)?)
}
/// Detect patterns in code
pub async fn detect_patterns_in_code(
&self,
code: &str,
language: &str,
) -> Result<Vec<PatternMatch>> {
let detector = self.patterns.read().await;
Ok(detector.detect_patterns(code, language)?)
}
// ========================================================================
// PREFERENCE MANAGEMENT
// ========================================================================
/// Remember a coding preference
pub fn remember_preference(&self, preference: CodingPreference) -> Result<String> {
let id = preference.id.clone();
self.nodes
.blocking_write()
.push(CodebaseNode::CodingPreference(preference));
Ok(id)
}
/// Remember a simple preference
pub fn remember_preference_simple(
&self,
context: &str,
preference: &str,
counter_preference: Option<&str>,
) -> Result<String> {
let id = format!("pref-{}", Uuid::new_v4());
let pref = CodingPreference::new(id.clone(), context.to_string(), preference.to_string())
.with_confidence(0.8);
let pref = if let Some(counter) = counter_preference {
pref.with_counter(counter.to_string())
} else {
pref
};
self.remember_preference(pref)?;
Ok(id)
}
// ========================================================================
// RELATIONSHIP MANAGEMENT
// ========================================================================
/// Get files related to a given file
pub async fn get_related_files(&self, file: &std::path::Path) -> Result<Vec<RelatedFile>> {
let tracker = self.relationships.read().await;
Ok(tracker.get_related_files(file)?)
}
/// Record that files were edited together
pub async fn record_coedit(&self, files: &[PathBuf]) -> Result<()> {
let mut tracker = self.relationships.write().await;
Ok(tracker.record_coedit(files)?)
}
/// Build a relationship graph for visualization
pub async fn build_relationship_graph(&self) -> Result<RelationshipGraph> {
let tracker = self.relationships.read().await;
Ok(tracker.build_graph()?)
}
// ========================================================================
// CONTEXT
// ========================================================================
/// Get the current working context
pub fn get_context(&self) -> Result<WorkingContext> {
Ok(self.context.capture()?)
}
/// Get context for a specific file
pub fn get_file_context(&self, path: &std::path::Path) -> Result<FileContext> {
Ok(self.context.context_for_file(path)?)
}
/// Set active files for context tracking
pub fn set_active_files(&mut self, files: Vec<PathBuf>) {
self.context.set_active_files(files);
}
// ========================================================================
// QUERY
// ========================================================================
/// Query codebase memories
pub fn query(
&self,
query: &str,
context: Option<&WorkingContext>,
) -> Result<Vec<CodebaseNode>> {
let query_lower = query.to_lowercase();
let nodes = self.nodes.blocking_read();
let mut results: Vec<_> = nodes
.iter()
.filter(|node| {
let text = node.to_searchable_text().to_lowercase();
text.contains(&query_lower)
})
.cloned()
.collect();
// Boost results relevant to current context
if let Some(ctx) = context {
results.sort_by(|a, b| {
let a_relevance = self.calculate_context_relevance(a, ctx);
let b_relevance = self.calculate_context_relevance(b, ctx);
b_relevance
.partial_cmp(&a_relevance)
.unwrap_or(std::cmp::Ordering::Equal)
});
}
Ok(results)
}
/// Calculate how relevant a node is to the current context
fn calculate_context_relevance(&self, node: &CodebaseNode, context: &WorkingContext) -> f64 {
let mut relevance = 0.0;
// Check file overlap
let node_files = node.associated_files();
if let Some(ref active) = context.active_file {
for file in &node_files {
if *file == active {
relevance += 1.0;
} else if file.parent() == active.parent() {
relevance += 0.5;
}
}
}
// Check framework relevance
for framework in &context.frameworks {
let text = node.to_searchable_text().to_lowercase();
if text.contains(&framework.name().to_lowercase()) {
relevance += 0.3;
}
}
relevance
}
/// Get memories relevant to current context
pub fn get_relevant(&self, context: &WorkingContext) -> Result<Vec<CodebaseNode>> {
let nodes = self.nodes.blocking_read();
let mut scored: Vec<_> = nodes
.iter()
.map(|node| {
let relevance = self.calculate_context_relevance(node, context);
(node.clone(), relevance)
})
.filter(|(_, relevance)| *relevance > 0.0)
.collect();
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
Ok(scored.into_iter().map(|(node, _)| node).collect())
}
/// Get a node by ID
pub fn get_node(&self, id: &str) -> Result<Option<CodebaseNode>> {
let nodes = self.nodes.blocking_read();
Ok(nodes.iter().find(|n| n.id() == id).cloned())
}
/// Get all nodes of a specific type
pub fn get_nodes_by_type(&self, node_type: &str) -> Result<Vec<CodebaseNode>> {
let nodes = self.nodes.blocking_read();
Ok(nodes
.iter()
.filter(|n| n.node_type() == node_type)
.cloned()
.collect())
}
// ========================================================================
// LEARNING
// ========================================================================
/// Learn from git history
pub async fn learn_from_history(&self) -> Result<LearningResult> {
let start = std::time::Instant::now();
// Analyze history
let analysis = self.git.analyze_history(None)?;
// Store bug fixes
let mut nodes = self.nodes.write().await;
for fix in &analysis.bug_fixes {
nodes.push(CodebaseNode::BugFix(fix.clone()));
}
// Store file relationships
let mut tracker = self.relationships.write().await;
for rel in &analysis.file_relationships {
let _ = tracker.add_relationship(rel.clone());
}
let duration_ms = start.elapsed().as_millis() as u64;
Ok(LearningResult {
bug_fixes_found: analysis.bug_fixes.len(),
relationships_found: analysis.file_relationships.len(),
patterns_detected: 0, // Could be extended
analyzed_since: analysis.analyzed_since,
commits_analyzed: analysis.commit_count,
duration_ms,
})
}
/// Learn from git history since a specific time
pub async fn learn_from_history_since(&self, since: DateTime<Utc>) -> Result<LearningResult> {
let start = std::time::Instant::now();
let analysis = self.git.analyze_history(Some(since))?;
let mut nodes = self.nodes.write().await;
for fix in &analysis.bug_fixes {
nodes.push(CodebaseNode::BugFix(fix.clone()));
}
let mut tracker = self.relationships.write().await;
for rel in &analysis.file_relationships {
let _ = tracker.add_relationship(rel.clone());
}
let duration_ms = start.elapsed().as_millis() as u64;
Ok(LearningResult {
bug_fixes_found: analysis.bug_fixes.len(),
relationships_found: analysis.file_relationships.len(),
patterns_detected: 0,
analyzed_since: Some(since),
commits_analyzed: analysis.commit_count,
duration_ms,
})
}
// ========================================================================
// FILE WATCHING
// ========================================================================
/// Start watching the repository for changes
pub async fn start_watching(&self) -> Result<()> {
if let Some(ref watcher) = self.watcher {
let mut w = watcher.write().await;
w.watch(&self.repo_path).await?;
}
Ok(())
}
/// Stop watching the repository
pub async fn stop_watching(&self) -> Result<()> {
if let Some(ref watcher) = self.watcher {
let mut w = watcher.write().await;
w.stop().await?;
}
Ok(())
}
// ========================================================================
// SERIALIZATION
// ========================================================================
/// Export all nodes for storage
pub fn export_nodes(&self) -> Vec<CodebaseNode> {
self.nodes.blocking_read().clone()
}
/// Import nodes from storage
pub fn import_nodes(&self, nodes: Vec<CodebaseNode>) {
let mut current = self.nodes.blocking_write();
current.extend(nodes);
}
/// Export patterns for storage
pub fn export_patterns(&self) -> Vec<CodePattern> {
self.patterns.blocking_read().export_patterns()
}
/// Import patterns from storage
pub fn import_patterns(&self, patterns: Vec<CodePattern>) -> Result<()> {
let mut detector = self.patterns.blocking_write();
detector.load_patterns(patterns)?;
Ok(())
}
/// Export relationships for storage
pub fn export_relationships(&self) -> Vec<FileRelationship> {
self.relationships.blocking_read().export_relationships()
}
/// Import relationships from storage
pub fn import_relationships(&self, relationships: Vec<FileRelationship>) -> Result<()> {
let mut tracker = self.relationships.blocking_write();
tracker.load_relationships(relationships)?;
Ok(())
}
// ========================================================================
// STATS
// ========================================================================
/// Get statistics about codebase memory
pub fn get_stats(&self) -> CodebaseStats {
let nodes = self.nodes.blocking_read();
let patterns = self.patterns.blocking_read();
let relationships = self.relationships.blocking_read();
CodebaseStats {
total_nodes: nodes.len(),
architectural_decisions: nodes
.iter()
.filter(|n| matches!(n, CodebaseNode::ArchitecturalDecision(_)))
.count(),
bug_fixes: nodes
.iter()
.filter(|n| matches!(n, CodebaseNode::BugFix(_)))
.count(),
patterns: patterns.get_all_patterns().len(),
preferences: nodes
.iter()
.filter(|n| matches!(n, CodebaseNode::CodingPreference(_)))
.count(),
file_relationships: relationships.get_all_relationships().len(),
}
}
}
/// Statistics about codebase memory
#[derive(Debug, Clone)]
pub struct CodebaseStats {
pub total_nodes: usize,
pub architectural_decisions: usize,
pub bug_fixes: usize,
pub patterns: usize,
pub preferences: usize,
pub file_relationships: usize,
}
// ============================================================================
// TESTS
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn create_test_repo() -> TempDir {
let dir = TempDir::new().unwrap();
// Initialize git repo
git2::Repository::init(dir.path()).unwrap();
// Create Cargo.toml
std::fs::write(
dir.path().join("Cargo.toml"),
r#"
[package]
name = "test-project"
version = "0.1.0"
"#,
)
.unwrap();
// Create src directory
std::fs::create_dir(dir.path().join("src")).unwrap();
std::fs::write(dir.path().join("src/main.rs"), "fn main() {}").unwrap();
dir
}
#[test]
fn test_codebase_memory_creation() {
let dir = create_test_repo();
let memory = CodebaseMemory::new(dir.path().to_path_buf());
assert!(memory.is_ok());
}
#[test]
fn test_remember_decision() {
let dir = create_test_repo();
let memory = CodebaseMemory::new(dir.path().to_path_buf()).unwrap();
let id = memory
.remember_decision(
"Use Event Sourcing",
"Need audit trail",
vec![PathBuf::from("src/events.rs")],
)
.unwrap();
assert!(id.starts_with("adr-"));
let node = memory.get_node(&id).unwrap();
assert!(node.is_some());
}
#[test]
fn test_remember_bug_fix() {
let dir = create_test_repo();
let memory = CodebaseMemory::new(dir.path().to_path_buf()).unwrap();
let id = memory
.remember_bug_fix_simple(
"App crashes on startup",
"Null pointer in config loading",
"Added null check",
vec![PathBuf::from("src/config.rs")],
)
.unwrap();
assert!(id.starts_with("bug-"));
}
#[test]
fn test_query() {
let dir = create_test_repo();
let memory = CodebaseMemory::new(dir.path().to_path_buf()).unwrap();
memory
.remember_decision("Use async/await for IO", "Better performance", vec![])
.unwrap();
memory
.remember_decision("Use channels for communication", "Thread safety", vec![])
.unwrap();
let results = memory.query("async", None).unwrap();
assert_eq!(results.len(), 1);
}
#[test]
fn test_get_context() {
let dir = create_test_repo();
let memory = CodebaseMemory::new(dir.path().to_path_buf()).unwrap();
let context = memory.get_context().unwrap();
assert_eq!(context.project_type, ProjectType::Rust);
}
#[test]
fn test_stats() {
let dir = create_test_repo();
let memory = CodebaseMemory::new(dir.path().to_path_buf()).unwrap();
memory.remember_decision("Test", "Test", vec![]).unwrap();
let stats = memory.get_stats();
assert_eq!(stats.architectural_decisions, 1);
assert!(stats.patterns > 0); // Built-in patterns
}
}

View file

@ -0,0 +1,722 @@
//! Pattern detection and storage for codebase memory
//!
//! This module handles:
//! - Learning new patterns from user teaching
//! - Detecting known patterns in code
//! - Suggesting relevant patterns based on context
//!
//! Patterns are the reusable pieces of knowledge that make Vestige smarter
//! over time. As the user teaches patterns, Vestige becomes more helpful
//! for that specific codebase.
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use chrono::Utc;
use serde::{Deserialize, Serialize};
use super::context::WorkingContext;
use super::types::CodePattern;
// ============================================================================
// ERRORS
// ============================================================================
#[derive(Debug, thiserror::Error)]
pub enum PatternError {
#[error("Pattern not found: {0}")]
NotFound(String),
#[error("Invalid pattern: {0}")]
Invalid(String),
#[error("Storage error: {0}")]
Storage(String),
}
pub type Result<T> = std::result::Result<T, PatternError>;
// ============================================================================
// PATTERN MATCH
// ============================================================================
/// A detected pattern match in code
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct PatternMatch {
/// The pattern that was matched
pub pattern: CodePattern,
/// Confidence of the match (0.0 - 1.0)
pub confidence: f64,
/// Location in the code where pattern was detected
pub location: Option<PatternLocation>,
/// Suggestions based on this pattern match
pub suggestions: Vec<String>,
}
/// Location where a pattern was detected
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct PatternLocation {
/// File where pattern was found
pub file: PathBuf,
/// Starting line (1-indexed)
pub start_line: u32,
/// Ending line (1-indexed)
pub end_line: u32,
/// Code snippet that matched
pub snippet: String,
}
// ============================================================================
// PATTERN SUGGESTION
// ============================================================================
/// A suggested pattern based on context
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct PatternSuggestion {
/// The suggested pattern
pub pattern: CodePattern,
/// Why this pattern is being suggested
pub reason: String,
/// Relevance score (0.0 - 1.0)
pub relevance: f64,
/// Example of how to apply this pattern
pub example: Option<String>,
}
// ============================================================================
// PATTERN DETECTOR
// ============================================================================
/// Detects and manages code patterns
pub struct PatternDetector {
/// Stored patterns indexed by ID
patterns: HashMap<String, CodePattern>,
/// Patterns indexed by language for faster lookup
patterns_by_language: HashMap<String, Vec<String>>,
/// Pattern keywords for text matching
pattern_keywords: HashMap<String, Vec<String>>,
}
impl PatternDetector {
/// Create a new pattern detector
pub fn new() -> Self {
Self {
patterns: HashMap::new(),
patterns_by_language: HashMap::new(),
pattern_keywords: HashMap::new(),
}
}
/// Learn a new pattern from user teaching
pub fn learn_pattern(&mut self, pattern: CodePattern) -> Result<String> {
// Validate the pattern
if pattern.name.is_empty() {
return Err(PatternError::Invalid(
"Pattern name cannot be empty".to_string(),
));
}
if pattern.description.is_empty() {
return Err(PatternError::Invalid(
"Pattern description cannot be empty".to_string(),
));
}
let id = pattern.id.clone();
// Index by language
if let Some(ref language) = pattern.language {
self.patterns_by_language
.entry(language.to_lowercase())
.or_default()
.push(id.clone());
}
// Extract keywords for matching
let keywords = self.extract_keywords(&pattern);
self.pattern_keywords.insert(id.clone(), keywords);
// Store the pattern
self.patterns.insert(id.clone(), pattern);
Ok(id)
}
/// Extract keywords from a pattern for matching
fn extract_keywords(&self, pattern: &CodePattern) -> Vec<String> {
let mut keywords = Vec::new();
// Words from name
keywords.extend(
pattern
.name
.to_lowercase()
.split_whitespace()
.filter(|w| w.len() > 2)
.map(|s| s.to_string()),
);
// Words from description
keywords.extend(
pattern
.description
.to_lowercase()
.split_whitespace()
.filter(|w| w.len() > 3)
.map(|s| s.to_string()),
);
// Tags
keywords.extend(pattern.tags.iter().map(|t| t.to_lowercase()));
// Deduplicate
keywords.sort();
keywords.dedup();
keywords
}
/// Get a pattern by ID
pub fn get_pattern(&self, id: &str) -> Option<&CodePattern> {
self.patterns.get(id)
}
/// Get all patterns
pub fn get_all_patterns(&self) -> Vec<&CodePattern> {
self.patterns.values().collect()
}
/// Get patterns for a specific language
pub fn get_patterns_for_language(&self, language: &str) -> Vec<&CodePattern> {
let language_lower = language.to_lowercase();
self.patterns_by_language
.get(&language_lower)
.map(|ids| ids.iter().filter_map(|id| self.patterns.get(id)).collect())
.unwrap_or_default()
}
/// Detect if current code matches known patterns
pub fn detect_patterns(&self, code: &str, language: &str) -> Result<Vec<PatternMatch>> {
let mut matches = Vec::new();
let code_lower = code.to_lowercase();
// Get relevant patterns for this language
let relevant_patterns: Vec<_> = self
.get_patterns_for_language(language)
.into_iter()
.chain(self.get_patterns_for_language("*"))
.collect();
for pattern in relevant_patterns {
if let Some(confidence) = self.calculate_match_confidence(code, &code_lower, pattern) {
if confidence >= 0.3 {
matches.push(PatternMatch {
pattern: pattern.clone(),
confidence,
location: None, // Would need line-level analysis
suggestions: self.generate_suggestions(pattern, code),
});
}
}
}
// Sort by confidence
matches.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
Ok(matches)
}
/// Calculate confidence that code matches a pattern
fn calculate_match_confidence(
&self,
_code: &str,
code_lower: &str,
pattern: &CodePattern,
) -> Option<f64> {
let keywords = self.pattern_keywords.get(&pattern.id)?;
if keywords.is_empty() {
return None;
}
// Count keyword matches
let matches: usize = keywords
.iter()
.filter(|kw| code_lower.contains(kw.as_str()))
.count();
if matches == 0 {
return None;
}
// Calculate confidence based on keyword match ratio
let confidence = matches as f64 / keywords.len() as f64;
// Boost confidence if example code matches
let boost = if !pattern.example_code.is_empty()
&& code_lower.contains(&pattern.example_code.to_lowercase())
{
0.3
} else {
0.0
};
Some((confidence + boost).min(1.0))
}
/// Generate suggestions based on a matched pattern
fn generate_suggestions(&self, pattern: &CodePattern, _code: &str) -> Vec<String> {
let mut suggestions = Vec::new();
// Add the when_to_use guidance
suggestions.push(format!("Consider: {}", pattern.when_to_use));
// Add when_not_to_use if present
if let Some(ref when_not) = pattern.when_not_to_use {
suggestions.push(format!("Note: {}", when_not));
}
suggestions
}
/// Suggest patterns based on current context
pub fn suggest_patterns(&self, context: &WorkingContext) -> Result<Vec<PatternSuggestion>> {
let mut suggestions = Vec::new();
// Get the language for the current context
let language = match &context.project_type {
super::context::ProjectType::Rust => "rust",
super::context::ProjectType::TypeScript => "typescript",
super::context::ProjectType::JavaScript => "javascript",
super::context::ProjectType::Python => "python",
super::context::ProjectType::Go => "go",
super::context::ProjectType::Java => "java",
super::context::ProjectType::Kotlin => "kotlin",
super::context::ProjectType::Swift => "swift",
super::context::ProjectType::CSharp => "csharp",
super::context::ProjectType::Cpp => "cpp",
super::context::ProjectType::Ruby => "ruby",
super::context::ProjectType::Php => "php",
super::context::ProjectType::Mixed(_) => "*",
super::context::ProjectType::Unknown => "*",
};
// Get patterns for this language
let language_patterns = self.get_patterns_for_language(language);
// Score patterns based on context relevance
for pattern in language_patterns {
let relevance = self.calculate_context_relevance(pattern, context);
if relevance >= 0.2 {
let reason = self.generate_suggestion_reason(pattern, context);
suggestions.push(PatternSuggestion {
pattern: pattern.clone(),
reason,
relevance,
example: if !pattern.example_code.is_empty() {
Some(pattern.example_code.clone())
} else {
None
},
});
}
}
// Sort by relevance
suggestions.sort_by(|a, b| b.relevance.partial_cmp(&a.relevance).unwrap_or(std::cmp::Ordering::Equal));
Ok(suggestions)
}
/// Calculate how relevant a pattern is to the current context
fn calculate_context_relevance(&self, pattern: &CodePattern, context: &WorkingContext) -> f64 {
let mut score = 0.0;
// Check if pattern files overlap with active files
if let Some(ref active) = context.active_file {
for example_file in &pattern.example_files {
if self.paths_related(active, example_file) {
score += 0.3;
break;
}
}
}
// Check framework relevance
for framework in &context.frameworks {
let framework_name = framework.name().to_lowercase();
if pattern
.tags
.iter()
.any(|t| t.to_lowercase() == framework_name)
|| pattern.description.to_lowercase().contains(&framework_name)
{
score += 0.2;
}
}
// Check recent usage
if pattern.usage_count > 0 {
score += (pattern.usage_count as f64 / 100.0).min(0.3);
}
score.min(1.0)
}
/// Check if two paths are related (same directory, similar names, etc.)
fn paths_related(&self, a: &Path, b: &Path) -> bool {
// Same parent directory
if a.parent() == b.parent() {
return true;
}
// Similar file names
if let (Some(a_stem), Some(b_stem)) = (a.file_stem(), b.file_stem()) {
let a_str = a_stem.to_string_lossy().to_lowercase();
let b_str = b_stem.to_string_lossy().to_lowercase();
if a_str.contains(&b_str) || b_str.contains(&a_str) {
return true;
}
}
false
}
/// Generate a reason for suggesting a pattern
fn generate_suggestion_reason(
&self,
pattern: &CodePattern,
context: &WorkingContext,
) -> String {
let mut reasons = Vec::new();
// Language match
if let Some(ref lang) = pattern.language {
reasons.push(format!("Relevant for {} code", lang));
}
// Framework match
for framework in &context.frameworks {
let framework_name = framework.name();
if pattern
.tags
.iter()
.any(|t| t.eq_ignore_ascii_case(framework_name))
|| pattern
.description
.to_lowercase()
.contains(&framework_name.to_lowercase())
{
reasons.push(format!("Used with {}", framework_name));
}
}
// Usage count
if pattern.usage_count > 5 {
reasons.push(format!("Commonly used ({} times)", pattern.usage_count));
}
if reasons.is_empty() {
"May be applicable in this context".to_string()
} else {
reasons.join("; ")
}
}
/// Update pattern usage count
pub fn record_pattern_usage(&mut self, pattern_id: &str) -> Result<()> {
if let Some(pattern) = self.patterns.get_mut(pattern_id) {
pattern.usage_count += 1;
Ok(())
} else {
Err(PatternError::NotFound(pattern_id.to_string()))
}
}
/// Delete a pattern
pub fn delete_pattern(&mut self, pattern_id: &str) -> Result<()> {
if self.patterns.remove(pattern_id).is_some() {
// Clean up indexes
for (_, ids) in self.patterns_by_language.iter_mut() {
ids.retain(|id| id != pattern_id);
}
self.pattern_keywords.remove(pattern_id);
Ok(())
} else {
Err(PatternError::NotFound(pattern_id.to_string()))
}
}
/// Search patterns by query
pub fn search_patterns(&self, query: &str) -> Vec<&CodePattern> {
let query_lower = query.to_lowercase();
let query_words: Vec<_> = query_lower.split_whitespace().collect();
let mut scored: Vec<_> = self
.patterns
.values()
.filter_map(|pattern| {
let name_match = pattern.name.to_lowercase().contains(&query_lower);
let desc_match = pattern.description.to_lowercase().contains(&query_lower);
let tag_match = pattern
.tags
.iter()
.any(|t| t.to_lowercase().contains(&query_lower));
// Count word matches
let keywords = self.pattern_keywords.get(&pattern.id)?;
let word_matches = query_words
.iter()
.filter(|w| keywords.iter().any(|kw| kw.contains(*w)))
.count();
let score = if name_match {
1.0
} else if tag_match {
0.8
} else if desc_match {
0.6
} else if word_matches > 0 {
0.4 * (word_matches as f64 / query_words.len() as f64)
} else {
return None;
};
Some((pattern, score))
})
.collect();
// Sort by score
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scored.into_iter().map(|(p, _)| p).collect()
}
/// Load patterns from storage (to be implemented with actual storage)
pub fn load_patterns(&mut self, patterns: Vec<CodePattern>) -> Result<()> {
for pattern in patterns {
self.learn_pattern(pattern)?;
}
Ok(())
}
/// Export all patterns for storage
pub fn export_patterns(&self) -> Vec<CodePattern> {
self.patterns.values().cloned().collect()
}
}
impl Default for PatternDetector {
fn default() -> Self {
Self::new()
}
}
// ============================================================================
// BUILT-IN PATTERNS
// ============================================================================
/// Create built-in patterns for common coding patterns
pub fn create_builtin_patterns() -> Vec<CodePattern> {
vec![
// Rust Error Handling Pattern
CodePattern {
id: "builtin-rust-error-handling".to_string(),
name: "Rust Error Handling with thiserror".to_string(),
description: "Use thiserror for defining custom error types with derive macros"
.to_string(),
example_code: r#"
#[derive(Debug, thiserror::Error)]
pub enum MyError {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("Parse error: {0}")]
Parse(String),
}
pub type Result<T> = std::result::Result<T, MyError>;
"#
.to_string(),
example_files: vec![],
when_to_use: "When defining domain-specific error types in Rust".to_string(),
when_not_to_use: Some("For simple one-off errors, anyhow might be simpler".to_string()),
language: Some("rust".to_string()),
created_at: Utc::now(),
usage_count: 0,
tags: vec!["error-handling".to_string(), "rust".to_string()],
related_patterns: vec!["builtin-rust-result".to_string()],
},
// TypeScript React Component Pattern
CodePattern {
id: "builtin-react-functional".to_string(),
name: "React Functional Component".to_string(),
description: "Modern React functional component with TypeScript".to_string(),
example_code: r#"
interface Props {
title: string;
onClick?: () => void;
}
export function MyComponent({ title, onClick }: Props) {
return (
<div onClick={onClick}>
<h1>{title}</h1>
</div>
);
}
"#
.to_string(),
example_files: vec![],
when_to_use: "For all new React components".to_string(),
when_not_to_use: Some("Class components are rarely needed in modern React".to_string()),
language: Some("typescript".to_string()),
created_at: Utc::now(),
usage_count: 0,
tags: vec![
"react".to_string(),
"typescript".to_string(),
"component".to_string(),
],
related_patterns: vec![],
},
// Repository Pattern
CodePattern {
id: "builtin-repository-pattern".to_string(),
name: "Repository Pattern".to_string(),
description: "Abstract data access behind a repository interface".to_string(),
example_code: r#"
pub trait UserRepository {
fn find_by_id(&self, id: &str) -> Result<Option<User>>;
fn save(&self, user: &User) -> Result<()>;
fn delete(&self, id: &str) -> Result<()>;
}
pub struct SqliteUserRepository {
conn: Connection,
}
impl UserRepository for SqliteUserRepository {
// Implementation...
}
"#
.to_string(),
example_files: vec![],
when_to_use: "When you need to decouple domain logic from data access".to_string(),
when_not_to_use: Some("For simple CRUD with no complex domain logic".to_string()),
language: Some("rust".to_string()),
created_at: Utc::now(),
usage_count: 0,
tags: vec!["architecture".to_string(), "data-access".to_string()],
related_patterns: vec![],
},
]
}
// ============================================================================
// TESTS
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
use crate::codebase::context::ProjectType;
fn create_test_pattern() -> CodePattern {
CodePattern {
id: "test-pattern-1".to_string(),
name: "Test Pattern".to_string(),
description: "A test pattern for unit testing".to_string(),
example_code: "let x = test_function();".to_string(),
example_files: vec![PathBuf::from("src/test.rs")],
when_to_use: "When testing".to_string(),
when_not_to_use: None,
language: Some("rust".to_string()),
created_at: Utc::now(),
usage_count: 0,
tags: vec!["test".to_string()],
related_patterns: vec![],
}
}
#[test]
fn test_learn_pattern() {
let mut detector = PatternDetector::new();
let pattern = create_test_pattern();
let result = detector.learn_pattern(pattern.clone());
assert!(result.is_ok());
let stored = detector.get_pattern("test-pattern-1");
assert!(stored.is_some());
assert_eq!(stored.unwrap().name, "Test Pattern");
}
#[test]
fn test_detect_patterns() {
let mut detector = PatternDetector::new();
let pattern = create_test_pattern();
detector.learn_pattern(pattern).unwrap();
let code = "fn main() { let x = test_function(); }";
let matches = detector.detect_patterns(code, "rust").unwrap();
assert!(!matches.is_empty());
}
#[test]
fn test_get_patterns_for_language() {
let mut detector = PatternDetector::new();
let pattern = create_test_pattern();
detector.learn_pattern(pattern).unwrap();
let rust_patterns = detector.get_patterns_for_language("rust");
assert_eq!(rust_patterns.len(), 1);
let ts_patterns = detector.get_patterns_for_language("typescript");
assert!(ts_patterns.is_empty());
}
#[test]
fn test_search_patterns() {
let mut detector = PatternDetector::new();
let pattern = create_test_pattern();
detector.learn_pattern(pattern).unwrap();
let results = detector.search_patterns("test");
assert_eq!(results.len(), 1);
let results = detector.search_patterns("unknown");
assert!(results.is_empty());
}
#[test]
fn test_delete_pattern() {
let mut detector = PatternDetector::new();
let pattern = create_test_pattern();
detector.learn_pattern(pattern).unwrap();
assert!(detector.get_pattern("test-pattern-1").is_some());
detector.delete_pattern("test-pattern-1").unwrap();
assert!(detector.get_pattern("test-pattern-1").is_none());
}
#[test]
fn test_builtin_patterns() {
let patterns = create_builtin_patterns();
assert!(!patterns.is_empty());
// Check that each pattern has required fields
for pattern in patterns {
assert!(!pattern.id.is_empty());
assert!(!pattern.name.is_empty());
assert!(!pattern.description.is_empty());
assert!(!pattern.when_to_use.is_empty());
}
}
}

View file

@ -0,0 +1,708 @@
//! File relationship tracking for codebase memory
//!
//! This module tracks relationships between files:
//! - Co-edit patterns (files edited together)
//! - Import/dependency relationships
//! - Test-implementation relationships
//! - Domain groupings
//!
//! Understanding file relationships helps:
//! - Suggest related files when editing
//! - Provide better context for code generation
//! - Identify architectural boundaries
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use super::types::{FileRelationship, RelationType, RelationshipSource};
// ============================================================================
// ERRORS
// ============================================================================
#[derive(Debug, thiserror::Error)]
pub enum RelationshipError {
#[error("Relationship not found: {0}")]
NotFound(String),
#[error("Invalid relationship: {0}")]
Invalid(String),
}
pub type Result<T> = std::result::Result<T, RelationshipError>;
// ============================================================================
// RELATED FILE
// ============================================================================
/// A file that is related to another file
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RelatedFile {
/// Path to the related file
pub path: PathBuf,
/// Type of relationship
pub relationship_type: RelationType,
/// Strength of the relationship (0.0 - 1.0)
pub strength: f64,
/// Human-readable description
pub description: String,
}
// ============================================================================
// RELATIONSHIP GRAPH
// ============================================================================
/// Graph structure for visualizing file relationships
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RelationshipGraph {
/// Nodes (files) in the graph
pub nodes: Vec<GraphNode>,
/// Edges (relationships) in the graph
pub edges: Vec<GraphEdge>,
/// Graph metadata
pub metadata: GraphMetadata,
}
/// A node in the relationship graph
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct GraphNode {
/// Unique ID for this node
pub id: String,
/// File path
pub path: PathBuf,
/// Display label
pub label: String,
/// Node type (for styling)
pub node_type: String,
/// Number of connections
pub degree: usize,
}
/// An edge in the relationship graph
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct GraphEdge {
/// Source node ID
pub source: String,
/// Target node ID
pub target: String,
/// Relationship type
pub relationship_type: RelationType,
/// Edge weight (strength)
pub weight: f64,
/// Edge label
pub label: String,
}
/// Metadata about the graph
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct GraphMetadata {
/// Total number of nodes
pub node_count: usize,
/// Total number of edges
pub edge_count: usize,
/// When the graph was built
pub built_at: DateTime<Utc>,
/// Average relationship strength
pub average_strength: f64,
}
// ============================================================================
// CO-EDIT SESSION
// ============================================================================
/// Tracks files edited together in a session
#[derive(Debug, Clone)]
struct CoEditSession {
/// Files in this session
files: HashSet<PathBuf>,
/// When the session started (for analytics/debugging)
#[allow(dead_code)]
started_at: DateTime<Utc>,
/// When the session was last updated
last_updated: DateTime<Utc>,
}
// ============================================================================
// RELATIONSHIP TRACKER
// ============================================================================
/// Tracks relationships between files in a codebase
pub struct RelationshipTracker {
/// All relationships indexed by ID
relationships: HashMap<String, FileRelationship>,
/// Relationships indexed by file for fast lookup
file_relationships: HashMap<PathBuf, Vec<String>>,
/// Current co-edit session
current_session: Option<CoEditSession>,
/// Co-edit counts between file pairs
coedit_counts: HashMap<(PathBuf, PathBuf), u32>,
/// ID counter for new relationships
next_id: u32,
}
impl RelationshipTracker {
/// Create a new relationship tracker
pub fn new() -> Self {
Self {
relationships: HashMap::new(),
file_relationships: HashMap::new(),
current_session: None,
coedit_counts: HashMap::new(),
next_id: 1,
}
}
/// Generate a new relationship ID
fn new_id(&mut self) -> String {
let id = format!("rel-{}", self.next_id);
self.next_id += 1;
id
}
/// Add a relationship
pub fn add_relationship(&mut self, relationship: FileRelationship) -> Result<String> {
if relationship.files.len() < 2 {
return Err(RelationshipError::Invalid(
"Relationship must have at least 2 files".to_string(),
));
}
let id = relationship.id.clone();
// Index by each file
for file in &relationship.files {
self.file_relationships
.entry(file.clone())
.or_default()
.push(id.clone());
}
self.relationships.insert(id.clone(), relationship);
Ok(id)
}
/// Record that files were edited together
pub fn record_coedit(&mut self, files: &[PathBuf]) -> Result<()> {
if files.len() < 2 {
return Ok(()); // Need at least 2 files for a relationship
}
let now = Utc::now();
// Update or create session
match &mut self.current_session {
Some(session) => {
// Check if session is still active (within 30 minutes)
let elapsed = now.signed_duration_since(session.last_updated);
if elapsed.num_minutes() > 30 {
// Session expired, finalize it and start new
self.finalize_session()?;
self.current_session = Some(CoEditSession {
files: files.iter().cloned().collect(),
started_at: now,
last_updated: now,
});
} else {
// Add files to current session
session.files.extend(files.iter().cloned());
session.last_updated = now;
}
}
None => {
// Start new session
self.current_session = Some(CoEditSession {
files: files.iter().cloned().collect(),
started_at: now,
last_updated: now,
});
}
}
// Update co-edit counts for each pair
for i in 0..files.len() {
for j in (i + 1)..files.len() {
let pair = if files[i] < files[j] {
(files[i].clone(), files[j].clone())
} else {
(files[j].clone(), files[i].clone())
};
*self.coedit_counts.entry(pair).or_insert(0) += 1;
}
}
Ok(())
}
/// Finalize the current session and create relationships
fn finalize_session(&mut self) -> Result<()> {
if let Some(session) = self.current_session.take() {
let files: Vec<_> = session.files.into_iter().collect();
if files.len() >= 2 {
// Create relationships for frequent co-edits
for i in 0..files.len() {
for j in (i + 1)..files.len() {
let pair = if files[i] < files[j] {
(files[i].clone(), files[j].clone())
} else {
(files[j].clone(), files[i].clone())
};
let count = self.coedit_counts.get(&pair).copied().unwrap_or(0);
// Only create relationship if edited together multiple times
if count >= 3 {
let strength = (count as f64 / 10.0).min(1.0);
let id = self.new_id();
let relationship = FileRelationship {
id: id.clone(),
files: vec![pair.0.clone(), pair.1.clone()],
relationship_type: RelationType::FrequentCochange,
strength,
description: format!(
"Edited together {} times in recent sessions",
count
),
created_at: Utc::now(),
last_confirmed: Some(Utc::now()),
source: RelationshipSource::UserDefined,
observation_count: count,
};
// Check if relationship already exists
let exists = self
.relationships
.values()
.any(|r| r.files.contains(&pair.0) && r.files.contains(&pair.1));
if !exists {
self.add_relationship(relationship)?;
}
}
}
}
}
}
Ok(())
}
/// Get files related to a given file
pub fn get_related_files(&self, file: &Path) -> Result<Vec<RelatedFile>> {
let path = file.to_path_buf();
let relationship_ids = self.file_relationships.get(&path);
let related: Vec<_> = relationship_ids
.map(|ids| {
ids.iter()
.filter_map(|id| self.relationships.get(id))
.flat_map(|rel| {
rel.files
.iter()
.filter(|f| *f != &path)
.map(|f| RelatedFile {
path: f.clone(),
relationship_type: rel.relationship_type,
strength: rel.strength,
description: rel.description.clone(),
})
})
.collect()
})
.unwrap_or_default();
// Also check for test file relationships
let mut additional = self.infer_test_relationships(file);
additional.extend(related);
// Deduplicate by path
let mut seen = HashSet::new();
let deduped: Vec<_> = additional
.into_iter()
.filter(|r| seen.insert(r.path.clone()))
.collect();
Ok(deduped)
}
/// Infer test file relationships based on naming conventions
fn infer_test_relationships(&self, file: &Path) -> Vec<RelatedFile> {
let mut related = Vec::new();
let file_stem = file
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_default();
let extension = file
.extension()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_default();
let parent = file.parent().unwrap_or(Path::new("."));
// Check for test file naming patterns
let is_test = file_stem.contains("test")
|| file_stem.contains("spec")
|| file_stem.ends_with("_test")
|| file_stem.starts_with("test_");
if is_test {
// This is a test file - find the implementation
let impl_stem = file_stem
.replace("_test", "")
.replace(".test", "")
.replace("_spec", "")
.replace(".spec", "")
.trim_start_matches("test_")
.to_string();
let impl_path = parent.join(format!("{}.{}", impl_stem, extension));
if impl_path.exists() {
related.push(RelatedFile {
path: impl_path,
relationship_type: RelationType::TestsImplementation,
strength: 0.9,
description: "Implementation file for this test".to_string(),
});
}
} else {
// This is an implementation - find the test file
let test_patterns = [
format!("{}_test.{}", file_stem, extension),
format!("{}.test.{}", file_stem, extension),
format!("test_{}.{}", file_stem, extension),
format!("{}_spec.{}", file_stem, extension),
format!("{}.spec.{}", file_stem, extension),
];
for pattern in &test_patterns {
let test_path = parent.join(pattern);
if test_path.exists() {
related.push(RelatedFile {
path: test_path,
relationship_type: RelationType::TestsImplementation,
strength: 0.9,
description: "Test file for this implementation".to_string(),
});
break;
}
}
// Check tests/ directory
if let Some(grandparent) = parent.parent() {
let tests_dir = grandparent.join("tests");
if tests_dir.exists() {
for pattern in &test_patterns {
let test_path = tests_dir.join(pattern);
if test_path.exists() {
related.push(RelatedFile {
path: test_path,
relationship_type: RelationType::TestsImplementation,
strength: 0.8,
description: "Test file in tests/ directory".to_string(),
});
}
}
}
}
}
related
}
/// Build a relationship graph for visualization
pub fn build_graph(&self) -> Result<RelationshipGraph> {
let mut nodes = Vec::new();
let mut edges = Vec::new();
let mut node_ids: HashMap<PathBuf, String> = HashMap::new();
let mut node_degrees: HashMap<String, usize> = HashMap::new();
// Build nodes from all files in relationships
for relationship in self.relationships.values() {
for file in &relationship.files {
if !node_ids.contains_key(file) {
let id = format!("node-{}", node_ids.len());
node_ids.insert(file.clone(), id.clone());
let label = file
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| file.to_string_lossy().to_string());
let node_type = file
.extension()
.map(|e| e.to_string_lossy().to_string())
.unwrap_or_else(|| "unknown".to_string());
nodes.push(GraphNode {
id: id.clone(),
path: file.clone(),
label,
node_type,
degree: 0, // Will update later
});
}
}
}
// Build edges from relationships
for relationship in self.relationships.values() {
if relationship.files.len() >= 2 {
// Skip relationships where files aren't in the node map
let Some(source_id) = node_ids.get(&relationship.files[0]).cloned() else {
continue;
};
let Some(target_id) = node_ids.get(&relationship.files[1]).cloned() else {
continue;
};
// Update degrees
*node_degrees.entry(source_id.clone()).or_insert(0) += 1;
*node_degrees.entry(target_id.clone()).or_insert(0) += 1;
let label = format!("{:?}", relationship.relationship_type);
edges.push(GraphEdge {
source: source_id,
target: target_id,
relationship_type: relationship.relationship_type,
weight: relationship.strength,
label,
});
}
}
// Update node degrees
for node in &mut nodes {
node.degree = node_degrees.get(&node.id).copied().unwrap_or(0);
}
// Calculate metadata
let average_strength = if edges.is_empty() {
0.0
} else {
edges.iter().map(|e| e.weight).sum::<f64>() / edges.len() as f64
};
let metadata = GraphMetadata {
node_count: nodes.len(),
edge_count: edges.len(),
built_at: Utc::now(),
average_strength,
};
Ok(RelationshipGraph {
nodes,
edges,
metadata,
})
}
/// Get a specific relationship by ID
pub fn get_relationship(&self, id: &str) -> Option<&FileRelationship> {
self.relationships.get(id)
}
/// Get all relationships
pub fn get_all_relationships(&self) -> Vec<&FileRelationship> {
self.relationships.values().collect()
}
/// Delete a relationship
pub fn delete_relationship(&mut self, id: &str) -> Result<()> {
if let Some(relationship) = self.relationships.remove(id) {
// Remove from file index
for file in &relationship.files {
if let Some(ids) = self.file_relationships.get_mut(file) {
ids.retain(|i| i != id);
}
}
Ok(())
} else {
Err(RelationshipError::NotFound(id.to_string()))
}
}
/// Get relationships by type
pub fn get_relationships_by_type(&self, rel_type: RelationType) -> Vec<&FileRelationship> {
self.relationships
.values()
.filter(|r| r.relationship_type == rel_type)
.collect()
}
/// Update relationship strength
pub fn update_strength(&mut self, id: &str, delta: f64) -> Result<()> {
if let Some(relationship) = self.relationships.get_mut(id) {
relationship.strength = (relationship.strength + delta).clamp(0.0, 1.0);
relationship.last_confirmed = Some(Utc::now());
relationship.observation_count += 1;
Ok(())
} else {
Err(RelationshipError::NotFound(id.to_string()))
}
}
/// Load relationships from storage
pub fn load_relationships(&mut self, relationships: Vec<FileRelationship>) -> Result<()> {
for relationship in relationships {
self.add_relationship(relationship)?;
}
Ok(())
}
/// Export all relationships for storage
pub fn export_relationships(&self) -> Vec<FileRelationship> {
self.relationships.values().cloned().collect()
}
/// Get the most connected files (highest degree in graph)
pub fn get_hub_files(&self, limit: usize) -> Vec<(PathBuf, usize)> {
let mut file_degrees: HashMap<PathBuf, usize> = HashMap::new();
for relationship in self.relationships.values() {
for file in &relationship.files {
*file_degrees.entry(file.clone()).or_insert(0) += 1;
}
}
let mut sorted: Vec<_> = file_degrees.into_iter().collect();
sorted.sort_by(|a, b| b.1.cmp(&a.1));
sorted.truncate(limit);
sorted
}
}
impl Default for RelationshipTracker {
fn default() -> Self {
Self::new()
}
}
// ============================================================================
// TESTS
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
fn create_test_relationship() -> FileRelationship {
FileRelationship::new(
"test-rel-1".to_string(),
vec![PathBuf::from("src/main.rs"), PathBuf::from("src/lib.rs")],
RelationType::SharedDomain,
"Core entry points".to_string(),
)
}
#[test]
fn test_add_relationship() {
let mut tracker = RelationshipTracker::new();
let rel = create_test_relationship();
let result = tracker.add_relationship(rel);
assert!(result.is_ok());
let stored = tracker.get_relationship("test-rel-1");
assert!(stored.is_some());
}
#[test]
fn test_get_related_files() {
let mut tracker = RelationshipTracker::new();
let rel = create_test_relationship();
tracker.add_relationship(rel).unwrap();
let related = tracker.get_related_files(Path::new("src/main.rs")).unwrap();
assert!(!related.is_empty());
assert!(related
.iter()
.any(|r| r.path == PathBuf::from("src/lib.rs")));
}
#[test]
fn test_build_graph() {
let mut tracker = RelationshipTracker::new();
let rel = create_test_relationship();
tracker.add_relationship(rel).unwrap();
let graph = tracker.build_graph().unwrap();
assert_eq!(graph.nodes.len(), 2);
assert_eq!(graph.edges.len(), 1);
assert_eq!(graph.metadata.node_count, 2);
assert_eq!(graph.metadata.edge_count, 1);
}
#[test]
fn test_delete_relationship() {
let mut tracker = RelationshipTracker::new();
let rel = create_test_relationship();
tracker.add_relationship(rel).unwrap();
assert!(tracker.get_relationship("test-rel-1").is_some());
tracker.delete_relationship("test-rel-1").unwrap();
assert!(tracker.get_relationship("test-rel-1").is_none());
}
#[test]
fn test_record_coedit() {
let mut tracker = RelationshipTracker::new();
let files = vec![PathBuf::from("src/a.rs"), PathBuf::from("src/b.rs")];
// Record multiple coedits
for _ in 0..5 {
tracker.record_coedit(&files).unwrap();
}
// Finalize should create a relationship
tracker.finalize_session().unwrap();
// Should have a co-change relationship
let relationships = tracker.get_relationships_by_type(RelationType::FrequentCochange);
assert!(!relationships.is_empty());
}
#[test]
fn test_get_hub_files() {
let mut tracker = RelationshipTracker::new();
// Create a hub file (main.rs) connected to multiple others
for i in 0..5 {
let rel = FileRelationship::new(
format!("rel-{}", i),
vec![
PathBuf::from("src/main.rs"),
PathBuf::from(format!("src/module{}.rs", i)),
],
RelationType::ImportsDependency,
"Import relationship".to_string(),
);
tracker.add_relationship(rel).unwrap();
}
let hubs = tracker.get_hub_files(3);
assert!(!hubs.is_empty());
assert_eq!(hubs[0].0, PathBuf::from("src/main.rs"));
assert_eq!(hubs[0].1, 5);
}
}

View file

@ -0,0 +1,799 @@
//! Codebase-specific memory types for Vestige
//!
//! This module defines the specialized node types that make Vestige's codebase memory
//! unique and powerful. These types capture the contextual knowledge that developers
//! accumulate but traditionally lose - architectural decisions, bug fixes, coding
//! patterns, and file relationships.
//!
//! This is Vestige's KILLER DIFFERENTIATOR. No other AI memory system understands
//! codebases at this level.
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
// ============================================================================
// CODEBASE NODE - The Core Memory Type
// ============================================================================
/// Types of memories specific to codebases.
///
/// Each variant captures a different kind of knowledge that developers accumulate
/// but typically lose over time or when context-switching between projects.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum CodebaseNode {
/// "We use X pattern because Y"
///
/// Captures architectural decisions with their rationale. This is critical
/// for maintaining consistency and understanding why the codebase evolved
/// the way it did.
ArchitecturalDecision(ArchitecturalDecision),
/// "This bug was caused by X, fixed by Y"
///
/// Records bug fixes with root cause analysis. Invaluable for preventing
/// regression and understanding historical issues.
BugFix(BugFix),
/// "Use this pattern for X"
///
/// Codifies recurring patterns with examples and guidance on when to use them.
CodePattern(CodePattern),
/// "These files always change together"
///
/// Tracks file relationships discovered through git history analysis or
/// explicit user teaching.
FileRelationship(FileRelationship),
/// "User prefers X over Y"
///
/// Captures coding preferences and style decisions for consistent suggestions.
CodingPreference(CodingPreference),
/// "This function does X and is called by Y"
///
/// Stores knowledge about specific code entities - functions, types, modules.
CodeEntity(CodeEntity),
/// "The current task is implementing X"
///
/// Tracks ongoing work context for continuity across sessions.
WorkContext(WorkContext),
}
impl CodebaseNode {
/// Get the unique identifier for this node
pub fn id(&self) -> &str {
match self {
Self::ArchitecturalDecision(n) => &n.id,
Self::BugFix(n) => &n.id,
Self::CodePattern(n) => &n.id,
Self::FileRelationship(n) => &n.id,
Self::CodingPreference(n) => &n.id,
Self::CodeEntity(n) => &n.id,
Self::WorkContext(n) => &n.id,
}
}
/// Get the node type as a string
pub fn node_type(&self) -> &'static str {
match self {
Self::ArchitecturalDecision(_) => "architectural_decision",
Self::BugFix(_) => "bug_fix",
Self::CodePattern(_) => "code_pattern",
Self::FileRelationship(_) => "file_relationship",
Self::CodingPreference(_) => "coding_preference",
Self::CodeEntity(_) => "code_entity",
Self::WorkContext(_) => "work_context",
}
}
/// Get the creation timestamp
pub fn created_at(&self) -> DateTime<Utc> {
match self {
Self::ArchitecturalDecision(n) => n.created_at,
Self::BugFix(n) => n.created_at,
Self::CodePattern(n) => n.created_at,
Self::FileRelationship(n) => n.created_at,
Self::CodingPreference(n) => n.created_at,
Self::CodeEntity(n) => n.created_at,
Self::WorkContext(n) => n.created_at,
}
}
/// Get all file paths associated with this node
pub fn associated_files(&self) -> Vec<&PathBuf> {
match self {
Self::ArchitecturalDecision(n) => n.files_affected.iter().collect(),
Self::BugFix(n) => n.files_changed.iter().collect(),
Self::CodePattern(n) => n.example_files.iter().collect(),
Self::FileRelationship(n) => n.files.iter().collect(),
Self::CodingPreference(_) => vec![],
Self::CodeEntity(n) => n.file_path.as_ref().map(|p| vec![p]).unwrap_or_default(),
Self::WorkContext(n) => n.active_files.iter().collect(),
}
}
/// Convert to a searchable text representation
pub fn to_searchable_text(&self) -> String {
match self {
Self::ArchitecturalDecision(n) => {
format!(
"Architectural Decision: {} - Rationale: {} - Context: {}",
n.decision,
n.rationale,
n.context.as_deref().unwrap_or("")
)
}
Self::BugFix(n) => {
format!(
"Bug Fix: {} - Root Cause: {} - Solution: {}",
n.symptom, n.root_cause, n.solution
)
}
Self::CodePattern(n) => {
format!(
"Code Pattern: {} - {} - When to use: {}",
n.name, n.description, n.when_to_use
)
}
Self::FileRelationship(n) => {
format!(
"File Relationship: {:?} - Type: {:?} - {}",
n.files, n.relationship_type, n.description
)
}
Self::CodingPreference(n) => {
format!(
"Coding Preference ({}): {} vs {:?}",
n.context, n.preference, n.counter_preference
)
}
Self::CodeEntity(n) => {
format!(
"Code Entity: {} ({:?}) - {}",
n.name, n.entity_type, n.description
)
}
Self::WorkContext(n) => {
format!(
"Work Context: {} - {} - Active files: {:?}",
n.task_description,
n.status.as_str(),
n.active_files
)
}
}
}
}
// ============================================================================
// ARCHITECTURAL DECISION
// ============================================================================
/// Records an architectural decision with its rationale.
///
/// Example:
/// - Decision: "Use Event Sourcing for order management"
/// - Rationale: "Need complete audit trail and ability to replay state"
/// - Files: ["src/orders/events.rs", "src/orders/aggregate.rs"]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ArchitecturalDecision {
pub id: String,
/// The decision that was made
pub decision: String,
/// Why this decision was made
pub rationale: String,
/// Files affected by this decision
pub files_affected: Vec<PathBuf>,
/// Git commit SHA where this was implemented (if applicable)
pub commit_sha: Option<String>,
/// When this decision was recorded
pub created_at: DateTime<Utc>,
/// When this decision was last updated
pub updated_at: Option<DateTime<Utc>>,
/// Additional context or notes
pub context: Option<String>,
/// Tags for categorization
pub tags: Vec<String>,
/// Status of the decision
pub status: DecisionStatus,
/// Alternatives that were considered
pub alternatives_considered: Vec<String>,
}
/// Status of an architectural decision
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DecisionStatus {
/// Decision is proposed but not yet implemented
Proposed,
/// Decision is accepted and being implemented
Accepted,
/// Decision has been superseded by another
Superseded,
/// Decision was rejected
Deprecated,
}
impl Default for DecisionStatus {
fn default() -> Self {
Self::Accepted
}
}
// ============================================================================
// BUG FIX
// ============================================================================
/// Records a bug fix with root cause analysis.
///
/// This is invaluable for:
/// - Preventing regressions
/// - Understanding why certain code exists
/// - Training junior developers on common pitfalls
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct BugFix {
pub id: String,
/// What symptoms was the bug causing?
pub symptom: String,
/// What was the actual root cause?
pub root_cause: String,
/// How was it fixed?
pub solution: String,
/// Files that were changed to fix the bug
pub files_changed: Vec<PathBuf>,
/// Git commit SHA of the fix
pub commit_sha: String,
/// When the fix was recorded
pub created_at: DateTime<Utc>,
/// Link to issue tracker (if applicable)
pub issue_link: Option<String>,
/// Severity of the bug
pub severity: BugSeverity,
/// How the bug was discovered
pub discovered_by: Option<String>,
/// Prevention measures (what would have caught this earlier)
pub prevention_notes: Option<String>,
/// Tags for categorization
pub tags: Vec<String>,
}
/// Severity level of a bug
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum BugSeverity {
Critical,
High,
Medium,
Low,
Trivial,
}
impl Default for BugSeverity {
fn default() -> Self {
Self::Medium
}
}
// ============================================================================
// CODE PATTERN
// ============================================================================
/// Records a reusable code pattern with examples and guidance.
///
/// Patterns can be:
/// - Discovered automatically from git history
/// - Taught explicitly by the user
/// - Extracted from documentation
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct CodePattern {
pub id: String,
/// Name of the pattern (e.g., "Repository Pattern", "Error Handling")
pub name: String,
/// Detailed description of the pattern
pub description: String,
/// Example code showing the pattern
pub example_code: String,
/// Files containing examples of this pattern
pub example_files: Vec<PathBuf>,
/// When should this pattern be used?
pub when_to_use: String,
/// When should this pattern NOT be used?
pub when_not_to_use: Option<String>,
/// Language this pattern applies to
pub language: Option<String>,
/// When this pattern was recorded
pub created_at: DateTime<Utc>,
/// How many times this pattern has been applied
pub usage_count: u32,
/// Tags for categorization
pub tags: Vec<String>,
/// Related patterns
pub related_patterns: Vec<String>,
}
// ============================================================================
// FILE RELATIONSHIP
// ============================================================================
/// Tracks relationships between files in the codebase.
///
/// Relationships can be:
/// - Discovered from imports/dependencies
/// - Detected from git co-change patterns
/// - Explicitly taught by the user
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct FileRelationship {
pub id: String,
/// The files involved in this relationship
pub files: Vec<PathBuf>,
/// Type of relationship
pub relationship_type: RelationType,
/// Strength of the relationship (0.0 - 1.0)
/// For co-change relationships, this is the frequency they change together
pub strength: f64,
/// Human-readable description
pub description: String,
/// When this relationship was first detected
pub created_at: DateTime<Utc>,
/// When this relationship was last confirmed
pub last_confirmed: Option<DateTime<Utc>>,
/// How this relationship was discovered
pub source: RelationshipSource,
/// Number of times this relationship has been observed
pub observation_count: u32,
}
/// Types of relationships between files
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum RelationType {
/// A imports/depends on B
ImportsDependency,
/// A tests implementation in B
TestsImplementation,
/// A configures service B
ConfiguresService,
/// Files are in the same domain/feature area
SharedDomain,
/// Files frequently change together in commits
FrequentCochange,
/// A extends/implements B
ExtendsImplements,
/// A is the interface, B is the implementation
InterfaceImplementation,
/// A and B are related through documentation
DocumentationReference,
}
/// How a relationship was discovered
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum RelationshipSource {
/// Detected from git history co-change analysis
GitCochange,
/// Detected from import/dependency analysis
ImportAnalysis,
/// Detected from AST analysis
AstAnalysis,
/// Explicitly taught by user
UserDefined,
/// Inferred from file naming conventions
NamingConvention,
}
// ============================================================================
// CODING PREFERENCE
// ============================================================================
/// Records a user's coding preferences for consistent suggestions.
///
/// Examples:
/// - "For error handling, prefer Result over panic"
/// - "For naming, use snake_case for functions"
/// - "For async, prefer tokio over async-std"
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct CodingPreference {
pub id: String,
/// Context where this preference applies (e.g., "error handling", "naming")
pub context: String,
/// The preferred approach
pub preference: String,
/// What NOT to do (optional)
pub counter_preference: Option<String>,
/// Examples showing the preference in action
pub examples: Vec<String>,
/// Confidence in this preference (0.0 - 1.0)
/// Higher confidence = more consistently applied
pub confidence: f64,
/// When this preference was recorded
pub created_at: DateTime<Utc>,
/// Language this applies to (None = all languages)
pub language: Option<String>,
/// How this preference was learned
pub source: PreferenceSource,
/// Number of times this preference has been observed
pub observation_count: u32,
}
/// How a preference was learned
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum PreferenceSource {
/// Explicitly stated by user
UserStated,
/// Inferred from code review feedback
CodeReview,
/// Detected from coding patterns in history
PatternDetection,
/// From project configuration (e.g., rustfmt.toml)
ProjectConfig,
}
// ============================================================================
// CODE ENTITY
// ============================================================================
/// Knowledge about a specific code entity (function, type, module, etc.)
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct CodeEntity {
pub id: String,
/// Name of the entity
pub name: String,
/// Type of entity
pub entity_type: EntityType,
/// Description of what this entity does
pub description: String,
/// File where this entity is defined
pub file_path: Option<PathBuf>,
/// Line number where entity starts
pub line_number: Option<u32>,
/// Entities that this one depends on
pub dependencies: Vec<String>,
/// Entities that depend on this one
pub dependents: Vec<String>,
/// When this was recorded
pub created_at: DateTime<Utc>,
/// Tags for categorization
pub tags: Vec<String>,
/// Usage notes or gotchas
pub notes: Option<String>,
}
/// Type of code entity
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum EntityType {
Function,
Method,
Struct,
Enum,
Trait,
Interface,
Class,
Module,
Constant,
Variable,
Type,
}
// ============================================================================
// WORK CONTEXT
// ============================================================================
/// Tracks the current work context for continuity across sessions.
///
/// This allows Vestige to remember:
/// - What task the user was working on
/// - What files were being edited
/// - What the next steps were
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct WorkContext {
pub id: String,
/// Description of the current task
pub task_description: String,
/// Files currently being worked on
pub active_files: Vec<PathBuf>,
/// Current git branch
pub branch: Option<String>,
/// Status of the work
pub status: WorkStatus,
/// Next steps that were planned
pub next_steps: Vec<String>,
/// Blockers or issues encountered
pub blockers: Vec<String>,
/// When this context was created
pub created_at: DateTime<Utc>,
/// When this context was last updated
pub updated_at: DateTime<Utc>,
/// Related issue/ticket IDs
pub related_issues: Vec<String>,
/// Notes about the work
pub notes: Option<String>,
}
/// Status of work in progress
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum WorkStatus {
/// Actively being worked on
InProgress,
/// Paused, will resume later
Paused,
/// Completed
Completed,
/// Blocked by something
Blocked,
/// Abandoned
Abandoned,
}
impl WorkStatus {
pub fn as_str(&self) -> &'static str {
match self {
Self::InProgress => "in_progress",
Self::Paused => "paused",
Self::Completed => "completed",
Self::Blocked => "blocked",
Self::Abandoned => "abandoned",
}
}
}
// ============================================================================
// BUILDER HELPERS
// ============================================================================
impl ArchitecturalDecision {
pub fn new(id: String, decision: String, rationale: String) -> Self {
Self {
id,
decision,
rationale,
files_affected: vec![],
commit_sha: None,
created_at: Utc::now(),
updated_at: None,
context: None,
tags: vec![],
status: DecisionStatus::default(),
alternatives_considered: vec![],
}
}
pub fn with_files(mut self, files: Vec<PathBuf>) -> Self {
self.files_affected = files;
self
}
pub fn with_commit(mut self, sha: String) -> Self {
self.commit_sha = Some(sha);
self
}
pub fn with_context(mut self, context: String) -> Self {
self.context = Some(context);
self
}
pub fn with_tags(mut self, tags: Vec<String>) -> Self {
self.tags = tags;
self
}
}
impl BugFix {
pub fn new(
id: String,
symptom: String,
root_cause: String,
solution: String,
commit_sha: String,
) -> Self {
Self {
id,
symptom,
root_cause,
solution,
files_changed: vec![],
commit_sha,
created_at: Utc::now(),
issue_link: None,
severity: BugSeverity::default(),
discovered_by: None,
prevention_notes: None,
tags: vec![],
}
}
pub fn with_files(mut self, files: Vec<PathBuf>) -> Self {
self.files_changed = files;
self
}
pub fn with_severity(mut self, severity: BugSeverity) -> Self {
self.severity = severity;
self
}
pub fn with_issue(mut self, link: String) -> Self {
self.issue_link = Some(link);
self
}
}
impl CodePattern {
pub fn new(id: String, name: String, description: String, when_to_use: String) -> Self {
Self {
id,
name,
description,
example_code: String::new(),
example_files: vec![],
when_to_use,
when_not_to_use: None,
language: None,
created_at: Utc::now(),
usage_count: 0,
tags: vec![],
related_patterns: vec![],
}
}
pub fn with_example(mut self, code: String, files: Vec<PathBuf>) -> Self {
self.example_code = code;
self.example_files = files;
self
}
pub fn with_language(mut self, language: String) -> Self {
self.language = Some(language);
self
}
}
impl FileRelationship {
pub fn new(
id: String,
files: Vec<PathBuf>,
relationship_type: RelationType,
description: String,
) -> Self {
Self {
id,
files,
relationship_type,
strength: 0.5,
description,
created_at: Utc::now(),
last_confirmed: None,
source: RelationshipSource::UserDefined,
observation_count: 1,
}
}
pub fn from_git_cochange(id: String, files: Vec<PathBuf>, strength: f64, count: u32) -> Self {
Self {
id,
files: files.clone(),
relationship_type: RelationType::FrequentCochange,
strength,
description: format!(
"Files frequently change together ({} co-occurrences)",
count
),
created_at: Utc::now(),
last_confirmed: Some(Utc::now()),
source: RelationshipSource::GitCochange,
observation_count: count,
}
}
}
impl CodingPreference {
pub fn new(id: String, context: String, preference: String) -> Self {
Self {
id,
context,
preference,
counter_preference: None,
examples: vec![],
confidence: 0.5,
created_at: Utc::now(),
language: None,
source: PreferenceSource::UserStated,
observation_count: 1,
}
}
pub fn with_counter(mut self, counter: String) -> Self {
self.counter_preference = Some(counter);
self
}
pub fn with_examples(mut self, examples: Vec<String>) -> Self {
self.examples = examples;
self
}
pub fn with_confidence(mut self, confidence: f64) -> Self {
self.confidence = confidence.clamp(0.0, 1.0);
self
}
}
// ============================================================================
// TESTS
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_architectural_decision_builder() {
let decision = ArchitecturalDecision::new(
"adr-001".to_string(),
"Use Event Sourcing".to_string(),
"Need complete audit trail".to_string(),
)
.with_files(vec![PathBuf::from("src/events.rs")])
.with_tags(vec!["architecture".to_string()]);
assert_eq!(decision.id, "adr-001");
assert!(!decision.files_affected.is_empty());
assert!(!decision.tags.is_empty());
}
#[test]
fn test_codebase_node_id() {
let decision = ArchitecturalDecision::new(
"test-id".to_string(),
"Test".to_string(),
"Test".to_string(),
);
let node = CodebaseNode::ArchitecturalDecision(decision);
assert_eq!(node.id(), "test-id");
assert_eq!(node.node_type(), "architectural_decision");
}
#[test]
fn test_file_relationship_from_git() {
let rel = FileRelationship::from_git_cochange(
"rel-001".to_string(),
vec![PathBuf::from("src/a.rs"), PathBuf::from("src/b.rs")],
0.8,
15,
);
assert_eq!(rel.relationship_type, RelationType::FrequentCochange);
assert_eq!(rel.source, RelationshipSource::GitCochange);
assert_eq!(rel.strength, 0.8);
assert_eq!(rel.observation_count, 15);
}
#[test]
fn test_searchable_text() {
let pattern = CodePattern::new(
"pat-001".to_string(),
"Repository Pattern".to_string(),
"Abstract data access".to_string(),
"When you need to decouple domain logic from data access".to_string(),
);
let node = CodebaseNode::CodePattern(pattern);
let text = node.to_searchable_text();
assert!(text.contains("Repository Pattern"));
assert!(text.contains("Abstract data access"));
}
}

View file

@ -0,0 +1,729 @@
//! File system watching for automatic learning
//!
//! This module watches the codebase for changes and:
//! - Records co-edit patterns (files changed together)
//! - Triggers pattern detection on modified files
//! - Updates relationship strengths based on activity
//!
//! This enables Vestige to learn continuously from developer behavior
//! without requiring explicit user input.
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use chrono::{DateTime, Utc};
use notify::{Config, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
use tokio::sync::{broadcast, mpsc, RwLock};
use super::patterns::PatternDetector;
use super::relationships::RelationshipTracker;
// ============================================================================
// ERRORS
// ============================================================================
#[derive(Debug, thiserror::Error)]
pub enum WatcherError {
#[error("Watcher error: {0}")]
Notify(#[from] notify::Error),
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("Channel error: {0}")]
Channel(String),
#[error("Already watching: {0}")]
AlreadyWatching(PathBuf),
#[error("Not watching: {0}")]
NotWatching(PathBuf),
#[error("Relationship error: {0}")]
Relationship(#[from] super::relationships::RelationshipError),
}
pub type Result<T> = std::result::Result<T, WatcherError>;
// ============================================================================
// FILE EVENT
// ============================================================================
/// Represents a file change event
#[derive(Debug, Clone)]
pub struct FileEvent {
/// Type of event
pub kind: FileEventKind,
/// Path(s) affected
pub paths: Vec<PathBuf>,
/// When the event occurred
pub timestamp: DateTime<Utc>,
}
/// Types of file events
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FileEventKind {
/// File was created
Created,
/// File was modified
Modified,
/// File was deleted
Deleted,
/// File was renamed
Renamed,
/// Access event (read)
Accessed,
}
impl From<EventKind> for FileEventKind {
fn from(kind: EventKind) -> Self {
match kind {
EventKind::Create(_) => Self::Created,
EventKind::Modify(_) => Self::Modified,
EventKind::Remove(_) => Self::Deleted,
EventKind::Access(_) => Self::Accessed,
_ => Self::Modified, // Default to modified
}
}
}
// ============================================================================
// WATCHER CONFIG
// ============================================================================
/// Configuration for the codebase watcher
#[derive(Debug, Clone)]
pub struct WatcherConfig {
/// Debounce interval for batching events
pub debounce_interval: Duration,
/// Patterns to ignore (gitignore-style)
pub ignore_patterns: Vec<String>,
/// File extensions to watch (None = all)
pub watch_extensions: Option<Vec<String>>,
/// Maximum depth for recursive watching
pub max_depth: Option<usize>,
/// Enable pattern detection on file changes
pub detect_patterns: bool,
/// Enable relationship tracking
pub track_relationships: bool,
}
impl Default for WatcherConfig {
fn default() -> Self {
Self {
debounce_interval: Duration::from_millis(500),
ignore_patterns: vec![
"**/node_modules/**".to_string(),
"**/target/**".to_string(),
"**/.git/**".to_string(),
"**/dist/**".to_string(),
"**/build/**".to_string(),
"**/*.lock".to_string(),
"**/*.log".to_string(),
],
watch_extensions: Some(vec![
"rs".to_string(),
"ts".to_string(),
"tsx".to_string(),
"js".to_string(),
"jsx".to_string(),
"py".to_string(),
"go".to_string(),
"java".to_string(),
"kt".to_string(),
"swift".to_string(),
"cs".to_string(),
"cpp".to_string(),
"c".to_string(),
"h".to_string(),
"hpp".to_string(),
"rb".to_string(),
"php".to_string(),
]),
max_depth: None,
detect_patterns: true,
track_relationships: true,
}
}
}
// ============================================================================
// EDIT SESSION
// ============================================================================
/// Tracks files being edited in a session
#[derive(Debug)]
struct EditSession {
/// Files modified in this session
files: HashSet<PathBuf>,
/// When the session started (for analytics/debugging)
#[allow(dead_code)]
started_at: DateTime<Utc>,
/// When the last edit occurred
last_edit_at: DateTime<Utc>,
}
impl EditSession {
fn new() -> Self {
let now = Utc::now();
Self {
files: HashSet::new(),
started_at: now,
last_edit_at: now,
}
}
fn add_file(&mut self, path: PathBuf) {
self.files.insert(path);
self.last_edit_at = Utc::now();
}
fn is_expired(&self, timeout: Duration) -> bool {
let elapsed = Utc::now()
.signed_duration_since(self.last_edit_at)
.to_std()
.unwrap_or(Duration::ZERO);
elapsed > timeout
}
fn files_list(&self) -> Vec<PathBuf> {
self.files.iter().cloned().collect()
}
}
// ============================================================================
// CODEBASE WATCHER
// ============================================================================
/// Watches a codebase for file changes
pub struct CodebaseWatcher {
/// Relationship tracker
tracker: Arc<RwLock<RelationshipTracker>>,
/// Pattern detector
detector: Arc<RwLock<PatternDetector>>,
/// Configuration
config: WatcherConfig,
/// Currently watched paths
watched_paths: Arc<RwLock<HashSet<PathBuf>>>,
/// Shutdown signal sender
shutdown_tx: Option<broadcast::Sender<()>>,
/// Flag to signal watcher thread to stop
running: Arc<AtomicBool>,
}
impl CodebaseWatcher {
/// Create a new codebase watcher
pub fn new(
tracker: Arc<RwLock<RelationshipTracker>>,
detector: Arc<RwLock<PatternDetector>>,
) -> Self {
Self::with_config(tracker, detector, WatcherConfig::default())
}
/// Create a new codebase watcher with custom config
pub fn with_config(
tracker: Arc<RwLock<RelationshipTracker>>,
detector: Arc<RwLock<PatternDetector>>,
config: WatcherConfig,
) -> Self {
Self {
tracker,
detector,
config,
watched_paths: Arc::new(RwLock::new(HashSet::new())),
shutdown_tx: None,
running: Arc::new(AtomicBool::new(false)),
}
}
/// Start watching a directory
pub async fn watch(&mut self, path: &Path) -> Result<()> {
let path = path.canonicalize()?;
// Check if already watching
{
let watched = self.watched_paths.read().await;
if watched.contains(&path) {
return Err(WatcherError::AlreadyWatching(path));
}
}
// Add to watched paths
self.watched_paths.write().await.insert(path.clone());
// Create shutdown channel
let (shutdown_tx, mut shutdown_rx) = broadcast::channel::<()>(1);
self.shutdown_tx = Some(shutdown_tx);
// Create event channel
let (event_tx, mut event_rx) = mpsc::channel::<FileEvent>(100);
// Clone for move into watcher thread
let config = self.config.clone();
let watch_path = path.clone();
// Set running flag to true and clone for thread
self.running.store(true, Ordering::SeqCst);
let running = Arc::clone(&self.running);
// Spawn watcher thread
let event_tx_clone = event_tx.clone();
std::thread::spawn(move || {
let config_notify = Config::default().with_poll_interval(config.debounce_interval);
let tx = event_tx_clone.clone();
let mut watcher = match RecommendedWatcher::new(
move |res: std::result::Result<Event, notify::Error>| {
if let Ok(event) = res {
let file_event = FileEvent {
kind: event.kind.into(),
paths: event.paths,
timestamp: Utc::now(),
};
let _ = tx.blocking_send(file_event);
}
},
config_notify,
) {
Ok(w) => w,
Err(e) => {
eprintln!("Failed to create watcher: {}", e);
return;
}
};
if let Err(e) = watcher.watch(&watch_path, RecursiveMode::Recursive) {
eprintln!("Failed to watch path: {}", e);
return;
}
// Keep thread alive until shutdown signal
while running.load(Ordering::SeqCst) {
std::thread::sleep(Duration::from_millis(100));
}
});
// Clone for move into handler task
let tracker = Arc::clone(&self.tracker);
let detector = Arc::clone(&self.detector);
let config = self.config.clone();
// Spawn event handler task
tokio::spawn(async move {
let mut session = EditSession::new();
let session_timeout = Duration::from_secs(60 * 30); // 30 minutes
loop {
tokio::select! {
Some(event) = event_rx.recv() => {
// Check session expiry
if session.is_expired(session_timeout) {
// Record co-edits from expired session
if session.files.len() >= 2 {
let files = session.files_list();
if let Ok(mut tracker) = tracker.try_write() {
let _ = tracker.record_coedit(&files);
}
}
session = EditSession::new();
}
// Process event
for path in &event.paths {
if Self::should_process(path, &config) {
match event.kind {
FileEventKind::Modified | FileEventKind::Created => {
// Track in session
if config.track_relationships {
session.add_file(path.clone());
}
// Detect patterns if enabled
if config.detect_patterns {
if let Ok(content) = std::fs::read_to_string(path) {
let language = Self::detect_language(path);
if let Ok(detector) = detector.try_read() {
let _ = detector.detect_patterns(&content, &language);
}
}
}
}
FileEventKind::Deleted => {
// File was deleted, remove from session
session.files.remove(path);
}
_ => {}
}
}
}
}
_ = shutdown_rx.recv() => {
// Finalize session before shutdown
if session.files.len() >= 2 {
let files = session.files_list();
if let Ok(mut tracker) = tracker.try_write() {
let _ = tracker.record_coedit(&files);
}
}
break;
}
}
}
});
Ok(())
}
/// Stop watching a directory
pub async fn unwatch(&mut self, path: &Path) -> Result<()> {
let path = path.canonicalize()?;
let mut watched = self.watched_paths.write().await;
if !watched.remove(&path) {
return Err(WatcherError::NotWatching(path));
}
// If no more paths being watched, send shutdown signals
if watched.is_empty() {
// Signal watcher thread to exit
self.running.store(false, Ordering::SeqCst);
// Signal async task to exit
if let Some(tx) = &self.shutdown_tx {
let _ = tx.send(());
}
}
Ok(())
}
/// Stop watching all directories
pub async fn stop(&mut self) -> Result<()> {
self.watched_paths.write().await.clear();
// Signal watcher thread to exit
self.running.store(false, Ordering::SeqCst);
// Signal async task to exit
if let Some(tx) = &self.shutdown_tx {
let _ = tx.send(());
}
Ok(())
}
/// Check if a path should be processed based on config
fn should_process(path: &Path, config: &WatcherConfig) -> bool {
let path_str = path.to_string_lossy();
// Check ignore patterns
for pattern in &config.ignore_patterns {
// Simple glob matching (basic implementation)
if Self::glob_match(&path_str, pattern) {
return false;
}
}
// Check extensions
if let Some(ref extensions) = config.watch_extensions {
if let Some(ext) = path.extension() {
let ext_str = ext.to_string_lossy().to_lowercase();
if !extensions.iter().any(|e| e.to_lowercase() == ext_str) {
return false;
}
} else {
return false; // No extension and we're filtering by extension
}
}
true
}
/// Simple glob pattern matching
fn glob_match(path: &str, pattern: &str) -> bool {
// Handle ** (match any path)
if pattern.contains("**") {
let parts: Vec<_> = pattern.split("**").collect();
if parts.len() == 2 {
let prefix = parts[0].trim_end_matches('/');
let suffix = parts[1].trim_start_matches('/');
let prefix_match = prefix.is_empty() || path.starts_with(prefix);
// Handle suffix with wildcards like *.lock
let suffix_match = if suffix.is_empty() {
true
} else if suffix.starts_with('*') {
// Pattern like *.lock - match the extension
let ext_pattern = suffix.trim_start_matches('*');
path.ends_with(ext_pattern)
} else {
// Exact suffix match
path.ends_with(suffix) || path.contains(&format!("/{}", suffix))
};
return prefix_match && suffix_match;
}
}
// Handle * (match single component)
if pattern.contains('*') {
let pattern = pattern.replace('*', "");
return path.contains(&pattern);
}
// Direct match
path.contains(pattern)
}
/// Detect language from file extension
fn detect_language(path: &Path) -> String {
path.extension()
.map(|e| {
let ext = e.to_string_lossy().to_lowercase();
match ext.as_str() {
"rs" => "rust",
"ts" | "tsx" => "typescript",
"js" | "jsx" => "javascript",
"py" => "python",
"go" => "go",
"java" => "java",
"kt" | "kts" => "kotlin",
"swift" => "swift",
"cs" => "csharp",
"cpp" | "cc" | "cxx" | "c" | "h" | "hpp" => "cpp",
"rb" => "ruby",
"php" => "php",
_ => "unknown",
}
.to_string()
})
.unwrap_or_else(|| "unknown".to_string())
}
/// Get currently watched paths
pub async fn get_watched_paths(&self) -> Vec<PathBuf> {
self.watched_paths.read().await.iter().cloned().collect()
}
/// Check if a path is being watched
pub async fn is_watching(&self, path: &Path) -> bool {
let path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
self.watched_paths.read().await.contains(&path)
}
/// Get the current configuration
pub fn config(&self) -> &WatcherConfig {
&self.config
}
/// Update the configuration
pub fn set_config(&mut self, config: WatcherConfig) {
self.config = config;
}
}
impl Drop for CodebaseWatcher {
fn drop(&mut self) {
// Signal watcher thread to exit
self.running.store(false, Ordering::SeqCst);
// Signal async task to exit
if let Some(tx) = &self.shutdown_tx {
let _ = tx.send(());
}
}
}
// ============================================================================
// MANUAL EVENT HANDLER (for non-async contexts)
// ============================================================================
/// Handles file events manually (for use without the async watcher)
pub struct ManualEventHandler {
tracker: Arc<RwLock<RelationshipTracker>>,
detector: Arc<RwLock<PatternDetector>>,
session_files: HashSet<PathBuf>,
config: WatcherConfig,
}
impl ManualEventHandler {
/// Create a new manual event handler
pub fn new(
tracker: Arc<RwLock<RelationshipTracker>>,
detector: Arc<RwLock<PatternDetector>>,
) -> Self {
Self {
tracker,
detector,
session_files: HashSet::new(),
config: WatcherConfig::default(),
}
}
/// Handle a file modification event
pub async fn on_file_modified(&mut self, path: &Path) -> Result<()> {
if !CodebaseWatcher::should_process(path, &self.config) {
return Ok(());
}
// Add to session
self.session_files.insert(path.to_path_buf());
// Record co-edit if we have multiple files
if self.session_files.len() >= 2 {
let files: Vec<_> = self.session_files.iter().cloned().collect();
let mut tracker = self.tracker.write().await;
tracker.record_coedit(&files)?;
}
// Detect patterns
if self.config.detect_patterns {
if let Ok(content) = std::fs::read_to_string(path) {
let language = CodebaseWatcher::detect_language(path);
let detector = self.detector.read().await;
let _ = detector.detect_patterns(&content, &language);
}
}
Ok(())
}
/// Handle a file creation event
pub async fn on_file_created(&mut self, path: &Path) -> Result<()> {
self.on_file_modified(path).await
}
/// Handle a file deletion event
pub async fn on_file_deleted(&mut self, path: &Path) -> Result<()> {
self.session_files.remove(path);
Ok(())
}
/// Clear the current session
pub fn clear_session(&mut self) {
self.session_files.clear();
}
/// Finalize the current session
pub async fn finalize_session(&mut self) -> Result<()> {
if self.session_files.len() >= 2 {
let files: Vec<_> = self.session_files.iter().cloned().collect();
let mut tracker = self.tracker.write().await;
tracker.record_coedit(&files)?;
}
self.session_files.clear();
Ok(())
}
}
// ============================================================================
// TESTS
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_glob_match() {
// Match any path with pattern
assert!(CodebaseWatcher::glob_match(
"/project/node_modules/foo/bar.js",
"**/node_modules/**"
));
assert!(CodebaseWatcher::glob_match(
"/project/target/debug/main",
"**/target/**"
));
assert!(CodebaseWatcher::glob_match(
"/project/.git/config",
"**/.git/**"
));
// Extension matching
assert!(CodebaseWatcher::glob_match(
"/project/Cargo.lock",
"**/*.lock"
));
// Non-matches
assert!(!CodebaseWatcher::glob_match(
"/project/src/main.rs",
"**/node_modules/**"
));
}
#[test]
fn test_should_process() {
let config = WatcherConfig::default();
// Should process source files
assert!(CodebaseWatcher::should_process(
Path::new("/project/src/main.rs"),
&config
));
assert!(CodebaseWatcher::should_process(
Path::new("/project/src/app.tsx"),
&config
));
// Should not process node_modules
assert!(!CodebaseWatcher::should_process(
Path::new("/project/node_modules/foo/index.js"),
&config
));
// Should not process target
assert!(!CodebaseWatcher::should_process(
Path::new("/project/target/debug/main"),
&config
));
// Should not process lock files
assert!(!CodebaseWatcher::should_process(
Path::new("/project/Cargo.lock"),
&config
));
}
#[test]
fn test_detect_language() {
assert_eq!(
CodebaseWatcher::detect_language(Path::new("main.rs")),
"rust"
);
assert_eq!(
CodebaseWatcher::detect_language(Path::new("app.tsx")),
"typescript"
);
assert_eq!(
CodebaseWatcher::detect_language(Path::new("script.js")),
"javascript"
);
assert_eq!(
CodebaseWatcher::detect_language(Path::new("main.py")),
"python"
);
assert_eq!(CodebaseWatcher::detect_language(Path::new("main.go")), "go");
}
#[test]
fn test_edit_session() {
let mut session = EditSession::new();
session.add_file(PathBuf::from("a.rs"));
session.add_file(PathBuf::from("b.rs"));
assert_eq!(session.files.len(), 2);
assert!(!session.is_expired(Duration::from_secs(60)));
}
#[test]
fn test_watcher_config_default() {
let config = WatcherConfig::default();
assert!(!config.ignore_patterns.is_empty());
assert!(config.watch_extensions.is_some());
assert!(config.detect_patterns);
assert!(config.track_relationships);
}
}