mirror of
https://github.com/samvallad33/vestige.git
synced 2026-05-10 00:02:37 +02:00
Initial commit: Vestige v1.0.0 - Cognitive memory MCP server
FSRS-6 spaced repetition, spreading activation, synaptic tagging, hippocampal indexing, and 130 years of memory research. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
commit
f9c60eb5a7
169 changed files with 97206 additions and 0 deletions
984
crates/vestige-core/src/codebase/context.rs
Normal file
984
crates/vestige-core/src/codebase/context.rs
Normal file
|
|
@ -0,0 +1,984 @@
|
|||
//! Context capture for codebase memory
|
||||
//!
|
||||
//! This module captures the current working context - what branch you're on,
|
||||
//! what files you're editing, what the project structure looks like. This
|
||||
//! context is critical for:
|
||||
//!
|
||||
//! - Storing memories with full context for later retrieval
|
||||
//! - Providing relevant suggestions based on current work
|
||||
//! - Maintaining continuity across sessions
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::git::{GitAnalyzer, GitContext, GitError};
|
||||
|
||||
// ============================================================================
|
||||
// ERRORS
|
||||
// ============================================================================
|
||||
|
||||
/// Errors that can occur during context capture
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ContextError {
|
||||
#[error("Git error: {0}")]
|
||||
Git(#[from] GitError),
|
||||
#[error("IO error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("Path not found: {0}")]
|
||||
PathNotFound(PathBuf),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, ContextError>;
|
||||
|
||||
// ============================================================================
|
||||
// PROJECT TYPE DETECTION
|
||||
// ============================================================================
|
||||
|
||||
/// Detected project type based on files present
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ProjectType {
|
||||
Rust,
|
||||
TypeScript,
|
||||
JavaScript,
|
||||
Python,
|
||||
Go,
|
||||
Java,
|
||||
Kotlin,
|
||||
Swift,
|
||||
CSharp,
|
||||
Cpp,
|
||||
Ruby,
|
||||
Php,
|
||||
Mixed(Vec<String>), // Multiple languages detected
|
||||
Unknown,
|
||||
}
|
||||
|
||||
impl ProjectType {
|
||||
/// Get the file extensions associated with this project type
|
||||
pub fn extensions(&self) -> Vec<&'static str> {
|
||||
match self {
|
||||
Self::Rust => vec!["rs"],
|
||||
Self::TypeScript => vec!["ts", "tsx"],
|
||||
Self::JavaScript => vec!["js", "jsx"],
|
||||
Self::Python => vec!["py"],
|
||||
Self::Go => vec!["go"],
|
||||
Self::Java => vec!["java"],
|
||||
Self::Kotlin => vec!["kt", "kts"],
|
||||
Self::Swift => vec!["swift"],
|
||||
Self::CSharp => vec!["cs"],
|
||||
Self::Cpp => vec!["cpp", "cc", "cxx", "c", "h", "hpp"],
|
||||
Self::Ruby => vec!["rb"],
|
||||
Self::Php => vec!["php"],
|
||||
Self::Mixed(_) => vec![],
|
||||
Self::Unknown => vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the language name as a string
|
||||
pub fn language_name(&self) -> &str {
|
||||
match self {
|
||||
Self::Rust => "Rust",
|
||||
Self::TypeScript => "TypeScript",
|
||||
Self::JavaScript => "JavaScript",
|
||||
Self::Python => "Python",
|
||||
Self::Go => "Go",
|
||||
Self::Java => "Java",
|
||||
Self::Kotlin => "Kotlin",
|
||||
Self::Swift => "Swift",
|
||||
Self::CSharp => "C#",
|
||||
Self::Cpp => "C++",
|
||||
Self::Ruby => "Ruby",
|
||||
Self::Php => "PHP",
|
||||
Self::Mixed(_) => "Mixed",
|
||||
Self::Unknown => "Unknown",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// FRAMEWORK DETECTION
|
||||
// ============================================================================
|
||||
|
||||
/// Known frameworks that can be detected
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum Framework {
|
||||
// Rust
|
||||
Tauri,
|
||||
Actix,
|
||||
Axum,
|
||||
Rocket,
|
||||
Tokio,
|
||||
Diesel,
|
||||
SeaOrm,
|
||||
|
||||
// JavaScript/TypeScript
|
||||
React,
|
||||
Vue,
|
||||
Angular,
|
||||
Svelte,
|
||||
NextJs,
|
||||
NuxtJs,
|
||||
Express,
|
||||
NestJs,
|
||||
Deno,
|
||||
Bun,
|
||||
|
||||
// Python
|
||||
Django,
|
||||
Flask,
|
||||
FastApi,
|
||||
Pytest,
|
||||
Poetry,
|
||||
|
||||
// Other
|
||||
Spring, // Java
|
||||
Rails, // Ruby
|
||||
Laravel, // PHP
|
||||
DotNet, // C#
|
||||
|
||||
Other(String),
|
||||
}
|
||||
|
||||
impl Framework {
|
||||
pub fn name(&self) -> &str {
|
||||
match self {
|
||||
Self::Tauri => "Tauri",
|
||||
Self::Actix => "Actix",
|
||||
Self::Axum => "Axum",
|
||||
Self::Rocket => "Rocket",
|
||||
Self::Tokio => "Tokio",
|
||||
Self::Diesel => "Diesel",
|
||||
Self::SeaOrm => "SeaORM",
|
||||
Self::React => "React",
|
||||
Self::Vue => "Vue",
|
||||
Self::Angular => "Angular",
|
||||
Self::Svelte => "Svelte",
|
||||
Self::NextJs => "Next.js",
|
||||
Self::NuxtJs => "Nuxt.js",
|
||||
Self::Express => "Express",
|
||||
Self::NestJs => "NestJS",
|
||||
Self::Deno => "Deno",
|
||||
Self::Bun => "Bun",
|
||||
Self::Django => "Django",
|
||||
Self::Flask => "Flask",
|
||||
Self::FastApi => "FastAPI",
|
||||
Self::Pytest => "Pytest",
|
||||
Self::Poetry => "Poetry",
|
||||
Self::Spring => "Spring",
|
||||
Self::Rails => "Rails",
|
||||
Self::Laravel => "Laravel",
|
||||
Self::DotNet => ".NET",
|
||||
Self::Other(name) => name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// WORKING CONTEXT
|
||||
// ============================================================================
|
||||
|
||||
/// Complete working context for memory storage
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct WorkingContext {
|
||||
/// Git context (branch, commits, changes)
|
||||
pub git: Option<GitContextInfo>,
|
||||
/// Currently active file (e.g., file being edited)
|
||||
pub active_file: Option<PathBuf>,
|
||||
/// Project type (Rust, TypeScript, etc.)
|
||||
pub project_type: ProjectType,
|
||||
/// Detected frameworks
|
||||
pub frameworks: Vec<Framework>,
|
||||
/// Project name (from cargo.toml, package.json, etc.)
|
||||
pub project_name: Option<String>,
|
||||
/// Project root directory
|
||||
pub project_root: PathBuf,
|
||||
/// When this context was captured
|
||||
pub captured_at: DateTime<Utc>,
|
||||
/// Recent files (for context)
|
||||
pub recent_files: Vec<PathBuf>,
|
||||
/// Key configuration files found
|
||||
pub config_files: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
/// Serializable git context info
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GitContextInfo {
|
||||
pub current_branch: String,
|
||||
pub head_commit: String,
|
||||
pub uncommitted_changes: Vec<PathBuf>,
|
||||
pub staged_changes: Vec<PathBuf>,
|
||||
pub has_uncommitted: bool,
|
||||
pub is_clean: bool,
|
||||
}
|
||||
|
||||
impl From<GitContext> for GitContextInfo {
|
||||
fn from(ctx: GitContext) -> Self {
|
||||
let has_uncommitted = !ctx.uncommitted_changes.is_empty();
|
||||
let is_clean = ctx.uncommitted_changes.is_empty() && ctx.staged_changes.is_empty();
|
||||
|
||||
Self {
|
||||
current_branch: ctx.current_branch,
|
||||
head_commit: ctx.head_commit,
|
||||
uncommitted_changes: ctx.uncommitted_changes,
|
||||
staged_changes: ctx.staged_changes,
|
||||
has_uncommitted,
|
||||
is_clean,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// FILE CONTEXT
|
||||
// ============================================================================
|
||||
|
||||
/// Context specific to a single file
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct FileContext {
|
||||
/// Path to the file
|
||||
pub path: PathBuf,
|
||||
/// Detected language
|
||||
pub language: Option<String>,
|
||||
/// File extension
|
||||
pub extension: Option<String>,
|
||||
/// Parent directory
|
||||
pub directory: PathBuf,
|
||||
/// Related files (imports, tests, etc.)
|
||||
pub related_files: Vec<PathBuf>,
|
||||
/// Whether the file has uncommitted changes
|
||||
pub has_changes: bool,
|
||||
/// Last modified time
|
||||
pub last_modified: Option<DateTime<Utc>>,
|
||||
/// Whether it's a test file
|
||||
pub is_test_file: bool,
|
||||
/// Module/package this file belongs to
|
||||
pub module: Option<String>,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// CONTEXT CAPTURE
|
||||
// ============================================================================
|
||||
|
||||
/// Captures and manages working context
|
||||
pub struct ContextCapture {
|
||||
/// Git analyzer for the repository
|
||||
git: Option<GitAnalyzer>,
|
||||
/// Currently active files
|
||||
active_files: Vec<PathBuf>,
|
||||
/// Project root directory
|
||||
project_root: PathBuf,
|
||||
}
|
||||
|
||||
impl ContextCapture {
|
||||
/// Create a new context capture for a project directory
|
||||
pub fn new(project_root: PathBuf) -> Result<Self> {
|
||||
// Try to create git analyzer (may fail if not a git repo)
|
||||
let git = GitAnalyzer::new(project_root.clone()).ok();
|
||||
|
||||
Ok(Self {
|
||||
git,
|
||||
active_files: vec![],
|
||||
project_root,
|
||||
})
|
||||
}
|
||||
|
||||
/// Set the currently active file(s)
|
||||
pub fn set_active_files(&mut self, files: Vec<PathBuf>) {
|
||||
self.active_files = files;
|
||||
}
|
||||
|
||||
/// Add an active file
|
||||
pub fn add_active_file(&mut self, file: PathBuf) {
|
||||
if !self.active_files.contains(&file) {
|
||||
self.active_files.push(file);
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove an active file
|
||||
pub fn remove_active_file(&mut self, file: &Path) {
|
||||
self.active_files.retain(|f| f != file);
|
||||
}
|
||||
|
||||
/// Capture the full working context
|
||||
pub fn capture(&self) -> Result<WorkingContext> {
|
||||
let git = self
|
||||
.git
|
||||
.as_ref()
|
||||
.and_then(|g| g.get_current_context().ok().map(GitContextInfo::from));
|
||||
|
||||
let project_type = self.detect_project_type()?;
|
||||
let frameworks = self.detect_frameworks()?;
|
||||
let project_name = self.detect_project_name()?;
|
||||
let config_files = self.find_config_files()?;
|
||||
|
||||
Ok(WorkingContext {
|
||||
git,
|
||||
active_file: self.active_files.first().cloned(),
|
||||
project_type,
|
||||
frameworks,
|
||||
project_name,
|
||||
project_root: self.project_root.clone(),
|
||||
captured_at: Utc::now(),
|
||||
recent_files: self.active_files.clone(),
|
||||
config_files,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get context specific to a file
|
||||
pub fn context_for_file(&self, path: &Path) -> Result<FileContext> {
|
||||
let extension = path.extension().map(|e| e.to_string_lossy().to_string());
|
||||
|
||||
let language = extension
|
||||
.as_ref()
|
||||
.and_then(|ext| match ext.as_str() {
|
||||
"rs" => Some("rust"),
|
||||
"ts" | "tsx" => Some("typescript"),
|
||||
"js" | "jsx" => Some("javascript"),
|
||||
"py" => Some("python"),
|
||||
"go" => Some("go"),
|
||||
"java" => Some("java"),
|
||||
"kt" | "kts" => Some("kotlin"),
|
||||
"swift" => Some("swift"),
|
||||
"cs" => Some("csharp"),
|
||||
"cpp" | "cc" | "cxx" | "c" => Some("cpp"),
|
||||
"h" | "hpp" => Some("cpp"),
|
||||
"rb" => Some("ruby"),
|
||||
"php" => Some("php"),
|
||||
"sql" => Some("sql"),
|
||||
"json" => Some("json"),
|
||||
"yaml" | "yml" => Some("yaml"),
|
||||
"toml" => Some("toml"),
|
||||
"md" => Some("markdown"),
|
||||
_ => None,
|
||||
})
|
||||
.map(|s| s.to_string());
|
||||
|
||||
let directory = path.parent().unwrap_or(Path::new(".")).to_path_buf();
|
||||
|
||||
// Detect related files
|
||||
let related_files = self.find_related_files(path)?;
|
||||
|
||||
// Check git status
|
||||
let has_changes = self
|
||||
.git
|
||||
.as_ref()
|
||||
.map(|g| {
|
||||
g.get_current_context()
|
||||
.ok()
|
||||
.map(|ctx| {
|
||||
ctx.uncommitted_changes.contains(&path.to_path_buf())
|
||||
|| ctx.staged_changes.contains(&path.to_path_buf())
|
||||
})
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.unwrap_or(false);
|
||||
|
||||
// Check if test file
|
||||
let is_test_file = self.is_test_file(path);
|
||||
|
||||
// Get last modified time
|
||||
let last_modified = fs::metadata(path)
|
||||
.ok()
|
||||
.and_then(|m| m.modified().ok().map(|t| DateTime::<Utc>::from(t)));
|
||||
|
||||
// Detect module
|
||||
let module = self.detect_module(path);
|
||||
|
||||
Ok(FileContext {
|
||||
path: path.to_path_buf(),
|
||||
language,
|
||||
extension,
|
||||
directory,
|
||||
related_files,
|
||||
has_changes,
|
||||
last_modified,
|
||||
is_test_file,
|
||||
module,
|
||||
})
|
||||
}
|
||||
|
||||
/// Detect the project type based on files present
|
||||
fn detect_project_type(&self) -> Result<ProjectType> {
|
||||
let mut detected = Vec::new();
|
||||
|
||||
// Check for Rust
|
||||
if self.file_exists("Cargo.toml") {
|
||||
detected.push("Rust".to_string());
|
||||
}
|
||||
|
||||
// Check for JavaScript/TypeScript
|
||||
if self.file_exists("package.json") {
|
||||
// Check for TypeScript
|
||||
if self.file_exists("tsconfig.json") || self.file_exists("tsconfig.base.json") {
|
||||
detected.push("TypeScript".to_string());
|
||||
} else {
|
||||
detected.push("JavaScript".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// Check for Python
|
||||
if self.file_exists("pyproject.toml")
|
||||
|| self.file_exists("setup.py")
|
||||
|| self.file_exists("requirements.txt")
|
||||
{
|
||||
detected.push("Python".to_string());
|
||||
}
|
||||
|
||||
// Check for Go
|
||||
if self.file_exists("go.mod") {
|
||||
detected.push("Go".to_string());
|
||||
}
|
||||
|
||||
// Check for Java/Kotlin
|
||||
if self.file_exists("pom.xml") || self.file_exists("build.gradle") {
|
||||
if self.dir_exists("src/main/kotlin") || self.file_exists("build.gradle.kts") {
|
||||
detected.push("Kotlin".to_string());
|
||||
} else {
|
||||
detected.push("Java".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// Check for Swift
|
||||
if self.file_exists("Package.swift") {
|
||||
detected.push("Swift".to_string());
|
||||
}
|
||||
|
||||
// Check for C#
|
||||
if self.glob_exists("*.csproj") || self.glob_exists("*.sln") {
|
||||
detected.push("CSharp".to_string());
|
||||
}
|
||||
|
||||
// Check for Ruby
|
||||
if self.file_exists("Gemfile") {
|
||||
detected.push("Ruby".to_string());
|
||||
}
|
||||
|
||||
// Check for PHP
|
||||
if self.file_exists("composer.json") {
|
||||
detected.push("PHP".to_string());
|
||||
}
|
||||
|
||||
match detected.len() {
|
||||
0 => Ok(ProjectType::Unknown),
|
||||
1 => Ok(match detected[0].as_str() {
|
||||
"Rust" => ProjectType::Rust,
|
||||
"TypeScript" => ProjectType::TypeScript,
|
||||
"JavaScript" => ProjectType::JavaScript,
|
||||
"Python" => ProjectType::Python,
|
||||
"Go" => ProjectType::Go,
|
||||
"Java" => ProjectType::Java,
|
||||
"Kotlin" => ProjectType::Kotlin,
|
||||
"Swift" => ProjectType::Swift,
|
||||
"CSharp" => ProjectType::CSharp,
|
||||
"Ruby" => ProjectType::Ruby,
|
||||
"PHP" => ProjectType::Php,
|
||||
_ => ProjectType::Unknown,
|
||||
}),
|
||||
_ => Ok(ProjectType::Mixed(detected)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Detect frameworks used in the project
|
||||
fn detect_frameworks(&self) -> Result<Vec<Framework>> {
|
||||
let mut frameworks = Vec::new();
|
||||
|
||||
// Rust frameworks
|
||||
if let Ok(content) = fs::read_to_string(self.project_root.join("Cargo.toml")) {
|
||||
if content.contains("tauri") {
|
||||
frameworks.push(Framework::Tauri);
|
||||
}
|
||||
if content.contains("actix-web") {
|
||||
frameworks.push(Framework::Actix);
|
||||
}
|
||||
if content.contains("axum") {
|
||||
frameworks.push(Framework::Axum);
|
||||
}
|
||||
if content.contains("rocket") {
|
||||
frameworks.push(Framework::Rocket);
|
||||
}
|
||||
if content.contains("tokio") {
|
||||
frameworks.push(Framework::Tokio);
|
||||
}
|
||||
if content.contains("diesel") {
|
||||
frameworks.push(Framework::Diesel);
|
||||
}
|
||||
if content.contains("sea-orm") {
|
||||
frameworks.push(Framework::SeaOrm);
|
||||
}
|
||||
}
|
||||
|
||||
// JavaScript/TypeScript frameworks
|
||||
if let Ok(content) = fs::read_to_string(self.project_root.join("package.json")) {
|
||||
if content.contains("\"react\"") || content.contains("\"react\":") {
|
||||
frameworks.push(Framework::React);
|
||||
}
|
||||
if content.contains("\"vue\"") || content.contains("\"vue\":") {
|
||||
frameworks.push(Framework::Vue);
|
||||
}
|
||||
if content.contains("\"@angular/") {
|
||||
frameworks.push(Framework::Angular);
|
||||
}
|
||||
if content.contains("\"svelte\"") {
|
||||
frameworks.push(Framework::Svelte);
|
||||
}
|
||||
if content.contains("\"next\"") || content.contains("\"next\":") {
|
||||
frameworks.push(Framework::NextJs);
|
||||
}
|
||||
if content.contains("\"nuxt\"") || content.contains("\"nuxt\":") {
|
||||
frameworks.push(Framework::NuxtJs);
|
||||
}
|
||||
if content.contains("\"express\"") {
|
||||
frameworks.push(Framework::Express);
|
||||
}
|
||||
if content.contains("\"@nestjs/") {
|
||||
frameworks.push(Framework::NestJs);
|
||||
}
|
||||
}
|
||||
|
||||
// Deno
|
||||
if self.file_exists("deno.json") || self.file_exists("deno.jsonc") {
|
||||
frameworks.push(Framework::Deno);
|
||||
}
|
||||
|
||||
// Bun
|
||||
if self.file_exists("bun.lockb") || self.file_exists("bunfig.toml") {
|
||||
frameworks.push(Framework::Bun);
|
||||
}
|
||||
|
||||
// Python frameworks
|
||||
if let Ok(content) = fs::read_to_string(self.project_root.join("pyproject.toml")) {
|
||||
if content.contains("django") {
|
||||
frameworks.push(Framework::Django);
|
||||
}
|
||||
if content.contains("flask") {
|
||||
frameworks.push(Framework::Flask);
|
||||
}
|
||||
if content.contains("fastapi") {
|
||||
frameworks.push(Framework::FastApi);
|
||||
}
|
||||
if content.contains("pytest") {
|
||||
frameworks.push(Framework::Pytest);
|
||||
}
|
||||
if content.contains("[tool.poetry]") {
|
||||
frameworks.push(Framework::Poetry);
|
||||
}
|
||||
}
|
||||
|
||||
// Check requirements.txt too
|
||||
if let Ok(content) = fs::read_to_string(self.project_root.join("requirements.txt")) {
|
||||
if content.contains("django") && !frameworks.contains(&Framework::Django) {
|
||||
frameworks.push(Framework::Django);
|
||||
}
|
||||
if content.contains("flask") && !frameworks.contains(&Framework::Flask) {
|
||||
frameworks.push(Framework::Flask);
|
||||
}
|
||||
if content.contains("fastapi") && !frameworks.contains(&Framework::FastApi) {
|
||||
frameworks.push(Framework::FastApi);
|
||||
}
|
||||
}
|
||||
|
||||
// Java Spring
|
||||
if let Ok(content) = fs::read_to_string(self.project_root.join("pom.xml")) {
|
||||
if content.contains("spring") {
|
||||
frameworks.push(Framework::Spring);
|
||||
}
|
||||
}
|
||||
|
||||
// Ruby Rails
|
||||
if self.file_exists("config/routes.rb") {
|
||||
frameworks.push(Framework::Rails);
|
||||
}
|
||||
|
||||
// PHP Laravel
|
||||
if self.file_exists("artisan") && self.dir_exists("app/Http") {
|
||||
frameworks.push(Framework::Laravel);
|
||||
}
|
||||
|
||||
// .NET
|
||||
if self.glob_exists("*.csproj") {
|
||||
frameworks.push(Framework::DotNet);
|
||||
}
|
||||
|
||||
Ok(frameworks)
|
||||
}
|
||||
|
||||
/// Detect the project name from config files
|
||||
fn detect_project_name(&self) -> Result<Option<String>> {
|
||||
// Try Cargo.toml
|
||||
if let Ok(content) = fs::read_to_string(self.project_root.join("Cargo.toml")) {
|
||||
if let Some(name) = self.extract_toml_value(&content, "name") {
|
||||
return Ok(Some(name));
|
||||
}
|
||||
}
|
||||
|
||||
// Try package.json
|
||||
if let Ok(content) = fs::read_to_string(self.project_root.join("package.json")) {
|
||||
if let Some(name) = self.extract_json_value(&content, "name") {
|
||||
return Ok(Some(name));
|
||||
}
|
||||
}
|
||||
|
||||
// Try pyproject.toml
|
||||
if let Ok(content) = fs::read_to_string(self.project_root.join("pyproject.toml")) {
|
||||
if let Some(name) = self.extract_toml_value(&content, "name") {
|
||||
return Ok(Some(name));
|
||||
}
|
||||
}
|
||||
|
||||
// Try go.mod
|
||||
if let Ok(content) = fs::read_to_string(self.project_root.join("go.mod")) {
|
||||
if let Some(line) = content.lines().next() {
|
||||
if line.starts_with("module ") {
|
||||
let name = line
|
||||
.trim_start_matches("module ")
|
||||
.split('/')
|
||||
.last()
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
if !name.is_empty() {
|
||||
return Ok(Some(name));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to directory name
|
||||
Ok(self
|
||||
.project_root
|
||||
.file_name()
|
||||
.map(|n| n.to_string_lossy().to_string()))
|
||||
}
|
||||
|
||||
/// Find configuration files in the project
|
||||
fn find_config_files(&self) -> Result<Vec<PathBuf>> {
|
||||
let config_names = [
|
||||
"Cargo.toml",
|
||||
"package.json",
|
||||
"tsconfig.json",
|
||||
"pyproject.toml",
|
||||
"go.mod",
|
||||
".gitignore",
|
||||
".env",
|
||||
".env.local",
|
||||
"docker-compose.yml",
|
||||
"docker-compose.yaml",
|
||||
"Dockerfile",
|
||||
"Makefile",
|
||||
"justfile",
|
||||
".editorconfig",
|
||||
".prettierrc",
|
||||
".eslintrc.json",
|
||||
"rustfmt.toml",
|
||||
".rustfmt.toml",
|
||||
"clippy.toml",
|
||||
".clippy.toml",
|
||||
"tauri.conf.json",
|
||||
];
|
||||
|
||||
let mut found = Vec::new();
|
||||
|
||||
for name in config_names {
|
||||
let path = self.project_root.join(name);
|
||||
if path.exists() {
|
||||
found.push(path);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(found)
|
||||
}
|
||||
|
||||
/// Find files related to a given file
|
||||
fn find_related_files(&self, path: &Path) -> Result<Vec<PathBuf>> {
|
||||
let mut related = Vec::new();
|
||||
|
||||
let file_stem = path.file_stem().map(|s| s.to_string_lossy().to_string());
|
||||
let extension = path.extension().map(|s| s.to_string_lossy().to_string());
|
||||
let parent = path.parent();
|
||||
|
||||
if let (Some(stem), Some(parent)) = (file_stem, parent) {
|
||||
// Look for test files
|
||||
let test_patterns = [
|
||||
format!("{}.test", stem),
|
||||
format!("{}_test", stem),
|
||||
format!("{}.spec", stem),
|
||||
format!("test_{}", stem),
|
||||
];
|
||||
|
||||
// Common test directories
|
||||
let test_dirs = ["tests", "test", "__tests__", "spec"];
|
||||
|
||||
// Check same directory for test files
|
||||
if let Ok(entries) = fs::read_dir(parent) {
|
||||
for entry in entries.filter_map(|e| e.ok()) {
|
||||
let entry_path = entry.path();
|
||||
if let Some(entry_stem) = entry_path.file_stem() {
|
||||
let entry_stem = entry_stem.to_string_lossy();
|
||||
for pattern in &test_patterns {
|
||||
if entry_stem.eq_ignore_ascii_case(pattern) {
|
||||
related.push(entry_path.clone());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check test directories
|
||||
for test_dir in test_dirs {
|
||||
let test_path = self.project_root.join(test_dir);
|
||||
if test_path.exists() {
|
||||
if let Ok(entries) = fs::read_dir(&test_path) {
|
||||
for entry in entries.filter_map(|e| e.ok()) {
|
||||
let entry_path = entry.path();
|
||||
if let Some(entry_stem) = entry_path.file_stem() {
|
||||
let entry_stem = entry_stem.to_string_lossy();
|
||||
if entry_stem.contains(&stem) {
|
||||
related.push(entry_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For Rust, look for mod.rs in same directory
|
||||
if extension.as_deref() == Some("rs") {
|
||||
let mod_path = parent.join("mod.rs");
|
||||
if mod_path.exists() && mod_path != path {
|
||||
related.push(mod_path);
|
||||
}
|
||||
|
||||
// Look for lib.rs or main.rs at project root
|
||||
let lib_path = self.project_root.join("src/lib.rs");
|
||||
let main_path = self.project_root.join("src/main.rs");
|
||||
|
||||
if lib_path.exists() && lib_path != path {
|
||||
related.push(lib_path);
|
||||
}
|
||||
if main_path.exists() && main_path != path {
|
||||
related.push(main_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove duplicates
|
||||
let related: HashSet<_> = related.into_iter().collect();
|
||||
Ok(related.into_iter().collect())
|
||||
}
|
||||
|
||||
/// Check if a file is a test file
|
||||
fn is_test_file(&self, path: &Path) -> bool {
|
||||
let path_str = path.to_string_lossy().to_lowercase();
|
||||
|
||||
path_str.contains("test")
|
||||
|| path_str.contains("spec")
|
||||
|| path_str.contains("__tests__")
|
||||
|| path
|
||||
.file_name()
|
||||
.map(|n| {
|
||||
let n = n.to_string_lossy();
|
||||
n.starts_with("test_")
|
||||
|| n.ends_with("_test.rs")
|
||||
|| n.ends_with(".test.ts")
|
||||
|| n.ends_with(".test.tsx")
|
||||
|| n.ends_with(".test.js")
|
||||
|| n.ends_with(".spec.ts")
|
||||
|| n.ends_with(".spec.js")
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Detect the module a file belongs to
|
||||
fn detect_module(&self, path: &Path) -> Option<String> {
|
||||
// For Rust, use the parent directory name relative to src/
|
||||
if path.extension().map(|e| e == "rs").unwrap_or(false) {
|
||||
if let Ok(relative) = path.strip_prefix(&self.project_root) {
|
||||
if let Ok(src_relative) = relative.strip_prefix("src") {
|
||||
// Get the module path
|
||||
let components: Vec<_> = src_relative
|
||||
.parent()?
|
||||
.components()
|
||||
.map(|c| c.as_os_str().to_string_lossy().to_string())
|
||||
.collect();
|
||||
|
||||
if !components.is_empty() {
|
||||
return Some(components.join("::"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For TypeScript/JavaScript, use the parent directory
|
||||
if path
|
||||
.extension()
|
||||
.map(|e| e == "ts" || e == "tsx" || e == "js" || e == "jsx")
|
||||
.unwrap_or(false)
|
||||
{
|
||||
if let Ok(relative) = path.strip_prefix(&self.project_root) {
|
||||
// Skip src/ or lib/ prefix
|
||||
let relative = relative
|
||||
.strip_prefix("src")
|
||||
.or_else(|_| relative.strip_prefix("lib"))
|
||||
.unwrap_or(relative);
|
||||
|
||||
if let Some(parent) = relative.parent() {
|
||||
let module = parent.to_string_lossy().replace('/', ".");
|
||||
if !module.is_empty() {
|
||||
return Some(module);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Check if a file exists relative to project root
|
||||
fn file_exists(&self, name: &str) -> bool {
|
||||
self.project_root.join(name).exists()
|
||||
}
|
||||
|
||||
/// Check if a directory exists relative to project root
|
||||
fn dir_exists(&self, name: &str) -> bool {
|
||||
let path = self.project_root.join(name);
|
||||
path.exists() && path.is_dir()
|
||||
}
|
||||
|
||||
/// Check if any file matching a glob pattern exists
|
||||
fn glob_exists(&self, pattern: &str) -> bool {
|
||||
if let Ok(entries) = fs::read_dir(&self.project_root) {
|
||||
for entry in entries.filter_map(|e| e.ok()) {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
// Simple glob matching for patterns like "*.ext"
|
||||
if pattern.starts_with("*.") {
|
||||
let ext = &pattern[1..];
|
||||
if name.ends_with(ext) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Simple TOML value extraction (basic, no full parser)
|
||||
fn extract_toml_value(&self, content: &str, key: &str) -> Option<String> {
|
||||
for line in content.lines() {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.starts_with(&format!("{} ", key))
|
||||
|| trimmed.starts_with(&format!("{}=", key))
|
||||
{
|
||||
if let Some(value) = trimmed.split('=').nth(1) {
|
||||
let value = value.trim().trim_matches('"').trim_matches('\'');
|
||||
return Some(value.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Simple JSON value extraction (basic, no full parser)
|
||||
fn extract_json_value(&self, content: &str, key: &str) -> Option<String> {
|
||||
let pattern = format!("\"{}\"", key);
|
||||
for line in content.lines() {
|
||||
if line.contains(&pattern) {
|
||||
// Try to extract the value after the colon
|
||||
if let Some(colon_pos) = line.find(':') {
|
||||
let value = line[colon_pos + 1..].trim();
|
||||
let value = value.trim_start_matches('"');
|
||||
if let Some(end) = value.find('"') {
|
||||
return Some(value[..end].to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// TESTS
|
||||
// ============================================================================
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn create_test_project() -> TempDir {
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
// Create Cargo.toml
|
||||
fs::write(
|
||||
dir.path().join("Cargo.toml"),
|
||||
r#"
|
||||
[package]
|
||||
name = "test-project"
|
||||
version = "0.1.0"
|
||||
|
||||
[dependencies]
|
||||
tokio = "1.0"
|
||||
axum = "0.7"
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Create src directory
|
||||
fs::create_dir(dir.path().join("src")).unwrap();
|
||||
fs::write(dir.path().join("src/main.rs"), "fn main() {}").unwrap();
|
||||
|
||||
dir
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_project_type() {
|
||||
let dir = create_test_project();
|
||||
let capture = ContextCapture::new(dir.path().to_path_buf()).unwrap();
|
||||
|
||||
let project_type = capture.detect_project_type().unwrap();
|
||||
assert_eq!(project_type, ProjectType::Rust);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_frameworks() {
|
||||
let dir = create_test_project();
|
||||
let capture = ContextCapture::new(dir.path().to_path_buf()).unwrap();
|
||||
|
||||
let frameworks = capture.detect_frameworks().unwrap();
|
||||
assert!(frameworks.contains(&Framework::Tokio));
|
||||
assert!(frameworks.contains(&Framework::Axum));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_project_name() {
|
||||
let dir = create_test_project();
|
||||
let capture = ContextCapture::new(dir.path().to_path_buf()).unwrap();
|
||||
|
||||
let name = capture.detect_project_name().unwrap();
|
||||
assert_eq!(name, Some("test-project".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_test_file() {
|
||||
let capture = ContextCapture {
|
||||
git: None,
|
||||
active_files: vec![],
|
||||
project_root: PathBuf::from("."),
|
||||
};
|
||||
|
||||
assert!(capture.is_test_file(Path::new("src/utils_test.rs")));
|
||||
assert!(capture.is_test_file(Path::new("tests/integration.rs")));
|
||||
assert!(capture.is_test_file(Path::new("src/utils.test.ts")));
|
||||
assert!(!capture.is_test_file(Path::new("src/utils.rs")));
|
||||
assert!(!capture.is_test_file(Path::new("src/main.ts")));
|
||||
}
|
||||
}
|
||||
798
crates/vestige-core/src/codebase/git.rs
Normal file
798
crates/vestige-core/src/codebase/git.rs
Normal file
|
|
@ -0,0 +1,798 @@
|
|||
//! Git history analysis for extracting codebase knowledge
|
||||
//!
|
||||
//! This module analyzes git history to automatically extract:
|
||||
//! - File co-change patterns (files that frequently change together)
|
||||
//! - Bug fix patterns (from commit messages matching conventional formats)
|
||||
//! - Current git context (branch, uncommitted changes, recent history)
|
||||
//!
|
||||
//! This is a key differentiator for Vestige - learning from the codebase's history
|
||||
//! without requiring explicit user input.
|
||||
|
||||
use chrono::{DateTime, TimeZone, Utc};
|
||||
use git2::{Commit, Repository, Sort};
|
||||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use super::types::{BugFix, BugSeverity, FileRelationship, RelationType, RelationshipSource};
|
||||
|
||||
// ============================================================================
|
||||
// ERRORS
|
||||
// ============================================================================
|
||||
|
||||
/// Errors that can occur during git analysis
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum GitError {
|
||||
#[error("Git repository error: {0}")]
|
||||
Repository(#[from] git2::Error),
|
||||
#[error("Repository not found at: {0}")]
|
||||
NotFound(PathBuf),
|
||||
#[error("Invalid path: {0}")]
|
||||
InvalidPath(String),
|
||||
#[error("No commits found")]
|
||||
NoCommits,
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, GitError>;
|
||||
|
||||
// ============================================================================
|
||||
// GIT CONTEXT
|
||||
// ============================================================================
|
||||
|
||||
/// Current git context for a repository
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct GitContext {
|
||||
/// Root path of the repository
|
||||
pub repo_root: PathBuf,
|
||||
/// Current branch name
|
||||
pub current_branch: String,
|
||||
/// HEAD commit SHA
|
||||
pub head_commit: String,
|
||||
/// Files with uncommitted changes (unstaged)
|
||||
pub uncommitted_changes: Vec<PathBuf>,
|
||||
/// Files staged for commit
|
||||
pub staged_changes: Vec<PathBuf>,
|
||||
/// Recent commits
|
||||
pub recent_commits: Vec<CommitInfo>,
|
||||
/// Whether the repository has any commits
|
||||
pub has_commits: bool,
|
||||
/// Whether there are untracked files
|
||||
pub has_untracked: bool,
|
||||
}
|
||||
|
||||
/// Information about a git commit
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CommitInfo {
|
||||
/// Commit SHA (short)
|
||||
pub sha: String,
|
||||
/// Full commit SHA
|
||||
pub full_sha: String,
|
||||
/// Commit message (first line)
|
||||
pub message: String,
|
||||
/// Full commit message
|
||||
pub full_message: String,
|
||||
/// Author name
|
||||
pub author: String,
|
||||
/// Author email
|
||||
pub author_email: String,
|
||||
/// Commit timestamp
|
||||
pub timestamp: DateTime<Utc>,
|
||||
/// Files changed in this commit
|
||||
pub files_changed: Vec<PathBuf>,
|
||||
/// Is this a merge commit?
|
||||
pub is_merge: bool,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// GIT ANALYZER
|
||||
// ============================================================================
|
||||
|
||||
/// Analyzes git history to extract knowledge
|
||||
pub struct GitAnalyzer {
|
||||
repo_path: PathBuf,
|
||||
}
|
||||
|
||||
impl GitAnalyzer {
|
||||
/// Create a new GitAnalyzer for the given repository path
|
||||
pub fn new(repo_path: PathBuf) -> Result<Self> {
|
||||
// Verify the repository exists
|
||||
let _ = Repository::open(&repo_path)?;
|
||||
Ok(Self { repo_path })
|
||||
}
|
||||
|
||||
/// Open the repository
|
||||
fn open_repo(&self) -> Result<Repository> {
|
||||
Repository::open(&self.repo_path).map_err(GitError::from)
|
||||
}
|
||||
|
||||
/// Get the current git context
|
||||
pub fn get_current_context(&self) -> Result<GitContext> {
|
||||
let repo = self.open_repo()?;
|
||||
|
||||
// Get repository root
|
||||
let repo_root = repo
|
||||
.workdir()
|
||||
.map(|p| p.to_path_buf())
|
||||
.unwrap_or_else(|| self.repo_path.clone());
|
||||
|
||||
// Get current branch
|
||||
let current_branch = self.get_current_branch(&repo)?;
|
||||
|
||||
// Get HEAD commit
|
||||
let (head_commit, has_commits) = match repo.head() {
|
||||
Ok(head) => match head.peel_to_commit() {
|
||||
Ok(commit) => (commit.id().to_string()[..8].to_string(), true),
|
||||
Err(_) => (String::new(), false),
|
||||
},
|
||||
Err(_) => (String::new(), false),
|
||||
};
|
||||
|
||||
// Get status
|
||||
let statuses = repo.statuses(None)?;
|
||||
let mut uncommitted_changes = Vec::new();
|
||||
let mut staged_changes = Vec::new();
|
||||
let mut has_untracked = false;
|
||||
|
||||
for entry in statuses.iter() {
|
||||
let path = entry.path().map(|p| PathBuf::from(p)).unwrap_or_default();
|
||||
|
||||
let status = entry.status();
|
||||
|
||||
if status.is_wt_new() {
|
||||
has_untracked = true;
|
||||
}
|
||||
if status.is_wt_modified() || status.is_wt_deleted() || status.is_wt_renamed() {
|
||||
uncommitted_changes.push(path.clone());
|
||||
}
|
||||
if status.is_index_new()
|
||||
|| status.is_index_modified()
|
||||
|| status.is_index_deleted()
|
||||
|| status.is_index_renamed()
|
||||
{
|
||||
staged_changes.push(path);
|
||||
}
|
||||
}
|
||||
|
||||
// Get recent commits
|
||||
let recent_commits = if has_commits {
|
||||
self.get_recent_commits(&repo, 10)?
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
Ok(GitContext {
|
||||
repo_root,
|
||||
current_branch,
|
||||
head_commit,
|
||||
uncommitted_changes,
|
||||
staged_changes,
|
||||
recent_commits,
|
||||
has_commits,
|
||||
has_untracked,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the current branch name
|
||||
fn get_current_branch(&self, repo: &Repository) -> Result<String> {
|
||||
match repo.head() {
|
||||
Ok(head) => {
|
||||
if head.is_branch() {
|
||||
Ok(head
|
||||
.shorthand()
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| "unknown".to_string()))
|
||||
} else {
|
||||
// Detached HEAD
|
||||
Ok(head
|
||||
.target()
|
||||
.map(|oid| oid.to_string()[..8].to_string())
|
||||
.unwrap_or_else(|| "HEAD".to_string()))
|
||||
}
|
||||
}
|
||||
Err(_) => Ok("main".to_string()), // New repo with no commits
|
||||
}
|
||||
}
|
||||
|
||||
/// Get recent commits
|
||||
fn get_recent_commits(&self, repo: &Repository, limit: usize) -> Result<Vec<CommitInfo>> {
|
||||
let mut revwalk = repo.revwalk()?;
|
||||
revwalk.push_head()?;
|
||||
revwalk.set_sorting(Sort::TIME)?;
|
||||
|
||||
let mut commits = Vec::new();
|
||||
|
||||
for oid in revwalk.take(limit) {
|
||||
let oid = oid?;
|
||||
let commit = repo.find_commit(oid)?;
|
||||
let commit_info = self.commit_to_info(&commit, repo)?;
|
||||
commits.push(commit_info);
|
||||
}
|
||||
|
||||
Ok(commits)
|
||||
}
|
||||
|
||||
/// Convert a git2::Commit to CommitInfo
|
||||
fn commit_to_info(&self, commit: &Commit, repo: &Repository) -> Result<CommitInfo> {
|
||||
let full_sha = commit.id().to_string();
|
||||
let sha = full_sha[..8].to_string();
|
||||
|
||||
let message = commit
|
||||
.message()
|
||||
.map(|m| m.lines().next().unwrap_or("").to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
let full_message = commit.message().map(|m| m.to_string()).unwrap_or_default();
|
||||
|
||||
let author = commit.author();
|
||||
let author_name = author.name().unwrap_or("Unknown").to_string();
|
||||
let author_email = author.email().unwrap_or("").to_string();
|
||||
|
||||
let timestamp = Utc
|
||||
.timestamp_opt(commit.time().seconds(), 0)
|
||||
.single()
|
||||
.unwrap_or_else(Utc::now);
|
||||
|
||||
// Get files changed
|
||||
let files_changed = self.get_commit_files(commit, repo)?;
|
||||
|
||||
let is_merge = commit.parent_count() > 1;
|
||||
|
||||
Ok(CommitInfo {
|
||||
sha,
|
||||
full_sha,
|
||||
message,
|
||||
full_message,
|
||||
author: author_name,
|
||||
author_email,
|
||||
timestamp,
|
||||
files_changed,
|
||||
is_merge,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get files changed in a commit
|
||||
fn get_commit_files(&self, commit: &Commit, repo: &Repository) -> Result<Vec<PathBuf>> {
|
||||
let mut files = Vec::new();
|
||||
|
||||
if commit.parent_count() == 0 {
|
||||
// Initial commit - diff against empty tree
|
||||
let tree = commit.tree()?;
|
||||
let diff = repo.diff_tree_to_tree(None, Some(&tree), None)?;
|
||||
for delta in diff.deltas() {
|
||||
if let Some(path) = delta.new_file().path() {
|
||||
files.push(path.to_path_buf());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Normal commit - diff against first parent
|
||||
let parent = commit.parent(0)?;
|
||||
let parent_tree = parent.tree()?;
|
||||
let tree = commit.tree()?;
|
||||
|
||||
let diff = repo.diff_tree_to_tree(Some(&parent_tree), Some(&tree), None)?;
|
||||
|
||||
for delta in diff.deltas() {
|
||||
if let Some(path) = delta.new_file().path() {
|
||||
files.push(path.to_path_buf());
|
||||
}
|
||||
if let Some(path) = delta.old_file().path() {
|
||||
if !files.contains(&path.to_path_buf()) {
|
||||
files.push(path.to_path_buf());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
|
||||
/// Find files that frequently change together
|
||||
///
|
||||
/// This analyzes git history to find pairs of files that are often modified
|
||||
/// in the same commit. This can reveal:
|
||||
/// - Test files and their implementations
|
||||
/// - Related components
|
||||
/// - Configuration files and code they configure
|
||||
pub fn find_cochange_patterns(
|
||||
&self,
|
||||
since: Option<DateTime<Utc>>,
|
||||
min_cooccurrence: f64,
|
||||
) -> Result<Vec<FileRelationship>> {
|
||||
let repo = self.open_repo()?;
|
||||
|
||||
// Track how often each pair of files changes together
|
||||
let mut cochange_counts: HashMap<(PathBuf, PathBuf), u32> = HashMap::new();
|
||||
let mut file_change_counts: HashMap<PathBuf, u32> = HashMap::new();
|
||||
let mut total_commits = 0u32;
|
||||
|
||||
let mut revwalk = repo.revwalk()?;
|
||||
revwalk.push_head()?;
|
||||
revwalk.set_sorting(Sort::TIME)?;
|
||||
|
||||
for oid in revwalk {
|
||||
let oid = oid?;
|
||||
let commit = repo.find_commit(oid)?;
|
||||
|
||||
// Check if commit is after 'since' timestamp
|
||||
if let Some(since_time) = since {
|
||||
let commit_time = Utc
|
||||
.timestamp_opt(commit.time().seconds(), 0)
|
||||
.single()
|
||||
.unwrap_or_else(Utc::now);
|
||||
|
||||
if commit_time < since_time {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Skip merge commits
|
||||
if commit.parent_count() > 1 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let files = self.get_commit_files(&commit, &repo)?;
|
||||
|
||||
// Filter to relevant file types
|
||||
let relevant_files: Vec<_> = files
|
||||
.into_iter()
|
||||
.filter(|f| self.is_relevant_file(f))
|
||||
.collect();
|
||||
|
||||
if relevant_files.len() < 2 || relevant_files.len() > 50 {
|
||||
// Skip commits with too few or too many files
|
||||
continue;
|
||||
}
|
||||
|
||||
total_commits += 1;
|
||||
|
||||
// Count individual file changes
|
||||
for file in &relevant_files {
|
||||
*file_change_counts.entry(file.clone()).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
// Count co-occurrences for all pairs
|
||||
for i in 0..relevant_files.len() {
|
||||
for j in (i + 1)..relevant_files.len() {
|
||||
let (a, b) = if relevant_files[i] < relevant_files[j] {
|
||||
(relevant_files[i].clone(), relevant_files[j].clone())
|
||||
} else {
|
||||
(relevant_files[j].clone(), relevant_files[i].clone())
|
||||
};
|
||||
*cochange_counts.entry((a, b)).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if total_commits == 0 {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
// Convert to relationships, filtering by minimum co-occurrence
|
||||
let mut relationships = Vec::new();
|
||||
let mut id_counter = 0u32;
|
||||
|
||||
for ((file_a, file_b), count) in cochange_counts {
|
||||
if count < 2 {
|
||||
continue; // Need at least 2 co-occurrences
|
||||
}
|
||||
|
||||
// Calculate strength as Jaccard coefficient
|
||||
// strength = count(A&B) / (count(A) + count(B) - count(A&B))
|
||||
let count_a = file_change_counts.get(&file_a).copied().unwrap_or(0);
|
||||
let count_b = file_change_counts.get(&file_b).copied().unwrap_or(0);
|
||||
|
||||
let union = count_a + count_b - count;
|
||||
let strength = if union > 0 {
|
||||
count as f64 / union as f64
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
if strength >= min_cooccurrence {
|
||||
id_counter += 1;
|
||||
relationships.push(FileRelationship {
|
||||
id: format!("cochange-{}", id_counter),
|
||||
files: vec![file_a, file_b],
|
||||
relationship_type: RelationType::FrequentCochange,
|
||||
strength,
|
||||
description: format!(
|
||||
"Changed together in {} of {} commits ({:.0}% co-occurrence)",
|
||||
count,
|
||||
total_commits,
|
||||
strength * 100.0
|
||||
),
|
||||
created_at: Utc::now(),
|
||||
last_confirmed: Some(Utc::now()),
|
||||
source: RelationshipSource::GitCochange,
|
||||
observation_count: count,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by strength
|
||||
relationships.sort_by(|a, b| b.strength.partial_cmp(&a.strength).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
Ok(relationships)
|
||||
}
|
||||
|
||||
/// Check if a file is relevant for analysis
|
||||
fn is_relevant_file(&self, path: &Path) -> bool {
|
||||
// Skip common non-source files
|
||||
let path_str = path.to_string_lossy();
|
||||
|
||||
// Skip lock files, generated files, etc.
|
||||
if path_str.contains("Cargo.lock")
|
||||
|| path_str.contains("package-lock.json")
|
||||
|| path_str.contains("yarn.lock")
|
||||
|| path_str.contains("pnpm-lock.yaml")
|
||||
|| path_str.contains(".min.")
|
||||
|| path_str.contains(".map")
|
||||
|| path_str.contains("node_modules")
|
||||
|| path_str.contains("target/")
|
||||
|| path_str.contains("dist/")
|
||||
|| path_str.contains("build/")
|
||||
|| path_str.contains(".git/")
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Include source files
|
||||
if let Some(ext) = path.extension() {
|
||||
let ext = ext.to_string_lossy().to_lowercase();
|
||||
matches!(
|
||||
ext.as_str(),
|
||||
"rs" | "ts"
|
||||
| "tsx"
|
||||
| "js"
|
||||
| "jsx"
|
||||
| "py"
|
||||
| "go"
|
||||
| "java"
|
||||
| "kt"
|
||||
| "swift"
|
||||
| "c"
|
||||
| "cpp"
|
||||
| "h"
|
||||
| "hpp"
|
||||
| "toml"
|
||||
| "yaml"
|
||||
| "yml"
|
||||
| "json"
|
||||
| "md"
|
||||
| "sql"
|
||||
)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract bug fixes from commit messages
|
||||
///
|
||||
/// Looks for conventional commit messages like:
|
||||
/// - "fix: description"
|
||||
/// - "fix(scope): description"
|
||||
/// - "bugfix: description"
|
||||
/// - Messages containing "fixes #123"
|
||||
pub fn extract_bug_fixes(&self, since: Option<DateTime<Utc>>) -> Result<Vec<BugFix>> {
|
||||
let repo = self.open_repo()?;
|
||||
let mut bug_fixes = Vec::new();
|
||||
|
||||
let mut revwalk = repo.revwalk()?;
|
||||
revwalk.push_head()?;
|
||||
revwalk.set_sorting(Sort::TIME)?;
|
||||
|
||||
let mut id_counter = 0u32;
|
||||
|
||||
for oid in revwalk {
|
||||
let oid = oid?;
|
||||
let commit = repo.find_commit(oid)?;
|
||||
|
||||
// Check timestamp
|
||||
let commit_time = Utc
|
||||
.timestamp_opt(commit.time().seconds(), 0)
|
||||
.single()
|
||||
.unwrap_or_else(Utc::now);
|
||||
|
||||
if let Some(since_time) = since {
|
||||
if commit_time < since_time {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let message = commit.message().map(|m| m.to_string()).unwrap_or_default();
|
||||
|
||||
// Check if this looks like a bug fix commit
|
||||
if let Some(bug_fix) =
|
||||
self.parse_bug_fix_commit(&message, &commit, &repo, &mut id_counter)?
|
||||
{
|
||||
bug_fixes.push(bug_fix);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(bug_fixes)
|
||||
}
|
||||
|
||||
/// Parse a commit message to extract bug fix information
|
||||
fn parse_bug_fix_commit(
|
||||
&self,
|
||||
message: &str,
|
||||
commit: &Commit,
|
||||
repo: &Repository,
|
||||
counter: &mut u32,
|
||||
) -> Result<Option<BugFix>> {
|
||||
let message_lower = message.to_lowercase();
|
||||
|
||||
// Check for conventional commit fix patterns
|
||||
let is_fix = message_lower.starts_with("fix:")
|
||||
|| message_lower.starts_with("fix(")
|
||||
|| message_lower.starts_with("bugfix:")
|
||||
|| message_lower.starts_with("bugfix(")
|
||||
|| message_lower.starts_with("hotfix:")
|
||||
|| message_lower.starts_with("hotfix(")
|
||||
|| message_lower.contains("fixes #")
|
||||
|| message_lower.contains("closes #")
|
||||
|| message_lower.contains("resolves #");
|
||||
|
||||
if !is_fix {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
*counter += 1;
|
||||
|
||||
// Extract the description (first line, removing the prefix)
|
||||
let first_line = message.lines().next().unwrap_or("");
|
||||
let symptom = if let Some(colon_pos) = first_line.find(':') {
|
||||
first_line[colon_pos + 1..].trim().to_string()
|
||||
} else {
|
||||
first_line.to_string()
|
||||
};
|
||||
|
||||
// Try to extract root cause and solution from multi-line messages
|
||||
let mut root_cause = String::new();
|
||||
let mut solution = String::new();
|
||||
let mut issue_link = None;
|
||||
|
||||
for line in message.lines().skip(1) {
|
||||
let line_lower = line.to_lowercase().trim().to_string();
|
||||
|
||||
if line_lower.starts_with("cause:")
|
||||
|| line_lower.starts_with("root cause:")
|
||||
|| line_lower.starts_with("problem:")
|
||||
{
|
||||
root_cause = line
|
||||
.split_once(':')
|
||||
.map(|(_, v)| v.trim().to_string())
|
||||
.unwrap_or_default();
|
||||
} else if line_lower.starts_with("solution:")
|
||||
|| line_lower.starts_with("fix:")
|
||||
|| line_lower.starts_with("fixed by:")
|
||||
{
|
||||
solution = line
|
||||
.split_once(':')
|
||||
.map(|(_, v)| v.trim().to_string())
|
||||
.unwrap_or_default();
|
||||
} else if line_lower.contains("fixes #")
|
||||
|| line_lower.contains("closes #")
|
||||
|| line_lower.contains("resolves #")
|
||||
{
|
||||
// Extract issue number
|
||||
if let Some(hash_pos) = line.find('#') {
|
||||
let issue_num: String = line[hash_pos + 1..]
|
||||
.chars()
|
||||
.take_while(|c| c.is_ascii_digit())
|
||||
.collect();
|
||||
if !issue_num.is_empty() {
|
||||
issue_link = Some(format!("#{}", issue_num));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no explicit root cause/solution, use the commit message
|
||||
if root_cause.is_empty() {
|
||||
root_cause = "See commit for details".to_string();
|
||||
}
|
||||
if solution.is_empty() {
|
||||
solution = symptom.clone();
|
||||
}
|
||||
|
||||
// Determine severity from keywords
|
||||
let severity = if message_lower.contains("critical")
|
||||
|| message_lower.contains("security")
|
||||
|| message_lower.contains("crash")
|
||||
{
|
||||
BugSeverity::Critical
|
||||
} else if message_lower.contains("hotfix") || message_lower.contains("urgent") {
|
||||
BugSeverity::High
|
||||
} else if message_lower.contains("minor") || message_lower.contains("typo") {
|
||||
BugSeverity::Low
|
||||
} else {
|
||||
BugSeverity::Medium
|
||||
};
|
||||
|
||||
let files_changed = self.get_commit_files(commit, repo)?;
|
||||
|
||||
let bug_fix = BugFix {
|
||||
id: format!("bug-{}", counter),
|
||||
symptom,
|
||||
root_cause,
|
||||
solution,
|
||||
files_changed,
|
||||
commit_sha: commit.id().to_string(),
|
||||
created_at: Utc
|
||||
.timestamp_opt(commit.time().seconds(), 0)
|
||||
.single()
|
||||
.unwrap_or_else(Utc::now),
|
||||
issue_link,
|
||||
severity,
|
||||
discovered_by: commit.author().name().map(|s| s.to_string()),
|
||||
prevention_notes: None,
|
||||
tags: vec!["auto-detected".to_string()],
|
||||
};
|
||||
|
||||
Ok(Some(bug_fix))
|
||||
}
|
||||
|
||||
/// Analyze the full git history and return discovered knowledge
|
||||
pub fn analyze_history(&self, since: Option<DateTime<Utc>>) -> Result<HistoryAnalysis> {
|
||||
// Extract bug fixes
|
||||
let bug_fixes = self.extract_bug_fixes(since)?;
|
||||
|
||||
// Find co-change patterns
|
||||
let file_relationships = self.find_cochange_patterns(since, 0.3)?;
|
||||
|
||||
// Get recent activity summary
|
||||
let recent_commits = {
|
||||
let repo = self.open_repo()?;
|
||||
self.get_recent_commits(&repo, 50)?
|
||||
};
|
||||
|
||||
// Calculate activity stats
|
||||
let mut author_counts: HashMap<String, u32> = HashMap::new();
|
||||
let mut file_counts: HashMap<PathBuf, u32> = HashMap::new();
|
||||
|
||||
for commit in &recent_commits {
|
||||
*author_counts.entry(commit.author.clone()).or_insert(0) += 1;
|
||||
for file in &commit.files_changed {
|
||||
*file_counts.entry(file.clone()).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Top contributors
|
||||
let mut top_contributors: Vec<_> = author_counts.into_iter().collect();
|
||||
top_contributors.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
|
||||
// Hot files (most frequently changed)
|
||||
let mut hot_files: Vec<_> = file_counts.into_iter().collect();
|
||||
hot_files.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
|
||||
Ok(HistoryAnalysis {
|
||||
bug_fixes,
|
||||
file_relationships,
|
||||
commit_count: recent_commits.len(),
|
||||
top_contributors: top_contributors.into_iter().take(5).collect(),
|
||||
hot_files: hot_files.into_iter().take(10).collect(),
|
||||
analyzed_since: since,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get files changed since a specific commit
|
||||
pub fn get_files_changed_since(&self, commit_sha: &str) -> Result<Vec<PathBuf>> {
|
||||
let repo = self.open_repo()?;
|
||||
|
||||
let target_oid = repo.revparse_single(commit_sha)?.id();
|
||||
let head_commit = repo.head()?.peel_to_commit()?;
|
||||
let target_commit = repo.find_commit(target_oid)?;
|
||||
|
||||
let head_tree = head_commit.tree()?;
|
||||
let target_tree = target_commit.tree()?;
|
||||
|
||||
let diff = repo.diff_tree_to_tree(Some(&target_tree), Some(&head_tree), None)?;
|
||||
|
||||
let mut files = Vec::new();
|
||||
for delta in diff.deltas() {
|
||||
if let Some(path) = delta.new_file().path() {
|
||||
files.push(path.to_path_buf());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
|
||||
/// Get blame information for a file
|
||||
pub fn get_file_blame(&self, file_path: &Path, line: u32) -> Result<Option<CommitInfo>> {
|
||||
let repo = self.open_repo()?;
|
||||
|
||||
let blame = repo.blame_file(file_path, None)?;
|
||||
|
||||
if let Some(hunk) = blame.get_line(line as usize) {
|
||||
let commit_id = hunk.final_commit_id();
|
||||
if let Ok(commit) = repo.find_commit(commit_id) {
|
||||
return Ok(Some(self.commit_to_info(&commit, &repo)?));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// HISTORY ANALYSIS RESULT
|
||||
// ============================================================================
|
||||
|
||||
/// Result of analyzing git history
|
||||
#[derive(Debug)]
|
||||
pub struct HistoryAnalysis {
|
||||
/// Bug fixes extracted from commits
|
||||
pub bug_fixes: Vec<BugFix>,
|
||||
/// File relationships discovered from co-change patterns
|
||||
pub file_relationships: Vec<FileRelationship>,
|
||||
/// Total commits analyzed
|
||||
pub commit_count: usize,
|
||||
/// Top contributors (author, commit count)
|
||||
pub top_contributors: Vec<(String, u32)>,
|
||||
/// Most frequently changed files (path, change count)
|
||||
pub hot_files: Vec<(PathBuf, u32)>,
|
||||
/// Time period analyzed from
|
||||
pub analyzed_since: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// TESTS
|
||||
// ============================================================================
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn create_test_repo() -> (TempDir, Repository) {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let repo = Repository::init(dir.path()).unwrap();
|
||||
|
||||
// Configure signature
|
||||
let sig = git2::Signature::now("Test User", "test@example.com").unwrap();
|
||||
|
||||
// Create initial commit
|
||||
{
|
||||
let tree_id = {
|
||||
let mut index = repo.index().unwrap();
|
||||
index.write_tree().unwrap()
|
||||
};
|
||||
let tree = repo.find_tree(tree_id).unwrap();
|
||||
repo.commit(Some("HEAD"), &sig, &sig, "Initial commit", &tree, &[])
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
(dir, repo)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_git_analyzer_creation() {
|
||||
let (dir, _repo) = create_test_repo();
|
||||
let analyzer = GitAnalyzer::new(dir.path().to_path_buf());
|
||||
assert!(analyzer.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_current_context() {
|
||||
let (dir, _repo) = create_test_repo();
|
||||
let analyzer = GitAnalyzer::new(dir.path().to_path_buf()).unwrap();
|
||||
|
||||
let context = analyzer.get_current_context().unwrap();
|
||||
assert!(context.has_commits);
|
||||
assert!(!context.head_commit.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_relevant_file() {
|
||||
let analyzer = GitAnalyzer {
|
||||
repo_path: PathBuf::from("."),
|
||||
};
|
||||
|
||||
assert!(analyzer.is_relevant_file(Path::new("src/main.rs")));
|
||||
assert!(analyzer.is_relevant_file(Path::new("lib/utils.ts")));
|
||||
assert!(!analyzer.is_relevant_file(Path::new("Cargo.lock")));
|
||||
assert!(!analyzer.is_relevant_file(Path::new("node_modules/foo.js")));
|
||||
assert!(!analyzer.is_relevant_file(Path::new("target/debug/main")));
|
||||
}
|
||||
}
|
||||
769
crates/vestige-core/src/codebase/mod.rs
Normal file
769
crates/vestige-core/src/codebase/mod.rs
Normal file
|
|
@ -0,0 +1,769 @@
|
|||
//! Codebase Memory Module - Vestige's KILLER DIFFERENTIATOR
|
||||
//!
|
||||
//! This module makes Vestige unique in the AI memory market. No other tool
|
||||
//! understands codebases at this level - remembering architectural decisions,
|
||||
//! bug fixes, patterns, file relationships, and developer preferences.
|
||||
//!
|
||||
//! # Overview
|
||||
//!
|
||||
//! The Codebase Memory Module provides:
|
||||
//!
|
||||
//! - **Git History Analysis**: Automatically learns from your codebase's history
|
||||
//! - Extracts bug fix patterns from commit messages
|
||||
//! - Discovers file co-change patterns (files that always change together)
|
||||
//! - Understands the evolution of the codebase
|
||||
//!
|
||||
//! - **Context Capture**: Knows what you're working on
|
||||
//! - Current branch and uncommitted changes
|
||||
//! - Project type and frameworks
|
||||
//! - Active files and editing context
|
||||
//!
|
||||
//! - **Pattern Detection**: Learns and applies coding patterns
|
||||
//! - User-taught patterns
|
||||
//! - Auto-detected patterns from code
|
||||
//! - Context-aware pattern suggestions
|
||||
//!
|
||||
//! - **Relationship Tracking**: Understands file relationships
|
||||
//! - Import/dependency relationships
|
||||
//! - Test-implementation pairs
|
||||
//! - Co-edit patterns
|
||||
//!
|
||||
//! - **File Watching**: Continuous learning from developer behavior
|
||||
//! - Tracks files edited together
|
||||
//! - Updates relationship strengths
|
||||
//! - Triggers pattern detection
|
||||
//!
|
||||
//! # Quick Start
|
||||
//!
|
||||
//! ```rust,no_run
|
||||
//! use vestige_core::codebase::CodebaseMemory;
|
||||
//! use std::path::PathBuf;
|
||||
//!
|
||||
//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
|
||||
//! // Create codebase memory for a project
|
||||
//! let memory = CodebaseMemory::new(PathBuf::from("/path/to/project"))?;
|
||||
//!
|
||||
//! // Learn from git history
|
||||
//! let analysis = memory.learn_from_history().await?;
|
||||
//! println!("Found {} bug fixes", analysis.bug_fixes_found);
|
||||
//! println!("Found {} file relationships", analysis.relationships_found);
|
||||
//!
|
||||
//! // Get current context
|
||||
//! let context = memory.get_context()?;
|
||||
//! println!("Working on branch: {}", context.git.as_ref().map(|g| &g.current_branch).unwrap_or(&"unknown".to_string()));
|
||||
//!
|
||||
//! // Remember an architectural decision
|
||||
//! memory.remember_decision(
|
||||
//! "Use Event Sourcing for order management",
|
||||
//! "Need complete audit trail and ability to replay state",
|
||||
//! vec![PathBuf::from("src/orders/events.rs")],
|
||||
//! )?;
|
||||
//!
|
||||
//! // Query codebase memories
|
||||
//! let results = memory.query("error handling", None)?;
|
||||
//! for node in results {
|
||||
//! println!("Found: {}", node.to_searchable_text());
|
||||
//! }
|
||||
//! # Ok(())
|
||||
//! # }
|
||||
//! ```
|
||||
|
||||
pub mod context;
|
||||
pub mod git;
|
||||
pub mod patterns;
|
||||
pub mod relationships;
|
||||
pub mod types;
|
||||
pub mod watcher;
|
||||
|
||||
// Re-export main types
|
||||
pub use context::{ContextCapture, FileContext, Framework, ProjectType, WorkingContext};
|
||||
pub use git::{CommitInfo, GitAnalyzer, GitContext, HistoryAnalysis};
|
||||
pub use patterns::{PatternDetector, PatternMatch, PatternSuggestion};
|
||||
pub use relationships::{
|
||||
GraphEdge, GraphMetadata, GraphNode, RelatedFile, RelationshipGraph, RelationshipTracker,
|
||||
};
|
||||
pub use types::{
|
||||
ArchitecturalDecision, BugFix, BugSeverity, CodeEntity, CodePattern, CodebaseNode,
|
||||
CodingPreference, DecisionStatus, EntityType, FileRelationship, PreferenceSource, RelationType,
|
||||
RelationshipSource, WorkContext, WorkStatus,
|
||||
};
|
||||
pub use watcher::{CodebaseWatcher, FileEvent, FileEventKind, WatcherConfig};
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use tokio::sync::RwLock;
|
||||
use uuid::Uuid;
|
||||
|
||||
// ============================================================================
|
||||
// ERRORS
|
||||
// ============================================================================
|
||||
|
||||
/// Unified error type for codebase memory operations
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum CodebaseError {
|
||||
#[error("Git error: {0}")]
|
||||
Git(#[from] git::GitError),
|
||||
#[error("Context error: {0}")]
|
||||
Context(#[from] context::ContextError),
|
||||
#[error("Pattern error: {0}")]
|
||||
Pattern(#[from] patterns::PatternError),
|
||||
#[error("Relationship error: {0}")]
|
||||
Relationship(#[from] relationships::RelationshipError),
|
||||
#[error("Watcher error: {0}")]
|
||||
Watcher(#[from] watcher::WatcherError),
|
||||
#[error("Storage error: {0}")]
|
||||
Storage(String),
|
||||
#[error("Not found: {0}")]
|
||||
NotFound(String),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, CodebaseError>;
|
||||
|
||||
// ============================================================================
|
||||
// LEARNING RESULT
|
||||
// ============================================================================
|
||||
|
||||
/// Result of learning from git history
|
||||
#[derive(Debug)]
|
||||
pub struct LearningResult {
|
||||
/// Bug fixes extracted
|
||||
pub bug_fixes_found: usize,
|
||||
/// File relationships discovered
|
||||
pub relationships_found: usize,
|
||||
/// Patterns detected
|
||||
pub patterns_detected: usize,
|
||||
/// Time range analyzed
|
||||
pub analyzed_since: Option<DateTime<Utc>>,
|
||||
/// Commits analyzed
|
||||
pub commits_analyzed: usize,
|
||||
/// Duration of analysis
|
||||
pub duration_ms: u64,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// CODEBASE MEMORY
|
||||
// ============================================================================
|
||||
|
||||
/// Main codebase memory interface
|
||||
///
|
||||
/// This is the primary entry point for all codebase memory operations.
|
||||
/// It coordinates between git analysis, context capture, pattern detection,
|
||||
/// and relationship tracking.
|
||||
pub struct CodebaseMemory {
|
||||
/// Repository path
|
||||
repo_path: PathBuf,
|
||||
/// Git analyzer
|
||||
pub git: GitAnalyzer,
|
||||
/// Context capture
|
||||
pub context: ContextCapture,
|
||||
/// Pattern detector
|
||||
patterns: Arc<RwLock<PatternDetector>>,
|
||||
/// Relationship tracker
|
||||
relationships: Arc<RwLock<RelationshipTracker>>,
|
||||
/// File watcher (optional)
|
||||
watcher: Option<Arc<RwLock<CodebaseWatcher>>>,
|
||||
/// Stored codebase nodes
|
||||
nodes: Arc<RwLock<Vec<CodebaseNode>>>,
|
||||
}
|
||||
|
||||
impl CodebaseMemory {
|
||||
/// Create a new CodebaseMemory for a repository
|
||||
pub fn new(repo_path: PathBuf) -> Result<Self> {
|
||||
let git = GitAnalyzer::new(repo_path.clone())?;
|
||||
let context = ContextCapture::new(repo_path.clone())?;
|
||||
let patterns = Arc::new(RwLock::new(PatternDetector::new()));
|
||||
let relationships = Arc::new(RwLock::new(RelationshipTracker::new()));
|
||||
|
||||
// Load built-in patterns
|
||||
{
|
||||
let mut detector = patterns.blocking_write();
|
||||
for pattern in patterns::create_builtin_patterns() {
|
||||
let _ = detector.learn_pattern(pattern);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
repo_path,
|
||||
git,
|
||||
context,
|
||||
patterns,
|
||||
relationships,
|
||||
watcher: None,
|
||||
nodes: Arc::new(RwLock::new(Vec::new())),
|
||||
})
|
||||
}
|
||||
|
||||
/// Create with file watching enabled
|
||||
pub fn with_watcher(repo_path: PathBuf) -> Result<Self> {
|
||||
let mut memory = Self::new(repo_path)?;
|
||||
|
||||
let watcher = CodebaseWatcher::new(
|
||||
Arc::clone(&memory.relationships),
|
||||
Arc::clone(&memory.patterns),
|
||||
);
|
||||
memory.watcher = Some(Arc::new(RwLock::new(watcher)));
|
||||
|
||||
Ok(memory)
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// DECISION MANAGEMENT
|
||||
// ========================================================================
|
||||
|
||||
/// Remember an architectural decision
|
||||
pub fn remember_decision(
|
||||
&self,
|
||||
decision: &str,
|
||||
rationale: &str,
|
||||
files_affected: Vec<PathBuf>,
|
||||
) -> Result<String> {
|
||||
let id = format!("adr-{}", Uuid::new_v4());
|
||||
|
||||
let node = CodebaseNode::ArchitecturalDecision(ArchitecturalDecision {
|
||||
id: id.clone(),
|
||||
decision: decision.to_string(),
|
||||
rationale: rationale.to_string(),
|
||||
files_affected,
|
||||
commit_sha: self.git.get_current_context().ok().map(|c| c.head_commit),
|
||||
created_at: Utc::now(),
|
||||
updated_at: None,
|
||||
context: None,
|
||||
tags: vec![],
|
||||
status: DecisionStatus::Accepted,
|
||||
alternatives_considered: vec![],
|
||||
});
|
||||
|
||||
self.nodes.blocking_write().push(node);
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Remember an architectural decision with full details
|
||||
pub fn remember_decision_full(&self, decision: ArchitecturalDecision) -> Result<String> {
|
||||
let id = decision.id.clone();
|
||||
self.nodes
|
||||
.blocking_write()
|
||||
.push(CodebaseNode::ArchitecturalDecision(decision));
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// BUG FIX MANAGEMENT
|
||||
// ========================================================================
|
||||
|
||||
/// Remember a bug fix
|
||||
pub fn remember_bug_fix(&self, fix: BugFix) -> Result<String> {
|
||||
let id = fix.id.clone();
|
||||
self.nodes.blocking_write().push(CodebaseNode::BugFix(fix));
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Remember a bug fix with minimal details
|
||||
pub fn remember_bug_fix_simple(
|
||||
&self,
|
||||
symptom: &str,
|
||||
root_cause: &str,
|
||||
solution: &str,
|
||||
files_changed: Vec<PathBuf>,
|
||||
) -> Result<String> {
|
||||
let id = format!("bug-{}", Uuid::new_v4());
|
||||
let commit_sha = self
|
||||
.git
|
||||
.get_current_context()
|
||||
.map(|c| c.head_commit)
|
||||
.unwrap_or_default();
|
||||
|
||||
let fix = BugFix::new(
|
||||
id.clone(),
|
||||
symptom.to_string(),
|
||||
root_cause.to_string(),
|
||||
solution.to_string(),
|
||||
commit_sha,
|
||||
)
|
||||
.with_files(files_changed);
|
||||
|
||||
self.remember_bug_fix(fix)?;
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// PATTERN MANAGEMENT
|
||||
// ========================================================================
|
||||
|
||||
/// Remember a coding pattern
|
||||
pub fn remember_pattern(&self, pattern: CodePattern) -> Result<String> {
|
||||
let id = pattern.id.clone();
|
||||
self.patterns.blocking_write().learn_pattern(pattern)?;
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Get pattern suggestions for current context
|
||||
pub async fn get_pattern_suggestions(&self) -> Result<Vec<PatternSuggestion>> {
|
||||
let context = self.get_context()?;
|
||||
let detector = self.patterns.read().await;
|
||||
Ok(detector.suggest_patterns(&context)?)
|
||||
}
|
||||
|
||||
/// Detect patterns in code
|
||||
pub async fn detect_patterns_in_code(
|
||||
&self,
|
||||
code: &str,
|
||||
language: &str,
|
||||
) -> Result<Vec<PatternMatch>> {
|
||||
let detector = self.patterns.read().await;
|
||||
Ok(detector.detect_patterns(code, language)?)
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// PREFERENCE MANAGEMENT
|
||||
// ========================================================================
|
||||
|
||||
/// Remember a coding preference
|
||||
pub fn remember_preference(&self, preference: CodingPreference) -> Result<String> {
|
||||
let id = preference.id.clone();
|
||||
self.nodes
|
||||
.blocking_write()
|
||||
.push(CodebaseNode::CodingPreference(preference));
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Remember a simple preference
|
||||
pub fn remember_preference_simple(
|
||||
&self,
|
||||
context: &str,
|
||||
preference: &str,
|
||||
counter_preference: Option<&str>,
|
||||
) -> Result<String> {
|
||||
let id = format!("pref-{}", Uuid::new_v4());
|
||||
|
||||
let pref = CodingPreference::new(id.clone(), context.to_string(), preference.to_string())
|
||||
.with_confidence(0.8);
|
||||
|
||||
let pref = if let Some(counter) = counter_preference {
|
||||
pref.with_counter(counter.to_string())
|
||||
} else {
|
||||
pref
|
||||
};
|
||||
|
||||
self.remember_preference(pref)?;
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// RELATIONSHIP MANAGEMENT
|
||||
// ========================================================================
|
||||
|
||||
/// Get files related to a given file
|
||||
pub async fn get_related_files(&self, file: &std::path::Path) -> Result<Vec<RelatedFile>> {
|
||||
let tracker = self.relationships.read().await;
|
||||
Ok(tracker.get_related_files(file)?)
|
||||
}
|
||||
|
||||
/// Record that files were edited together
|
||||
pub async fn record_coedit(&self, files: &[PathBuf]) -> Result<()> {
|
||||
let mut tracker = self.relationships.write().await;
|
||||
Ok(tracker.record_coedit(files)?)
|
||||
}
|
||||
|
||||
/// Build a relationship graph for visualization
|
||||
pub async fn build_relationship_graph(&self) -> Result<RelationshipGraph> {
|
||||
let tracker = self.relationships.read().await;
|
||||
Ok(tracker.build_graph()?)
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// CONTEXT
|
||||
// ========================================================================
|
||||
|
||||
/// Get the current working context
|
||||
pub fn get_context(&self) -> Result<WorkingContext> {
|
||||
Ok(self.context.capture()?)
|
||||
}
|
||||
|
||||
/// Get context for a specific file
|
||||
pub fn get_file_context(&self, path: &std::path::Path) -> Result<FileContext> {
|
||||
Ok(self.context.context_for_file(path)?)
|
||||
}
|
||||
|
||||
/// Set active files for context tracking
|
||||
pub fn set_active_files(&mut self, files: Vec<PathBuf>) {
|
||||
self.context.set_active_files(files);
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// QUERY
|
||||
// ========================================================================
|
||||
|
||||
/// Query codebase memories
|
||||
pub fn query(
|
||||
&self,
|
||||
query: &str,
|
||||
context: Option<&WorkingContext>,
|
||||
) -> Result<Vec<CodebaseNode>> {
|
||||
let query_lower = query.to_lowercase();
|
||||
let nodes = self.nodes.blocking_read();
|
||||
|
||||
let mut results: Vec<_> = nodes
|
||||
.iter()
|
||||
.filter(|node| {
|
||||
let text = node.to_searchable_text().to_lowercase();
|
||||
text.contains(&query_lower)
|
||||
})
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
// Boost results relevant to current context
|
||||
if let Some(ctx) = context {
|
||||
results.sort_by(|a, b| {
|
||||
let a_relevance = self.calculate_context_relevance(a, ctx);
|
||||
let b_relevance = self.calculate_context_relevance(b, ctx);
|
||||
b_relevance
|
||||
.partial_cmp(&a_relevance)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Calculate how relevant a node is to the current context
|
||||
fn calculate_context_relevance(&self, node: &CodebaseNode, context: &WorkingContext) -> f64 {
|
||||
let mut relevance = 0.0;
|
||||
|
||||
// Check file overlap
|
||||
let node_files = node.associated_files();
|
||||
if let Some(ref active) = context.active_file {
|
||||
for file in &node_files {
|
||||
if *file == active {
|
||||
relevance += 1.0;
|
||||
} else if file.parent() == active.parent() {
|
||||
relevance += 0.5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check framework relevance
|
||||
for framework in &context.frameworks {
|
||||
let text = node.to_searchable_text().to_lowercase();
|
||||
if text.contains(&framework.name().to_lowercase()) {
|
||||
relevance += 0.3;
|
||||
}
|
||||
}
|
||||
|
||||
relevance
|
||||
}
|
||||
|
||||
/// Get memories relevant to current context
|
||||
pub fn get_relevant(&self, context: &WorkingContext) -> Result<Vec<CodebaseNode>> {
|
||||
let nodes = self.nodes.blocking_read();
|
||||
|
||||
let mut scored: Vec<_> = nodes
|
||||
.iter()
|
||||
.map(|node| {
|
||||
let relevance = self.calculate_context_relevance(node, context);
|
||||
(node.clone(), relevance)
|
||||
})
|
||||
.filter(|(_, relevance)| *relevance > 0.0)
|
||||
.collect();
|
||||
|
||||
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
Ok(scored.into_iter().map(|(node, _)| node).collect())
|
||||
}
|
||||
|
||||
/// Get a node by ID
|
||||
pub fn get_node(&self, id: &str) -> Result<Option<CodebaseNode>> {
|
||||
let nodes = self.nodes.blocking_read();
|
||||
Ok(nodes.iter().find(|n| n.id() == id).cloned())
|
||||
}
|
||||
|
||||
/// Get all nodes of a specific type
|
||||
pub fn get_nodes_by_type(&self, node_type: &str) -> Result<Vec<CodebaseNode>> {
|
||||
let nodes = self.nodes.blocking_read();
|
||||
Ok(nodes
|
||||
.iter()
|
||||
.filter(|n| n.node_type() == node_type)
|
||||
.cloned()
|
||||
.collect())
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// LEARNING
|
||||
// ========================================================================
|
||||
|
||||
/// Learn from git history
|
||||
pub async fn learn_from_history(&self) -> Result<LearningResult> {
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// Analyze history
|
||||
let analysis = self.git.analyze_history(None)?;
|
||||
|
||||
// Store bug fixes
|
||||
let mut nodes = self.nodes.write().await;
|
||||
for fix in &analysis.bug_fixes {
|
||||
nodes.push(CodebaseNode::BugFix(fix.clone()));
|
||||
}
|
||||
|
||||
// Store file relationships
|
||||
let mut tracker = self.relationships.write().await;
|
||||
for rel in &analysis.file_relationships {
|
||||
let _ = tracker.add_relationship(rel.clone());
|
||||
}
|
||||
|
||||
let duration_ms = start.elapsed().as_millis() as u64;
|
||||
|
||||
Ok(LearningResult {
|
||||
bug_fixes_found: analysis.bug_fixes.len(),
|
||||
relationships_found: analysis.file_relationships.len(),
|
||||
patterns_detected: 0, // Could be extended
|
||||
analyzed_since: analysis.analyzed_since,
|
||||
commits_analyzed: analysis.commit_count,
|
||||
duration_ms,
|
||||
})
|
||||
}
|
||||
|
||||
/// Learn from git history since a specific time
|
||||
pub async fn learn_from_history_since(&self, since: DateTime<Utc>) -> Result<LearningResult> {
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let analysis = self.git.analyze_history(Some(since))?;
|
||||
|
||||
let mut nodes = self.nodes.write().await;
|
||||
for fix in &analysis.bug_fixes {
|
||||
nodes.push(CodebaseNode::BugFix(fix.clone()));
|
||||
}
|
||||
|
||||
let mut tracker = self.relationships.write().await;
|
||||
for rel in &analysis.file_relationships {
|
||||
let _ = tracker.add_relationship(rel.clone());
|
||||
}
|
||||
|
||||
let duration_ms = start.elapsed().as_millis() as u64;
|
||||
|
||||
Ok(LearningResult {
|
||||
bug_fixes_found: analysis.bug_fixes.len(),
|
||||
relationships_found: analysis.file_relationships.len(),
|
||||
patterns_detected: 0,
|
||||
analyzed_since: Some(since),
|
||||
commits_analyzed: analysis.commit_count,
|
||||
duration_ms,
|
||||
})
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// FILE WATCHING
|
||||
// ========================================================================
|
||||
|
||||
/// Start watching the repository for changes
|
||||
pub async fn start_watching(&self) -> Result<()> {
|
||||
if let Some(ref watcher) = self.watcher {
|
||||
let mut w = watcher.write().await;
|
||||
w.watch(&self.repo_path).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Stop watching the repository
|
||||
pub async fn stop_watching(&self) -> Result<()> {
|
||||
if let Some(ref watcher) = self.watcher {
|
||||
let mut w = watcher.write().await;
|
||||
w.stop().await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// SERIALIZATION
|
||||
// ========================================================================
|
||||
|
||||
/// Export all nodes for storage
|
||||
pub fn export_nodes(&self) -> Vec<CodebaseNode> {
|
||||
self.nodes.blocking_read().clone()
|
||||
}
|
||||
|
||||
/// Import nodes from storage
|
||||
pub fn import_nodes(&self, nodes: Vec<CodebaseNode>) {
|
||||
let mut current = self.nodes.blocking_write();
|
||||
current.extend(nodes);
|
||||
}
|
||||
|
||||
/// Export patterns for storage
|
||||
pub fn export_patterns(&self) -> Vec<CodePattern> {
|
||||
self.patterns.blocking_read().export_patterns()
|
||||
}
|
||||
|
||||
/// Import patterns from storage
|
||||
pub fn import_patterns(&self, patterns: Vec<CodePattern>) -> Result<()> {
|
||||
let mut detector = self.patterns.blocking_write();
|
||||
detector.load_patterns(patterns)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Export relationships for storage
|
||||
pub fn export_relationships(&self) -> Vec<FileRelationship> {
|
||||
self.relationships.blocking_read().export_relationships()
|
||||
}
|
||||
|
||||
/// Import relationships from storage
|
||||
pub fn import_relationships(&self, relationships: Vec<FileRelationship>) -> Result<()> {
|
||||
let mut tracker = self.relationships.blocking_write();
|
||||
tracker.load_relationships(relationships)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// STATS
|
||||
// ========================================================================
|
||||
|
||||
/// Get statistics about codebase memory
|
||||
pub fn get_stats(&self) -> CodebaseStats {
|
||||
let nodes = self.nodes.blocking_read();
|
||||
let patterns = self.patterns.blocking_read();
|
||||
let relationships = self.relationships.blocking_read();
|
||||
|
||||
CodebaseStats {
|
||||
total_nodes: nodes.len(),
|
||||
architectural_decisions: nodes
|
||||
.iter()
|
||||
.filter(|n| matches!(n, CodebaseNode::ArchitecturalDecision(_)))
|
||||
.count(),
|
||||
bug_fixes: nodes
|
||||
.iter()
|
||||
.filter(|n| matches!(n, CodebaseNode::BugFix(_)))
|
||||
.count(),
|
||||
patterns: patterns.get_all_patterns().len(),
|
||||
preferences: nodes
|
||||
.iter()
|
||||
.filter(|n| matches!(n, CodebaseNode::CodingPreference(_)))
|
||||
.count(),
|
||||
file_relationships: relationships.get_all_relationships().len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Statistics about codebase memory
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CodebaseStats {
|
||||
pub total_nodes: usize,
|
||||
pub architectural_decisions: usize,
|
||||
pub bug_fixes: usize,
|
||||
pub patterns: usize,
|
||||
pub preferences: usize,
|
||||
pub file_relationships: usize,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// TESTS
|
||||
// ============================================================================
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn create_test_repo() -> TempDir {
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
// Initialize git repo
|
||||
git2::Repository::init(dir.path()).unwrap();
|
||||
|
||||
// Create Cargo.toml
|
||||
std::fs::write(
|
||||
dir.path().join("Cargo.toml"),
|
||||
r#"
|
||||
[package]
|
||||
name = "test-project"
|
||||
version = "0.1.0"
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Create src directory
|
||||
std::fs::create_dir(dir.path().join("src")).unwrap();
|
||||
std::fs::write(dir.path().join("src/main.rs"), "fn main() {}").unwrap();
|
||||
|
||||
dir
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_codebase_memory_creation() {
|
||||
let dir = create_test_repo();
|
||||
let memory = CodebaseMemory::new(dir.path().to_path_buf());
|
||||
assert!(memory.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remember_decision() {
|
||||
let dir = create_test_repo();
|
||||
let memory = CodebaseMemory::new(dir.path().to_path_buf()).unwrap();
|
||||
|
||||
let id = memory
|
||||
.remember_decision(
|
||||
"Use Event Sourcing",
|
||||
"Need audit trail",
|
||||
vec![PathBuf::from("src/events.rs")],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(id.starts_with("adr-"));
|
||||
|
||||
let node = memory.get_node(&id).unwrap();
|
||||
assert!(node.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remember_bug_fix() {
|
||||
let dir = create_test_repo();
|
||||
let memory = CodebaseMemory::new(dir.path().to_path_buf()).unwrap();
|
||||
|
||||
let id = memory
|
||||
.remember_bug_fix_simple(
|
||||
"App crashes on startup",
|
||||
"Null pointer in config loading",
|
||||
"Added null check",
|
||||
vec![PathBuf::from("src/config.rs")],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(id.starts_with("bug-"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query() {
|
||||
let dir = create_test_repo();
|
||||
let memory = CodebaseMemory::new(dir.path().to_path_buf()).unwrap();
|
||||
|
||||
memory
|
||||
.remember_decision("Use async/await for IO", "Better performance", vec![])
|
||||
.unwrap();
|
||||
|
||||
memory
|
||||
.remember_decision("Use channels for communication", "Thread safety", vec![])
|
||||
.unwrap();
|
||||
|
||||
let results = memory.query("async", None).unwrap();
|
||||
assert_eq!(results.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_context() {
|
||||
let dir = create_test_repo();
|
||||
let memory = CodebaseMemory::new(dir.path().to_path_buf()).unwrap();
|
||||
|
||||
let context = memory.get_context().unwrap();
|
||||
assert_eq!(context.project_type, ProjectType::Rust);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stats() {
|
||||
let dir = create_test_repo();
|
||||
let memory = CodebaseMemory::new(dir.path().to_path_buf()).unwrap();
|
||||
|
||||
memory.remember_decision("Test", "Test", vec![]).unwrap();
|
||||
|
||||
let stats = memory.get_stats();
|
||||
assert_eq!(stats.architectural_decisions, 1);
|
||||
assert!(stats.patterns > 0); // Built-in patterns
|
||||
}
|
||||
}
|
||||
722
crates/vestige-core/src/codebase/patterns.rs
Normal file
722
crates/vestige-core/src/codebase/patterns.rs
Normal file
|
|
@ -0,0 +1,722 @@
|
|||
//! Pattern detection and storage for codebase memory
|
||||
//!
|
||||
//! This module handles:
|
||||
//! - Learning new patterns from user teaching
|
||||
//! - Detecting known patterns in code
|
||||
//! - Suggesting relevant patterns based on context
|
||||
//!
|
||||
//! Patterns are the reusable pieces of knowledge that make Vestige smarter
|
||||
//! over time. As the user teaches patterns, Vestige becomes more helpful
|
||||
//! for that specific codebase.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use chrono::Utc;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::context::WorkingContext;
|
||||
use super::types::CodePattern;
|
||||
|
||||
// ============================================================================
|
||||
// ERRORS
|
||||
// ============================================================================
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum PatternError {
|
||||
#[error("Pattern not found: {0}")]
|
||||
NotFound(String),
|
||||
#[error("Invalid pattern: {0}")]
|
||||
Invalid(String),
|
||||
#[error("Storage error: {0}")]
|
||||
Storage(String),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, PatternError>;
|
||||
|
||||
// ============================================================================
|
||||
// PATTERN MATCH
|
||||
// ============================================================================
|
||||
|
||||
/// A detected pattern match in code
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PatternMatch {
|
||||
/// The pattern that was matched
|
||||
pub pattern: CodePattern,
|
||||
/// Confidence of the match (0.0 - 1.0)
|
||||
pub confidence: f64,
|
||||
/// Location in the code where pattern was detected
|
||||
pub location: Option<PatternLocation>,
|
||||
/// Suggestions based on this pattern match
|
||||
pub suggestions: Vec<String>,
|
||||
}
|
||||
|
||||
/// Location where a pattern was detected
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PatternLocation {
|
||||
/// File where pattern was found
|
||||
pub file: PathBuf,
|
||||
/// Starting line (1-indexed)
|
||||
pub start_line: u32,
|
||||
/// Ending line (1-indexed)
|
||||
pub end_line: u32,
|
||||
/// Code snippet that matched
|
||||
pub snippet: String,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// PATTERN SUGGESTION
|
||||
// ============================================================================
|
||||
|
||||
/// A suggested pattern based on context
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PatternSuggestion {
|
||||
/// The suggested pattern
|
||||
pub pattern: CodePattern,
|
||||
/// Why this pattern is being suggested
|
||||
pub reason: String,
|
||||
/// Relevance score (0.0 - 1.0)
|
||||
pub relevance: f64,
|
||||
/// Example of how to apply this pattern
|
||||
pub example: Option<String>,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// PATTERN DETECTOR
|
||||
// ============================================================================
|
||||
|
||||
/// Detects and manages code patterns
|
||||
pub struct PatternDetector {
|
||||
/// Stored patterns indexed by ID
|
||||
patterns: HashMap<String, CodePattern>,
|
||||
/// Patterns indexed by language for faster lookup
|
||||
patterns_by_language: HashMap<String, Vec<String>>,
|
||||
/// Pattern keywords for text matching
|
||||
pattern_keywords: HashMap<String, Vec<String>>,
|
||||
}
|
||||
|
||||
impl PatternDetector {
|
||||
/// Create a new pattern detector
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
patterns: HashMap::new(),
|
||||
patterns_by_language: HashMap::new(),
|
||||
pattern_keywords: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Learn a new pattern from user teaching
|
||||
pub fn learn_pattern(&mut self, pattern: CodePattern) -> Result<String> {
|
||||
// Validate the pattern
|
||||
if pattern.name.is_empty() {
|
||||
return Err(PatternError::Invalid(
|
||||
"Pattern name cannot be empty".to_string(),
|
||||
));
|
||||
}
|
||||
if pattern.description.is_empty() {
|
||||
return Err(PatternError::Invalid(
|
||||
"Pattern description cannot be empty".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let id = pattern.id.clone();
|
||||
|
||||
// Index by language
|
||||
if let Some(ref language) = pattern.language {
|
||||
self.patterns_by_language
|
||||
.entry(language.to_lowercase())
|
||||
.or_default()
|
||||
.push(id.clone());
|
||||
}
|
||||
|
||||
// Extract keywords for matching
|
||||
let keywords = self.extract_keywords(&pattern);
|
||||
self.pattern_keywords.insert(id.clone(), keywords);
|
||||
|
||||
// Store the pattern
|
||||
self.patterns.insert(id.clone(), pattern);
|
||||
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Extract keywords from a pattern for matching
|
||||
fn extract_keywords(&self, pattern: &CodePattern) -> Vec<String> {
|
||||
let mut keywords = Vec::new();
|
||||
|
||||
// Words from name
|
||||
keywords.extend(
|
||||
pattern
|
||||
.name
|
||||
.to_lowercase()
|
||||
.split_whitespace()
|
||||
.filter(|w| w.len() > 2)
|
||||
.map(|s| s.to_string()),
|
||||
);
|
||||
|
||||
// Words from description
|
||||
keywords.extend(
|
||||
pattern
|
||||
.description
|
||||
.to_lowercase()
|
||||
.split_whitespace()
|
||||
.filter(|w| w.len() > 3)
|
||||
.map(|s| s.to_string()),
|
||||
);
|
||||
|
||||
// Tags
|
||||
keywords.extend(pattern.tags.iter().map(|t| t.to_lowercase()));
|
||||
|
||||
// Deduplicate
|
||||
keywords.sort();
|
||||
keywords.dedup();
|
||||
|
||||
keywords
|
||||
}
|
||||
|
||||
/// Get a pattern by ID
|
||||
pub fn get_pattern(&self, id: &str) -> Option<&CodePattern> {
|
||||
self.patterns.get(id)
|
||||
}
|
||||
|
||||
/// Get all patterns
|
||||
pub fn get_all_patterns(&self) -> Vec<&CodePattern> {
|
||||
self.patterns.values().collect()
|
||||
}
|
||||
|
||||
/// Get patterns for a specific language
|
||||
pub fn get_patterns_for_language(&self, language: &str) -> Vec<&CodePattern> {
|
||||
let language_lower = language.to_lowercase();
|
||||
|
||||
self.patterns_by_language
|
||||
.get(&language_lower)
|
||||
.map(|ids| ids.iter().filter_map(|id| self.patterns.get(id)).collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Detect if current code matches known patterns
|
||||
pub fn detect_patterns(&self, code: &str, language: &str) -> Result<Vec<PatternMatch>> {
|
||||
let mut matches = Vec::new();
|
||||
let code_lower = code.to_lowercase();
|
||||
|
||||
// Get relevant patterns for this language
|
||||
let relevant_patterns: Vec<_> = self
|
||||
.get_patterns_for_language(language)
|
||||
.into_iter()
|
||||
.chain(self.get_patterns_for_language("*"))
|
||||
.collect();
|
||||
|
||||
for pattern in relevant_patterns {
|
||||
if let Some(confidence) = self.calculate_match_confidence(code, &code_lower, pattern) {
|
||||
if confidence >= 0.3 {
|
||||
matches.push(PatternMatch {
|
||||
pattern: pattern.clone(),
|
||||
confidence,
|
||||
location: None, // Would need line-level analysis
|
||||
suggestions: self.generate_suggestions(pattern, code),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by confidence
|
||||
matches.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
Ok(matches)
|
||||
}
|
||||
|
||||
/// Calculate confidence that code matches a pattern
|
||||
fn calculate_match_confidence(
|
||||
&self,
|
||||
_code: &str,
|
||||
code_lower: &str,
|
||||
pattern: &CodePattern,
|
||||
) -> Option<f64> {
|
||||
let keywords = self.pattern_keywords.get(&pattern.id)?;
|
||||
|
||||
if keywords.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Count keyword matches
|
||||
let matches: usize = keywords
|
||||
.iter()
|
||||
.filter(|kw| code_lower.contains(kw.as_str()))
|
||||
.count();
|
||||
|
||||
if matches == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Calculate confidence based on keyword match ratio
|
||||
let confidence = matches as f64 / keywords.len() as f64;
|
||||
|
||||
// Boost confidence if example code matches
|
||||
let boost = if !pattern.example_code.is_empty()
|
||||
&& code_lower.contains(&pattern.example_code.to_lowercase())
|
||||
{
|
||||
0.3
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
Some((confidence + boost).min(1.0))
|
||||
}
|
||||
|
||||
/// Generate suggestions based on a matched pattern
|
||||
fn generate_suggestions(&self, pattern: &CodePattern, _code: &str) -> Vec<String> {
|
||||
let mut suggestions = Vec::new();
|
||||
|
||||
// Add the when_to_use guidance
|
||||
suggestions.push(format!("Consider: {}", pattern.when_to_use));
|
||||
|
||||
// Add when_not_to_use if present
|
||||
if let Some(ref when_not) = pattern.when_not_to_use {
|
||||
suggestions.push(format!("Note: {}", when_not));
|
||||
}
|
||||
|
||||
suggestions
|
||||
}
|
||||
|
||||
/// Suggest patterns based on current context
|
||||
pub fn suggest_patterns(&self, context: &WorkingContext) -> Result<Vec<PatternSuggestion>> {
|
||||
let mut suggestions = Vec::new();
|
||||
|
||||
// Get the language for the current context
|
||||
let language = match &context.project_type {
|
||||
super::context::ProjectType::Rust => "rust",
|
||||
super::context::ProjectType::TypeScript => "typescript",
|
||||
super::context::ProjectType::JavaScript => "javascript",
|
||||
super::context::ProjectType::Python => "python",
|
||||
super::context::ProjectType::Go => "go",
|
||||
super::context::ProjectType::Java => "java",
|
||||
super::context::ProjectType::Kotlin => "kotlin",
|
||||
super::context::ProjectType::Swift => "swift",
|
||||
super::context::ProjectType::CSharp => "csharp",
|
||||
super::context::ProjectType::Cpp => "cpp",
|
||||
super::context::ProjectType::Ruby => "ruby",
|
||||
super::context::ProjectType::Php => "php",
|
||||
super::context::ProjectType::Mixed(_) => "*",
|
||||
super::context::ProjectType::Unknown => "*",
|
||||
};
|
||||
|
||||
// Get patterns for this language
|
||||
let language_patterns = self.get_patterns_for_language(language);
|
||||
|
||||
// Score patterns based on context relevance
|
||||
for pattern in language_patterns {
|
||||
let relevance = self.calculate_context_relevance(pattern, context);
|
||||
|
||||
if relevance >= 0.2 {
|
||||
let reason = self.generate_suggestion_reason(pattern, context);
|
||||
|
||||
suggestions.push(PatternSuggestion {
|
||||
pattern: pattern.clone(),
|
||||
reason,
|
||||
relevance,
|
||||
example: if !pattern.example_code.is_empty() {
|
||||
Some(pattern.example_code.clone())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by relevance
|
||||
suggestions.sort_by(|a, b| b.relevance.partial_cmp(&a.relevance).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
Ok(suggestions)
|
||||
}
|
||||
|
||||
/// Calculate how relevant a pattern is to the current context
|
||||
fn calculate_context_relevance(&self, pattern: &CodePattern, context: &WorkingContext) -> f64 {
|
||||
let mut score = 0.0;
|
||||
|
||||
// Check if pattern files overlap with active files
|
||||
if let Some(ref active) = context.active_file {
|
||||
for example_file in &pattern.example_files {
|
||||
if self.paths_related(active, example_file) {
|
||||
score += 0.3;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check framework relevance
|
||||
for framework in &context.frameworks {
|
||||
let framework_name = framework.name().to_lowercase();
|
||||
if pattern
|
||||
.tags
|
||||
.iter()
|
||||
.any(|t| t.to_lowercase() == framework_name)
|
||||
|| pattern.description.to_lowercase().contains(&framework_name)
|
||||
{
|
||||
score += 0.2;
|
||||
}
|
||||
}
|
||||
|
||||
// Check recent usage
|
||||
if pattern.usage_count > 0 {
|
||||
score += (pattern.usage_count as f64 / 100.0).min(0.3);
|
||||
}
|
||||
|
||||
score.min(1.0)
|
||||
}
|
||||
|
||||
/// Check if two paths are related (same directory, similar names, etc.)
|
||||
fn paths_related(&self, a: &Path, b: &Path) -> bool {
|
||||
// Same parent directory
|
||||
if a.parent() == b.parent() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Similar file names
|
||||
if let (Some(a_stem), Some(b_stem)) = (a.file_stem(), b.file_stem()) {
|
||||
let a_str = a_stem.to_string_lossy().to_lowercase();
|
||||
let b_str = b_stem.to_string_lossy().to_lowercase();
|
||||
|
||||
if a_str.contains(&b_str) || b_str.contains(&a_str) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Generate a reason for suggesting a pattern
|
||||
fn generate_suggestion_reason(
|
||||
&self,
|
||||
pattern: &CodePattern,
|
||||
context: &WorkingContext,
|
||||
) -> String {
|
||||
let mut reasons = Vec::new();
|
||||
|
||||
// Language match
|
||||
if let Some(ref lang) = pattern.language {
|
||||
reasons.push(format!("Relevant for {} code", lang));
|
||||
}
|
||||
|
||||
// Framework match
|
||||
for framework in &context.frameworks {
|
||||
let framework_name = framework.name();
|
||||
if pattern
|
||||
.tags
|
||||
.iter()
|
||||
.any(|t| t.eq_ignore_ascii_case(framework_name))
|
||||
|| pattern
|
||||
.description
|
||||
.to_lowercase()
|
||||
.contains(&framework_name.to_lowercase())
|
||||
{
|
||||
reasons.push(format!("Used with {}", framework_name));
|
||||
}
|
||||
}
|
||||
|
||||
// Usage count
|
||||
if pattern.usage_count > 5 {
|
||||
reasons.push(format!("Commonly used ({} times)", pattern.usage_count));
|
||||
}
|
||||
|
||||
if reasons.is_empty() {
|
||||
"May be applicable in this context".to_string()
|
||||
} else {
|
||||
reasons.join("; ")
|
||||
}
|
||||
}
|
||||
|
||||
/// Update pattern usage count
|
||||
pub fn record_pattern_usage(&mut self, pattern_id: &str) -> Result<()> {
|
||||
if let Some(pattern) = self.patterns.get_mut(pattern_id) {
|
||||
pattern.usage_count += 1;
|
||||
Ok(())
|
||||
} else {
|
||||
Err(PatternError::NotFound(pattern_id.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Delete a pattern
|
||||
pub fn delete_pattern(&mut self, pattern_id: &str) -> Result<()> {
|
||||
if self.patterns.remove(pattern_id).is_some() {
|
||||
// Clean up indexes
|
||||
for (_, ids) in self.patterns_by_language.iter_mut() {
|
||||
ids.retain(|id| id != pattern_id);
|
||||
}
|
||||
self.pattern_keywords.remove(pattern_id);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(PatternError::NotFound(pattern_id.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Search patterns by query
|
||||
pub fn search_patterns(&self, query: &str) -> Vec<&CodePattern> {
|
||||
let query_lower = query.to_lowercase();
|
||||
let query_words: Vec<_> = query_lower.split_whitespace().collect();
|
||||
|
||||
let mut scored: Vec<_> = self
|
||||
.patterns
|
||||
.values()
|
||||
.filter_map(|pattern| {
|
||||
let name_match = pattern.name.to_lowercase().contains(&query_lower);
|
||||
let desc_match = pattern.description.to_lowercase().contains(&query_lower);
|
||||
let tag_match = pattern
|
||||
.tags
|
||||
.iter()
|
||||
.any(|t| t.to_lowercase().contains(&query_lower));
|
||||
|
||||
// Count word matches
|
||||
let keywords = self.pattern_keywords.get(&pattern.id)?;
|
||||
let word_matches = query_words
|
||||
.iter()
|
||||
.filter(|w| keywords.iter().any(|kw| kw.contains(*w)))
|
||||
.count();
|
||||
|
||||
let score = if name_match {
|
||||
1.0
|
||||
} else if tag_match {
|
||||
0.8
|
||||
} else if desc_match {
|
||||
0.6
|
||||
} else if word_matches > 0 {
|
||||
0.4 * (word_matches as f64 / query_words.len() as f64)
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
|
||||
Some((pattern, score))
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by score
|
||||
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
scored.into_iter().map(|(p, _)| p).collect()
|
||||
}
|
||||
|
||||
/// Load patterns from storage (to be implemented with actual storage)
|
||||
pub fn load_patterns(&mut self, patterns: Vec<CodePattern>) -> Result<()> {
|
||||
for pattern in patterns {
|
||||
self.learn_pattern(pattern)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Export all patterns for storage
|
||||
pub fn export_patterns(&self) -> Vec<CodePattern> {
|
||||
self.patterns.values().cloned().collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for PatternDetector {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// BUILT-IN PATTERNS
|
||||
// ============================================================================
|
||||
|
||||
/// Create built-in patterns for common coding patterns
|
||||
pub fn create_builtin_patterns() -> Vec<CodePattern> {
|
||||
vec![
|
||||
// Rust Error Handling Pattern
|
||||
CodePattern {
|
||||
id: "builtin-rust-error-handling".to_string(),
|
||||
name: "Rust Error Handling with thiserror".to_string(),
|
||||
description: "Use thiserror for defining custom error types with derive macros"
|
||||
.to_string(),
|
||||
example_code: r#"
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum MyError {
|
||||
#[error("IO error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("Parse error: {0}")]
|
||||
Parse(String),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, MyError>;
|
||||
"#
|
||||
.to_string(),
|
||||
example_files: vec![],
|
||||
when_to_use: "When defining domain-specific error types in Rust".to_string(),
|
||||
when_not_to_use: Some("For simple one-off errors, anyhow might be simpler".to_string()),
|
||||
language: Some("rust".to_string()),
|
||||
created_at: Utc::now(),
|
||||
usage_count: 0,
|
||||
tags: vec!["error-handling".to_string(), "rust".to_string()],
|
||||
related_patterns: vec!["builtin-rust-result".to_string()],
|
||||
},
|
||||
// TypeScript React Component Pattern
|
||||
CodePattern {
|
||||
id: "builtin-react-functional".to_string(),
|
||||
name: "React Functional Component".to_string(),
|
||||
description: "Modern React functional component with TypeScript".to_string(),
|
||||
example_code: r#"
|
||||
interface Props {
|
||||
title: string;
|
||||
onClick?: () => void;
|
||||
}
|
||||
|
||||
export function MyComponent({ title, onClick }: Props) {
|
||||
return (
|
||||
<div onClick={onClick}>
|
||||
<h1>{title}</h1>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
"#
|
||||
.to_string(),
|
||||
example_files: vec![],
|
||||
when_to_use: "For all new React components".to_string(),
|
||||
when_not_to_use: Some("Class components are rarely needed in modern React".to_string()),
|
||||
language: Some("typescript".to_string()),
|
||||
created_at: Utc::now(),
|
||||
usage_count: 0,
|
||||
tags: vec![
|
||||
"react".to_string(),
|
||||
"typescript".to_string(),
|
||||
"component".to_string(),
|
||||
],
|
||||
related_patterns: vec![],
|
||||
},
|
||||
// Repository Pattern
|
||||
CodePattern {
|
||||
id: "builtin-repository-pattern".to_string(),
|
||||
name: "Repository Pattern".to_string(),
|
||||
description: "Abstract data access behind a repository interface".to_string(),
|
||||
example_code: r#"
|
||||
pub trait UserRepository {
|
||||
fn find_by_id(&self, id: &str) -> Result<Option<User>>;
|
||||
fn save(&self, user: &User) -> Result<()>;
|
||||
fn delete(&self, id: &str) -> Result<()>;
|
||||
}
|
||||
|
||||
pub struct SqliteUserRepository {
|
||||
conn: Connection,
|
||||
}
|
||||
|
||||
impl UserRepository for SqliteUserRepository {
|
||||
// Implementation...
|
||||
}
|
||||
"#
|
||||
.to_string(),
|
||||
example_files: vec![],
|
||||
when_to_use: "When you need to decouple domain logic from data access".to_string(),
|
||||
when_not_to_use: Some("For simple CRUD with no complex domain logic".to_string()),
|
||||
language: Some("rust".to_string()),
|
||||
created_at: Utc::now(),
|
||||
usage_count: 0,
|
||||
tags: vec!["architecture".to_string(), "data-access".to_string()],
|
||||
related_patterns: vec![],
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// TESTS
|
||||
// ============================================================================
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::codebase::context::ProjectType;
|
||||
|
||||
fn create_test_pattern() -> CodePattern {
|
||||
CodePattern {
|
||||
id: "test-pattern-1".to_string(),
|
||||
name: "Test Pattern".to_string(),
|
||||
description: "A test pattern for unit testing".to_string(),
|
||||
example_code: "let x = test_function();".to_string(),
|
||||
example_files: vec![PathBuf::from("src/test.rs")],
|
||||
when_to_use: "When testing".to_string(),
|
||||
when_not_to_use: None,
|
||||
language: Some("rust".to_string()),
|
||||
created_at: Utc::now(),
|
||||
usage_count: 0,
|
||||
tags: vec!["test".to_string()],
|
||||
related_patterns: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_learn_pattern() {
|
||||
let mut detector = PatternDetector::new();
|
||||
let pattern = create_test_pattern();
|
||||
|
||||
let result = detector.learn_pattern(pattern.clone());
|
||||
assert!(result.is_ok());
|
||||
|
||||
let stored = detector.get_pattern("test-pattern-1");
|
||||
assert!(stored.is_some());
|
||||
assert_eq!(stored.unwrap().name, "Test Pattern");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_patterns() {
|
||||
let mut detector = PatternDetector::new();
|
||||
let pattern = create_test_pattern();
|
||||
detector.learn_pattern(pattern).unwrap();
|
||||
|
||||
let code = "fn main() { let x = test_function(); }";
|
||||
let matches = detector.detect_patterns(code, "rust").unwrap();
|
||||
|
||||
assert!(!matches.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_patterns_for_language() {
|
||||
let mut detector = PatternDetector::new();
|
||||
let pattern = create_test_pattern();
|
||||
detector.learn_pattern(pattern).unwrap();
|
||||
|
||||
let rust_patterns = detector.get_patterns_for_language("rust");
|
||||
assert_eq!(rust_patterns.len(), 1);
|
||||
|
||||
let ts_patterns = detector.get_patterns_for_language("typescript");
|
||||
assert!(ts_patterns.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_patterns() {
|
||||
let mut detector = PatternDetector::new();
|
||||
let pattern = create_test_pattern();
|
||||
detector.learn_pattern(pattern).unwrap();
|
||||
|
||||
let results = detector.search_patterns("test");
|
||||
assert_eq!(results.len(), 1);
|
||||
|
||||
let results = detector.search_patterns("unknown");
|
||||
assert!(results.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_pattern() {
|
||||
let mut detector = PatternDetector::new();
|
||||
let pattern = create_test_pattern();
|
||||
detector.learn_pattern(pattern).unwrap();
|
||||
|
||||
assert!(detector.get_pattern("test-pattern-1").is_some());
|
||||
|
||||
detector.delete_pattern("test-pattern-1").unwrap();
|
||||
|
||||
assert!(detector.get_pattern("test-pattern-1").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_builtin_patterns() {
|
||||
let patterns = create_builtin_patterns();
|
||||
assert!(!patterns.is_empty());
|
||||
|
||||
// Check that each pattern has required fields
|
||||
for pattern in patterns {
|
||||
assert!(!pattern.id.is_empty());
|
||||
assert!(!pattern.name.is_empty());
|
||||
assert!(!pattern.description.is_empty());
|
||||
assert!(!pattern.when_to_use.is_empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
708
crates/vestige-core/src/codebase/relationships.rs
Normal file
708
crates/vestige-core/src/codebase/relationships.rs
Normal file
|
|
@ -0,0 +1,708 @@
|
|||
//! File relationship tracking for codebase memory
|
||||
//!
|
||||
//! This module tracks relationships between files:
|
||||
//! - Co-edit patterns (files edited together)
|
||||
//! - Import/dependency relationships
|
||||
//! - Test-implementation relationships
|
||||
//! - Domain groupings
|
||||
//!
|
||||
//! Understanding file relationships helps:
|
||||
//! - Suggest related files when editing
|
||||
//! - Provide better context for code generation
|
||||
//! - Identify architectural boundaries
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::types::{FileRelationship, RelationType, RelationshipSource};
|
||||
|
||||
// ============================================================================
|
||||
// ERRORS
|
||||
// ============================================================================
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum RelationshipError {
|
||||
#[error("Relationship not found: {0}")]
|
||||
NotFound(String),
|
||||
#[error("Invalid relationship: {0}")]
|
||||
Invalid(String),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, RelationshipError>;
|
||||
|
||||
// ============================================================================
|
||||
// RELATED FILE
|
||||
// ============================================================================
|
||||
|
||||
/// A file that is related to another file
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct RelatedFile {
|
||||
/// Path to the related file
|
||||
pub path: PathBuf,
|
||||
/// Type of relationship
|
||||
pub relationship_type: RelationType,
|
||||
/// Strength of the relationship (0.0 - 1.0)
|
||||
pub strength: f64,
|
||||
/// Human-readable description
|
||||
pub description: String,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// RELATIONSHIP GRAPH
|
||||
// ============================================================================
|
||||
|
||||
/// Graph structure for visualizing file relationships
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct RelationshipGraph {
|
||||
/// Nodes (files) in the graph
|
||||
pub nodes: Vec<GraphNode>,
|
||||
/// Edges (relationships) in the graph
|
||||
pub edges: Vec<GraphEdge>,
|
||||
/// Graph metadata
|
||||
pub metadata: GraphMetadata,
|
||||
}
|
||||
|
||||
/// A node in the relationship graph
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GraphNode {
|
||||
/// Unique ID for this node
|
||||
pub id: String,
|
||||
/// File path
|
||||
pub path: PathBuf,
|
||||
/// Display label
|
||||
pub label: String,
|
||||
/// Node type (for styling)
|
||||
pub node_type: String,
|
||||
/// Number of connections
|
||||
pub degree: usize,
|
||||
}
|
||||
|
||||
/// An edge in the relationship graph
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GraphEdge {
|
||||
/// Source node ID
|
||||
pub source: String,
|
||||
/// Target node ID
|
||||
pub target: String,
|
||||
/// Relationship type
|
||||
pub relationship_type: RelationType,
|
||||
/// Edge weight (strength)
|
||||
pub weight: f64,
|
||||
/// Edge label
|
||||
pub label: String,
|
||||
}
|
||||
|
||||
/// Metadata about the graph
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GraphMetadata {
|
||||
/// Total number of nodes
|
||||
pub node_count: usize,
|
||||
/// Total number of edges
|
||||
pub edge_count: usize,
|
||||
/// When the graph was built
|
||||
pub built_at: DateTime<Utc>,
|
||||
/// Average relationship strength
|
||||
pub average_strength: f64,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// CO-EDIT SESSION
|
||||
// ============================================================================
|
||||
|
||||
/// Tracks files edited together in a session
|
||||
#[derive(Debug, Clone)]
|
||||
struct CoEditSession {
|
||||
/// Files in this session
|
||||
files: HashSet<PathBuf>,
|
||||
/// When the session started (for analytics/debugging)
|
||||
#[allow(dead_code)]
|
||||
started_at: DateTime<Utc>,
|
||||
/// When the session was last updated
|
||||
last_updated: DateTime<Utc>,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// RELATIONSHIP TRACKER
|
||||
// ============================================================================
|
||||
|
||||
/// Tracks relationships between files in a codebase
|
||||
pub struct RelationshipTracker {
|
||||
/// All relationships indexed by ID
|
||||
relationships: HashMap<String, FileRelationship>,
|
||||
/// Relationships indexed by file for fast lookup
|
||||
file_relationships: HashMap<PathBuf, Vec<String>>,
|
||||
/// Current co-edit session
|
||||
current_session: Option<CoEditSession>,
|
||||
/// Co-edit counts between file pairs
|
||||
coedit_counts: HashMap<(PathBuf, PathBuf), u32>,
|
||||
/// ID counter for new relationships
|
||||
next_id: u32,
|
||||
}
|
||||
|
||||
impl RelationshipTracker {
|
||||
/// Create a new relationship tracker
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
relationships: HashMap::new(),
|
||||
file_relationships: HashMap::new(),
|
||||
current_session: None,
|
||||
coedit_counts: HashMap::new(),
|
||||
next_id: 1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a new relationship ID
|
||||
fn new_id(&mut self) -> String {
|
||||
let id = format!("rel-{}", self.next_id);
|
||||
self.next_id += 1;
|
||||
id
|
||||
}
|
||||
|
||||
/// Add a relationship
|
||||
pub fn add_relationship(&mut self, relationship: FileRelationship) -> Result<String> {
|
||||
if relationship.files.len() < 2 {
|
||||
return Err(RelationshipError::Invalid(
|
||||
"Relationship must have at least 2 files".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let id = relationship.id.clone();
|
||||
|
||||
// Index by each file
|
||||
for file in &relationship.files {
|
||||
self.file_relationships
|
||||
.entry(file.clone())
|
||||
.or_default()
|
||||
.push(id.clone());
|
||||
}
|
||||
|
||||
self.relationships.insert(id.clone(), relationship);
|
||||
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Record that files were edited together
|
||||
pub fn record_coedit(&mut self, files: &[PathBuf]) -> Result<()> {
|
||||
if files.len() < 2 {
|
||||
return Ok(()); // Need at least 2 files for a relationship
|
||||
}
|
||||
|
||||
let now = Utc::now();
|
||||
|
||||
// Update or create session
|
||||
match &mut self.current_session {
|
||||
Some(session) => {
|
||||
// Check if session is still active (within 30 minutes)
|
||||
let elapsed = now.signed_duration_since(session.last_updated);
|
||||
if elapsed.num_minutes() > 30 {
|
||||
// Session expired, finalize it and start new
|
||||
self.finalize_session()?;
|
||||
self.current_session = Some(CoEditSession {
|
||||
files: files.iter().cloned().collect(),
|
||||
started_at: now,
|
||||
last_updated: now,
|
||||
});
|
||||
} else {
|
||||
// Add files to current session
|
||||
session.files.extend(files.iter().cloned());
|
||||
session.last_updated = now;
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// Start new session
|
||||
self.current_session = Some(CoEditSession {
|
||||
files: files.iter().cloned().collect(),
|
||||
started_at: now,
|
||||
last_updated: now,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Update co-edit counts for each pair
|
||||
for i in 0..files.len() {
|
||||
for j in (i + 1)..files.len() {
|
||||
let pair = if files[i] < files[j] {
|
||||
(files[i].clone(), files[j].clone())
|
||||
} else {
|
||||
(files[j].clone(), files[i].clone())
|
||||
};
|
||||
*self.coedit_counts.entry(pair).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Finalize the current session and create relationships
|
||||
fn finalize_session(&mut self) -> Result<()> {
|
||||
if let Some(session) = self.current_session.take() {
|
||||
let files: Vec<_> = session.files.into_iter().collect();
|
||||
|
||||
if files.len() >= 2 {
|
||||
// Create relationships for frequent co-edits
|
||||
for i in 0..files.len() {
|
||||
for j in (i + 1)..files.len() {
|
||||
let pair = if files[i] < files[j] {
|
||||
(files[i].clone(), files[j].clone())
|
||||
} else {
|
||||
(files[j].clone(), files[i].clone())
|
||||
};
|
||||
|
||||
let count = self.coedit_counts.get(&pair).copied().unwrap_or(0);
|
||||
|
||||
// Only create relationship if edited together multiple times
|
||||
if count >= 3 {
|
||||
let strength = (count as f64 / 10.0).min(1.0);
|
||||
let id = self.new_id();
|
||||
|
||||
let relationship = FileRelationship {
|
||||
id: id.clone(),
|
||||
files: vec![pair.0.clone(), pair.1.clone()],
|
||||
relationship_type: RelationType::FrequentCochange,
|
||||
strength,
|
||||
description: format!(
|
||||
"Edited together {} times in recent sessions",
|
||||
count
|
||||
),
|
||||
created_at: Utc::now(),
|
||||
last_confirmed: Some(Utc::now()),
|
||||
source: RelationshipSource::UserDefined,
|
||||
observation_count: count,
|
||||
};
|
||||
|
||||
// Check if relationship already exists
|
||||
let exists = self
|
||||
.relationships
|
||||
.values()
|
||||
.any(|r| r.files.contains(&pair.0) && r.files.contains(&pair.1));
|
||||
|
||||
if !exists {
|
||||
self.add_relationship(relationship)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get files related to a given file
|
||||
pub fn get_related_files(&self, file: &Path) -> Result<Vec<RelatedFile>> {
|
||||
let path = file.to_path_buf();
|
||||
|
||||
let relationship_ids = self.file_relationships.get(&path);
|
||||
|
||||
let related: Vec<_> = relationship_ids
|
||||
.map(|ids| {
|
||||
ids.iter()
|
||||
.filter_map(|id| self.relationships.get(id))
|
||||
.flat_map(|rel| {
|
||||
rel.files
|
||||
.iter()
|
||||
.filter(|f| *f != &path)
|
||||
.map(|f| RelatedFile {
|
||||
path: f.clone(),
|
||||
relationship_type: rel.relationship_type,
|
||||
strength: rel.strength,
|
||||
description: rel.description.clone(),
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
// Also check for test file relationships
|
||||
let mut additional = self.infer_test_relationships(file);
|
||||
additional.extend(related);
|
||||
|
||||
// Deduplicate by path
|
||||
let mut seen = HashSet::new();
|
||||
let deduped: Vec<_> = additional
|
||||
.into_iter()
|
||||
.filter(|r| seen.insert(r.path.clone()))
|
||||
.collect();
|
||||
|
||||
Ok(deduped)
|
||||
}
|
||||
|
||||
/// Infer test file relationships based on naming conventions
|
||||
fn infer_test_relationships(&self, file: &Path) -> Vec<RelatedFile> {
|
||||
let mut related = Vec::new();
|
||||
|
||||
let file_stem = file
|
||||
.file_stem()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
let extension = file
|
||||
.extension()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
let parent = file.parent().unwrap_or(Path::new("."));
|
||||
|
||||
// Check for test file naming patterns
|
||||
let is_test = file_stem.contains("test")
|
||||
|| file_stem.contains("spec")
|
||||
|| file_stem.ends_with("_test")
|
||||
|| file_stem.starts_with("test_");
|
||||
|
||||
if is_test {
|
||||
// This is a test file - find the implementation
|
||||
let impl_stem = file_stem
|
||||
.replace("_test", "")
|
||||
.replace(".test", "")
|
||||
.replace("_spec", "")
|
||||
.replace(".spec", "")
|
||||
.trim_start_matches("test_")
|
||||
.to_string();
|
||||
|
||||
let impl_path = parent.join(format!("{}.{}", impl_stem, extension));
|
||||
|
||||
if impl_path.exists() {
|
||||
related.push(RelatedFile {
|
||||
path: impl_path,
|
||||
relationship_type: RelationType::TestsImplementation,
|
||||
strength: 0.9,
|
||||
description: "Implementation file for this test".to_string(),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// This is an implementation - find the test file
|
||||
let test_patterns = [
|
||||
format!("{}_test.{}", file_stem, extension),
|
||||
format!("{}.test.{}", file_stem, extension),
|
||||
format!("test_{}.{}", file_stem, extension),
|
||||
format!("{}_spec.{}", file_stem, extension),
|
||||
format!("{}.spec.{}", file_stem, extension),
|
||||
];
|
||||
|
||||
for pattern in &test_patterns {
|
||||
let test_path = parent.join(pattern);
|
||||
if test_path.exists() {
|
||||
related.push(RelatedFile {
|
||||
path: test_path,
|
||||
relationship_type: RelationType::TestsImplementation,
|
||||
strength: 0.9,
|
||||
description: "Test file for this implementation".to_string(),
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check tests/ directory
|
||||
if let Some(grandparent) = parent.parent() {
|
||||
let tests_dir = grandparent.join("tests");
|
||||
if tests_dir.exists() {
|
||||
for pattern in &test_patterns {
|
||||
let test_path = tests_dir.join(pattern);
|
||||
if test_path.exists() {
|
||||
related.push(RelatedFile {
|
||||
path: test_path,
|
||||
relationship_type: RelationType::TestsImplementation,
|
||||
strength: 0.8,
|
||||
description: "Test file in tests/ directory".to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
related
|
||||
}
|
||||
|
||||
/// Build a relationship graph for visualization
|
||||
pub fn build_graph(&self) -> Result<RelationshipGraph> {
|
||||
let mut nodes = Vec::new();
|
||||
let mut edges = Vec::new();
|
||||
let mut node_ids: HashMap<PathBuf, String> = HashMap::new();
|
||||
let mut node_degrees: HashMap<String, usize> = HashMap::new();
|
||||
|
||||
// Build nodes from all files in relationships
|
||||
for relationship in self.relationships.values() {
|
||||
for file in &relationship.files {
|
||||
if !node_ids.contains_key(file) {
|
||||
let id = format!("node-{}", node_ids.len());
|
||||
node_ids.insert(file.clone(), id.clone());
|
||||
|
||||
let label = file
|
||||
.file_name()
|
||||
.map(|n| n.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| file.to_string_lossy().to_string());
|
||||
|
||||
let node_type = file
|
||||
.extension()
|
||||
.map(|e| e.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
|
||||
nodes.push(GraphNode {
|
||||
id: id.clone(),
|
||||
path: file.clone(),
|
||||
label,
|
||||
node_type,
|
||||
degree: 0, // Will update later
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build edges from relationships
|
||||
for relationship in self.relationships.values() {
|
||||
if relationship.files.len() >= 2 {
|
||||
// Skip relationships where files aren't in the node map
|
||||
let Some(source_id) = node_ids.get(&relationship.files[0]).cloned() else {
|
||||
continue;
|
||||
};
|
||||
let Some(target_id) = node_ids.get(&relationship.files[1]).cloned() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
// Update degrees
|
||||
*node_degrees.entry(source_id.clone()).or_insert(0) += 1;
|
||||
*node_degrees.entry(target_id.clone()).or_insert(0) += 1;
|
||||
|
||||
let label = format!("{:?}", relationship.relationship_type);
|
||||
|
||||
edges.push(GraphEdge {
|
||||
source: source_id,
|
||||
target: target_id,
|
||||
relationship_type: relationship.relationship_type,
|
||||
weight: relationship.strength,
|
||||
label,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Update node degrees
|
||||
for node in &mut nodes {
|
||||
node.degree = node_degrees.get(&node.id).copied().unwrap_or(0);
|
||||
}
|
||||
|
||||
// Calculate metadata
|
||||
let average_strength = if edges.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
edges.iter().map(|e| e.weight).sum::<f64>() / edges.len() as f64
|
||||
};
|
||||
|
||||
let metadata = GraphMetadata {
|
||||
node_count: nodes.len(),
|
||||
edge_count: edges.len(),
|
||||
built_at: Utc::now(),
|
||||
average_strength,
|
||||
};
|
||||
|
||||
Ok(RelationshipGraph {
|
||||
nodes,
|
||||
edges,
|
||||
metadata,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get a specific relationship by ID
|
||||
pub fn get_relationship(&self, id: &str) -> Option<&FileRelationship> {
|
||||
self.relationships.get(id)
|
||||
}
|
||||
|
||||
/// Get all relationships
|
||||
pub fn get_all_relationships(&self) -> Vec<&FileRelationship> {
|
||||
self.relationships.values().collect()
|
||||
}
|
||||
|
||||
/// Delete a relationship
|
||||
pub fn delete_relationship(&mut self, id: &str) -> Result<()> {
|
||||
if let Some(relationship) = self.relationships.remove(id) {
|
||||
// Remove from file index
|
||||
for file in &relationship.files {
|
||||
if let Some(ids) = self.file_relationships.get_mut(file) {
|
||||
ids.retain(|i| i != id);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
Err(RelationshipError::NotFound(id.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Get relationships by type
|
||||
pub fn get_relationships_by_type(&self, rel_type: RelationType) -> Vec<&FileRelationship> {
|
||||
self.relationships
|
||||
.values()
|
||||
.filter(|r| r.relationship_type == rel_type)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Update relationship strength
|
||||
pub fn update_strength(&mut self, id: &str, delta: f64) -> Result<()> {
|
||||
if let Some(relationship) = self.relationships.get_mut(id) {
|
||||
relationship.strength = (relationship.strength + delta).clamp(0.0, 1.0);
|
||||
relationship.last_confirmed = Some(Utc::now());
|
||||
relationship.observation_count += 1;
|
||||
Ok(())
|
||||
} else {
|
||||
Err(RelationshipError::NotFound(id.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Load relationships from storage
|
||||
pub fn load_relationships(&mut self, relationships: Vec<FileRelationship>) -> Result<()> {
|
||||
for relationship in relationships {
|
||||
self.add_relationship(relationship)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Export all relationships for storage
|
||||
pub fn export_relationships(&self) -> Vec<FileRelationship> {
|
||||
self.relationships.values().cloned().collect()
|
||||
}
|
||||
|
||||
/// Get the most connected files (highest degree in graph)
|
||||
pub fn get_hub_files(&self, limit: usize) -> Vec<(PathBuf, usize)> {
|
||||
let mut file_degrees: HashMap<PathBuf, usize> = HashMap::new();
|
||||
|
||||
for relationship in self.relationships.values() {
|
||||
for file in &relationship.files {
|
||||
*file_degrees.entry(file.clone()).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let mut sorted: Vec<_> = file_degrees.into_iter().collect();
|
||||
sorted.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
sorted.truncate(limit);
|
||||
|
||||
sorted
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RelationshipTracker {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// TESTS
|
||||
// ============================================================================
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn create_test_relationship() -> FileRelationship {
|
||||
FileRelationship::new(
|
||||
"test-rel-1".to_string(),
|
||||
vec![PathBuf::from("src/main.rs"), PathBuf::from("src/lib.rs")],
|
||||
RelationType::SharedDomain,
|
||||
"Core entry points".to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_add_relationship() {
|
||||
let mut tracker = RelationshipTracker::new();
|
||||
let rel = create_test_relationship();
|
||||
|
||||
let result = tracker.add_relationship(rel);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let stored = tracker.get_relationship("test-rel-1");
|
||||
assert!(stored.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_related_files() {
|
||||
let mut tracker = RelationshipTracker::new();
|
||||
let rel = create_test_relationship();
|
||||
tracker.add_relationship(rel).unwrap();
|
||||
|
||||
let related = tracker.get_related_files(Path::new("src/main.rs")).unwrap();
|
||||
|
||||
assert!(!related.is_empty());
|
||||
assert!(related
|
||||
.iter()
|
||||
.any(|r| r.path == PathBuf::from("src/lib.rs")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_graph() {
|
||||
let mut tracker = RelationshipTracker::new();
|
||||
let rel = create_test_relationship();
|
||||
tracker.add_relationship(rel).unwrap();
|
||||
|
||||
let graph = tracker.build_graph().unwrap();
|
||||
|
||||
assert_eq!(graph.nodes.len(), 2);
|
||||
assert_eq!(graph.edges.len(), 1);
|
||||
assert_eq!(graph.metadata.node_count, 2);
|
||||
assert_eq!(graph.metadata.edge_count, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_relationship() {
|
||||
let mut tracker = RelationshipTracker::new();
|
||||
let rel = create_test_relationship();
|
||||
tracker.add_relationship(rel).unwrap();
|
||||
|
||||
assert!(tracker.get_relationship("test-rel-1").is_some());
|
||||
|
||||
tracker.delete_relationship("test-rel-1").unwrap();
|
||||
|
||||
assert!(tracker.get_relationship("test-rel-1").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_record_coedit() {
|
||||
let mut tracker = RelationshipTracker::new();
|
||||
|
||||
let files = vec![PathBuf::from("src/a.rs"), PathBuf::from("src/b.rs")];
|
||||
|
||||
// Record multiple coedits
|
||||
for _ in 0..5 {
|
||||
tracker.record_coedit(&files).unwrap();
|
||||
}
|
||||
|
||||
// Finalize should create a relationship
|
||||
tracker.finalize_session().unwrap();
|
||||
|
||||
// Should have a co-change relationship
|
||||
let relationships = tracker.get_relationships_by_type(RelationType::FrequentCochange);
|
||||
assert!(!relationships.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_hub_files() {
|
||||
let mut tracker = RelationshipTracker::new();
|
||||
|
||||
// Create a hub file (main.rs) connected to multiple others
|
||||
for i in 0..5 {
|
||||
let rel = FileRelationship::new(
|
||||
format!("rel-{}", i),
|
||||
vec![
|
||||
PathBuf::from("src/main.rs"),
|
||||
PathBuf::from(format!("src/module{}.rs", i)),
|
||||
],
|
||||
RelationType::ImportsDependency,
|
||||
"Import relationship".to_string(),
|
||||
);
|
||||
tracker.add_relationship(rel).unwrap();
|
||||
}
|
||||
|
||||
let hubs = tracker.get_hub_files(3);
|
||||
|
||||
assert!(!hubs.is_empty());
|
||||
assert_eq!(hubs[0].0, PathBuf::from("src/main.rs"));
|
||||
assert_eq!(hubs[0].1, 5);
|
||||
}
|
||||
}
|
||||
799
crates/vestige-core/src/codebase/types.rs
Normal file
799
crates/vestige-core/src/codebase/types.rs
Normal file
|
|
@ -0,0 +1,799 @@
|
|||
//! Codebase-specific memory types for Vestige
|
||||
//!
|
||||
//! This module defines the specialized node types that make Vestige's codebase memory
|
||||
//! unique and powerful. These types capture the contextual knowledge that developers
|
||||
//! accumulate but traditionally lose - architectural decisions, bug fixes, coding
|
||||
//! patterns, and file relationships.
|
||||
//!
|
||||
//! This is Vestige's KILLER DIFFERENTIATOR. No other AI memory system understands
|
||||
//! codebases at this level.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::PathBuf;
|
||||
|
||||
// ============================================================================
|
||||
// CODEBASE NODE - The Core Memory Type
|
||||
// ============================================================================
|
||||
|
||||
/// Types of memories specific to codebases.
|
||||
///
|
||||
/// Each variant captures a different kind of knowledge that developers accumulate
|
||||
/// but typically lose over time or when context-switching between projects.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum CodebaseNode {
|
||||
/// "We use X pattern because Y"
|
||||
///
|
||||
/// Captures architectural decisions with their rationale. This is critical
|
||||
/// for maintaining consistency and understanding why the codebase evolved
|
||||
/// the way it did.
|
||||
ArchitecturalDecision(ArchitecturalDecision),
|
||||
|
||||
/// "This bug was caused by X, fixed by Y"
|
||||
///
|
||||
/// Records bug fixes with root cause analysis. Invaluable for preventing
|
||||
/// regression and understanding historical issues.
|
||||
BugFix(BugFix),
|
||||
|
||||
/// "Use this pattern for X"
|
||||
///
|
||||
/// Codifies recurring patterns with examples and guidance on when to use them.
|
||||
CodePattern(CodePattern),
|
||||
|
||||
/// "These files always change together"
|
||||
///
|
||||
/// Tracks file relationships discovered through git history analysis or
|
||||
/// explicit user teaching.
|
||||
FileRelationship(FileRelationship),
|
||||
|
||||
/// "User prefers X over Y"
|
||||
///
|
||||
/// Captures coding preferences and style decisions for consistent suggestions.
|
||||
CodingPreference(CodingPreference),
|
||||
|
||||
/// "This function does X and is called by Y"
|
||||
///
|
||||
/// Stores knowledge about specific code entities - functions, types, modules.
|
||||
CodeEntity(CodeEntity),
|
||||
|
||||
/// "The current task is implementing X"
|
||||
///
|
||||
/// Tracks ongoing work context for continuity across sessions.
|
||||
WorkContext(WorkContext),
|
||||
}
|
||||
|
||||
impl CodebaseNode {
|
||||
/// Get the unique identifier for this node
|
||||
pub fn id(&self) -> &str {
|
||||
match self {
|
||||
Self::ArchitecturalDecision(n) => &n.id,
|
||||
Self::BugFix(n) => &n.id,
|
||||
Self::CodePattern(n) => &n.id,
|
||||
Self::FileRelationship(n) => &n.id,
|
||||
Self::CodingPreference(n) => &n.id,
|
||||
Self::CodeEntity(n) => &n.id,
|
||||
Self::WorkContext(n) => &n.id,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the node type as a string
|
||||
pub fn node_type(&self) -> &'static str {
|
||||
match self {
|
||||
Self::ArchitecturalDecision(_) => "architectural_decision",
|
||||
Self::BugFix(_) => "bug_fix",
|
||||
Self::CodePattern(_) => "code_pattern",
|
||||
Self::FileRelationship(_) => "file_relationship",
|
||||
Self::CodingPreference(_) => "coding_preference",
|
||||
Self::CodeEntity(_) => "code_entity",
|
||||
Self::WorkContext(_) => "work_context",
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the creation timestamp
|
||||
pub fn created_at(&self) -> DateTime<Utc> {
|
||||
match self {
|
||||
Self::ArchitecturalDecision(n) => n.created_at,
|
||||
Self::BugFix(n) => n.created_at,
|
||||
Self::CodePattern(n) => n.created_at,
|
||||
Self::FileRelationship(n) => n.created_at,
|
||||
Self::CodingPreference(n) => n.created_at,
|
||||
Self::CodeEntity(n) => n.created_at,
|
||||
Self::WorkContext(n) => n.created_at,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get all file paths associated with this node
|
||||
pub fn associated_files(&self) -> Vec<&PathBuf> {
|
||||
match self {
|
||||
Self::ArchitecturalDecision(n) => n.files_affected.iter().collect(),
|
||||
Self::BugFix(n) => n.files_changed.iter().collect(),
|
||||
Self::CodePattern(n) => n.example_files.iter().collect(),
|
||||
Self::FileRelationship(n) => n.files.iter().collect(),
|
||||
Self::CodingPreference(_) => vec![],
|
||||
Self::CodeEntity(n) => n.file_path.as_ref().map(|p| vec![p]).unwrap_or_default(),
|
||||
Self::WorkContext(n) => n.active_files.iter().collect(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert to a searchable text representation
|
||||
pub fn to_searchable_text(&self) -> String {
|
||||
match self {
|
||||
Self::ArchitecturalDecision(n) => {
|
||||
format!(
|
||||
"Architectural Decision: {} - Rationale: {} - Context: {}",
|
||||
n.decision,
|
||||
n.rationale,
|
||||
n.context.as_deref().unwrap_or("")
|
||||
)
|
||||
}
|
||||
Self::BugFix(n) => {
|
||||
format!(
|
||||
"Bug Fix: {} - Root Cause: {} - Solution: {}",
|
||||
n.symptom, n.root_cause, n.solution
|
||||
)
|
||||
}
|
||||
Self::CodePattern(n) => {
|
||||
format!(
|
||||
"Code Pattern: {} - {} - When to use: {}",
|
||||
n.name, n.description, n.when_to_use
|
||||
)
|
||||
}
|
||||
Self::FileRelationship(n) => {
|
||||
format!(
|
||||
"File Relationship: {:?} - Type: {:?} - {}",
|
||||
n.files, n.relationship_type, n.description
|
||||
)
|
||||
}
|
||||
Self::CodingPreference(n) => {
|
||||
format!(
|
||||
"Coding Preference ({}): {} vs {:?}",
|
||||
n.context, n.preference, n.counter_preference
|
||||
)
|
||||
}
|
||||
Self::CodeEntity(n) => {
|
||||
format!(
|
||||
"Code Entity: {} ({:?}) - {}",
|
||||
n.name, n.entity_type, n.description
|
||||
)
|
||||
}
|
||||
Self::WorkContext(n) => {
|
||||
format!(
|
||||
"Work Context: {} - {} - Active files: {:?}",
|
||||
n.task_description,
|
||||
n.status.as_str(),
|
||||
n.active_files
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// ARCHITECTURAL DECISION
|
||||
// ============================================================================
|
||||
|
||||
/// Records an architectural decision with its rationale.
|
||||
///
|
||||
/// Example:
|
||||
/// - Decision: "Use Event Sourcing for order management"
|
||||
/// - Rationale: "Need complete audit trail and ability to replay state"
|
||||
/// - Files: ["src/orders/events.rs", "src/orders/aggregate.rs"]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ArchitecturalDecision {
|
||||
pub id: String,
|
||||
/// The decision that was made
|
||||
pub decision: String,
|
||||
/// Why this decision was made
|
||||
pub rationale: String,
|
||||
/// Files affected by this decision
|
||||
pub files_affected: Vec<PathBuf>,
|
||||
/// Git commit SHA where this was implemented (if applicable)
|
||||
pub commit_sha: Option<String>,
|
||||
/// When this decision was recorded
|
||||
pub created_at: DateTime<Utc>,
|
||||
/// When this decision was last updated
|
||||
pub updated_at: Option<DateTime<Utc>>,
|
||||
/// Additional context or notes
|
||||
pub context: Option<String>,
|
||||
/// Tags for categorization
|
||||
pub tags: Vec<String>,
|
||||
/// Status of the decision
|
||||
pub status: DecisionStatus,
|
||||
/// Alternatives that were considered
|
||||
pub alternatives_considered: Vec<String>,
|
||||
}
|
||||
|
||||
/// Status of an architectural decision
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum DecisionStatus {
|
||||
/// Decision is proposed but not yet implemented
|
||||
Proposed,
|
||||
/// Decision is accepted and being implemented
|
||||
Accepted,
|
||||
/// Decision has been superseded by another
|
||||
Superseded,
|
||||
/// Decision was rejected
|
||||
Deprecated,
|
||||
}
|
||||
|
||||
impl Default for DecisionStatus {
|
||||
fn default() -> Self {
|
||||
Self::Accepted
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// BUG FIX
|
||||
// ============================================================================
|
||||
|
||||
/// Records a bug fix with root cause analysis.
|
||||
///
|
||||
/// This is invaluable for:
|
||||
/// - Preventing regressions
|
||||
/// - Understanding why certain code exists
|
||||
/// - Training junior developers on common pitfalls
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct BugFix {
|
||||
pub id: String,
|
||||
/// What symptoms was the bug causing?
|
||||
pub symptom: String,
|
||||
/// What was the actual root cause?
|
||||
pub root_cause: String,
|
||||
/// How was it fixed?
|
||||
pub solution: String,
|
||||
/// Files that were changed to fix the bug
|
||||
pub files_changed: Vec<PathBuf>,
|
||||
/// Git commit SHA of the fix
|
||||
pub commit_sha: String,
|
||||
/// When the fix was recorded
|
||||
pub created_at: DateTime<Utc>,
|
||||
/// Link to issue tracker (if applicable)
|
||||
pub issue_link: Option<String>,
|
||||
/// Severity of the bug
|
||||
pub severity: BugSeverity,
|
||||
/// How the bug was discovered
|
||||
pub discovered_by: Option<String>,
|
||||
/// Prevention measures (what would have caught this earlier)
|
||||
pub prevention_notes: Option<String>,
|
||||
/// Tags for categorization
|
||||
pub tags: Vec<String>,
|
||||
}
|
||||
|
||||
/// Severity level of a bug
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum BugSeverity {
|
||||
Critical,
|
||||
High,
|
||||
Medium,
|
||||
Low,
|
||||
Trivial,
|
||||
}
|
||||
|
||||
impl Default for BugSeverity {
|
||||
fn default() -> Self {
|
||||
Self::Medium
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// CODE PATTERN
|
||||
// ============================================================================
|
||||
|
||||
/// Records a reusable code pattern with examples and guidance.
|
||||
///
|
||||
/// Patterns can be:
|
||||
/// - Discovered automatically from git history
|
||||
/// - Taught explicitly by the user
|
||||
/// - Extracted from documentation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct CodePattern {
|
||||
pub id: String,
|
||||
/// Name of the pattern (e.g., "Repository Pattern", "Error Handling")
|
||||
pub name: String,
|
||||
/// Detailed description of the pattern
|
||||
pub description: String,
|
||||
/// Example code showing the pattern
|
||||
pub example_code: String,
|
||||
/// Files containing examples of this pattern
|
||||
pub example_files: Vec<PathBuf>,
|
||||
/// When should this pattern be used?
|
||||
pub when_to_use: String,
|
||||
/// When should this pattern NOT be used?
|
||||
pub when_not_to_use: Option<String>,
|
||||
/// Language this pattern applies to
|
||||
pub language: Option<String>,
|
||||
/// When this pattern was recorded
|
||||
pub created_at: DateTime<Utc>,
|
||||
/// How many times this pattern has been applied
|
||||
pub usage_count: u32,
|
||||
/// Tags for categorization
|
||||
pub tags: Vec<String>,
|
||||
/// Related patterns
|
||||
pub related_patterns: Vec<String>,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// FILE RELATIONSHIP
|
||||
// ============================================================================
|
||||
|
||||
/// Tracks relationships between files in the codebase.
|
||||
///
|
||||
/// Relationships can be:
|
||||
/// - Discovered from imports/dependencies
|
||||
/// - Detected from git co-change patterns
|
||||
/// - Explicitly taught by the user
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct FileRelationship {
|
||||
pub id: String,
|
||||
/// The files involved in this relationship
|
||||
pub files: Vec<PathBuf>,
|
||||
/// Type of relationship
|
||||
pub relationship_type: RelationType,
|
||||
/// Strength of the relationship (0.0 - 1.0)
|
||||
/// For co-change relationships, this is the frequency they change together
|
||||
pub strength: f64,
|
||||
/// Human-readable description
|
||||
pub description: String,
|
||||
/// When this relationship was first detected
|
||||
pub created_at: DateTime<Utc>,
|
||||
/// When this relationship was last confirmed
|
||||
pub last_confirmed: Option<DateTime<Utc>>,
|
||||
/// How this relationship was discovered
|
||||
pub source: RelationshipSource,
|
||||
/// Number of times this relationship has been observed
|
||||
pub observation_count: u32,
|
||||
}
|
||||
|
||||
/// Types of relationships between files
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum RelationType {
|
||||
/// A imports/depends on B
|
||||
ImportsDependency,
|
||||
/// A tests implementation in B
|
||||
TestsImplementation,
|
||||
/// A configures service B
|
||||
ConfiguresService,
|
||||
/// Files are in the same domain/feature area
|
||||
SharedDomain,
|
||||
/// Files frequently change together in commits
|
||||
FrequentCochange,
|
||||
/// A extends/implements B
|
||||
ExtendsImplements,
|
||||
/// A is the interface, B is the implementation
|
||||
InterfaceImplementation,
|
||||
/// A and B are related through documentation
|
||||
DocumentationReference,
|
||||
}
|
||||
|
||||
/// How a relationship was discovered
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum RelationshipSource {
|
||||
/// Detected from git history co-change analysis
|
||||
GitCochange,
|
||||
/// Detected from import/dependency analysis
|
||||
ImportAnalysis,
|
||||
/// Detected from AST analysis
|
||||
AstAnalysis,
|
||||
/// Explicitly taught by user
|
||||
UserDefined,
|
||||
/// Inferred from file naming conventions
|
||||
NamingConvention,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// CODING PREFERENCE
|
||||
// ============================================================================
|
||||
|
||||
/// Records a user's coding preferences for consistent suggestions.
|
||||
///
|
||||
/// Examples:
|
||||
/// - "For error handling, prefer Result over panic"
|
||||
/// - "For naming, use snake_case for functions"
|
||||
/// - "For async, prefer tokio over async-std"
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct CodingPreference {
|
||||
pub id: String,
|
||||
/// Context where this preference applies (e.g., "error handling", "naming")
|
||||
pub context: String,
|
||||
/// The preferred approach
|
||||
pub preference: String,
|
||||
/// What NOT to do (optional)
|
||||
pub counter_preference: Option<String>,
|
||||
/// Examples showing the preference in action
|
||||
pub examples: Vec<String>,
|
||||
/// Confidence in this preference (0.0 - 1.0)
|
||||
/// Higher confidence = more consistently applied
|
||||
pub confidence: f64,
|
||||
/// When this preference was recorded
|
||||
pub created_at: DateTime<Utc>,
|
||||
/// Language this applies to (None = all languages)
|
||||
pub language: Option<String>,
|
||||
/// How this preference was learned
|
||||
pub source: PreferenceSource,
|
||||
/// Number of times this preference has been observed
|
||||
pub observation_count: u32,
|
||||
}
|
||||
|
||||
/// How a preference was learned
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum PreferenceSource {
|
||||
/// Explicitly stated by user
|
||||
UserStated,
|
||||
/// Inferred from code review feedback
|
||||
CodeReview,
|
||||
/// Detected from coding patterns in history
|
||||
PatternDetection,
|
||||
/// From project configuration (e.g., rustfmt.toml)
|
||||
ProjectConfig,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// CODE ENTITY
|
||||
// ============================================================================
|
||||
|
||||
/// Knowledge about a specific code entity (function, type, module, etc.)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct CodeEntity {
|
||||
pub id: String,
|
||||
/// Name of the entity
|
||||
pub name: String,
|
||||
/// Type of entity
|
||||
pub entity_type: EntityType,
|
||||
/// Description of what this entity does
|
||||
pub description: String,
|
||||
/// File where this entity is defined
|
||||
pub file_path: Option<PathBuf>,
|
||||
/// Line number where entity starts
|
||||
pub line_number: Option<u32>,
|
||||
/// Entities that this one depends on
|
||||
pub dependencies: Vec<String>,
|
||||
/// Entities that depend on this one
|
||||
pub dependents: Vec<String>,
|
||||
/// When this was recorded
|
||||
pub created_at: DateTime<Utc>,
|
||||
/// Tags for categorization
|
||||
pub tags: Vec<String>,
|
||||
/// Usage notes or gotchas
|
||||
pub notes: Option<String>,
|
||||
}
|
||||
|
||||
/// Type of code entity
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum EntityType {
|
||||
Function,
|
||||
Method,
|
||||
Struct,
|
||||
Enum,
|
||||
Trait,
|
||||
Interface,
|
||||
Class,
|
||||
Module,
|
||||
Constant,
|
||||
Variable,
|
||||
Type,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// WORK CONTEXT
|
||||
// ============================================================================
|
||||
|
||||
/// Tracks the current work context for continuity across sessions.
|
||||
///
|
||||
/// This allows Vestige to remember:
|
||||
/// - What task the user was working on
|
||||
/// - What files were being edited
|
||||
/// - What the next steps were
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct WorkContext {
|
||||
pub id: String,
|
||||
/// Description of the current task
|
||||
pub task_description: String,
|
||||
/// Files currently being worked on
|
||||
pub active_files: Vec<PathBuf>,
|
||||
/// Current git branch
|
||||
pub branch: Option<String>,
|
||||
/// Status of the work
|
||||
pub status: WorkStatus,
|
||||
/// Next steps that were planned
|
||||
pub next_steps: Vec<String>,
|
||||
/// Blockers or issues encountered
|
||||
pub blockers: Vec<String>,
|
||||
/// When this context was created
|
||||
pub created_at: DateTime<Utc>,
|
||||
/// When this context was last updated
|
||||
pub updated_at: DateTime<Utc>,
|
||||
/// Related issue/ticket IDs
|
||||
pub related_issues: Vec<String>,
|
||||
/// Notes about the work
|
||||
pub notes: Option<String>,
|
||||
}
|
||||
|
||||
/// Status of work in progress
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum WorkStatus {
|
||||
/// Actively being worked on
|
||||
InProgress,
|
||||
/// Paused, will resume later
|
||||
Paused,
|
||||
/// Completed
|
||||
Completed,
|
||||
/// Blocked by something
|
||||
Blocked,
|
||||
/// Abandoned
|
||||
Abandoned,
|
||||
}
|
||||
|
||||
impl WorkStatus {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::InProgress => "in_progress",
|
||||
Self::Paused => "paused",
|
||||
Self::Completed => "completed",
|
||||
Self::Blocked => "blocked",
|
||||
Self::Abandoned => "abandoned",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// BUILDER HELPERS
|
||||
// ============================================================================
|
||||
|
||||
impl ArchitecturalDecision {
|
||||
pub fn new(id: String, decision: String, rationale: String) -> Self {
|
||||
Self {
|
||||
id,
|
||||
decision,
|
||||
rationale,
|
||||
files_affected: vec![],
|
||||
commit_sha: None,
|
||||
created_at: Utc::now(),
|
||||
updated_at: None,
|
||||
context: None,
|
||||
tags: vec![],
|
||||
status: DecisionStatus::default(),
|
||||
alternatives_considered: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_files(mut self, files: Vec<PathBuf>) -> Self {
|
||||
self.files_affected = files;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_commit(mut self, sha: String) -> Self {
|
||||
self.commit_sha = Some(sha);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_context(mut self, context: String) -> Self {
|
||||
self.context = Some(context);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_tags(mut self, tags: Vec<String>) -> Self {
|
||||
self.tags = tags;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl BugFix {
|
||||
pub fn new(
|
||||
id: String,
|
||||
symptom: String,
|
||||
root_cause: String,
|
||||
solution: String,
|
||||
commit_sha: String,
|
||||
) -> Self {
|
||||
Self {
|
||||
id,
|
||||
symptom,
|
||||
root_cause,
|
||||
solution,
|
||||
files_changed: vec![],
|
||||
commit_sha,
|
||||
created_at: Utc::now(),
|
||||
issue_link: None,
|
||||
severity: BugSeverity::default(),
|
||||
discovered_by: None,
|
||||
prevention_notes: None,
|
||||
tags: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_files(mut self, files: Vec<PathBuf>) -> Self {
|
||||
self.files_changed = files;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_severity(mut self, severity: BugSeverity) -> Self {
|
||||
self.severity = severity;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_issue(mut self, link: String) -> Self {
|
||||
self.issue_link = Some(link);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl CodePattern {
|
||||
pub fn new(id: String, name: String, description: String, when_to_use: String) -> Self {
|
||||
Self {
|
||||
id,
|
||||
name,
|
||||
description,
|
||||
example_code: String::new(),
|
||||
example_files: vec![],
|
||||
when_to_use,
|
||||
when_not_to_use: None,
|
||||
language: None,
|
||||
created_at: Utc::now(),
|
||||
usage_count: 0,
|
||||
tags: vec![],
|
||||
related_patterns: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_example(mut self, code: String, files: Vec<PathBuf>) -> Self {
|
||||
self.example_code = code;
|
||||
self.example_files = files;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_language(mut self, language: String) -> Self {
|
||||
self.language = Some(language);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl FileRelationship {
|
||||
pub fn new(
|
||||
id: String,
|
||||
files: Vec<PathBuf>,
|
||||
relationship_type: RelationType,
|
||||
description: String,
|
||||
) -> Self {
|
||||
Self {
|
||||
id,
|
||||
files,
|
||||
relationship_type,
|
||||
strength: 0.5,
|
||||
description,
|
||||
created_at: Utc::now(),
|
||||
last_confirmed: None,
|
||||
source: RelationshipSource::UserDefined,
|
||||
observation_count: 1,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_git_cochange(id: String, files: Vec<PathBuf>, strength: f64, count: u32) -> Self {
|
||||
Self {
|
||||
id,
|
||||
files: files.clone(),
|
||||
relationship_type: RelationType::FrequentCochange,
|
||||
strength,
|
||||
description: format!(
|
||||
"Files frequently change together ({} co-occurrences)",
|
||||
count
|
||||
),
|
||||
created_at: Utc::now(),
|
||||
last_confirmed: Some(Utc::now()),
|
||||
source: RelationshipSource::GitCochange,
|
||||
observation_count: count,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CodingPreference {
|
||||
pub fn new(id: String, context: String, preference: String) -> Self {
|
||||
Self {
|
||||
id,
|
||||
context,
|
||||
preference,
|
||||
counter_preference: None,
|
||||
examples: vec![],
|
||||
confidence: 0.5,
|
||||
created_at: Utc::now(),
|
||||
language: None,
|
||||
source: PreferenceSource::UserStated,
|
||||
observation_count: 1,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_counter(mut self, counter: String) -> Self {
|
||||
self.counter_preference = Some(counter);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_examples(mut self, examples: Vec<String>) -> Self {
|
||||
self.examples = examples;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_confidence(mut self, confidence: f64) -> Self {
|
||||
self.confidence = confidence.clamp(0.0, 1.0);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// TESTS
|
||||
// ============================================================================
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_architectural_decision_builder() {
|
||||
let decision = ArchitecturalDecision::new(
|
||||
"adr-001".to_string(),
|
||||
"Use Event Sourcing".to_string(),
|
||||
"Need complete audit trail".to_string(),
|
||||
)
|
||||
.with_files(vec![PathBuf::from("src/events.rs")])
|
||||
.with_tags(vec!["architecture".to_string()]);
|
||||
|
||||
assert_eq!(decision.id, "adr-001");
|
||||
assert!(!decision.files_affected.is_empty());
|
||||
assert!(!decision.tags.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_codebase_node_id() {
|
||||
let decision = ArchitecturalDecision::new(
|
||||
"test-id".to_string(),
|
||||
"Test".to_string(),
|
||||
"Test".to_string(),
|
||||
);
|
||||
let node = CodebaseNode::ArchitecturalDecision(decision);
|
||||
assert_eq!(node.id(), "test-id");
|
||||
assert_eq!(node.node_type(), "architectural_decision");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_file_relationship_from_git() {
|
||||
let rel = FileRelationship::from_git_cochange(
|
||||
"rel-001".to_string(),
|
||||
vec![PathBuf::from("src/a.rs"), PathBuf::from("src/b.rs")],
|
||||
0.8,
|
||||
15,
|
||||
);
|
||||
|
||||
assert_eq!(rel.relationship_type, RelationType::FrequentCochange);
|
||||
assert_eq!(rel.source, RelationshipSource::GitCochange);
|
||||
assert_eq!(rel.strength, 0.8);
|
||||
assert_eq!(rel.observation_count, 15);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_searchable_text() {
|
||||
let pattern = CodePattern::new(
|
||||
"pat-001".to_string(),
|
||||
"Repository Pattern".to_string(),
|
||||
"Abstract data access".to_string(),
|
||||
"When you need to decouple domain logic from data access".to_string(),
|
||||
);
|
||||
let node = CodebaseNode::CodePattern(pattern);
|
||||
let text = node.to_searchable_text();
|
||||
|
||||
assert!(text.contains("Repository Pattern"));
|
||||
assert!(text.contains("Abstract data access"));
|
||||
}
|
||||
}
|
||||
729
crates/vestige-core/src/codebase/watcher.rs
Normal file
729
crates/vestige-core/src/codebase/watcher.rs
Normal file
|
|
@ -0,0 +1,729 @@
|
|||
//! File system watching for automatic learning
|
||||
//!
|
||||
//! This module watches the codebase for changes and:
|
||||
//! - Records co-edit patterns (files changed together)
|
||||
//! - Triggers pattern detection on modified files
|
||||
//! - Updates relationship strengths based on activity
|
||||
//!
|
||||
//! This enables Vestige to learn continuously from developer behavior
|
||||
//! without requiring explicit user input.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use notify::{Config, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
|
||||
use tokio::sync::{broadcast, mpsc, RwLock};
|
||||
|
||||
use super::patterns::PatternDetector;
|
||||
use super::relationships::RelationshipTracker;
|
||||
|
||||
// ============================================================================
|
||||
// ERRORS
|
||||
// ============================================================================
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum WatcherError {
|
||||
#[error("Watcher error: {0}")]
|
||||
Notify(#[from] notify::Error),
|
||||
#[error("IO error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("Channel error: {0}")]
|
||||
Channel(String),
|
||||
#[error("Already watching: {0}")]
|
||||
AlreadyWatching(PathBuf),
|
||||
#[error("Not watching: {0}")]
|
||||
NotWatching(PathBuf),
|
||||
#[error("Relationship error: {0}")]
|
||||
Relationship(#[from] super::relationships::RelationshipError),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, WatcherError>;
|
||||
|
||||
// ============================================================================
|
||||
// FILE EVENT
|
||||
// ============================================================================
|
||||
|
||||
/// Represents a file change event
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FileEvent {
|
||||
/// Type of event
|
||||
pub kind: FileEventKind,
|
||||
/// Path(s) affected
|
||||
pub paths: Vec<PathBuf>,
|
||||
/// When the event occurred
|
||||
pub timestamp: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Types of file events
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum FileEventKind {
|
||||
/// File was created
|
||||
Created,
|
||||
/// File was modified
|
||||
Modified,
|
||||
/// File was deleted
|
||||
Deleted,
|
||||
/// File was renamed
|
||||
Renamed,
|
||||
/// Access event (read)
|
||||
Accessed,
|
||||
}
|
||||
|
||||
impl From<EventKind> for FileEventKind {
|
||||
fn from(kind: EventKind) -> Self {
|
||||
match kind {
|
||||
EventKind::Create(_) => Self::Created,
|
||||
EventKind::Modify(_) => Self::Modified,
|
||||
EventKind::Remove(_) => Self::Deleted,
|
||||
EventKind::Access(_) => Self::Accessed,
|
||||
_ => Self::Modified, // Default to modified
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// WATCHER CONFIG
|
||||
// ============================================================================
|
||||
|
||||
/// Configuration for the codebase watcher
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct WatcherConfig {
|
||||
/// Debounce interval for batching events
|
||||
pub debounce_interval: Duration,
|
||||
/// Patterns to ignore (gitignore-style)
|
||||
pub ignore_patterns: Vec<String>,
|
||||
/// File extensions to watch (None = all)
|
||||
pub watch_extensions: Option<Vec<String>>,
|
||||
/// Maximum depth for recursive watching
|
||||
pub max_depth: Option<usize>,
|
||||
/// Enable pattern detection on file changes
|
||||
pub detect_patterns: bool,
|
||||
/// Enable relationship tracking
|
||||
pub track_relationships: bool,
|
||||
}
|
||||
|
||||
impl Default for WatcherConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
debounce_interval: Duration::from_millis(500),
|
||||
ignore_patterns: vec![
|
||||
"**/node_modules/**".to_string(),
|
||||
"**/target/**".to_string(),
|
||||
"**/.git/**".to_string(),
|
||||
"**/dist/**".to_string(),
|
||||
"**/build/**".to_string(),
|
||||
"**/*.lock".to_string(),
|
||||
"**/*.log".to_string(),
|
||||
],
|
||||
watch_extensions: Some(vec![
|
||||
"rs".to_string(),
|
||||
"ts".to_string(),
|
||||
"tsx".to_string(),
|
||||
"js".to_string(),
|
||||
"jsx".to_string(),
|
||||
"py".to_string(),
|
||||
"go".to_string(),
|
||||
"java".to_string(),
|
||||
"kt".to_string(),
|
||||
"swift".to_string(),
|
||||
"cs".to_string(),
|
||||
"cpp".to_string(),
|
||||
"c".to_string(),
|
||||
"h".to_string(),
|
||||
"hpp".to_string(),
|
||||
"rb".to_string(),
|
||||
"php".to_string(),
|
||||
]),
|
||||
max_depth: None,
|
||||
detect_patterns: true,
|
||||
track_relationships: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// EDIT SESSION
|
||||
// ============================================================================
|
||||
|
||||
/// Tracks files being edited in a session
|
||||
#[derive(Debug)]
|
||||
struct EditSession {
|
||||
/// Files modified in this session
|
||||
files: HashSet<PathBuf>,
|
||||
/// When the session started (for analytics/debugging)
|
||||
#[allow(dead_code)]
|
||||
started_at: DateTime<Utc>,
|
||||
/// When the last edit occurred
|
||||
last_edit_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl EditSession {
|
||||
fn new() -> Self {
|
||||
let now = Utc::now();
|
||||
Self {
|
||||
files: HashSet::new(),
|
||||
started_at: now,
|
||||
last_edit_at: now,
|
||||
}
|
||||
}
|
||||
|
||||
fn add_file(&mut self, path: PathBuf) {
|
||||
self.files.insert(path);
|
||||
self.last_edit_at = Utc::now();
|
||||
}
|
||||
|
||||
fn is_expired(&self, timeout: Duration) -> bool {
|
||||
let elapsed = Utc::now()
|
||||
.signed_duration_since(self.last_edit_at)
|
||||
.to_std()
|
||||
.unwrap_or(Duration::ZERO);
|
||||
elapsed > timeout
|
||||
}
|
||||
|
||||
fn files_list(&self) -> Vec<PathBuf> {
|
||||
self.files.iter().cloned().collect()
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// CODEBASE WATCHER
|
||||
// ============================================================================
|
||||
|
||||
/// Watches a codebase for file changes
|
||||
pub struct CodebaseWatcher {
|
||||
/// Relationship tracker
|
||||
tracker: Arc<RwLock<RelationshipTracker>>,
|
||||
/// Pattern detector
|
||||
detector: Arc<RwLock<PatternDetector>>,
|
||||
/// Configuration
|
||||
config: WatcherConfig,
|
||||
/// Currently watched paths
|
||||
watched_paths: Arc<RwLock<HashSet<PathBuf>>>,
|
||||
/// Shutdown signal sender
|
||||
shutdown_tx: Option<broadcast::Sender<()>>,
|
||||
/// Flag to signal watcher thread to stop
|
||||
running: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl CodebaseWatcher {
|
||||
/// Create a new codebase watcher
|
||||
pub fn new(
|
||||
tracker: Arc<RwLock<RelationshipTracker>>,
|
||||
detector: Arc<RwLock<PatternDetector>>,
|
||||
) -> Self {
|
||||
Self::with_config(tracker, detector, WatcherConfig::default())
|
||||
}
|
||||
|
||||
/// Create a new codebase watcher with custom config
|
||||
pub fn with_config(
|
||||
tracker: Arc<RwLock<RelationshipTracker>>,
|
||||
detector: Arc<RwLock<PatternDetector>>,
|
||||
config: WatcherConfig,
|
||||
) -> Self {
|
||||
Self {
|
||||
tracker,
|
||||
detector,
|
||||
config,
|
||||
watched_paths: Arc::new(RwLock::new(HashSet::new())),
|
||||
shutdown_tx: None,
|
||||
running: Arc::new(AtomicBool::new(false)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Start watching a directory
|
||||
pub async fn watch(&mut self, path: &Path) -> Result<()> {
|
||||
let path = path.canonicalize()?;
|
||||
|
||||
// Check if already watching
|
||||
{
|
||||
let watched = self.watched_paths.read().await;
|
||||
if watched.contains(&path) {
|
||||
return Err(WatcherError::AlreadyWatching(path));
|
||||
}
|
||||
}
|
||||
|
||||
// Add to watched paths
|
||||
self.watched_paths.write().await.insert(path.clone());
|
||||
|
||||
// Create shutdown channel
|
||||
let (shutdown_tx, mut shutdown_rx) = broadcast::channel::<()>(1);
|
||||
self.shutdown_tx = Some(shutdown_tx);
|
||||
|
||||
// Create event channel
|
||||
let (event_tx, mut event_rx) = mpsc::channel::<FileEvent>(100);
|
||||
|
||||
// Clone for move into watcher thread
|
||||
let config = self.config.clone();
|
||||
let watch_path = path.clone();
|
||||
|
||||
// Set running flag to true and clone for thread
|
||||
self.running.store(true, Ordering::SeqCst);
|
||||
let running = Arc::clone(&self.running);
|
||||
|
||||
// Spawn watcher thread
|
||||
let event_tx_clone = event_tx.clone();
|
||||
std::thread::spawn(move || {
|
||||
let config_notify = Config::default().with_poll_interval(config.debounce_interval);
|
||||
|
||||
let tx = event_tx_clone.clone();
|
||||
let mut watcher = match RecommendedWatcher::new(
|
||||
move |res: std::result::Result<Event, notify::Error>| {
|
||||
if let Ok(event) = res {
|
||||
let file_event = FileEvent {
|
||||
kind: event.kind.into(),
|
||||
paths: event.paths,
|
||||
timestamp: Utc::now(),
|
||||
};
|
||||
let _ = tx.blocking_send(file_event);
|
||||
}
|
||||
},
|
||||
config_notify,
|
||||
) {
|
||||
Ok(w) => w,
|
||||
Err(e) => {
|
||||
eprintln!("Failed to create watcher: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = watcher.watch(&watch_path, RecursiveMode::Recursive) {
|
||||
eprintln!("Failed to watch path: {}", e);
|
||||
return;
|
||||
}
|
||||
|
||||
// Keep thread alive until shutdown signal
|
||||
while running.load(Ordering::SeqCst) {
|
||||
std::thread::sleep(Duration::from_millis(100));
|
||||
}
|
||||
});
|
||||
|
||||
// Clone for move into handler task
|
||||
let tracker = Arc::clone(&self.tracker);
|
||||
let detector = Arc::clone(&self.detector);
|
||||
let config = self.config.clone();
|
||||
|
||||
// Spawn event handler task
|
||||
tokio::spawn(async move {
|
||||
let mut session = EditSession::new();
|
||||
let session_timeout = Duration::from_secs(60 * 30); // 30 minutes
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
Some(event) = event_rx.recv() => {
|
||||
// Check session expiry
|
||||
if session.is_expired(session_timeout) {
|
||||
// Record co-edits from expired session
|
||||
if session.files.len() >= 2 {
|
||||
let files = session.files_list();
|
||||
if let Ok(mut tracker) = tracker.try_write() {
|
||||
let _ = tracker.record_coedit(&files);
|
||||
}
|
||||
}
|
||||
session = EditSession::new();
|
||||
}
|
||||
|
||||
// Process event
|
||||
for path in &event.paths {
|
||||
if Self::should_process(path, &config) {
|
||||
match event.kind {
|
||||
FileEventKind::Modified | FileEventKind::Created => {
|
||||
// Track in session
|
||||
if config.track_relationships {
|
||||
session.add_file(path.clone());
|
||||
}
|
||||
|
||||
// Detect patterns if enabled
|
||||
if config.detect_patterns {
|
||||
if let Ok(content) = std::fs::read_to_string(path) {
|
||||
let language = Self::detect_language(path);
|
||||
if let Ok(detector) = detector.try_read() {
|
||||
let _ = detector.detect_patterns(&content, &language);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
FileEventKind::Deleted => {
|
||||
// File was deleted, remove from session
|
||||
session.files.remove(path);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = shutdown_rx.recv() => {
|
||||
// Finalize session before shutdown
|
||||
if session.files.len() >= 2 {
|
||||
let files = session.files_list();
|
||||
if let Ok(mut tracker) = tracker.try_write() {
|
||||
let _ = tracker.record_coedit(&files);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Stop watching a directory
|
||||
pub async fn unwatch(&mut self, path: &Path) -> Result<()> {
|
||||
let path = path.canonicalize()?;
|
||||
|
||||
let mut watched = self.watched_paths.write().await;
|
||||
if !watched.remove(&path) {
|
||||
return Err(WatcherError::NotWatching(path));
|
||||
}
|
||||
|
||||
// If no more paths being watched, send shutdown signals
|
||||
if watched.is_empty() {
|
||||
// Signal watcher thread to exit
|
||||
self.running.store(false, Ordering::SeqCst);
|
||||
|
||||
// Signal async task to exit
|
||||
if let Some(tx) = &self.shutdown_tx {
|
||||
let _ = tx.send(());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Stop watching all directories
|
||||
pub async fn stop(&mut self) -> Result<()> {
|
||||
self.watched_paths.write().await.clear();
|
||||
|
||||
// Signal watcher thread to exit
|
||||
self.running.store(false, Ordering::SeqCst);
|
||||
|
||||
// Signal async task to exit
|
||||
if let Some(tx) = &self.shutdown_tx {
|
||||
let _ = tx.send(());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if a path should be processed based on config
|
||||
fn should_process(path: &Path, config: &WatcherConfig) -> bool {
|
||||
let path_str = path.to_string_lossy();
|
||||
|
||||
// Check ignore patterns
|
||||
for pattern in &config.ignore_patterns {
|
||||
// Simple glob matching (basic implementation)
|
||||
if Self::glob_match(&path_str, pattern) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check extensions
|
||||
if let Some(ref extensions) = config.watch_extensions {
|
||||
if let Some(ext) = path.extension() {
|
||||
let ext_str = ext.to_string_lossy().to_lowercase();
|
||||
if !extensions.iter().any(|e| e.to_lowercase() == ext_str) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false; // No extension and we're filtering by extension
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Simple glob pattern matching
|
||||
fn glob_match(path: &str, pattern: &str) -> bool {
|
||||
// Handle ** (match any path)
|
||||
if pattern.contains("**") {
|
||||
let parts: Vec<_> = pattern.split("**").collect();
|
||||
if parts.len() == 2 {
|
||||
let prefix = parts[0].trim_end_matches('/');
|
||||
let suffix = parts[1].trim_start_matches('/');
|
||||
|
||||
let prefix_match = prefix.is_empty() || path.starts_with(prefix);
|
||||
|
||||
// Handle suffix with wildcards like *.lock
|
||||
let suffix_match = if suffix.is_empty() {
|
||||
true
|
||||
} else if suffix.starts_with('*') {
|
||||
// Pattern like *.lock - match the extension
|
||||
let ext_pattern = suffix.trim_start_matches('*');
|
||||
path.ends_with(ext_pattern)
|
||||
} else {
|
||||
// Exact suffix match
|
||||
path.ends_with(suffix) || path.contains(&format!("/{}", suffix))
|
||||
};
|
||||
|
||||
return prefix_match && suffix_match;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle * (match single component)
|
||||
if pattern.contains('*') {
|
||||
let pattern = pattern.replace('*', "");
|
||||
return path.contains(&pattern);
|
||||
}
|
||||
|
||||
// Direct match
|
||||
path.contains(pattern)
|
||||
}
|
||||
|
||||
/// Detect language from file extension
|
||||
fn detect_language(path: &Path) -> String {
|
||||
path.extension()
|
||||
.map(|e| {
|
||||
let ext = e.to_string_lossy().to_lowercase();
|
||||
match ext.as_str() {
|
||||
"rs" => "rust",
|
||||
"ts" | "tsx" => "typescript",
|
||||
"js" | "jsx" => "javascript",
|
||||
"py" => "python",
|
||||
"go" => "go",
|
||||
"java" => "java",
|
||||
"kt" | "kts" => "kotlin",
|
||||
"swift" => "swift",
|
||||
"cs" => "csharp",
|
||||
"cpp" | "cc" | "cxx" | "c" | "h" | "hpp" => "cpp",
|
||||
"rb" => "ruby",
|
||||
"php" => "php",
|
||||
_ => "unknown",
|
||||
}
|
||||
.to_string()
|
||||
})
|
||||
.unwrap_or_else(|| "unknown".to_string())
|
||||
}
|
||||
|
||||
/// Get currently watched paths
|
||||
pub async fn get_watched_paths(&self) -> Vec<PathBuf> {
|
||||
self.watched_paths.read().await.iter().cloned().collect()
|
||||
}
|
||||
|
||||
/// Check if a path is being watched
|
||||
pub async fn is_watching(&self, path: &Path) -> bool {
|
||||
let path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
|
||||
self.watched_paths.read().await.contains(&path)
|
||||
}
|
||||
|
||||
/// Get the current configuration
|
||||
pub fn config(&self) -> &WatcherConfig {
|
||||
&self.config
|
||||
}
|
||||
|
||||
/// Update the configuration
|
||||
pub fn set_config(&mut self, config: WatcherConfig) {
|
||||
self.config = config;
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for CodebaseWatcher {
|
||||
fn drop(&mut self) {
|
||||
// Signal watcher thread to exit
|
||||
self.running.store(false, Ordering::SeqCst);
|
||||
|
||||
// Signal async task to exit
|
||||
if let Some(tx) = &self.shutdown_tx {
|
||||
let _ = tx.send(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// MANUAL EVENT HANDLER (for non-async contexts)
|
||||
// ============================================================================
|
||||
|
||||
/// Handles file events manually (for use without the async watcher)
|
||||
pub struct ManualEventHandler {
|
||||
tracker: Arc<RwLock<RelationshipTracker>>,
|
||||
detector: Arc<RwLock<PatternDetector>>,
|
||||
session_files: HashSet<PathBuf>,
|
||||
config: WatcherConfig,
|
||||
}
|
||||
|
||||
impl ManualEventHandler {
|
||||
/// Create a new manual event handler
|
||||
pub fn new(
|
||||
tracker: Arc<RwLock<RelationshipTracker>>,
|
||||
detector: Arc<RwLock<PatternDetector>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
tracker,
|
||||
detector,
|
||||
session_files: HashSet::new(),
|
||||
config: WatcherConfig::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle a file modification event
|
||||
pub async fn on_file_modified(&mut self, path: &Path) -> Result<()> {
|
||||
if !CodebaseWatcher::should_process(path, &self.config) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Add to session
|
||||
self.session_files.insert(path.to_path_buf());
|
||||
|
||||
// Record co-edit if we have multiple files
|
||||
if self.session_files.len() >= 2 {
|
||||
let files: Vec<_> = self.session_files.iter().cloned().collect();
|
||||
let mut tracker = self.tracker.write().await;
|
||||
tracker.record_coedit(&files)?;
|
||||
}
|
||||
|
||||
// Detect patterns
|
||||
if self.config.detect_patterns {
|
||||
if let Ok(content) = std::fs::read_to_string(path) {
|
||||
let language = CodebaseWatcher::detect_language(path);
|
||||
let detector = self.detector.read().await;
|
||||
let _ = detector.detect_patterns(&content, &language);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle a file creation event
|
||||
pub async fn on_file_created(&mut self, path: &Path) -> Result<()> {
|
||||
self.on_file_modified(path).await
|
||||
}
|
||||
|
||||
/// Handle a file deletion event
|
||||
pub async fn on_file_deleted(&mut self, path: &Path) -> Result<()> {
|
||||
self.session_files.remove(path);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Clear the current session
|
||||
pub fn clear_session(&mut self) {
|
||||
self.session_files.clear();
|
||||
}
|
||||
|
||||
/// Finalize the current session
|
||||
pub async fn finalize_session(&mut self) -> Result<()> {
|
||||
if self.session_files.len() >= 2 {
|
||||
let files: Vec<_> = self.session_files.iter().cloned().collect();
|
||||
let mut tracker = self.tracker.write().await;
|
||||
tracker.record_coedit(&files)?;
|
||||
}
|
||||
self.session_files.clear();
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// TESTS
|
||||
// ============================================================================
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_glob_match() {
|
||||
// Match any path with pattern
|
||||
assert!(CodebaseWatcher::glob_match(
|
||||
"/project/node_modules/foo/bar.js",
|
||||
"**/node_modules/**"
|
||||
));
|
||||
assert!(CodebaseWatcher::glob_match(
|
||||
"/project/target/debug/main",
|
||||
"**/target/**"
|
||||
));
|
||||
assert!(CodebaseWatcher::glob_match(
|
||||
"/project/.git/config",
|
||||
"**/.git/**"
|
||||
));
|
||||
|
||||
// Extension matching
|
||||
assert!(CodebaseWatcher::glob_match(
|
||||
"/project/Cargo.lock",
|
||||
"**/*.lock"
|
||||
));
|
||||
|
||||
// Non-matches
|
||||
assert!(!CodebaseWatcher::glob_match(
|
||||
"/project/src/main.rs",
|
||||
"**/node_modules/**"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_process() {
|
||||
let config = WatcherConfig::default();
|
||||
|
||||
// Should process source files
|
||||
assert!(CodebaseWatcher::should_process(
|
||||
Path::new("/project/src/main.rs"),
|
||||
&config
|
||||
));
|
||||
assert!(CodebaseWatcher::should_process(
|
||||
Path::new("/project/src/app.tsx"),
|
||||
&config
|
||||
));
|
||||
|
||||
// Should not process node_modules
|
||||
assert!(!CodebaseWatcher::should_process(
|
||||
Path::new("/project/node_modules/foo/index.js"),
|
||||
&config
|
||||
));
|
||||
|
||||
// Should not process target
|
||||
assert!(!CodebaseWatcher::should_process(
|
||||
Path::new("/project/target/debug/main"),
|
||||
&config
|
||||
));
|
||||
|
||||
// Should not process lock files
|
||||
assert!(!CodebaseWatcher::should_process(
|
||||
Path::new("/project/Cargo.lock"),
|
||||
&config
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_language() {
|
||||
assert_eq!(
|
||||
CodebaseWatcher::detect_language(Path::new("main.rs")),
|
||||
"rust"
|
||||
);
|
||||
assert_eq!(
|
||||
CodebaseWatcher::detect_language(Path::new("app.tsx")),
|
||||
"typescript"
|
||||
);
|
||||
assert_eq!(
|
||||
CodebaseWatcher::detect_language(Path::new("script.js")),
|
||||
"javascript"
|
||||
);
|
||||
assert_eq!(
|
||||
CodebaseWatcher::detect_language(Path::new("main.py")),
|
||||
"python"
|
||||
);
|
||||
assert_eq!(CodebaseWatcher::detect_language(Path::new("main.go")), "go");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_edit_session() {
|
||||
let mut session = EditSession::new();
|
||||
|
||||
session.add_file(PathBuf::from("a.rs"));
|
||||
session.add_file(PathBuf::from("b.rs"));
|
||||
|
||||
assert_eq!(session.files.len(), 2);
|
||||
assert!(!session.is_expired(Duration::from_secs(60)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_watcher_config_default() {
|
||||
let config = WatcherConfig::default();
|
||||
|
||||
assert!(!config.ignore_patterns.is_empty());
|
||||
assert!(config.watch_extensions.is_some());
|
||||
assert!(config.detect_patterns);
|
||||
assert!(config.track_relationships);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue