//! Diff engine: tree comparison and line-level file diffs. //! //! Provides recursive tree diffing (which files changed between two commits) //! and line-level unified diffs via the `similar` crate. use crate::store; use serde::Serialize; use std::collections::HashMap; use worker::*; type Url = worker::Url; // --------------------------------------------------------------------------- // Types // --------------------------------------------------------------------------- #[derive(Debug, Clone, Serialize)] #[serde(rename_all = "lowercase")] pub enum DiffStatus { Added, Deleted, Modified, } #[derive(Debug, Clone, Serialize)] pub struct FileDiff { pub path: String, pub status: DiffStatus, #[serde(skip_serializing_if = "Option::is_none")] pub old_hash: Option, #[serde(skip_serializing_if = "Option::is_none")] pub new_hash: Option, #[serde(skip_serializing_if = "Option::is_none")] pub hunks: Option>, } #[derive(Debug, Clone, Serialize)] pub struct Hunk { pub old_start: usize, pub old_count: usize, pub new_start: usize, pub new_count: usize, pub lines: Vec, } #[derive(Debug, Clone, Serialize)] pub struct DiffLine { pub tag: &'static str, // "context", "add", "delete", "binary" pub content: String, } #[derive(Debug, Serialize)] pub struct DiffStats { pub files_changed: usize, pub additions: usize, pub deletions: usize, } #[derive(Debug, Serialize)] pub struct CommitDiff { pub commit_hash: String, #[serde(skip_serializing_if = "Option::is_none")] pub parent_hash: Option, pub files: Vec, pub stats: DiffStats, } #[derive(Debug, Serialize)] pub struct Comparison { pub base_hash: String, pub head_hash: String, pub files: Vec, pub stats: DiffStats, } // --------------------------------------------------------------------------- // Tree diff: recursive comparison of two trees // --------------------------------------------------------------------------- /// Compare two trees and return a flat list of file-level changes. /// If `old_tree` is None, all files in `new_tree` are treated as added (root commit). /// If `new_tree` is None, all files in `old_tree` are treated as deleted. pub fn diff_trees( sql: &SqlStorage, old_tree: Option<&str>, new_tree: Option<&str>, ) -> Result> { let mut diffs = Vec::new(); diff_trees_recursive(sql, old_tree, new_tree, "", &mut diffs)?; diffs.sort_by(|a, b| a.path.cmp(&b.path)); Ok(diffs) } fn diff_trees_recursive( sql: &SqlStorage, old_tree: Option<&str>, new_tree: Option<&str>, prefix: &str, diffs: &mut Vec, ) -> Result<()> { let old_entries = match old_tree { Some(hash) => load_tree_entries(sql, hash)?, None => HashMap::new(), }; let new_entries = match new_tree { Some(hash) => load_tree_entries(sql, hash)?, None => HashMap::new(), }; // Collect all entry names, sorted for deterministic output let all_names: Vec = { let mut set = std::collections::BTreeSet::new(); for k in old_entries.keys() { set.insert(k.clone()); } for k in new_entries.keys() { set.insert(k.clone()); } set.into_iter().collect() }; for name in &all_names { let full_path = if prefix.is_empty() { name.clone() } else { format!("{}/{}", prefix, name) }; let old = old_entries.get(name); let new = new_entries.get(name); match (old, new) { // Same hash — nothing changed in this subtree/file (Some(o), Some(n)) if o.hash == n.hash => continue, // Both exist, different hashes (Some(o), Some(n)) => { let o_is_tree = o.mode == 0o040000; let n_is_tree = n.mode == 0o040000; match (o_is_tree, n_is_tree) { (true, true) => { // Both subtrees — recurse diff_trees_recursive(sql, Some(&o.hash), Some(&n.hash), &full_path, diffs)?; } (false, false) => { // Both blobs — file modified diffs.push(FileDiff { path: full_path, status: DiffStatus::Modified, old_hash: Some(o.hash.clone()), new_hash: Some(n.hash.clone()), hunks: None, }); } (true, false) => { // Was subtree, now blob: delete old tree contents, add new blob diff_trees_recursive(sql, Some(&o.hash), None, &full_path, diffs)?; diffs.push(FileDiff { path: full_path, status: DiffStatus::Added, old_hash: None, new_hash: Some(n.hash.clone()), hunks: None, }); } (false, true) => { // Was blob, now subtree: delete old blob, add new tree contents diffs.push(FileDiff { path: full_path.clone(), status: DiffStatus::Deleted, old_hash: Some(o.hash.clone()), new_hash: None, hunks: None, }); diff_trees_recursive(sql, None, Some(&n.hash), &full_path, diffs)?; } } } // Only in old tree — deleted (Some(o), None) => { if o.mode == 0o040000 { diff_trees_recursive(sql, Some(&o.hash), None, &full_path, diffs)?; } else { diffs.push(FileDiff { path: full_path, status: DiffStatus::Deleted, old_hash: Some(o.hash.clone()), new_hash: None, hunks: None, }); } } // Only in new tree — added (None, Some(n)) => { if n.mode == 0o040000 { diff_trees_recursive(sql, None, Some(&n.hash), &full_path, diffs)?; } else { diffs.push(FileDiff { path: full_path, status: DiffStatus::Added, old_hash: None, new_hash: Some(n.hash.clone()), hunks: None, }); } } (None, None) => unreachable!(), } } Ok(()) } struct TreeEntryInfo { mode: u32, hash: String, } fn load_tree_entries(sql: &SqlStorage, tree_hash: &str) -> Result> { #[derive(serde::Deserialize)] struct Row { name: String, mode: i64, entry_hash: String, } let rows: Vec = sql .exec( "SELECT name, mode, entry_hash FROM trees WHERE tree_hash = ?", vec![SqlStorageValue::from(tree_hash.to_string())], )? .to_array()?; let mut map = HashMap::with_capacity(rows.len()); for row in rows { map.insert( row.name, TreeEntryInfo { mode: row.mode as u32, hash: row.entry_hash, }, ); } Ok(map) } // --------------------------------------------------------------------------- // Line-level diff using `similar` // --------------------------------------------------------------------------- /// Compute a line-level diff between two blobs. Returns hunks with context. pub fn diff_blobs(old_content: &[u8], new_content: &[u8], context_lines: usize) -> Vec { if is_binary(old_content) || is_binary(new_content) { return vec![Hunk { old_start: 0, old_count: 0, new_start: 0, new_count: 0, lines: vec![DiffLine { tag: "binary", content: "Binary files differ".to_string(), }], }]; } let old_text = String::from_utf8_lossy(old_content); let new_text = String::from_utf8_lossy(new_content); let diff = similar::TextDiff::from_lines(old_text.as_ref(), new_text.as_ref()); let mut hunks = Vec::new(); for group in diff.grouped_ops(context_lines) { if group.is_empty() { continue; } let old_range_start = group.first().unwrap().old_range().start; let old_range_end = group.last().unwrap().old_range().end; let new_range_start = group.first().unwrap().new_range().start; let new_range_end = group.last().unwrap().new_range().end; let mut lines = Vec::new(); for op in &group { for change in diff.iter_changes(op) { let tag = match change.tag() { similar::ChangeTag::Equal => "context", similar::ChangeTag::Insert => "add", similar::ChangeTag::Delete => "delete", }; lines.push(DiffLine { tag, content: change.value().to_string(), }); } } hunks.push(Hunk { old_start: if old_range_start == old_range_end { 0 } else { old_range_start + 1 }, old_count: old_range_end - old_range_start, new_start: if new_range_start == new_range_end { 0 } else { new_range_start + 1 }, new_count: new_range_end - new_range_start, lines, }); } hunks } /// Check for null bytes in the first 8 KiB — heuristic for binary content. fn is_binary(data: &[u8]) -> bool { let check_len = data.len().min(8192); data[..check_len].contains(&0) } // --------------------------------------------------------------------------- // High-level diff operations // --------------------------------------------------------------------------- /// Diff a commit against its first parent. Root commits diff against empty tree. /// When `with_content` is true, line-level hunks are computed for each file. pub fn diff_commit( sql: &SqlStorage, commit_hash: &str, with_content: bool, context_lines: usize, ) -> Result { let (tree_hash, parents) = load_commit_info(sql, commit_hash)?; let parent_tree = if let Some(parent_hash) = parents.first() { let (pt, _) = load_commit_info(sql, parent_hash)?; Some(pt) } else { None // root commit — everything is new }; let mut files = diff_trees(sql, parent_tree.as_deref(), Some(&tree_hash))?; if with_content { populate_hunks(sql, &mut files, context_lines)?; } let stats = compute_stats(&files); Ok(CommitDiff { commit_hash: commit_hash.to_string(), parent_hash: parents.into_iter().next(), files, stats, }) } /// Diff two commits by comparing their trees. pub fn compare( sql: &SqlStorage, base_hash: &str, head_hash: &str, with_content: bool, context_lines: usize, ) -> Result { let (base_tree, _) = load_commit_info(sql, base_hash)?; let (head_tree, _) = load_commit_info(sql, head_hash)?; let mut files = diff_trees(sql, Some(&base_tree), Some(&head_tree))?; if with_content { populate_hunks(sql, &mut files, context_lines)?; } let stats = compute_stats(&files); Ok(Comparison { base_hash: base_hash.to_string(), head_hash: head_hash.to_string(), files, stats, }) } /// Fill in line-level hunks for every file diff. fn populate_hunks(sql: &SqlStorage, files: &mut [FileDiff], context_lines: usize) -> Result<()> { for file in files.iter_mut() { let old = match &file.old_hash { Some(h) => load_blob_content(sql, h)?.unwrap_or_default(), None => Vec::new(), }; let new = match &file.new_hash { Some(h) => load_blob_content(sql, h)?.unwrap_or_default(), None => Vec::new(), }; file.hunks = Some(diff_blobs(&old, &new, context_lines)); } Ok(()) } // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- fn load_blob_content(sql: &SqlStorage, blob_hash: &str) -> Result>> { #[derive(serde::Deserialize)] struct BlobInfo { group_id: i64, version_in_group: i64, } let rows: Vec = sql .exec( "SELECT group_id, version_in_group FROM blobs WHERE blob_hash = ?", vec![SqlStorageValue::from(blob_hash.to_string())], )? .to_array()?; match rows.into_iter().next() { Some(info) => { let content = store::reconstruct_blob(sql, info.group_id, info.version_in_group)?; Ok(Some(content)) } None => Ok(None), } } fn load_commit_info(sql: &SqlStorage, hash: &str) -> Result<(String, Vec)> { #[derive(serde::Deserialize)] struct CommitRow { tree_hash: String, } let commits: Vec = sql .exec( "SELECT tree_hash FROM commits WHERE hash = ?", vec![SqlStorageValue::from(hash.to_string())], )? .to_array()?; let tree_hash = commits .into_iter() .next() .ok_or_else(|| Error::RustError(format!("commit not found: {}", hash)))? .tree_hash; #[derive(serde::Deserialize)] struct ParentRow { parent_hash: String, } let parents: Vec = sql .exec( "SELECT parent_hash FROM commit_parents WHERE commit_hash = ? ORDER BY ordinal ASC", vec![SqlStorageValue::from(hash.to_string())], )? .to_array::()? .into_iter() .map(|p| p.parent_hash) .collect(); Ok((tree_hash, parents)) } fn compute_stats(files: &[FileDiff]) -> DiffStats { let mut additions = 0usize; let mut deletions = 0usize; for file in files { if let Some(hunks) = &file.hunks { for hunk in hunks { for line in &hunk.lines { match line.tag { "add" => additions += 1, "delete" => deletions += 1, _ => {} } } } } else { // No content diff — count at file level match file.status { DiffStatus::Added => additions += 1, DiffStatus::Deleted => deletions += 1, DiffStatus::Modified => { additions += 1; deletions += 1; } } } } DiffStats { files_changed: files.len(), additions, deletions, } } /// Try to resolve a string as a commit hash or ref name. fn resolve_to_commit(sql: &SqlStorage, name: &str) -> Result { // Check if it's already a known commit hash #[derive(serde::Deserialize)] struct Count { n: i64, } let existing: Count = sql .exec( "SELECT COUNT(*) AS n FROM commits WHERE hash = ?", vec![SqlStorageValue::from(name.to_string())], )? .one()?; if existing.n > 0 { return Ok(name.to_string()); } // Try as a ref name match crate::api::resolve_ref(sql, name)? { Some(hash) => Ok(hash), None => Err(Error::RustError(format!( "'{}' is not a known commit or ref", name ))), } } // --------------------------------------------------------------------------- // HTTP handlers // --------------------------------------------------------------------------- /// GET /diff/:sha?context=3&stat=1 pub fn handle_diff(sql: &SqlStorage, sha: &str, url: &Url) -> Result { if sha.is_empty() { return Response::error("missing commit hash", 400); } let context: usize = crate::api::get_query(url, "context") .and_then(|v| v.parse().ok()) .unwrap_or(3); let stat_only = crate::api::get_query(url, "stat") .map(|v| v == "1") .unwrap_or(false); let result = diff_commit(sql, sha, !stat_only, context)?; Response::from_json(&result) } /// GET /compare/base...head?context=3&stat=1 pub fn handle_compare(sql: &SqlStorage, spec: &str, url: &Url) -> Result { // Accept both "base...head" (three-dot) and "base..head" (two-dot) let (base, head) = if let Some(pos) = spec.find("...") { (&spec[..pos], &spec[pos + 3..]) } else if let Some(pos) = spec.find("..") { (&spec[..pos], &spec[pos + 2..]) } else { return Response::error("use /compare/base...head or /compare/base..head", 400); }; if base.is_empty() || head.is_empty() { return Response::error("both base and head are required", 400); } let base_hash = resolve_to_commit(sql, base)?; let head_hash = resolve_to_commit(sql, head)?; let context: usize = crate::api::get_query(url, "context") .and_then(|v| v.parse().ok()) .unwrap_or(3); let stat_only = crate::api::get_query(url, "stat") .map(|v| v == "1") .unwrap_or(false); let result = compare(sql, &base_hash, &head_hash, !stat_only, context)?; Response::from_json(&result) }