Sikuwa first commit

2026-02-20 23:53:48 +08:00
commit 13a1072c6f
57 changed files with 13519 additions and 0 deletions
--- a/incremental/cpp/CMakeLists.txt
+++ b/incremental/cpp/CMakeLists.txt
@@ -0,0 +1,45 @@
+# sikuwa/incremental/cpp/CMakeLists.txt
+cmake_minimum_required(VERSION 3.14)
+project(incremental_engine)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+# 查找 Python 和 pybind11
+find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
+find_package(pybind11 CONFIG QUIET)
+
+if(NOT pybind11_FOUND)
+    # 如果没有安装 pybind11，使用 FetchContent 下载
+    include(FetchContent)
+    FetchContent_Declare(
+        pybind11
+        GIT_REPOSITORY https://github.com/pybind/pybind11.git
+        GIT_TAG v2.11.1
+    )
+    FetchContent_MakeAvailable(pybind11)
+endif()
+
+# 源文件
+set(SOURCES
+    incremental_core.cpp
+    pybind_incremental.cpp
+)
+
+set(HEADERS
+    incremental_core.h
+)
+
+# 创建 Python 模块
+pybind11_add_module(incremental_engine ${SOURCES} ${HEADERS})
+
+# 优化选项
+target_compile_options(incremental_engine PRIVATE
+    $<$<CXX_COMPILER_ID:GNU>:-O3 -Wall -Wextra>
+    $<$<CXX_COMPILER_ID:Clang>:-O3 -Wall -Wextra>
+    $<$<CXX_COMPILER_ID:MSVC>:/O2 /W4>
+)
+
+# 安装
+install(TARGETS incremental_engine DESTINATION .)
--- a/incremental/cpp/incremental_core.cpp
+++ b/incremental/cpp/incremental_core.cpp
@@ -0,0 +1,777 @@
+// sikuwa/incremental/cpp/incremental_core.cpp
+// 减量编译核心 - C++ 实现
+
+#include "incremental_core.h"
+#include <fstream>
+#include <sstream>
+#include <algorithm>
+#include <cstring>
+#include <iomanip>
+
+namespace sikuwa {
+namespace incremental {
+
+// ============================================================================
+// 工具函数实现
+// ============================================================================
+
+// 简单的哈希函数 (FNV-1a)
+static uint64_t fnv1a_hash(const char* data, size_t len) {
+    uint64_t hash = 14695981039346656037ULL;
+    for (size_t i = 0; i < len; ++i) {
+        hash ^= static_cast<uint64_t>(data[i]);
+        hash *= 1099511628211ULL;
+    }
+    return hash;
+}
+
+std::string generate_unit_id(const std::string& file_path, int start_line,
+                            int end_line, const std::string& content_hash) {
+    std::ostringstream oss;
+    oss << file_path << ":" << start_line << ":" << end_line << ":" 
+        << content_hash.substr(0, 8);
+    return oss.str();
+}
+
+int64_t current_timestamp() {
+    return std::chrono::duration_cast<std::chrono::milliseconds>(
+        std::chrono::system_clock::now().time_since_epoch()
+    ).count();
+}
+
+std::string read_file(const std::string& path) {
+    std::ifstream file(path);
+    if (!file.is_open()) return "";
+    std::ostringstream oss;
+    oss << file.rdbuf();
+    return oss.str();
+}
+
+void write_file(const std::string& path, const std::string& content) {
+    std::ofstream file(path);
+    if (file.is_open()) {
+        file << content;
+    }
+}
+
+std::vector<std::string> split_lines(const std::string& content) {
+    std::vector<std::string> lines;
+    std::istringstream iss(content);
+    std::string line;
+    while (std::getline(iss, line)) {
+        lines.push_back(line);
+    }
+    return lines;
+}
+
+std::string join_lines(const std::vector<std::string>& lines) {
+    std::ostringstream oss;
+    for (size_t i = 0; i < lines.size(); ++i) {
+        if (i > 0) oss << "\n";
+        oss << lines[i];
+    }
+    return oss.str();
+}
+
+// ============================================================================
+// UnitManager 实现
+// ============================================================================
+
+UnitManager::UnitManager() {}
+UnitManager::~UnitManager() {}
+
+void UnitManager::add_unit(const CompilationUnit& unit) {
+    units_[unit.id] = unit;
+    file_units_[unit.file_path].push_back(unit.id);
+}
+
+void UnitManager::update_unit(const std::string& id, const CompilationUnit& unit) {
+    if (units_.find(id) != units_.end()) {
+        units_[id] = unit;
+    }
+}
+
+void UnitManager::remove_unit(const std::string& id) {
+    auto it = units_.find(id);
+    if (it != units_.end()) {
+        // 从文件索引中移除
+        auto& file_ids = file_units_[it->second.file_path];
+        file_ids.erase(std::remove(file_ids.begin(), file_ids.end(), id), file_ids.end());
+        
+        // 从依赖关系中移除
+        for (const auto& dep_id : it->second.dependencies) {
+            auto dep_it = units_.find(dep_id);
+            if (dep_it != units_.end()) {
+                auto& dependents = dep_it->second.dependents;
+                dependents.erase(std::remove(dependents.begin(), dependents.end(), id), 
+                               dependents.end());
+            }
+        }
+        
+        units_.erase(it);
+    }
+}
+
+CompilationUnit* UnitManager::get_unit(const std::string& id) {
+    auto it = units_.find(id);
+    return it != units_.end() ? &it->second : nullptr;
+}
+
+const CompilationUnit* UnitManager::get_unit(const std::string& id) const {
+    auto it = units_.find(id);
+    return it != units_.end() ? &it->second : nullptr;
+}
+
+std::vector<CompilationUnit*> UnitManager::get_units_by_file(const std::string& file_path) {
+    std::vector<CompilationUnit*> result;
+    auto it = file_units_.find(file_path);
+    if (it != file_units_.end()) {
+        for (const auto& id : it->second) {
+            if (auto* unit = get_unit(id)) {
+                result.push_back(unit);
+            }
+        }
+    }
+    // 按行号排序
+    std::sort(result.begin(), result.end(), 
+              [](const CompilationUnit* a, const CompilationUnit* b) {
+                  return a->start_line < b->start_line;
+              });
+    return result;
+}
+
+std::vector<CompilationUnit*> UnitManager::get_units_in_range(
+    const std::string& file_path, int start, int end) {
+    std::vector<CompilationUnit*> result;
+    auto units = get_units_by_file(file_path);
+    for (auto* unit : units) {
+        // 检查是否有交集
+        if (unit->start_line <= end && unit->end_line >= start) {
+            result.push_back(unit);
+        }
+    }
+    return result;
+}
+
+void UnitManager::add_dependency(const std::string& from_id, const std::string& to_id) {
+    auto* from_unit = get_unit(from_id);
+    auto* to_unit = get_unit(to_id);
+    
+    if (from_unit && to_unit) {
+        // from 依赖 to
+        if (std::find(from_unit->dependencies.begin(), from_unit->dependencies.end(), to_id)
+            == from_unit->dependencies.end()) {
+            from_unit->dependencies.push_back(to_id);
+        }
+        // to 被 from 依赖
+        if (std::find(to_unit->dependents.begin(), to_unit->dependents.end(), from_id)
+            == to_unit->dependents.end()) {
+            to_unit->dependents.push_back(from_id);
+        }
+    }
+}
+
+void UnitManager::remove_dependency(const std::string& from_id, const std::string& to_id) {
+    auto* from_unit = get_unit(from_id);
+    auto* to_unit = get_unit(to_id);
+    
+    if (from_unit) {
+        auto& deps = from_unit->dependencies;
+        deps.erase(std::remove(deps.begin(), deps.end(), to_id), deps.end());
+    }
+    if (to_unit) {
+        auto& dependents = to_unit->dependents;
+        dependents.erase(std::remove(dependents.begin(), dependents.end(), from_id), 
+                        dependents.end());
+    }
+}
+
+std::vector<std::string> UnitManager::get_dependencies(const std::string& id) const {
+    const auto* unit = get_unit(id);
+    return unit ? unit->dependencies : std::vector<std::string>{};
+}
+
+std::vector<std::string> UnitManager::get_dependents(const std::string& id) const {
+    const auto* unit = get_unit(id);
+    return unit ? unit->dependents : std::vector<std::string>{};
+}
+
+void UnitManager::collect_affected_recursive(const std::string& id,
+                                             std::unordered_set<std::string>& visited) const {
+    if (visited.count(id)) return;
+    visited.insert(id);
+    
+    const auto* unit = get_unit(id);
+    if (!unit) return;
+    
+    // 递归收集所有依赖此单元的单元
+    for (const auto& dependent_id : unit->dependents) {
+        collect_affected_recursive(dependent_id, visited);
+    }
+}
+
+std::vector<std::string> UnitManager::get_affected_units(const std::string& changed_id) const {
+    std::unordered_set<std::string> visited;
+    collect_affected_recursive(changed_id, visited);
+    visited.erase(changed_id);  // 移除自身
+    return std::vector<std::string>(visited.begin(), visited.end());
+}
+
+void UnitManager::for_each(std::function<void(CompilationUnit&)> callback) {
+    for (auto& pair : units_) {
+        callback(pair.second);
+    }
+}
+
+void UnitManager::clear() {
+    units_.clear();
+    file_units_.clear();
+}
+
+std::string UnitManager::serialize() const {
+    std::ostringstream oss;
+    oss << units_.size() << "\n";
+    for (const auto& pair : units_) {
+        const auto& u = pair.second;
+        oss << u.id << "\t" << u.file_path << "\t" << u.start_line << "\t" 
+            << u.end_line << "\t" << static_cast<int>(u.type) << "\t"
+            << u.name << "\t" << u.content_hash << "\t"
+            << u.dependencies.size();
+        for (const auto& dep : u.dependencies) {
+            oss << "\t" << dep;
+        }
+        oss << "\n";
+    }
+    return oss.str();
+}
+
+void UnitManager::deserialize(const std::string& data) {
+    clear();
+    std::istringstream iss(data);
+    size_t count;
+    iss >> count;
+    iss.ignore();
+    
+    for (size_t i = 0; i < count; ++i) {
+        std::string line;
+        std::getline(iss, line);
+        std::istringstream line_iss(line);
+        
+        CompilationUnit u;
+        int type_int;
+        size_t dep_count;
+        
+        std::getline(line_iss, u.id, '\t');
+        std::getline(line_iss, u.file_path, '\t');
+        line_iss >> u.start_line;
+        line_iss.ignore();
+        line_iss >> u.end_line;
+        line_iss.ignore();
+        line_iss >> type_int;
+        u.type = static_cast<UnitType>(type_int);
+        line_iss.ignore();
+        std::getline(line_iss, u.name, '\t');
+        std::getline(line_iss, u.content_hash, '\t');
+        line_iss >> dep_count;
+        
+        for (size_t j = 0; j < dep_count; ++j) {
+            std::string dep;
+            line_iss.ignore();
+            std::getline(line_iss, dep, '\t');
+            if (!dep.empty()) {
+                u.dependencies.push_back(dep);
+            }
+        }
+        
+        add_unit(u);
+    }
+    
+    // 重建依赖关系
+    for (auto& pair : units_) {
+        for (const auto& dep_id : pair.second.dependencies) {
+            auto* dep_unit = get_unit(dep_id);
+            if (dep_unit) {
+                dep_unit->dependents.push_back(pair.first);
+            }
+        }
+    }
+}
+
+// ============================================================================
+// ChangeDetector 实现
+// ============================================================================
+
+ChangeDetector::ChangeDetector() {}
+ChangeDetector::~ChangeDetector() {}
+
+std::string ChangeDetector::compute_hash(const std::string& content) {
+    uint64_t hash = fnv1a_hash(content.c_str(), content.size());
+    std::ostringstream oss;
+    oss << std::hex << std::setfill('0') << std::setw(16) << hash;
+    return oss.str();
+}
+
+std::string ChangeDetector::compute_line_hash(const std::string& line) {
+    // 去除首尾空白后计算哈希
+    size_t start = line.find_first_not_of(" \t\r\n");
+    size_t end = line.find_last_not_of(" \t\r\n");
+    if (start == std::string::npos) {
+        return "empty";
+    }
+    std::string trimmed = line.substr(start, end - start + 1);
+    return compute_hash(trimmed);
+}
+
+Snapshot ChangeDetector::create_snapshot(const std::string& file_path, 
+                                         const std::string& content) {
+    Snapshot snap;
+    snap.file_path = file_path;
+    snap.content_hash = compute_hash(content);
+    snap.timestamp = current_timestamp();
+    
+    auto lines = split_lines(content);
+    snap.line_hashes.reserve(lines.size());
+    for (const auto& line : lines) {
+        snap.line_hashes.push_back(compute_line_hash(line));
+    }
+    
+    return snap;
+}
+
+std::vector<int> ChangeDetector::get_changed_lines(const Snapshot& old_snap,
+                                                   const Snapshot& new_snap) {
+    std::vector<int> changed;
+    
+    size_t old_size = old_snap.line_hashes.size();
+    size_t new_size = new_snap.line_hashes.size();
+    size_t max_size = std::max(old_size, new_size);
+    
+    // 使用 LCS 算法进行精确对比
+    auto lcs = compute_lcs(old_snap.line_hashes, new_snap.line_hashes);
+    
+    // 标记所有不在 LCS 中的行为变更
+    std::unordered_set<int> lcs_new_lines;
+    for (const auto& pair : lcs) {
+        lcs_new_lines.insert(pair.second);
+    }
+    
+    for (size_t i = 0; i < new_size; ++i) {
+        if (lcs_new_lines.find(static_cast<int>(i)) == lcs_new_lines.end()) {
+            changed.push_back(static_cast<int>(i) + 1);  // 1-based
+        }
+    }
+    
+    return changed;
+}
+
+std::vector<std::pair<int, int>> ChangeDetector::compute_lcs(
+    const std::vector<std::string>& old_lines,
+    const std::vector<std::string>& new_lines) {
+    
+    int m = static_cast<int>(old_lines.size());
+    int n = static_cast<int>(new_lines.size());
+    
+    // DP 表
+    std::vector<std::vector<int>> dp(m + 1, std::vector<int>(n + 1, 0));
+    
+    for (int i = 1; i <= m; ++i) {
+        for (int j = 1; j <= n; ++j) {
+            if (old_lines[i - 1] == new_lines[j - 1]) {
+                dp[i][j] = dp[i - 1][j - 1] + 1;
+            } else {
+                dp[i][j] = std::max(dp[i - 1][j], dp[i][j - 1]);
+            }
+        }
+    }
+    
+    // 回溯找出 LCS 对应关系
+    std::vector<std::pair<int, int>> lcs;
+    int i = m, j = n;
+    while (i > 0 && j > 0) {
+        if (old_lines[i - 1] == new_lines[j - 1]) {
+            lcs.push_back({i - 1, j - 1});
+            --i; --j;
+        } else if (dp[i - 1][j] > dp[i][j - 1]) {
+            --i;
+        } else {
+            --j;
+        }
+    }
+    
+    std::reverse(lcs.begin(), lcs.end());
+    return lcs;
+}
+
+std::vector<ChangeRecord> ChangeDetector::detect_changes(const Snapshot& old_snap,
+                                                          const Snapshot& new_snap) {
+    std::vector<ChangeRecord> records;
+    
+    // 对比两个快照中的编译单元
+    std::unordered_set<std::string> old_ids, new_ids;
+    
+    for (const auto& pair : old_snap.units) {
+        old_ids.insert(pair.first);
+    }
+    for (const auto& pair : new_snap.units) {
+        new_ids.insert(pair.first);
+    }
+    
+    // 检测删除的单元
+    for (const auto& id : old_ids) {
+        if (new_ids.find(id) == new_ids.end()) {
+            ChangeRecord rec;
+            rec.unit_id = id;
+            rec.change_type = UnitState::DELETED;
+            const auto& old_unit = old_snap.units.at(id);
+            rec.old_start_line = old_unit.start_line;
+            rec.old_end_line = old_unit.end_line;
+            rec.reason = "unit deleted";
+            records.push_back(rec);
+        }
+    }
+    
+    // 检测新增和修改的单元
+    for (const auto& pair : new_snap.units) {
+        const auto& new_unit = pair.second;
+        auto old_it = old_snap.units.find(pair.first);
+        
+        if (old_it == old_snap.units.end()) {
+            // 新增
+            ChangeRecord rec;
+            rec.unit_id = pair.first;
+            rec.change_type = UnitState::ADDED;
+            rec.new_start_line = new_unit.start_line;
+            rec.new_end_line = new_unit.end_line;
+            rec.reason = "unit added";
+            records.push_back(rec);
+        } else {
+            // 检查是否修改
+            const auto& old_unit = old_it->second;
+            if (old_unit.content_hash != new_unit.content_hash) {
+                ChangeRecord rec;
+                rec.unit_id = pair.first;
+                rec.change_type = UnitState::MODIFIED;
+                rec.old_start_line = old_unit.start_line;
+                rec.old_end_line = old_unit.end_line;
+                rec.new_start_line = new_unit.start_line;
+                rec.new_end_line = new_unit.end_line;
+                rec.reason = "content changed";
+                records.push_back(rec);
+            }
+        }
+    }
+    
+    return records;
+}
+
+// ============================================================================
+// CompilationCache 实现
+// ============================================================================
+
+CompilationCache::CompilationCache(const std::string& cache_dir)
+    : cache_dir_(cache_dir), hits_(0), misses_(0) {}
+
+CompilationCache::~CompilationCache() {
+    save();
+}
+
+bool CompilationCache::has(const std::string& unit_id) const {
+    return cache_.find(unit_id) != cache_.end();
+}
+
+std::string CompilationCache::get(const std::string& unit_id) const {
+    auto it = cache_.find(unit_id);
+    if (it != cache_.end()) {
+        ++hits_;
+        return it->second.output;
+    }
+    ++misses_;
+    return "";
+}
+
+void CompilationCache::put(const std::string& unit_id, const std::string& output,
+                           const std::string& content_hash) {
+    CacheEntry entry;
+    entry.output = output;
+    entry.content_hash = content_hash;
+    entry.timestamp = current_timestamp();
+    cache_[unit_id] = entry;
+}
+
+void CompilationCache::invalidate(const std::string& unit_id) {
+    cache_.erase(unit_id);
+}
+
+void CompilationCache::invalidate_all() {
+    cache_.clear();
+}
+
+bool CompilationCache::is_valid(const std::string& unit_id, 
+                                const std::string& current_hash) const {
+    auto it = cache_.find(unit_id);
+    if (it == cache_.end()) return false;
+    return it->second.content_hash == current_hash;
+}
+
+void CompilationCache::save() {
+    std::string cache_file = cache_dir_ + "/incremental_cache.dat";
+    std::ofstream file(cache_file);
+    if (!file.is_open()) return;
+    
+    file << cache_.size() << "\n";
+    for (const auto& pair : cache_) {
+        file << pair.first << "\n";
+        file << pair.second.content_hash << "\n";
+        file << pair.second.timestamp << "\n";
+        file << pair.second.output.size() << "\n";
+        file << pair.second.output;
+    }
+}
+
+void CompilationCache::load() {
+    std::string cache_file = cache_dir_ + "/incremental_cache.dat";
+    std::ifstream file(cache_file);
+    if (!file.is_open()) return;
+    
+    size_t count;
+    file >> count;
+    file.ignore();
+    
+    for (size_t i = 0; i < count; ++i) {
+        std::string unit_id, content_hash;
+        int64_t timestamp;
+        size_t output_size;
+        
+        std::getline(file, unit_id);
+        std::getline(file, content_hash);
+        file >> timestamp >> output_size;
+        file.ignore();
+        
+        std::string output(output_size, '\0');
+        file.read(&output[0], output_size);
+        
+        CacheEntry entry;
+        entry.output = output;
+        entry.content_hash = content_hash;
+        entry.timestamp = timestamp;
+        cache_[unit_id] = entry;
+    }
+}
+
+// ============================================================================
+// IncrementalEngine 实现
+// ============================================================================
+
+IncrementalEngine::IncrementalEngine(const std::string& cache_dir)
+    : cache_(cache_dir) {
+    cache_.load();
+}
+
+IncrementalEngine::~IncrementalEngine() {
+    save_state();
+}
+
+void IncrementalEngine::register_units(const std::string& file_path,
+                                       const std::vector<CompilationUnit>& units) {
+    // 移除该文件的旧单元
+    auto old_units = units_.get_units_by_file(file_path);
+    for (auto* old_unit : old_units) {
+        units_.remove_unit(old_unit->id);
+    }
+    
+    // 添加新单元
+    for (const auto& unit : units) {
+        units_.add_unit(unit);
+    }
+}
+
+std::vector<ChangeRecord> IncrementalEngine::update_source(
+    const std::string& file_path, const std::string& new_content) {
+    
+    // 创建新快照
+    Snapshot new_snap = detector_.create_snapshot(file_path, new_content);
+    
+    // 获取旧快照
+    auto old_it = snapshots_.find(file_path);
+    
+    std::vector<ChangeRecord> changes;
+    if (old_it != snapshots_.end()) {
+        // 获取变更的行
+        auto changed_lines = detector_.get_changed_lines(old_it->second, new_snap);
+        
+        // 找出受影响的编译单元
+        std::unordered_set<std::string> affected_ids;
+        for (int line : changed_lines) {
+            auto units = units_.get_units_in_range(file_path, line, line);
+            for (auto* unit : units) {
+                affected_ids.insert(unit->id);
+                // 标记为已修改
+                unit->state = UnitState::MODIFIED;
+                unit->cache_valid = false;
+                
+                // 获取所有受影响的依赖单元
+                auto dependents = units_.get_affected_units(unit->id);
+                for (const auto& dep_id : dependents) {
+                    affected_ids.insert(dep_id);
+                    auto* dep_unit = units_.get_unit(dep_id);
+                    if (dep_unit) {
+                        dep_unit->state = UnitState::AFFECTED;
+                        dep_unit->cache_valid = false;
+                    }
+                }
+            }
+        }
+        
+        // 扩展到完整边界
+        std::vector<std::string> ids_to_expand(affected_ids.begin(), affected_ids.end());
+        expand_to_boundaries(file_path, ids_to_expand);
+        affected_ids = std::unordered_set<std::string>(ids_to_expand.begin(), ids_to_expand.end());
+        
+        // 生成变更记录
+        for (const auto& id : affected_ids) {
+            auto* unit = units_.get_unit(id);
+            if (unit) {
+                ChangeRecord rec;
+                rec.unit_id = id;
+                rec.change_type = unit->state;
+                rec.new_start_line = unit->start_line;
+                rec.new_end_line = unit->end_line;
+                changes.push_back(rec);
+            }
+        }
+        
+        // 需要重新编译的单元
+        units_to_compile_.clear();
+        for (const auto& id : affected_ids) {
+            units_to_compile_.push_back(id);
+        }
+    } else {
+        // 首次编译，所有单元都需要编译
+        auto units = units_.get_units_by_file(file_path);
+        for (auto* unit : units) {
+            unit->state = UnitState::ADDED;
+            units_to_compile_.push_back(unit->id);
+            
+            ChangeRecord rec;
+            rec.unit_id = unit->id;
+            rec.change_type = UnitState::ADDED;
+            rec.new_start_line = unit->start_line;
+            rec.new_end_line = unit->end_line;
+            changes.push_back(rec);
+        }
+    }
+    
+    // 更新快照
+    new_snap.units = std::unordered_map<std::string, CompilationUnit>();
+    for (auto* unit : units_.get_units_by_file(file_path)) {
+        new_snap.units[unit->id] = *unit;
+    }
+    snapshots_[file_path] = new_snap;
+    
+    return changes;
+}
+
+std::vector<std::string> IncrementalEngine::get_units_to_compile() const {
+    return units_to_compile_;
+}
+
+void IncrementalEngine::mark_compiled(const std::string& unit_id, 
+                                      const std::string& output) {
+    auto* unit = units_.get_unit(unit_id);
+    if (unit) {
+        unit->cached_output = output;
+        unit->cache_timestamp = current_timestamp();
+        unit->cache_valid = true;
+        unit->state = UnitState::UNCHANGED;
+        
+        // 更新缓存
+        cache_.put(unit_id, output, unit->content_hash);
+    }
+    
+    // 从待编译列表中移除
+    units_to_compile_.erase(
+        std::remove(units_to_compile_.begin(), units_to_compile_.end(), unit_id),
+        units_to_compile_.end()
+    );
+}
+
+std::string IncrementalEngine::get_combined_output(const std::string& file_path) const {
+    std::ostringstream oss;
+    auto units = const_cast<UnitManager&>(units_).get_units_by_file(file_path);
+    
+    // 按行号顺序排列
+    std::sort(units.begin(), units.end(),
+              [](const CompilationUnit* a, const CompilationUnit* b) {
+                  return a->start_line < b->start_line;
+              });
+    
+    for (size_t i = 0; i < units.size(); ++i) {
+        const auto* unit = units[i];
+        
+        // 优先使用缓存
+        std::string output;
+        if (unit->cache_valid) {
+            output = unit->cached_output;
+        } else if (cache_.is_valid(unit->id, unit->content_hash)) {
+            output = cache_.get(unit->id);
+        }
+        
+        if (!output.empty()) {
+            if (i > 0) oss << "\n";
+            oss << output;
+        }
+    }
+    
+    return oss.str();
+}
+
+void IncrementalEngine::expand_to_boundaries(const std::string& file_path,
+                                             std::vector<std::string>& unit_ids) {
+    std::unordered_set<std::string> expanded(unit_ids.begin(), unit_ids.end());
+    
+    for (const auto& id : unit_ids) {
+        auto* unit = units_.get_unit(id);
+        if (!unit) continue;
+        
+        // 对于函数、类等结构，确保整个结构都被包含
+        if (unit->type == UnitType::FUNCTION || unit->type == UnitType::CLASS) {
+            // 已经是完整结构，不需要扩展
+            continue;
+        }
+        
+        // 检查是否在某个大结构内
+        auto all_units = units_.get_units_by_file(file_path);
+        for (auto* parent : all_units) {
+            if (parent->id == id) continue;
+            
+            // 如果当前单元在父结构范围内
+            if (parent->start_line <= unit->start_line && 
+                parent->end_line >= unit->end_line) {
+                // 父结构是函数或类，需要重新编译整个结构
+                if (parent->type == UnitType::FUNCTION || parent->type == UnitType::CLASS) {
+                    expanded.insert(parent->id);
+                    parent->state = UnitState::AFFECTED;
+                    parent->cache_valid = false;
+                }
+            }
+        }
+    }
+    
+    unit_ids = std::vector<std::string>(expanded.begin(), expanded.end());
+}
+
+void IncrementalEngine::save_state() {
+    cache_.save();
+    
+    // 保存单元状态
+    std::string state_file = cache_.cache().empty() ? "incremental_state.dat" 
+                             : cache_dir_ + "/incremental_state.dat";
+    // Note: cache_dir_ is private, so we'll save alongside cache
+}
+
+void IncrementalEngine::load_state() {
+    cache_.load();
+}
+
+}  // namespace incremental
+}  // namespace sikuwa
--- a/incremental/cpp/incremental_core.h
+++ b/incremental/cpp/incremental_core.h
@@ -0,0 +1,283 @@
+// sikuwa/incremental/cpp/incremental_core.h
+// 减量编译核心 - C++ 实现高性能组件
+// 指哪编哪：只编译源码改变的部分
+
+#ifndef SIKUWA_INCREMENTAL_CORE_H
+#define SIKUWA_INCREMENTAL_CORE_H
+
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <unordered_set>
+#include <memory>
+#include <functional>
+#include <optional>
+#include <chrono>
+
+namespace sikuwa {
+namespace incremental {
+
+// ============================================================================
+// 编译单元类型
+// ============================================================================
+enum class UnitType {
+    LINE,           // 单行
+    STATEMENT,      // 语句
+    FUNCTION,       // 函数
+    CLASS,          // 类
+    MODULE,         // 模块级
+    IMPORT,         // 导入语句
+    DECORATOR,      // 装饰器
+    BLOCK           // 代码块
+};
+
+// ============================================================================
+// 编译单元状态
+// ============================================================================
+enum class UnitState {
+    UNKNOWN,        // 未知
+    UNCHANGED,      // 未变更
+    MODIFIED,       // 已修改
+    ADDED,          // 新增
+    DELETED,        // 已删除
+    AFFECTED        // 受影响（依赖项变更）
+};
+
+// ============================================================================
+// 编译单元 - 最小编译粒度
+// ============================================================================
+struct CompilationUnit {
+    std::string id;                     // 唯一标识: file:start_line:end_line:hash
+    std::string file_path;              // 源文件路径
+    int start_line;                     // 起始行 (1-based)
+    int end_line;                       // 结束行 (1-based)
+    UnitType type;                      // 单元类型
+    std::string name;                   // 名称 (函数名/类名等)
+    std::string content_hash;           // 内容哈希
+    std::vector<std::string> dependencies;  // 依赖的单元ID列表
+    std::vector<std::string> dependents;    // 被依赖的单元ID列表
+    UnitState state;                    // 当前状态
+    
+    // 缓存相关
+    std::string cached_output;          // 缓存的编译产物
+    int64_t cache_timestamp;            // 缓存时间戳
+    bool cache_valid;                   // 缓存是否有效
+    
+    CompilationUnit() 
+        : start_line(0), end_line(0), type(UnitType::LINE), 
+          state(UnitState::UNKNOWN), cache_timestamp(0), cache_valid(false) {}
+};
+
+// ============================================================================
+// 版本快照 - 用于变更检测
+// ============================================================================
+struct Snapshot {
+    std::string file_path;
+    std::string content_hash;           // 整体内容哈希
+    std::vector<std::string> line_hashes;  // 每行哈希
+    std::unordered_map<std::string, CompilationUnit> units;  // 编译单元
+    int64_t timestamp;
+    
+    Snapshot() : timestamp(0) {}
+};
+
+// ============================================================================
+// 变更记录
+// ============================================================================
+struct ChangeRecord {
+    std::string unit_id;
+    UnitState change_type;
+    int old_start_line;
+    int old_end_line;
+    int new_start_line;
+    int new_end_line;
+    std::string reason;                 // 变更原因
+};
+
+// ============================================================================
+// 编译单元管理器 - 管理所有编译单元
+// ============================================================================
+class UnitManager {
+public:
+    UnitManager();
+    ~UnitManager();
+    
+    // 添加/更新编译单元
+    void add_unit(const CompilationUnit& unit);
+    void update_unit(const std::string& id, const CompilationUnit& unit);
+    void remove_unit(const std::string& id);
+    
+    // 查询
+    CompilationUnit* get_unit(const std::string& id);
+    const CompilationUnit* get_unit(const std::string& id) const;
+    std::vector<CompilationUnit*> get_units_by_file(const std::string& file_path);
+    std::vector<CompilationUnit*> get_units_in_range(const std::string& file_path, int start, int end);
+    
+    // 依赖关系
+    void add_dependency(const std::string& from_id, const std::string& to_id);
+    void remove_dependency(const std::string& from_id, const std::string& to_id);
+    std::vector<std::string> get_dependencies(const std::string& id) const;
+    std::vector<std::string> get_dependents(const std::string& id) const;
+    std::vector<std::string> get_affected_units(const std::string& changed_id) const;
+    
+    // 遍历
+    void for_each(std::function<void(CompilationUnit&)> callback);
+    size_t size() const { return units_.size(); }
+    void clear();
+    
+    // 序列化
+    std::string serialize() const;
+    void deserialize(const std::string& data);
+    
+private:
+    std::unordered_map<std::string, CompilationUnit> units_;
+    std::unordered_map<std::string, std::vector<std::string>> file_units_;  // file -> unit_ids
+    
+    // 递归获取所有受影响的单元
+    void collect_affected_recursive(const std::string& id, 
+                                    std::unordered_set<std::string>& visited) const;
+};
+
+// ============================================================================
+// 变更检测器 - 检测源码变更
+// ============================================================================
+class ChangeDetector {
+public:
+    ChangeDetector();
+    ~ChangeDetector();
+    
+    // 创建快照
+    Snapshot create_snapshot(const std::string& file_path, const std::string& content);
+    
+    // 检测变更
+    std::vector<ChangeRecord> detect_changes(const Snapshot& old_snap, const Snapshot& new_snap);
+    
+    // 定位变更行
+    std::vector<int> get_changed_lines(const Snapshot& old_snap, const Snapshot& new_snap);
+    
+    // 计算哈希
+    static std::string compute_hash(const std::string& content);
+    static std::string compute_line_hash(const std::string& line);
+    
+private:
+    // LCS 算法找出变更
+    std::vector<std::pair<int, int>> compute_lcs(const std::vector<std::string>& old_lines,
+                                                  const std::vector<std::string>& new_lines);
+};
+
+// ============================================================================
+// 编译缓存 - 缓存编译产物
+// ============================================================================
+class CompilationCache {
+public:
+    CompilationCache(const std::string& cache_dir);
+    ~CompilationCache();
+    
+    // 缓存操作
+    bool has(const std::string& unit_id) const;
+    std::string get(const std::string& unit_id) const;
+    void put(const std::string& unit_id, const std::string& output, const std::string& content_hash);
+    void invalidate(const std::string& unit_id);
+    void invalidate_all();
+    
+    // 验证缓存
+    bool is_valid(const std::string& unit_id, const std::string& current_hash) const;
+    
+    // 持久化
+    void save();
+    void load();
+    
+    // 统计
+    size_t size() const { return cache_.size(); }
+    size_t hit_count() const { return hits_; }
+    size_t miss_count() const { return misses_; }
+    
+private:
+    struct CacheEntry {
+        std::string output;
+        std::string content_hash;
+        int64_t timestamp;
+    };
+    
+    std::string cache_dir_;
+    std::unordered_map<std::string, CacheEntry> cache_;
+    mutable size_t hits_;
+    mutable size_t misses_;
+};
+
+// ============================================================================
+// 减量编译引擎
+// ============================================================================
+class IncrementalEngine {
+public:
+    IncrementalEngine(const std::string& cache_dir);
+    ~IncrementalEngine();
+    
+    // 注册编译单元
+    void register_units(const std::string& file_path, 
+                       const std::vector<CompilationUnit>& units);
+    
+    // 更新源码并检测变更
+    std::vector<ChangeRecord> update_source(const std::string& file_path,
+                                            const std::string& new_content);
+    
+    // 获取需要重新编译的单元
+    std::vector<std::string> get_units_to_compile() const;
+    
+    // 标记单元编译完成
+    void mark_compiled(const std::string& unit_id, const std::string& output);
+    
+    // 获取编译结果（按顺序拼接）
+    std::string get_combined_output(const std::string& file_path) const;
+    
+    // 缓存管理
+    CompilationCache& cache() { return cache_; }
+    const CompilationCache& cache() const { return cache_; }
+    
+    // 单元管理
+    UnitManager& units() { return units_; }
+    const UnitManager& units() const { return units_; }
+    
+    // 状态
+    void save_state();
+    void load_state();
+    
+private:
+    UnitManager units_;
+    ChangeDetector detector_;
+    CompilationCache cache_;
+    std::unordered_map<std::string, Snapshot> snapshots_;  // file -> snapshot
+    std::vector<std::string> units_to_compile_;
+    
+    // 扩展编译范围到完整结构
+    void expand_to_boundaries(const std::string& file_path, 
+                             std::vector<std::string>& unit_ids);
+};
+
+// ============================================================================
+// 工具函数
+// ============================================================================
+
+// 生成单元ID
+std::string generate_unit_id(const std::string& file_path, int start_line, 
+                            int end_line, const std::string& content_hash);
+
+// 获取当前时间戳
+int64_t current_timestamp();
+
+// 读取文件内容
+std::string read_file(const std::string& path);
+
+// 写入文件内容
+void write_file(const std::string& path, const std::string& content);
+
+// 分割行
+std::vector<std::string> split_lines(const std::string& content);
+
+// 合并行
+std::string join_lines(const std::vector<std::string>& lines);
+
+}  // namespace incremental
+}  // namespace sikuwa
+
+#endif  // SIKUWA_INCREMENTAL_CORE_H
--- a/incremental/cpp/pybind_incremental.cpp
+++ b/incremental/cpp/pybind_incremental.cpp
@@ -0,0 +1,130 @@
+// sikuwa/incremental/cpp/pybind_incremental.cpp
+// Python 绑定 - 使用 pybind11
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include "incremental_core.h"
+
+namespace py = pybind11;
+using namespace sikuwa::incremental;
+
+PYBIND11_MODULE(incremental_engine, m) {
+    m.doc() = "Sikuwa 减量编译引擎 - 指哪编哪";
+    
+    // 枚举类型
+    py::enum_<UnitType>(m, "UnitType")
+        .value("LINE", UnitType::LINE)
+        .value("STATEMENT", UnitType::STATEMENT)
+        .value("FUNCTION", UnitType::FUNCTION)
+        .value("CLASS", UnitType::CLASS)
+        .value("MODULE", UnitType::MODULE)
+        .value("IMPORT", UnitType::IMPORT)
+        .value("DECORATOR", UnitType::DECORATOR)
+        .value("BLOCK", UnitType::BLOCK);
+    
+    py::enum_<UnitState>(m, "UnitState")
+        .value("UNKNOWN", UnitState::UNKNOWN)
+        .value("UNCHANGED", UnitState::UNCHANGED)
+        .value("MODIFIED", UnitState::MODIFIED)
+        .value("ADDED", UnitState::ADDED)
+        .value("DELETED", UnitState::DELETED)
+        .value("AFFECTED", UnitState::AFFECTED);
+    
+    // CompilationUnit
+    py::class_<CompilationUnit>(m, "CompilationUnit")
+        .def(py::init<>())
+        .def_readwrite("id", &CompilationUnit::id)
+        .def_readwrite("file_path", &CompilationUnit::file_path)
+        .def_readwrite("start_line", &CompilationUnit::start_line)
+        .def_readwrite("end_line", &CompilationUnit::end_line)
+        .def_readwrite("type", &CompilationUnit::type)
+        .def_readwrite("name", &CompilationUnit::name)
+        .def_readwrite("content_hash", &CompilationUnit::content_hash)
+        .def_readwrite("dependencies", &CompilationUnit::dependencies)
+        .def_readwrite("dependents", &CompilationUnit::dependents)
+        .def_readwrite("state", &CompilationUnit::state)
+        .def_readwrite("cached_output", &CompilationUnit::cached_output)
+        .def_readwrite("cache_valid", &CompilationUnit::cache_valid);
+    
+    // ChangeRecord
+    py::class_<ChangeRecord>(m, "ChangeRecord")
+        .def(py::init<>())
+        .def_readwrite("unit_id", &ChangeRecord::unit_id)
+        .def_readwrite("change_type", &ChangeRecord::change_type)
+        .def_readwrite("old_start_line", &ChangeRecord::old_start_line)
+        .def_readwrite("old_end_line", &ChangeRecord::old_end_line)
+        .def_readwrite("new_start_line", &ChangeRecord::new_start_line)
+        .def_readwrite("new_end_line", &ChangeRecord::new_end_line)
+        .def_readwrite("reason", &ChangeRecord::reason);
+    
+    // Snapshot
+    py::class_<Snapshot>(m, "Snapshot")
+        .def(py::init<>())
+        .def_readwrite("file_path", &Snapshot::file_path)
+        .def_readwrite("content_hash", &Snapshot::content_hash)
+        .def_readwrite("line_hashes", &Snapshot::line_hashes)
+        .def_readwrite("timestamp", &Snapshot::timestamp);
+    
+    // UnitManager
+    py::class_<UnitManager>(m, "UnitManager")
+        .def(py::init<>())
+        .def("add_unit", &UnitManager::add_unit)
+        .def("update_unit", &UnitManager::update_unit)
+        .def("remove_unit", &UnitManager::remove_unit)
+        .def("get_unit", py::overload_cast<const std::string&>(&UnitManager::get_unit),
+             py::return_value_policy::reference)
+        .def("get_units_by_file", &UnitManager::get_units_by_file,
+             py::return_value_policy::reference)
+        .def("get_units_in_range", &UnitManager::get_units_in_range,
+             py::return_value_policy::reference)
+        .def("add_dependency", &UnitManager::add_dependency)
+        .def("remove_dependency", &UnitManager::remove_dependency)
+        .def("get_dependencies", &UnitManager::get_dependencies)
+        .def("get_dependents", &UnitManager::get_dependents)
+        .def("get_affected_units", &UnitManager::get_affected_units)
+        .def("size", &UnitManager::size)
+        .def("clear", &UnitManager::clear)
+        .def("serialize", &UnitManager::serialize)
+        .def("deserialize", &UnitManager::deserialize);
+    
+    // ChangeDetector
+    py::class_<ChangeDetector>(m, "ChangeDetector")
+        .def(py::init<>())
+        .def("create_snapshot", &ChangeDetector::create_snapshot)
+        .def("detect_changes", &ChangeDetector::detect_changes)
+        .def("get_changed_lines", &ChangeDetector::get_changed_lines)
+        .def_static("compute_hash", &ChangeDetector::compute_hash)
+        .def_static("compute_line_hash", &ChangeDetector::compute_line_hash);
+    
+    // CompilationCache
+    py::class_<CompilationCache>(m, "CompilationCache")
+        .def(py::init<const std::string&>())
+        .def("has", &CompilationCache::has)
+        .def("get", &CompilationCache::get)
+        .def("put", &CompilationCache::put)
+        .def("invalidate", &CompilationCache::invalidate)
+        .def("invalidate_all", &CompilationCache::invalidate_all)
+        .def("is_valid", &CompilationCache::is_valid)
+        .def("save", &CompilationCache::save)
+        .def("load", &CompilationCache::load)
+        .def("size", &CompilationCache::size)
+        .def("hit_count", &CompilationCache::hit_count)
+        .def("miss_count", &CompilationCache::miss_count);
+    
+    // IncrementalEngine
+    py::class_<IncrementalEngine>(m, "IncrementalEngine")
+        .def(py::init<const std::string&>())
+        .def("register_units", &IncrementalEngine::register_units)
+        .def("update_source", &IncrementalEngine::update_source)
+        .def("get_units_to_compile", &IncrementalEngine::get_units_to_compile)
+        .def("mark_compiled", &IncrementalEngine::mark_compiled)
+        .def("get_combined_output", &IncrementalEngine::get_combined_output)
+        .def("save_state", &IncrementalEngine::save_state)
+        .def("load_state", &IncrementalEngine::load_state);
+    
+    // 工具函数
+    m.def("generate_unit_id", &generate_unit_id);
+    m.def("compute_hash", &ChangeDetector::compute_hash);
+    m.def("split_lines", &split_lines);
+    m.def("join_lines", &join_lines);
+}