Sikuwa first commit
Some checks are pending
CI / Test (Python 3.10 on macos-latest) (push) Waiting to run
CI / Test (Python 3.11 on macos-latest) (push) Waiting to run
CI / Test (Python 3.12 on macos-latest) (push) Waiting to run
CI / Test (Python 3.8 on macos-latest) (push) Waiting to run
CI / Test (Python 3.9 on macos-latest) (push) Waiting to run
CI / Test (Python 3.10 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.11 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.12 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.8 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.9 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.10 on windows-latest) (push) Waiting to run
CI / Test (Python 3.11 on windows-latest) (push) Waiting to run
CI / Test (Python 3.12 on windows-latest) (push) Waiting to run
CI / Test (Python 3.8 on windows-latest) (push) Waiting to run
CI / Test (Python 3.9 on windows-latest) (push) Waiting to run
CI / Lint (push) Waiting to run
CI / Release (push) Blocked by required conditions
Documentation / Build Documentation (push) Waiting to run

This commit is contained in:
so陈
2026-02-20 23:53:48 +08:00
commit 13a1072c6f
57 changed files with 13519 additions and 0 deletions

84
incremental/__init__.py Normal file
View File

@@ -0,0 +1,84 @@
# sikuwa/incremental/__init__.py
"""
减量编译模块 - Incremental Compilation System
指哪编哪,精准编译
核心功能:
1. 单行/最小语法块为最小编译单元
2. 每个单元有唯一标识、最小依赖集、缓存产物
3. 版本快照对比检测变更
4. 只编译变更单元及受依赖影响的关联单元
5. 边界触发器处理函数/类
6. 按原始顺序拼接产物
智能缓存 V1.2
- 编译即缓存:每次编译自动记录,全历史可追溯
- 缓存即编译:缓存命中等同于零成本编译
- 预测缓存预热:基于访问模式预测并预编译
"""
from .core import (
IncrementalCompiler,
CompilationUnit,
Snapshot,
ChangeRecord,
ChangeDetector,
CompilationCache,
UnitType,
UnitState,
)
from .analyzer import (
PythonAnalyzer,
CodeBlock,
BlockType,
)
from .compiler_integration import (
IncrementalNativeCompiler,
IncrementalBuildResult,
create_incremental_native_compiler,
)
from .smart_cache import (
SmartCache,
CacheEntry,
CacheEvent,
CacheEventType,
get_smart_cache,
create_smart_cache,
)
__all__ = [
# 核心类
'IncrementalCompiler',
'CompilationUnit',
'Snapshot',
'ChangeRecord',
'ChangeDetector',
'CompilationCache',
# 枚举
'UnitType',
'UnitState',
# 分析器
'PythonAnalyzer',
'CodeBlock',
'BlockType',
# 集成编译器
'IncrementalNativeCompiler',
'IncrementalBuildResult',
'create_incremental_native_compiler',
# 智能缓存 V1.2
'SmartCache',
'CacheEntry',
'CacheEvent',
'CacheEventType',
'get_smart_cache',
'create_smart_cache',
]
__version__ = '1.2.0'

396
incremental/analyzer.py Normal file
View File

@@ -0,0 +1,396 @@
# sikuwa/incremental/analyzer.py
"""
Python 代码分析器 - 识别代码块边界和依赖关系
用于减量编译的 AST 分析
"""
import ast
import hashlib
from enum import Enum, auto
from dataclasses import dataclass, field
from typing import List, Dict, Set, Optional, Tuple
from pathlib import Path
class BlockType(Enum):
"""代码块类型"""
MODULE = auto() # 模块级
IMPORT = auto() # 导入语句
CLASS = auto() # 类定义
FUNCTION = auto() # 函数定义
METHOD = auto() # 方法定义
DECORATOR = auto() # 装饰器
STATEMENT = auto() # 普通语句
ASSIGNMENT = auto() # 赋值语句
EXPRESSION = auto() # 表达式
CONTROL = auto() # 控制流 (if/for/while/try)
WITH = auto() # with 语句
@dataclass
class CodeBlock:
"""代码块 - 最小编译单元"""
id: str = "" # 唯一标识
type: BlockType = BlockType.STATEMENT
name: str = "" # 名称(函数名/类名等)
start_line: int = 0 # 起始行 (1-based)
end_line: int = 0 # 结束行 (1-based)
start_col: int = 0 # 起始列
end_col: int = 0 # 结束列
content: str = "" # 源代码内容
content_hash: str = "" # 内容哈希
parent_id: str = "" # 父块ID
children: List[str] = field(default_factory=list) # 子块ID列表
# 依赖信息
imports: List[str] = field(default_factory=list) # 导入的模块/名称
references: List[str] = field(default_factory=list) # 引用的名称
definitions: List[str] = field(default_factory=list) # 定义的名称
dependencies: List[str] = field(default_factory=list) # 依赖的块ID
def compute_hash(self) -> str:
"""计算内容哈希"""
# 去除空白差异的影响
normalized = '\n'.join(line.strip() for line in self.content.splitlines())
self.content_hash = hashlib.sha256(normalized.encode()).hexdigest()[:16]
return self.content_hash
def generate_id(self, file_path: str) -> str:
"""生成唯一ID"""
if not self.content_hash:
self.compute_hash()
self.id = f"{file_path}:{self.start_line}:{self.end_line}:{self.content_hash[:8]}"
return self.id
class PythonAnalyzer:
"""
Python 代码分析器
分析代码结构,识别编译单元边界和依赖关系
"""
def __init__(self):
self.blocks: List[CodeBlock] = []
self.block_map: Dict[str, CodeBlock] = {}
self.lines: List[str] = []
self.file_path: str = ""
def analyze(self, source: str, file_path: str = "<string>") -> List[CodeBlock]:
"""
分析 Python 源代码,返回代码块列表
Args:
source: Python 源代码
file_path: 文件路径
Returns:
代码块列表
"""
self.file_path = file_path
self.lines = source.splitlines()
self.blocks = []
self.block_map = {}
try:
tree = ast.parse(source)
self._analyze_module(tree, source)
except SyntaxError as e:
# 语法错误时回退到行级分析
self._fallback_line_analysis(source)
# 分析依赖关系
self._analyze_dependencies()
return self.blocks
def _analyze_module(self, tree: ast.Module, source: str):
"""分析模块级 AST"""
for node in ast.iter_child_nodes(tree):
block = self._node_to_block(node, source)
if block:
self.blocks.append(block)
self.block_map[block.id] = block
def _node_to_block(self, node: ast.AST, source: str, parent_id: str = "") -> Optional[CodeBlock]:
"""将 AST 节点转换为代码块"""
block = CodeBlock()
block.parent_id = parent_id
# 获取行号范围
block.start_line = getattr(node, 'lineno', 0)
block.end_line = getattr(node, 'end_lineno', block.start_line)
block.start_col = getattr(node, 'col_offset', 0)
block.end_col = getattr(node, 'end_col_offset', 0)
# 提取源代码内容
if block.start_line > 0 and block.end_line > 0:
block.content = self._get_source_lines(block.start_line, block.end_line)
# 根据节点类型设置块类型和名称
if isinstance(node, ast.Import):
block.type = BlockType.IMPORT
block.name = "import"
block.imports = [alias.name for alias in node.names]
elif isinstance(node, ast.ImportFrom):
block.type = BlockType.IMPORT
block.name = f"from {node.module}"
block.imports = [node.module or ""] + [alias.name for alias in node.names]
elif isinstance(node, ast.ClassDef):
block.type = BlockType.CLASS
block.name = node.name
block.definitions = [node.name]
# 处理装饰器
if node.decorator_list:
block.start_line = node.decorator_list[0].lineno
# 递归处理类体
for child in node.body:
child_block = self._node_to_block(child, source, block.id)
if child_block:
block.children.append(child_block.id)
self.blocks.append(child_block)
self.block_map[child_block.id] = child_block
elif isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
block.type = BlockType.FUNCTION if not parent_id else BlockType.METHOD
block.name = node.name
block.definitions = [node.name]
# 处理装饰器
if node.decorator_list:
block.start_line = node.decorator_list[0].lineno
# 分析函数体中的引用
block.references = self._extract_references(node)
elif isinstance(node, ast.Assign):
block.type = BlockType.ASSIGNMENT
block.definitions = self._extract_targets(node.targets)
block.references = self._extract_references(node.value)
elif isinstance(node, ast.AugAssign):
block.type = BlockType.ASSIGNMENT
block.definitions = self._extract_targets([node.target])
block.references = self._extract_references(node.value)
elif isinstance(node, ast.AnnAssign):
block.type = BlockType.ASSIGNMENT
if node.target:
block.definitions = self._extract_targets([node.target])
if node.value:
block.references = self._extract_references(node.value)
elif isinstance(node, (ast.If, ast.For, ast.While, ast.Try)):
block.type = BlockType.CONTROL
block.name = node.__class__.__name__.lower()
block.references = self._extract_references(node)
elif isinstance(node, ast.With):
block.type = BlockType.WITH
block.references = self._extract_references(node)
elif isinstance(node, ast.Expr):
block.type = BlockType.EXPRESSION
block.references = self._extract_references(node.value)
else:
block.type = BlockType.STATEMENT
block.references = self._extract_references(node)
# 计算哈希并生成ID
block.compute_hash()
block.generate_id(self.file_path)
return block
def _get_source_lines(self, start: int, end: int) -> str:
"""获取指定行范围的源代码"""
if start < 1 or end > len(self.lines):
return ""
return '\n'.join(self.lines[start-1:end])
def _extract_references(self, node: ast.AST) -> List[str]:
"""提取节点中引用的名称"""
refs = []
for child in ast.walk(node):
if isinstance(child, ast.Name):
refs.append(child.id)
elif isinstance(child, ast.Attribute):
# 收集属性链的根名称
current = child
while isinstance(current, ast.Attribute):
current = current.value
if isinstance(current, ast.Name):
refs.append(current.id)
return list(set(refs))
def _extract_targets(self, targets: List[ast.AST]) -> List[str]:
"""提取赋值目标的名称"""
names = []
for target in targets:
if isinstance(target, ast.Name):
names.append(target.id)
elif isinstance(target, ast.Tuple) or isinstance(target, ast.List):
for elt in target.elts:
if isinstance(elt, ast.Name):
names.append(elt.id)
return names
def _analyze_dependencies(self):
"""分析块之间的依赖关系"""
# 构建名称到块的映射
name_to_block: Dict[str, str] = {}
for block in self.blocks:
for name in block.definitions:
name_to_block[name] = block.id
# 分析每个块的依赖
for block in self.blocks:
for ref in block.references:
if ref in name_to_block and name_to_block[ref] != block.id:
dep_id = name_to_block[ref]
if dep_id not in block.dependencies:
block.dependencies.append(dep_id)
def _fallback_line_analysis(self, source: str):
"""回退到行级分析(用于语法错误的代码)"""
lines = source.splitlines()
current_block = None
indent_stack = [(0, None)] # (indent, block)
for i, line in enumerate(lines, 1):
stripped = line.lstrip()
if not stripped or stripped.startswith('#'):
continue
indent = len(line) - len(stripped)
# 简单的块检测
if stripped.startswith('def ') or stripped.startswith('async def '):
block = CodeBlock(
type=BlockType.FUNCTION,
name=stripped.split('(')[0].replace('def ', '').replace('async ', '').strip(),
start_line=i,
end_line=i,
content=line
)
current_block = block
elif stripped.startswith('class '):
block = CodeBlock(
type=BlockType.CLASS,
name=stripped.split('(')[0].split(':')[0].replace('class ', '').strip(),
start_line=i,
end_line=i,
content=line
)
current_block = block
elif stripped.startswith('import ') or stripped.startswith('from '):
block = CodeBlock(
type=BlockType.IMPORT,
start_line=i,
end_line=i,
content=line
)
block.compute_hash()
block.generate_id(self.file_path)
self.blocks.append(block)
self.block_map[block.id] = block
continue
else:
if current_block and indent > indent_stack[-1][0]:
# 继续当前块
current_block.end_line = i
current_block.content += '\n' + line
else:
# 结束当前块
if current_block:
current_block.compute_hash()
current_block.generate_id(self.file_path)
self.blocks.append(current_block)
self.block_map[current_block.id] = current_block
current_block = None
# 普通语句
block = CodeBlock(
type=BlockType.STATEMENT,
start_line=i,
end_line=i,
content=line
)
block.compute_hash()
block.generate_id(self.file_path)
self.blocks.append(block)
self.block_map[block.id] = block
# 处理最后一个块
if current_block:
current_block.compute_hash()
current_block.generate_id(self.file_path)
self.blocks.append(current_block)
self.block_map[current_block.id] = current_block
def get_blocks_in_range(self, start_line: int, end_line: int) -> List[CodeBlock]:
"""获取指定行范围内的代码块"""
result = []
for block in self.blocks:
# 检查是否有交集
if block.start_line <= end_line and block.end_line >= start_line:
result.append(block)
return result
def get_affected_blocks(self, changed_block_ids: Set[str]) -> Set[str]:
"""获取受变更影响的所有块(包括依赖传播)"""
affected = set(changed_block_ids)
queue = list(changed_block_ids)
while queue:
block_id = queue.pop(0)
# 找出依赖此块的所有块
for block in self.blocks:
if block_id in block.dependencies and block.id not in affected:
affected.add(block.id)
queue.append(block.id)
return affected
def expand_to_boundaries(self, block_ids: Set[str]) -> Set[str]:
"""扩展块ID集合确保完整结构被包含"""
expanded = set(block_ids)
for block_id in list(block_ids):
block = self.block_map.get(block_id)
if not block:
continue
# 如果块在某个函数/类内,需要重新编译整个结构
if block.parent_id:
parent = self.block_map.get(block.parent_id)
if parent and parent.type in (BlockType.CLASS, BlockType.FUNCTION):
expanded.add(parent.id)
# 也包含所有子块
for child_id in parent.children:
expanded.add(child_id)
# 如果块是函数/类,包含所有子块
if block.type in (BlockType.CLASS, BlockType.FUNCTION):
for child_id in block.children:
expanded.add(child_id)
return expanded
def analyze_python_file(file_path: str) -> List[CodeBlock]:
"""分析 Python 文件"""
with open(file_path, 'r', encoding='utf-8') as f:
source = f.read()
analyzer = PythonAnalyzer()
return analyzer.analyze(source, file_path)
def analyze_python_source(source: str, file_path: str = "<string>") -> List[CodeBlock]:
"""分析 Python 源代码"""
analyzer = PythonAnalyzer()
return analyzer.analyze(source, file_path)

View File

@@ -0,0 +1,322 @@
# sikuwa/incremental/compiler_integration.py
"""
减量编译器集成模块
将减量编译系统与 Sikuwa 编译器集成
"""
import os
import sys
import subprocess
import tempfile
from pathlib import Path
from typing import Dict, List, Optional, Callable, Any
from dataclasses import dataclass
from .core import (
IncrementalCompiler,
CompilationUnit,
ChangeRecord,
UnitState,
UnitType
)
from .analyzer import PythonAnalyzer, CodeBlock, BlockType
@dataclass
class IncrementalBuildResult:
"""减量编译结果"""
success: bool = False
compiled_units: int = 0
cached_units: int = 0
total_units: int = 0
output_files: Dict[str, str] = None # unit_id -> output_path
combined_output: str = ""
errors: List[str] = None
def __post_init__(self):
if self.output_files is None:
self.output_files = {}
if self.errors is None:
self.errors = []
class IncrementalNativeCompiler:
"""
减量原生编译器
集成减量编译系统与原生 C/C++ 编译流程:
Python → C → GCC → dll/so
特点:
- 只编译变更的代码块
- 缓存已编译的目标文件
- 智能链接(只重新链接必要的部分)
"""
def __init__(self,
cache_dir: str = ".sikuwa_cache",
cc: str = "gcc",
cxx: str = "g++"):
self.incremental = IncrementalCompiler(cache_dir)
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
self.cc = cc
self.cxx = cxx
# 工作目录
self.work_dir = self.cache_dir / "incremental_build"
self.c_dir = self.work_dir / "c_source"
self.obj_dir = self.work_dir / "obj"
for d in [self.work_dir, self.c_dir, self.obj_dir]:
d.mkdir(parents=True, exist_ok=True)
# 设置编译回调
self.incremental.set_compiler(self._compile_unit)
# Cython 可用性
self._cython_available = self._check_cython()
def _check_cython(self) -> bool:
"""检查 Cython 是否可用"""
try:
import Cython
return True
except ImportError:
return False
def _compile_unit(self, unit: CompilationUnit) -> str:
"""
编译单个单元
流程Python 代码 → C 代码 → 目标文件
"""
# 生成 C 代码
c_code = self._python_to_c(unit)
# 保存 C 文件
c_file = self.c_dir / f"unit_{unit.content_hash}.c"
c_file.write_text(c_code, encoding='utf-8')
# 编译为目标文件
obj_file = self.obj_dir / f"unit_{unit.content_hash}.o"
if not obj_file.exists():
self._compile_c_to_obj(c_file, obj_file)
# 返回目标文件路径作为"编译产物"
return str(obj_file)
def _python_to_c(self, unit: CompilationUnit) -> str:
"""
Python 代码转 C 代码
使用 Cython 或内置转换器
"""
if self._cython_available and unit.type in (UnitType.FUNCTION, UnitType.CLASS):
return self._cython_convert(unit)
else:
return self._builtin_convert(unit)
def _cython_convert(self, unit: CompilationUnit) -> str:
"""使用 Cython 转换"""
# 创建临时 .pyx 文件
pyx_file = self.work_dir / f"temp_{unit.content_hash}.pyx"
pyx_file.write_text(unit.content, encoding='utf-8')
c_file = self.work_dir / f"temp_{unit.content_hash}.c"
try:
result = subprocess.run(
[sys.executable, "-m", "cython", "-3", str(pyx_file), "-o", str(c_file)],
capture_output=True,
text=True
)
if result.returncode == 0 and c_file.exists():
return c_file.read_text(encoding='utf-8')
except Exception:
pass
# 回退到内置转换
return self._builtin_convert(unit)
def _builtin_convert(self, unit: CompilationUnit) -> str:
"""内置转换器 - 将 Python 代码嵌入 C"""
escaped = unit.content.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
unit_name = unit.name or f"unit_{unit.content_hash[:8]}"
safe_name = ''.join(c if c.isalnum() else '_' for c in unit_name)
c_code = f'''
/* Auto-generated by Sikuwa Incremental Compiler */
/* Unit: {unit.id} */
/* Lines: {unit.start_line}-{unit.end_line} */
#define PY_SSIZE_T_CLEAN
#include <Python.h>
static const char* sikuwa_unit_{safe_name}_source = "{escaped}";
int sikuwa_exec_unit_{safe_name}(PyObject* globals, PyObject* locals) {{
PyObject* code = Py_CompileString(
sikuwa_unit_{safe_name}_source,
"{unit.file_path}",
Py_file_input
);
if (code == NULL) {{
return -1;
}}
PyObject* result = PyEval_EvalCode(code, globals, locals);
Py_DECREF(code);
if (result == NULL) {{
return -1;
}}
Py_DECREF(result);
return 0;
}}
'''
return c_code
def _compile_c_to_obj(self, c_file: Path, obj_file: Path):
"""编译 C 文件为目标文件"""
import sysconfig
# 获取 Python 头文件路径
include_dir = sysconfig.get_path('include')
cmd = [
self.cc,
"-c",
"-fPIC",
"-O2",
f"-I{include_dir}",
str(c_file),
"-o", str(obj_file)
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"Compilation failed: {result.stderr}")
def build(self, file_path: str, content: str) -> IncrementalBuildResult:
"""
执行减量编译
Args:
file_path: 源文件路径
content: 源代码内容
Returns:
编译结果
"""
result = IncrementalBuildResult()
try:
# 检测变更
changes = self.incremental.update_source(file_path, content)
# 获取需要编译的单元
units_to_compile = self.incremental.get_units_to_compile()
result.total_units = len(self.incremental._units)
# 编译变更的单元
compiled_outputs = self.incremental.compile_all_pending()
result.compiled_units = len(compiled_outputs)
result.cached_units = result.total_units - result.compiled_units
# 收集输出
result.output_files = compiled_outputs
# 获取合并输出(所有目标文件路径)
result.combined_output = self.incremental.get_combined_output(file_path)
result.success = True
except Exception as e:
result.success = False
result.errors.append(str(e))
return result
def link(self, output_path: str, file_paths: List[str]) -> bool:
"""
链接所有目标文件
Args:
output_path: 输出文件路径
file_paths: 源文件路径列表
Returns:
是否成功
"""
import sysconfig
# 收集所有目标文件
obj_files = []
for fp in file_paths:
combined = self.incremental.get_combined_output(fp)
for line in combined.splitlines():
if line.strip() and line.endswith('.o'):
obj_files.append(line.strip())
if not obj_files:
return False
# 获取 Python 库路径
lib_dir = sysconfig.get_config_var('LIBDIR') or '/usr/lib'
# 判断输出类型
if output_path.endswith('.so') or output_path.endswith('.dll'):
link_flags = ["-shared"]
else:
link_flags = []
# 链接命令
cmd = [
self.cxx,
*link_flags,
*obj_files,
f"-L{lib_dir}",
f"-lpython{sys.version_info.major}.{sys.version_info.minor}",
"-o", output_path
]
result = subprocess.run(cmd, capture_output=True, text=True)
return result.returncode == 0
def get_stats(self) -> Dict[str, Any]:
"""获取统计信息"""
stats = self.incremental.get_stats()
stats['c_files'] = len(list(self.c_dir.glob('*.c')))
stats['obj_files'] = len(list(self.obj_dir.glob('*.o')))
return stats
def clean(self):
"""清理所有缓存和临时文件"""
import shutil
self.incremental.clear()
for d in [self.c_dir, self.obj_dir]:
if d.exists():
shutil.rmtree(d)
d.mkdir(parents=True, exist_ok=True)
def save(self):
"""保存状态"""
self.incremental.save()
def create_incremental_native_compiler(
cache_dir: str = ".sikuwa_cache",
cc: str = "gcc",
cxx: str = "g++"
) -> IncrementalNativeCompiler:
"""创建减量原生编译器"""
return IncrementalNativeCompiler(cache_dir, cc, cxx)

778
incremental/core.py Normal file
View File

@@ -0,0 +1,778 @@
# sikuwa/incremental/core.py
"""
减量编译核心 - Python 实现
指哪编哪:只编译源码改变的部分
提供 C++ 扩展不可用时的纯 Python 回退实现
"""
import hashlib
import json
import os
import time
from enum import Enum, auto
from dataclasses import dataclass, field
from typing import List, Dict, Set, Optional, Tuple, Callable, Any
from pathlib import Path
from .analyzer import PythonAnalyzer, CodeBlock, BlockType
class UnitType(Enum):
"""编译单元类型"""
LINE = auto()
STATEMENT = auto()
FUNCTION = auto()
CLASS = auto()
MODULE = auto()
IMPORT = auto()
DECORATOR = auto()
BLOCK = auto()
class UnitState(Enum):
"""编译单元状态"""
UNKNOWN = auto()
UNCHANGED = auto()
MODIFIED = auto()
ADDED = auto()
DELETED = auto()
AFFECTED = auto()
@dataclass
class CompilationUnit:
"""编译单元 - 最小编译粒度"""
id: str = ""
file_path: str = ""
start_line: int = 0
end_line: int = 0
type: UnitType = UnitType.LINE
name: str = ""
content: str = ""
content_hash: str = ""
dependencies: List[str] = field(default_factory=list)
dependents: List[str] = field(default_factory=list)
state: UnitState = UnitState.UNKNOWN
cached_output: str = ""
cache_timestamp: int = 0
cache_valid: bool = False
def compute_hash(self) -> str:
"""计算内容哈希"""
normalized = '\n'.join(line.strip() for line in self.content.splitlines())
self.content_hash = hashlib.sha256(normalized.encode()).hexdigest()[:16]
return self.content_hash
def generate_id(self) -> str:
"""生成唯一ID"""
if not self.content_hash:
self.compute_hash()
self.id = f"{self.file_path}:{self.start_line}:{self.end_line}:{self.content_hash[:8]}"
return self.id
@classmethod
def from_code_block(cls, block: CodeBlock) -> 'CompilationUnit':
"""从 CodeBlock 创建"""
unit = cls()
unit.id = block.id
unit.file_path = block.id.split(':')[0] if ':' in block.id else ""
unit.start_line = block.start_line
unit.end_line = block.end_line
unit.content = block.content
unit.content_hash = block.content_hash
unit.name = block.name
unit.dependencies = block.dependencies.copy()
# 映射类型
type_map = {
BlockType.MODULE: UnitType.MODULE,
BlockType.IMPORT: UnitType.IMPORT,
BlockType.CLASS: UnitType.CLASS,
BlockType.FUNCTION: UnitType.FUNCTION,
BlockType.METHOD: UnitType.FUNCTION,
BlockType.DECORATOR: UnitType.DECORATOR,
BlockType.STATEMENT: UnitType.STATEMENT,
BlockType.ASSIGNMENT: UnitType.STATEMENT,
BlockType.EXPRESSION: UnitType.STATEMENT,
BlockType.CONTROL: UnitType.BLOCK,
BlockType.WITH: UnitType.BLOCK,
}
unit.type = type_map.get(block.type, UnitType.STATEMENT)
return unit
@dataclass
class Snapshot:
"""版本快照"""
file_path: str = ""
content_hash: str = ""
line_hashes: List[str] = field(default_factory=list)
units: Dict[str, CompilationUnit] = field(default_factory=dict)
timestamp: int = 0
@dataclass
class ChangeRecord:
"""变更记录"""
unit_id: str = ""
change_type: UnitState = UnitState.UNKNOWN
old_start_line: int = 0
old_end_line: int = 0
new_start_line: int = 0
new_end_line: int = 0
reason: str = ""
class ChangeDetector:
"""变更检测器"""
@staticmethod
def compute_hash(content: str) -> str:
"""计算内容哈希"""
return hashlib.sha256(content.encode()).hexdigest()[:16]
@staticmethod
def compute_line_hash(line: str) -> str:
"""计算行哈希(忽略首尾空白)"""
stripped = line.strip()
if not stripped:
return "empty"
return hashlib.sha256(stripped.encode()).hexdigest()[:16]
def create_snapshot(self, file_path: str, content: str) -> Snapshot:
"""创建快照"""
snap = Snapshot()
snap.file_path = file_path
snap.content_hash = self.compute_hash(content)
snap.timestamp = int(time.time() * 1000)
lines = content.splitlines()
snap.line_hashes = [self.compute_line_hash(line) for line in lines]
return snap
def get_changed_lines(self, old_snap: Snapshot, new_snap: Snapshot) -> List[int]:
"""获取变更的行号 (1-based)"""
# 使用 LCS 算法进行对比
lcs = self._compute_lcs(old_snap.line_hashes, new_snap.line_hashes)
# LCS 中新版本的行索引
lcs_new_indices = {pair[1] for pair in lcs}
# 不在 LCS 中的行即为变更的行
changed = []
for i in range(len(new_snap.line_hashes)):
if i not in lcs_new_indices:
changed.append(i + 1) # 1-based
return changed
def _compute_lcs(self, old_hashes: List[str], new_hashes: List[str]) -> List[Tuple[int, int]]:
"""计算最长公共子序列"""
m, n = len(old_hashes), len(new_hashes)
# DP 表
dp = [[0] * (n + 1) for _ in range(m + 1)]
for i in range(1, m + 1):
for j in range(1, n + 1):
if old_hashes[i - 1] == new_hashes[j - 1]:
dp[i][j] = dp[i - 1][j - 1] + 1
else:
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
# 回溯找出 LCS 对应关系
lcs = []
i, j = m, n
while i > 0 and j > 0:
if old_hashes[i - 1] == new_hashes[j - 1]:
lcs.append((i - 1, j - 1))
i -= 1
j -= 1
elif dp[i - 1][j] > dp[i][j - 1]:
i -= 1
else:
j -= 1
lcs.reverse()
return lcs
def detect_changes(self, old_snap: Snapshot, new_snap: Snapshot) -> List[ChangeRecord]:
"""检测变更"""
records = []
old_ids = set(old_snap.units.keys())
new_ids = set(new_snap.units.keys())
# 删除的单元
for uid in old_ids - new_ids:
old_unit = old_snap.units[uid]
rec = ChangeRecord(
unit_id=uid,
change_type=UnitState.DELETED,
old_start_line=old_unit.start_line,
old_end_line=old_unit.end_line,
reason="unit deleted"
)
records.append(rec)
# 新增的单元
for uid in new_ids - old_ids:
new_unit = new_snap.units[uid]
rec = ChangeRecord(
unit_id=uid,
change_type=UnitState.ADDED,
new_start_line=new_unit.start_line,
new_end_line=new_unit.end_line,
reason="unit added"
)
records.append(rec)
# 修改的单元
for uid in old_ids & new_ids:
old_unit = old_snap.units[uid]
new_unit = new_snap.units[uid]
if old_unit.content_hash != new_unit.content_hash:
rec = ChangeRecord(
unit_id=uid,
change_type=UnitState.MODIFIED,
old_start_line=old_unit.start_line,
old_end_line=old_unit.end_line,
new_start_line=new_unit.start_line,
new_end_line=new_unit.end_line,
reason="content changed"
)
records.append(rec)
return records
class CompilationCache:
"""
编译缓存 V1.2
编译即缓存,缓存即编译
- 每次编译自动记录,全历史可追溯
- 缓存命中等同于零成本编译
- 集成预测预热
"""
def __init__(self, cache_dir: str):
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
self._cache: Dict[str, Dict] = {}
self._hits = 0
self._misses = 0
self._compile_history: List[Dict] = [] # 编译历史
self._access_sequence: List[str] = [] # 访问序列
self._predictions: Dict[str, List[str]] = {} # 预测模式
self._load()
def _load(self):
"""加载缓存"""
cache_file = self.cache_dir / "incremental_cache.json"
history_file = self.cache_dir / "compile_history.json"
patterns_file = self.cache_dir / "prediction_patterns.json"
if cache_file.exists():
try:
with open(cache_file, 'r', encoding='utf-8') as f:
self._cache = json.load(f)
except:
self._cache = {}
if history_file.exists():
try:
with open(history_file, 'r', encoding='utf-8') as f:
self._compile_history = json.load(f)
except:
self._compile_history = []
if patterns_file.exists():
try:
with open(patterns_file, 'r', encoding='utf-8') as f:
self._predictions = json.load(f)
except:
self._predictions = {}
def save(self):
"""保存缓存和历史"""
cache_file = self.cache_dir / "incremental_cache.json"
history_file = self.cache_dir / "compile_history.json"
patterns_file = self.cache_dir / "prediction_patterns.json"
with open(cache_file, 'w', encoding='utf-8') as f:
json.dump(self._cache, f, indent=2)
# 只保留最近10000条历史
with open(history_file, 'w', encoding='utf-8') as f:
json.dump(self._compile_history[-10000:], f, indent=2)
with open(patterns_file, 'w', encoding='utf-8') as f:
json.dump(self._predictions, f, indent=2)
def has(self, unit_id: str) -> bool:
return unit_id in self._cache
def get(self, unit_id: str) -> str:
"""缓存即编译 - 命中即零成本获得编译结果"""
if unit_id in self._cache:
self._hits += 1
# 记录访问序列
self._record_access(unit_id)
# 更新访问时间
self._cache[unit_id]['last_access'] = int(time.time() * 1000)
self._cache[unit_id]['access_count'] = self._cache[unit_id].get('access_count', 0) + 1
return self._cache[unit_id].get('output', '')
self._misses += 1
return ""
def put(self, unit_id: str, output: str, content_hash: str,
compile_time_ms: int = 0, file_path: str = "",
start_line: int = 0, end_line: int = 0):
"""编译即缓存 - 每次编译自动记录"""
timestamp = int(time.time() * 1000)
self._cache[unit_id] = {
'output': output,
'content_hash': content_hash,
'timestamp': timestamp,
'last_access': timestamp,
'access_count': 1,
'compile_time_ms': compile_time_ms,
'file_path': file_path,
'line_range': [start_line, end_line],
'size_bytes': len(output.encode('utf-8')),
}
# 记录编译历史
self._compile_history.append({
'unit_id': unit_id,
'content_hash': content_hash,
'timestamp': timestamp,
'compile_time_ms': compile_time_ms,
'file_path': file_path,
'action': 'compile'
})
# 记录访问序列
self._record_access(unit_id)
def _record_access(self, unit_id: str):
"""记录访问序列,用于预测"""
# 更新访问序列
self._access_sequence.append(unit_id)
if len(self._access_sequence) > 1000:
self._access_sequence = self._access_sequence[-500:]
# 学习访问模式
if len(self._access_sequence) >= 2:
prev_id = self._access_sequence[-2]
if prev_id != unit_id:
if prev_id not in self._predictions:
self._predictions[prev_id] = []
if unit_id not in self._predictions[prev_id]:
self._predictions[prev_id].append(unit_id)
# 限制预测列表长度
self._predictions[prev_id] = self._predictions[prev_id][:10]
def get_predictions(self, unit_id: str) -> List[str]:
"""获取预测的下一个可能访问的单元"""
return self._predictions.get(unit_id, [])
def invalidate(self, unit_id: str):
self._cache.pop(unit_id, None)
# 记录失效历史
self._compile_history.append({
'unit_id': unit_id,
'timestamp': int(time.time() * 1000),
'action': 'invalidate'
})
def invalidate_all(self):
self._cache.clear()
def is_valid(self, unit_id: str, current_hash: str) -> bool:
if unit_id not in self._cache:
return False
return self._cache[unit_id].get('content_hash') == current_hash
def get_compile_history(self, limit: int = 100) -> List[Dict]:
"""获取编译历史"""
return self._compile_history[-limit:]
def get_hot_units(self, limit: int = 20) -> List[Dict]:
"""获取热点单元(访问最频繁)"""
sorted_items = sorted(
self._cache.items(),
key=lambda x: x[1].get('access_count', 0),
reverse=True
)
return [
{'unit_id': k, 'access_count': v.get('access_count', 0),
'file': v.get('file_path', ''), 'lines': v.get('line_range', [])}
for k, v in sorted_items[:limit]
]
def get_stats(self) -> Dict[str, Any]:
"""获取统计信息"""
total_size = sum(e.get('size_bytes', 0) for e in self._cache.values())
total_compile_time = sum(e.get('compile_time_ms', 0) for e in self._cache.values())
return {
'version': '1.2',
'entries': len(self._cache),
'total_size_mb': total_size / (1024 * 1024),
'total_compile_time_ms': total_compile_time,
'hits': self._hits,
'misses': self._misses,
'hit_rate': self._hits / (self._hits + self._misses) if (self._hits + self._misses) > 0 else 0,
'history_count': len(self._compile_history),
'prediction_patterns': len(self._predictions),
}
@property
def hit_count(self) -> int:
return self._hits
@property
def miss_count(self) -> int:
return self._misses
class IncrementalCompiler:
"""
减量编译器 - 指哪编哪
核心功能:
1. 以最小语法块为编译单元
2. 变更检测 - 只定位修改的单元及受影响的关联单元
3. 仅对变更单元重新编译,未变更单元复用缓存
4. 边界触发器 - 自动扩展到函数/类边界
5. 按原始顺序拼接产物
"""
def __init__(self, cache_dir: str = ".sikuwa_cache"):
self.cache = CompilationCache(cache_dir)
self.detector = ChangeDetector()
self.analyzer = PythonAnalyzer()
self._units: Dict[str, CompilationUnit] = {}
self._file_units: Dict[str, List[str]] = {} # file -> unit_ids
self._snapshots: Dict[str, Snapshot] = {}
self._units_to_compile: List[str] = []
# 编译器回调
self._compile_callback: Optional[Callable[[CompilationUnit], str]] = None
def set_compiler(self, callback: Callable[[CompilationUnit], str]):
"""设置编译器回调"""
self._compile_callback = callback
def analyze_source(self, file_path: str, content: str) -> List[CompilationUnit]:
"""分析源代码,返回编译单元列表"""
blocks = self.analyzer.analyze(content, file_path)
units = [CompilationUnit.from_code_block(b) for b in blocks]
return units
def register_units(self, file_path: str, units: List[CompilationUnit]):
"""注册编译单元"""
# 移除旧单元
if file_path in self._file_units:
for uid in self._file_units[file_path]:
self._units.pop(uid, None)
# 添加新单元
self._file_units[file_path] = []
for unit in units:
self._units[unit.id] = unit
self._file_units[file_path].append(unit.id)
def update_source(self, file_path: str, new_content: str) -> List[ChangeRecord]:
"""
更新源代码并检测变更
返回变更记录列表
"""
# 分析新代码
new_units = self.analyze_source(file_path, new_content)
# 创建新快照
new_snap = self.detector.create_snapshot(file_path, new_content)
for unit in new_units:
new_snap.units[unit.id] = unit
changes = []
self._units_to_compile = []
# 检查是否有旧快照
old_snap = self._snapshots.get(file_path)
if old_snap:
# 获取变更的行
changed_lines = self.detector.get_changed_lines(old_snap, new_snap)
# 找出受影响的编译单元
affected_ids: Set[str] = set()
for line in changed_lines:
# 找出覆盖此行的单元
for unit in new_units:
if unit.start_line <= line <= unit.end_line:
affected_ids.add(unit.id)
unit.state = UnitState.MODIFIED
unit.cache_valid = False
# 传播依赖影响
affected_ids = self._propagate_dependencies(affected_ids, new_units)
# 扩展到边界
affected_ids = self._expand_to_boundaries(affected_ids, new_units)
# 生成变更记录
for uid in affected_ids:
unit = self._units.get(uid) or next((u for u in new_units if u.id == uid), None)
if unit:
rec = ChangeRecord(
unit_id=uid,
change_type=unit.state if unit.state != UnitState.UNKNOWN else UnitState.MODIFIED,
new_start_line=unit.start_line,
new_end_line=unit.end_line,
reason="content changed"
)
changes.append(rec)
self._units_to_compile.append(uid)
else:
# 首次分析,所有单元都需要编译
for unit in new_units:
unit.state = UnitState.ADDED
rec = ChangeRecord(
unit_id=unit.id,
change_type=UnitState.ADDED,
new_start_line=unit.start_line,
new_end_line=unit.end_line,
reason="first analysis"
)
changes.append(rec)
self._units_to_compile.append(unit.id)
# 注册单元并更新快照
self.register_units(file_path, new_units)
self._snapshots[file_path] = new_snap
return changes
def _propagate_dependencies(self, affected_ids: Set[str],
units: List[CompilationUnit]) -> Set[str]:
"""传播依赖影响"""
# 构建依赖图
dependents: Dict[str, List[str]] = {}
for unit in units:
for dep_id in unit.dependencies:
if dep_id not in dependents:
dependents[dep_id] = []
dependents[dep_id].append(unit.id)
# BFS 传播
queue = list(affected_ids)
visited = set(affected_ids)
while queue:
uid = queue.pop(0)
for dependent_id in dependents.get(uid, []):
if dependent_id not in visited:
visited.add(dependent_id)
queue.append(dependent_id)
# 标记为受影响
for unit in units:
if unit.id == dependent_id:
unit.state = UnitState.AFFECTED
unit.cache_valid = False
break
return visited
def _expand_to_boundaries(self, affected_ids: Set[str],
units: List[CompilationUnit]) -> Set[str]:
"""扩展到函数/类边界"""
expanded = set(affected_ids)
unit_map = {u.id: u for u in units}
for uid in list(affected_ids):
unit = unit_map.get(uid)
if not unit:
continue
# 如果在函数/类内部修改,需要重新编译整个结构
for other in units:
if other.id == uid:
continue
# 检查是否被包含
if (other.type in (UnitType.FUNCTION, UnitType.CLASS) and
other.start_line <= unit.start_line and
other.end_line >= unit.end_line):
expanded.add(other.id)
other.state = UnitState.AFFECTED
other.cache_valid = False
return expanded
def get_units_to_compile(self) -> List[str]:
"""获取需要编译的单元ID列表"""
return self._units_to_compile.copy()
def compile_unit(self, unit_id: str) -> str:
"""
编译单个单元
缓存即编译:缓存命中 = 零成本获得编译结果
"""
unit = self._units.get(unit_id)
if not unit:
return ""
# 检查缓存 - 缓存即编译
if unit.cache_valid or self.cache.is_valid(unit_id, unit.content_hash):
output = self.cache.get(unit_id)
if output:
unit.cached_output = output
unit.cache_valid = True
# 触发预测预热
self._predictive_warmup(unit_id)
return output
# 执行编译并计时
start_time = time.time()
if self._compile_callback:
output = self._compile_callback(unit)
else:
# 默认:直接返回源代码(用于测试)
output = unit.content
compile_time_ms = int((time.time() - start_time) * 1000)
# 编译即缓存:自动记录
self.mark_compiled(unit_id, output, compile_time_ms)
return output
def _predictive_warmup(self, unit_id: str):
"""预测性缓存预热"""
# 获取预测的下一个访问单元
predictions = self.cache.get_predictions(unit_id)
for pred_id in predictions[:2]: # 最多预热2个
if pred_id in self._units and not self.cache.has(pred_id):
# 加入待编译队列
if pred_id not in self._units_to_compile:
self._units_to_compile.append(pred_id)
def mark_compiled(self, unit_id: str, output: str, compile_time_ms: int = 0):
"""标记单元编译完成 - 编译即缓存"""
unit = self._units.get(unit_id)
if unit:
unit.cached_output = output
unit.cache_timestamp = int(time.time() * 1000)
unit.cache_valid = True
unit.state = UnitState.UNCHANGED
# 编译即缓存:记录完整信息
self.cache.put(
unit_id, output, unit.content_hash,
compile_time_ms=compile_time_ms,
file_path=unit.file_path,
start_line=unit.start_line,
end_line=unit.end_line
)
# 从待编译列表移除
if unit_id in self._units_to_compile:
self._units_to_compile.remove(unit_id)
def compile_all_pending(self) -> Dict[str, str]:
"""编译所有待编译单元"""
results = {}
for uid in self._units_to_compile.copy():
output = self.compile_unit(uid)
results[uid] = output
return results
def get_combined_output(self, file_path: str) -> str:
"""获取合并后的编译输出(按原始顺序拼接)"""
if file_path not in self._file_units:
return ""
# 按行号排序
unit_ids = self._file_units[file_path]
units = [self._units[uid] for uid in unit_ids if uid in self._units]
units.sort(key=lambda u: u.start_line)
# 拼接输出
outputs = []
for unit in units:
output = unit.cached_output
if not output and self.cache.has(unit.id):
output = self.cache.get(unit.id)
if output:
outputs.append(output)
return '\n'.join(outputs)
def get_stats(self) -> Dict[str, Any]:
"""获取统计信息"""
cache_stats = self.cache.get_stats()
return {
'total_units': len(self._units),
'pending_units': len(self._units_to_compile),
'files': len(self._file_units),
**cache_stats, # 包含缓存详细统计
}
def get_compile_history(self, limit: int = 100) -> List[Dict]:
"""获取编译历史"""
return self.cache.get_compile_history(limit)
def get_hot_units(self, limit: int = 20) -> List[Dict]:
"""获取热点单元"""
return self.cache.get_hot_units(limit)
def get_predictions(self, unit_id: str) -> List[str]:
"""获取预测的下一个访问单元"""
return self.cache.get_predictions(unit_id)
def save(self):
"""保存状态"""
self.cache.save()
def clear(self):
"""清空所有状态"""
self._units.clear()
self._file_units.clear()
self._snapshots.clear()
self._units_to_compile.clear()
self.cache.invalidate_all()
# 尝试导入 C++ 扩展
_cpp_available = False
try:
from .cpp import incremental_engine as _cpp_engine
_cpp_available = True
except ImportError:
pass
def create_incremental_compiler(cache_dir: str = ".sikuwa_cache",
prefer_cpp: bool = True) -> IncrementalCompiler:
"""
创建减量编译器实例
Args:
cache_dir: 缓存目录
prefer_cpp: 是否优先使用 C++ 实现
Returns:
IncrementalCompiler 实例
"""
# 目前返回 Python 实现
# TODO: 当 C++ 扩展可用时,返回包装器
return IncrementalCompiler(cache_dir)

View File

@@ -0,0 +1,45 @@
# sikuwa/incremental/cpp/CMakeLists.txt
cmake_minimum_required(VERSION 3.14)
project(incremental_engine)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
# 查找 Python 和 pybind11
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
find_package(pybind11 CONFIG QUIET)
if(NOT pybind11_FOUND)
# 如果没有安装 pybind11使用 FetchContent 下载
include(FetchContent)
FetchContent_Declare(
pybind11
GIT_REPOSITORY https://github.com/pybind/pybind11.git
GIT_TAG v2.11.1
)
FetchContent_MakeAvailable(pybind11)
endif()
# 源文件
set(SOURCES
incremental_core.cpp
pybind_incremental.cpp
)
set(HEADERS
incremental_core.h
)
# 创建 Python 模块
pybind11_add_module(incremental_engine ${SOURCES} ${HEADERS})
# 优化选项
target_compile_options(incremental_engine PRIVATE
$<$<CXX_COMPILER_ID:GNU>:-O3 -Wall -Wextra>
$<$<CXX_COMPILER_ID:Clang>:-O3 -Wall -Wextra>
$<$<CXX_COMPILER_ID:MSVC>:/O2 /W4>
)
# 安装
install(TARGETS incremental_engine DESTINATION .)

View File

@@ -0,0 +1,777 @@
// sikuwa/incremental/cpp/incremental_core.cpp
// 减量编译核心 - C++ 实现
#include "incremental_core.h"
#include <fstream>
#include <sstream>
#include <algorithm>
#include <cstring>
#include <iomanip>
namespace sikuwa {
namespace incremental {
// ============================================================================
// 工具函数实现
// ============================================================================
// 简单的哈希函数 (FNV-1a)
static uint64_t fnv1a_hash(const char* data, size_t len) {
uint64_t hash = 14695981039346656037ULL;
for (size_t i = 0; i < len; ++i) {
hash ^= static_cast<uint64_t>(data[i]);
hash *= 1099511628211ULL;
}
return hash;
}
std::string generate_unit_id(const std::string& file_path, int start_line,
int end_line, const std::string& content_hash) {
std::ostringstream oss;
oss << file_path << ":" << start_line << ":" << end_line << ":"
<< content_hash.substr(0, 8);
return oss.str();
}
int64_t current_timestamp() {
return std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch()
).count();
}
std::string read_file(const std::string& path) {
std::ifstream file(path);
if (!file.is_open()) return "";
std::ostringstream oss;
oss << file.rdbuf();
return oss.str();
}
void write_file(const std::string& path, const std::string& content) {
std::ofstream file(path);
if (file.is_open()) {
file << content;
}
}
std::vector<std::string> split_lines(const std::string& content) {
std::vector<std::string> lines;
std::istringstream iss(content);
std::string line;
while (std::getline(iss, line)) {
lines.push_back(line);
}
return lines;
}
std::string join_lines(const std::vector<std::string>& lines) {
std::ostringstream oss;
for (size_t i = 0; i < lines.size(); ++i) {
if (i > 0) oss << "\n";
oss << lines[i];
}
return oss.str();
}
// ============================================================================
// UnitManager 实现
// ============================================================================
UnitManager::UnitManager() {}
UnitManager::~UnitManager() {}
void UnitManager::add_unit(const CompilationUnit& unit) {
units_[unit.id] = unit;
file_units_[unit.file_path].push_back(unit.id);
}
void UnitManager::update_unit(const std::string& id, const CompilationUnit& unit) {
if (units_.find(id) != units_.end()) {
units_[id] = unit;
}
}
void UnitManager::remove_unit(const std::string& id) {
auto it = units_.find(id);
if (it != units_.end()) {
// 从文件索引中移除
auto& file_ids = file_units_[it->second.file_path];
file_ids.erase(std::remove(file_ids.begin(), file_ids.end(), id), file_ids.end());
// 从依赖关系中移除
for (const auto& dep_id : it->second.dependencies) {
auto dep_it = units_.find(dep_id);
if (dep_it != units_.end()) {
auto& dependents = dep_it->second.dependents;
dependents.erase(std::remove(dependents.begin(), dependents.end(), id),
dependents.end());
}
}
units_.erase(it);
}
}
CompilationUnit* UnitManager::get_unit(const std::string& id) {
auto it = units_.find(id);
return it != units_.end() ? &it->second : nullptr;
}
const CompilationUnit* UnitManager::get_unit(const std::string& id) const {
auto it = units_.find(id);
return it != units_.end() ? &it->second : nullptr;
}
std::vector<CompilationUnit*> UnitManager::get_units_by_file(const std::string& file_path) {
std::vector<CompilationUnit*> result;
auto it = file_units_.find(file_path);
if (it != file_units_.end()) {
for (const auto& id : it->second) {
if (auto* unit = get_unit(id)) {
result.push_back(unit);
}
}
}
// 按行号排序
std::sort(result.begin(), result.end(),
[](const CompilationUnit* a, const CompilationUnit* b) {
return a->start_line < b->start_line;
});
return result;
}
std::vector<CompilationUnit*> UnitManager::get_units_in_range(
const std::string& file_path, int start, int end) {
std::vector<CompilationUnit*> result;
auto units = get_units_by_file(file_path);
for (auto* unit : units) {
// 检查是否有交集
if (unit->start_line <= end && unit->end_line >= start) {
result.push_back(unit);
}
}
return result;
}
void UnitManager::add_dependency(const std::string& from_id, const std::string& to_id) {
auto* from_unit = get_unit(from_id);
auto* to_unit = get_unit(to_id);
if (from_unit && to_unit) {
// from 依赖 to
if (std::find(from_unit->dependencies.begin(), from_unit->dependencies.end(), to_id)
== from_unit->dependencies.end()) {
from_unit->dependencies.push_back(to_id);
}
// to 被 from 依赖
if (std::find(to_unit->dependents.begin(), to_unit->dependents.end(), from_id)
== to_unit->dependents.end()) {
to_unit->dependents.push_back(from_id);
}
}
}
void UnitManager::remove_dependency(const std::string& from_id, const std::string& to_id) {
auto* from_unit = get_unit(from_id);
auto* to_unit = get_unit(to_id);
if (from_unit) {
auto& deps = from_unit->dependencies;
deps.erase(std::remove(deps.begin(), deps.end(), to_id), deps.end());
}
if (to_unit) {
auto& dependents = to_unit->dependents;
dependents.erase(std::remove(dependents.begin(), dependents.end(), from_id),
dependents.end());
}
}
std::vector<std::string> UnitManager::get_dependencies(const std::string& id) const {
const auto* unit = get_unit(id);
return unit ? unit->dependencies : std::vector<std::string>{};
}
std::vector<std::string> UnitManager::get_dependents(const std::string& id) const {
const auto* unit = get_unit(id);
return unit ? unit->dependents : std::vector<std::string>{};
}
void UnitManager::collect_affected_recursive(const std::string& id,
std::unordered_set<std::string>& visited) const {
if (visited.count(id)) return;
visited.insert(id);
const auto* unit = get_unit(id);
if (!unit) return;
// 递归收集所有依赖此单元的单元
for (const auto& dependent_id : unit->dependents) {
collect_affected_recursive(dependent_id, visited);
}
}
std::vector<std::string> UnitManager::get_affected_units(const std::string& changed_id) const {
std::unordered_set<std::string> visited;
collect_affected_recursive(changed_id, visited);
visited.erase(changed_id); // 移除自身
return std::vector<std::string>(visited.begin(), visited.end());
}
void UnitManager::for_each(std::function<void(CompilationUnit&)> callback) {
for (auto& pair : units_) {
callback(pair.second);
}
}
void UnitManager::clear() {
units_.clear();
file_units_.clear();
}
std::string UnitManager::serialize() const {
std::ostringstream oss;
oss << units_.size() << "\n";
for (const auto& pair : units_) {
const auto& u = pair.second;
oss << u.id << "\t" << u.file_path << "\t" << u.start_line << "\t"
<< u.end_line << "\t" << static_cast<int>(u.type) << "\t"
<< u.name << "\t" << u.content_hash << "\t"
<< u.dependencies.size();
for (const auto& dep : u.dependencies) {
oss << "\t" << dep;
}
oss << "\n";
}
return oss.str();
}
void UnitManager::deserialize(const std::string& data) {
clear();
std::istringstream iss(data);
size_t count;
iss >> count;
iss.ignore();
for (size_t i = 0; i < count; ++i) {
std::string line;
std::getline(iss, line);
std::istringstream line_iss(line);
CompilationUnit u;
int type_int;
size_t dep_count;
std::getline(line_iss, u.id, '\t');
std::getline(line_iss, u.file_path, '\t');
line_iss >> u.start_line;
line_iss.ignore();
line_iss >> u.end_line;
line_iss.ignore();
line_iss >> type_int;
u.type = static_cast<UnitType>(type_int);
line_iss.ignore();
std::getline(line_iss, u.name, '\t');
std::getline(line_iss, u.content_hash, '\t');
line_iss >> dep_count;
for (size_t j = 0; j < dep_count; ++j) {
std::string dep;
line_iss.ignore();
std::getline(line_iss, dep, '\t');
if (!dep.empty()) {
u.dependencies.push_back(dep);
}
}
add_unit(u);
}
// 重建依赖关系
for (auto& pair : units_) {
for (const auto& dep_id : pair.second.dependencies) {
auto* dep_unit = get_unit(dep_id);
if (dep_unit) {
dep_unit->dependents.push_back(pair.first);
}
}
}
}
// ============================================================================
// ChangeDetector 实现
// ============================================================================
ChangeDetector::ChangeDetector() {}
ChangeDetector::~ChangeDetector() {}
std::string ChangeDetector::compute_hash(const std::string& content) {
uint64_t hash = fnv1a_hash(content.c_str(), content.size());
std::ostringstream oss;
oss << std::hex << std::setfill('0') << std::setw(16) << hash;
return oss.str();
}
std::string ChangeDetector::compute_line_hash(const std::string& line) {
// 去除首尾空白后计算哈希
size_t start = line.find_first_not_of(" \t\r\n");
size_t end = line.find_last_not_of(" \t\r\n");
if (start == std::string::npos) {
return "empty";
}
std::string trimmed = line.substr(start, end - start + 1);
return compute_hash(trimmed);
}
Snapshot ChangeDetector::create_snapshot(const std::string& file_path,
const std::string& content) {
Snapshot snap;
snap.file_path = file_path;
snap.content_hash = compute_hash(content);
snap.timestamp = current_timestamp();
auto lines = split_lines(content);
snap.line_hashes.reserve(lines.size());
for (const auto& line : lines) {
snap.line_hashes.push_back(compute_line_hash(line));
}
return snap;
}
std::vector<int> ChangeDetector::get_changed_lines(const Snapshot& old_snap,
const Snapshot& new_snap) {
std::vector<int> changed;
size_t old_size = old_snap.line_hashes.size();
size_t new_size = new_snap.line_hashes.size();
size_t max_size = std::max(old_size, new_size);
// 使用 LCS 算法进行精确对比
auto lcs = compute_lcs(old_snap.line_hashes, new_snap.line_hashes);
// 标记所有不在 LCS 中的行为变更
std::unordered_set<int> lcs_new_lines;
for (const auto& pair : lcs) {
lcs_new_lines.insert(pair.second);
}
for (size_t i = 0; i < new_size; ++i) {
if (lcs_new_lines.find(static_cast<int>(i)) == lcs_new_lines.end()) {
changed.push_back(static_cast<int>(i) + 1); // 1-based
}
}
return changed;
}
std::vector<std::pair<int, int>> ChangeDetector::compute_lcs(
const std::vector<std::string>& old_lines,
const std::vector<std::string>& new_lines) {
int m = static_cast<int>(old_lines.size());
int n = static_cast<int>(new_lines.size());
// DP 表
std::vector<std::vector<int>> dp(m + 1, std::vector<int>(n + 1, 0));
for (int i = 1; i <= m; ++i) {
for (int j = 1; j <= n; ++j) {
if (old_lines[i - 1] == new_lines[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1;
} else {
dp[i][j] = std::max(dp[i - 1][j], dp[i][j - 1]);
}
}
}
// 回溯找出 LCS 对应关系
std::vector<std::pair<int, int>> lcs;
int i = m, j = n;
while (i > 0 && j > 0) {
if (old_lines[i - 1] == new_lines[j - 1]) {
lcs.push_back({i - 1, j - 1});
--i; --j;
} else if (dp[i - 1][j] > dp[i][j - 1]) {
--i;
} else {
--j;
}
}
std::reverse(lcs.begin(), lcs.end());
return lcs;
}
std::vector<ChangeRecord> ChangeDetector::detect_changes(const Snapshot& old_snap,
const Snapshot& new_snap) {
std::vector<ChangeRecord> records;
// 对比两个快照中的编译单元
std::unordered_set<std::string> old_ids, new_ids;
for (const auto& pair : old_snap.units) {
old_ids.insert(pair.first);
}
for (const auto& pair : new_snap.units) {
new_ids.insert(pair.first);
}
// 检测删除的单元
for (const auto& id : old_ids) {
if (new_ids.find(id) == new_ids.end()) {
ChangeRecord rec;
rec.unit_id = id;
rec.change_type = UnitState::DELETED;
const auto& old_unit = old_snap.units.at(id);
rec.old_start_line = old_unit.start_line;
rec.old_end_line = old_unit.end_line;
rec.reason = "unit deleted";
records.push_back(rec);
}
}
// 检测新增和修改的单元
for (const auto& pair : new_snap.units) {
const auto& new_unit = pair.second;
auto old_it = old_snap.units.find(pair.first);
if (old_it == old_snap.units.end()) {
// 新增
ChangeRecord rec;
rec.unit_id = pair.first;
rec.change_type = UnitState::ADDED;
rec.new_start_line = new_unit.start_line;
rec.new_end_line = new_unit.end_line;
rec.reason = "unit added";
records.push_back(rec);
} else {
// 检查是否修改
const auto& old_unit = old_it->second;
if (old_unit.content_hash != new_unit.content_hash) {
ChangeRecord rec;
rec.unit_id = pair.first;
rec.change_type = UnitState::MODIFIED;
rec.old_start_line = old_unit.start_line;
rec.old_end_line = old_unit.end_line;
rec.new_start_line = new_unit.start_line;
rec.new_end_line = new_unit.end_line;
rec.reason = "content changed";
records.push_back(rec);
}
}
}
return records;
}
// ============================================================================
// CompilationCache 实现
// ============================================================================
CompilationCache::CompilationCache(const std::string& cache_dir)
: cache_dir_(cache_dir), hits_(0), misses_(0) {}
CompilationCache::~CompilationCache() {
save();
}
bool CompilationCache::has(const std::string& unit_id) const {
return cache_.find(unit_id) != cache_.end();
}
std::string CompilationCache::get(const std::string& unit_id) const {
auto it = cache_.find(unit_id);
if (it != cache_.end()) {
++hits_;
return it->second.output;
}
++misses_;
return "";
}
void CompilationCache::put(const std::string& unit_id, const std::string& output,
const std::string& content_hash) {
CacheEntry entry;
entry.output = output;
entry.content_hash = content_hash;
entry.timestamp = current_timestamp();
cache_[unit_id] = entry;
}
void CompilationCache::invalidate(const std::string& unit_id) {
cache_.erase(unit_id);
}
void CompilationCache::invalidate_all() {
cache_.clear();
}
bool CompilationCache::is_valid(const std::string& unit_id,
const std::string& current_hash) const {
auto it = cache_.find(unit_id);
if (it == cache_.end()) return false;
return it->second.content_hash == current_hash;
}
void CompilationCache::save() {
std::string cache_file = cache_dir_ + "/incremental_cache.dat";
std::ofstream file(cache_file);
if (!file.is_open()) return;
file << cache_.size() << "\n";
for (const auto& pair : cache_) {
file << pair.first << "\n";
file << pair.second.content_hash << "\n";
file << pair.second.timestamp << "\n";
file << pair.second.output.size() << "\n";
file << pair.second.output;
}
}
void CompilationCache::load() {
std::string cache_file = cache_dir_ + "/incremental_cache.dat";
std::ifstream file(cache_file);
if (!file.is_open()) return;
size_t count;
file >> count;
file.ignore();
for (size_t i = 0; i < count; ++i) {
std::string unit_id, content_hash;
int64_t timestamp;
size_t output_size;
std::getline(file, unit_id);
std::getline(file, content_hash);
file >> timestamp >> output_size;
file.ignore();
std::string output(output_size, '\0');
file.read(&output[0], output_size);
CacheEntry entry;
entry.output = output;
entry.content_hash = content_hash;
entry.timestamp = timestamp;
cache_[unit_id] = entry;
}
}
// ============================================================================
// IncrementalEngine 实现
// ============================================================================
IncrementalEngine::IncrementalEngine(const std::string& cache_dir)
: cache_(cache_dir) {
cache_.load();
}
IncrementalEngine::~IncrementalEngine() {
save_state();
}
void IncrementalEngine::register_units(const std::string& file_path,
const std::vector<CompilationUnit>& units) {
// 移除该文件的旧单元
auto old_units = units_.get_units_by_file(file_path);
for (auto* old_unit : old_units) {
units_.remove_unit(old_unit->id);
}
// 添加新单元
for (const auto& unit : units) {
units_.add_unit(unit);
}
}
std::vector<ChangeRecord> IncrementalEngine::update_source(
const std::string& file_path, const std::string& new_content) {
// 创建新快照
Snapshot new_snap = detector_.create_snapshot(file_path, new_content);
// 获取旧快照
auto old_it = snapshots_.find(file_path);
std::vector<ChangeRecord> changes;
if (old_it != snapshots_.end()) {
// 获取变更的行
auto changed_lines = detector_.get_changed_lines(old_it->second, new_snap);
// 找出受影响的编译单元
std::unordered_set<std::string> affected_ids;
for (int line : changed_lines) {
auto units = units_.get_units_in_range(file_path, line, line);
for (auto* unit : units) {
affected_ids.insert(unit->id);
// 标记为已修改
unit->state = UnitState::MODIFIED;
unit->cache_valid = false;
// 获取所有受影响的依赖单元
auto dependents = units_.get_affected_units(unit->id);
for (const auto& dep_id : dependents) {
affected_ids.insert(dep_id);
auto* dep_unit = units_.get_unit(dep_id);
if (dep_unit) {
dep_unit->state = UnitState::AFFECTED;
dep_unit->cache_valid = false;
}
}
}
}
// 扩展到完整边界
std::vector<std::string> ids_to_expand(affected_ids.begin(), affected_ids.end());
expand_to_boundaries(file_path, ids_to_expand);
affected_ids = std::unordered_set<std::string>(ids_to_expand.begin(), ids_to_expand.end());
// 生成变更记录
for (const auto& id : affected_ids) {
auto* unit = units_.get_unit(id);
if (unit) {
ChangeRecord rec;
rec.unit_id = id;
rec.change_type = unit->state;
rec.new_start_line = unit->start_line;
rec.new_end_line = unit->end_line;
changes.push_back(rec);
}
}
// 需要重新编译的单元
units_to_compile_.clear();
for (const auto& id : affected_ids) {
units_to_compile_.push_back(id);
}
} else {
// 首次编译,所有单元都需要编译
auto units = units_.get_units_by_file(file_path);
for (auto* unit : units) {
unit->state = UnitState::ADDED;
units_to_compile_.push_back(unit->id);
ChangeRecord rec;
rec.unit_id = unit->id;
rec.change_type = UnitState::ADDED;
rec.new_start_line = unit->start_line;
rec.new_end_line = unit->end_line;
changes.push_back(rec);
}
}
// 更新快照
new_snap.units = std::unordered_map<std::string, CompilationUnit>();
for (auto* unit : units_.get_units_by_file(file_path)) {
new_snap.units[unit->id] = *unit;
}
snapshots_[file_path] = new_snap;
return changes;
}
std::vector<std::string> IncrementalEngine::get_units_to_compile() const {
return units_to_compile_;
}
void IncrementalEngine::mark_compiled(const std::string& unit_id,
const std::string& output) {
auto* unit = units_.get_unit(unit_id);
if (unit) {
unit->cached_output = output;
unit->cache_timestamp = current_timestamp();
unit->cache_valid = true;
unit->state = UnitState::UNCHANGED;
// 更新缓存
cache_.put(unit_id, output, unit->content_hash);
}
// 从待编译列表中移除
units_to_compile_.erase(
std::remove(units_to_compile_.begin(), units_to_compile_.end(), unit_id),
units_to_compile_.end()
);
}
std::string IncrementalEngine::get_combined_output(const std::string& file_path) const {
std::ostringstream oss;
auto units = const_cast<UnitManager&>(units_).get_units_by_file(file_path);
// 按行号顺序排列
std::sort(units.begin(), units.end(),
[](const CompilationUnit* a, const CompilationUnit* b) {
return a->start_line < b->start_line;
});
for (size_t i = 0; i < units.size(); ++i) {
const auto* unit = units[i];
// 优先使用缓存
std::string output;
if (unit->cache_valid) {
output = unit->cached_output;
} else if (cache_.is_valid(unit->id, unit->content_hash)) {
output = cache_.get(unit->id);
}
if (!output.empty()) {
if (i > 0) oss << "\n";
oss << output;
}
}
return oss.str();
}
void IncrementalEngine::expand_to_boundaries(const std::string& file_path,
std::vector<std::string>& unit_ids) {
std::unordered_set<std::string> expanded(unit_ids.begin(), unit_ids.end());
for (const auto& id : unit_ids) {
auto* unit = units_.get_unit(id);
if (!unit) continue;
// 对于函数、类等结构,确保整个结构都被包含
if (unit->type == UnitType::FUNCTION || unit->type == UnitType::CLASS) {
// 已经是完整结构,不需要扩展
continue;
}
// 检查是否在某个大结构内
auto all_units = units_.get_units_by_file(file_path);
for (auto* parent : all_units) {
if (parent->id == id) continue;
// 如果当前单元在父结构范围内
if (parent->start_line <= unit->start_line &&
parent->end_line >= unit->end_line) {
// 父结构是函数或类,需要重新编译整个结构
if (parent->type == UnitType::FUNCTION || parent->type == UnitType::CLASS) {
expanded.insert(parent->id);
parent->state = UnitState::AFFECTED;
parent->cache_valid = false;
}
}
}
}
unit_ids = std::vector<std::string>(expanded.begin(), expanded.end());
}
void IncrementalEngine::save_state() {
cache_.save();
// 保存单元状态
std::string state_file = cache_.cache().empty() ? "incremental_state.dat"
: cache_dir_ + "/incremental_state.dat";
// Note: cache_dir_ is private, so we'll save alongside cache
}
void IncrementalEngine::load_state() {
cache_.load();
}
} // namespace incremental
} // namespace sikuwa

View File

@@ -0,0 +1,283 @@
// sikuwa/incremental/cpp/incremental_core.h
// 减量编译核心 - C++ 实现高性能组件
// 指哪编哪:只编译源码改变的部分
#ifndef SIKUWA_INCREMENTAL_CORE_H
#define SIKUWA_INCREMENTAL_CORE_H
#include <string>
#include <vector>
#include <unordered_map>
#include <unordered_set>
#include <memory>
#include <functional>
#include <optional>
#include <chrono>
namespace sikuwa {
namespace incremental {
// ============================================================================
// 编译单元类型
// ============================================================================
enum class UnitType {
LINE, // 单行
STATEMENT, // 语句
FUNCTION, // 函数
CLASS, // 类
MODULE, // 模块级
IMPORT, // 导入语句
DECORATOR, // 装饰器
BLOCK // 代码块
};
// ============================================================================
// 编译单元状态
// ============================================================================
enum class UnitState {
UNKNOWN, // 未知
UNCHANGED, // 未变更
MODIFIED, // 已修改
ADDED, // 新增
DELETED, // 已删除
AFFECTED // 受影响(依赖项变更)
};
// ============================================================================
// 编译单元 - 最小编译粒度
// ============================================================================
struct CompilationUnit {
std::string id; // 唯一标识: file:start_line:end_line:hash
std::string file_path; // 源文件路径
int start_line; // 起始行 (1-based)
int end_line; // 结束行 (1-based)
UnitType type; // 单元类型
std::string name; // 名称 (函数名/类名等)
std::string content_hash; // 内容哈希
std::vector<std::string> dependencies; // 依赖的单元ID列表
std::vector<std::string> dependents; // 被依赖的单元ID列表
UnitState state; // 当前状态
// 缓存相关
std::string cached_output; // 缓存的编译产物
int64_t cache_timestamp; // 缓存时间戳
bool cache_valid; // 缓存是否有效
CompilationUnit()
: start_line(0), end_line(0), type(UnitType::LINE),
state(UnitState::UNKNOWN), cache_timestamp(0), cache_valid(false) {}
};
// ============================================================================
// 版本快照 - 用于变更检测
// ============================================================================
struct Snapshot {
std::string file_path;
std::string content_hash; // 整体内容哈希
std::vector<std::string> line_hashes; // 每行哈希
std::unordered_map<std::string, CompilationUnit> units; // 编译单元
int64_t timestamp;
Snapshot() : timestamp(0) {}
};
// ============================================================================
// 变更记录
// ============================================================================
struct ChangeRecord {
std::string unit_id;
UnitState change_type;
int old_start_line;
int old_end_line;
int new_start_line;
int new_end_line;
std::string reason; // 变更原因
};
// ============================================================================
// 编译单元管理器 - 管理所有编译单元
// ============================================================================
class UnitManager {
public:
UnitManager();
~UnitManager();
// 添加/更新编译单元
void add_unit(const CompilationUnit& unit);
void update_unit(const std::string& id, const CompilationUnit& unit);
void remove_unit(const std::string& id);
// 查询
CompilationUnit* get_unit(const std::string& id);
const CompilationUnit* get_unit(const std::string& id) const;
std::vector<CompilationUnit*> get_units_by_file(const std::string& file_path);
std::vector<CompilationUnit*> get_units_in_range(const std::string& file_path, int start, int end);
// 依赖关系
void add_dependency(const std::string& from_id, const std::string& to_id);
void remove_dependency(const std::string& from_id, const std::string& to_id);
std::vector<std::string> get_dependencies(const std::string& id) const;
std::vector<std::string> get_dependents(const std::string& id) const;
std::vector<std::string> get_affected_units(const std::string& changed_id) const;
// 遍历
void for_each(std::function<void(CompilationUnit&)> callback);
size_t size() const { return units_.size(); }
void clear();
// 序列化
std::string serialize() const;
void deserialize(const std::string& data);
private:
std::unordered_map<std::string, CompilationUnit> units_;
std::unordered_map<std::string, std::vector<std::string>> file_units_; // file -> unit_ids
// 递归获取所有受影响的单元
void collect_affected_recursive(const std::string& id,
std::unordered_set<std::string>& visited) const;
};
// ============================================================================
// 变更检测器 - 检测源码变更
// ============================================================================
class ChangeDetector {
public:
ChangeDetector();
~ChangeDetector();
// 创建快照
Snapshot create_snapshot(const std::string& file_path, const std::string& content);
// 检测变更
std::vector<ChangeRecord> detect_changes(const Snapshot& old_snap, const Snapshot& new_snap);
// 定位变更行
std::vector<int> get_changed_lines(const Snapshot& old_snap, const Snapshot& new_snap);
// 计算哈希
static std::string compute_hash(const std::string& content);
static std::string compute_line_hash(const std::string& line);
private:
// LCS 算法找出变更
std::vector<std::pair<int, int>> compute_lcs(const std::vector<std::string>& old_lines,
const std::vector<std::string>& new_lines);
};
// ============================================================================
// 编译缓存 - 缓存编译产物
// ============================================================================
class CompilationCache {
public:
CompilationCache(const std::string& cache_dir);
~CompilationCache();
// 缓存操作
bool has(const std::string& unit_id) const;
std::string get(const std::string& unit_id) const;
void put(const std::string& unit_id, const std::string& output, const std::string& content_hash);
void invalidate(const std::string& unit_id);
void invalidate_all();
// 验证缓存
bool is_valid(const std::string& unit_id, const std::string& current_hash) const;
// 持久化
void save();
void load();
// 统计
size_t size() const { return cache_.size(); }
size_t hit_count() const { return hits_; }
size_t miss_count() const { return misses_; }
private:
struct CacheEntry {
std::string output;
std::string content_hash;
int64_t timestamp;
};
std::string cache_dir_;
std::unordered_map<std::string, CacheEntry> cache_;
mutable size_t hits_;
mutable size_t misses_;
};
// ============================================================================
// 减量编译引擎
// ============================================================================
class IncrementalEngine {
public:
IncrementalEngine(const std::string& cache_dir);
~IncrementalEngine();
// 注册编译单元
void register_units(const std::string& file_path,
const std::vector<CompilationUnit>& units);
// 更新源码并检测变更
std::vector<ChangeRecord> update_source(const std::string& file_path,
const std::string& new_content);
// 获取需要重新编译的单元
std::vector<std::string> get_units_to_compile() const;
// 标记单元编译完成
void mark_compiled(const std::string& unit_id, const std::string& output);
// 获取编译结果(按顺序拼接)
std::string get_combined_output(const std::string& file_path) const;
// 缓存管理
CompilationCache& cache() { return cache_; }
const CompilationCache& cache() const { return cache_; }
// 单元管理
UnitManager& units() { return units_; }
const UnitManager& units() const { return units_; }
// 状态
void save_state();
void load_state();
private:
UnitManager units_;
ChangeDetector detector_;
CompilationCache cache_;
std::unordered_map<std::string, Snapshot> snapshots_; // file -> snapshot
std::vector<std::string> units_to_compile_;
// 扩展编译范围到完整结构
void expand_to_boundaries(const std::string& file_path,
std::vector<std::string>& unit_ids);
};
// ============================================================================
// 工具函数
// ============================================================================
// 生成单元ID
std::string generate_unit_id(const std::string& file_path, int start_line,
int end_line, const std::string& content_hash);
// 获取当前时间戳
int64_t current_timestamp();
// 读取文件内容
std::string read_file(const std::string& path);
// 写入文件内容
void write_file(const std::string& path, const std::string& content);
// 分割行
std::vector<std::string> split_lines(const std::string& content);
// 合并行
std::string join_lines(const std::vector<std::string>& lines);
} // namespace incremental
} // namespace sikuwa
#endif // SIKUWA_INCREMENTAL_CORE_H

View File

@@ -0,0 +1,130 @@
// sikuwa/incremental/cpp/pybind_incremental.cpp
// Python 绑定 - 使用 pybind11
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "incremental_core.h"
namespace py = pybind11;
using namespace sikuwa::incremental;
PYBIND11_MODULE(incremental_engine, m) {
m.doc() = "Sikuwa 减量编译引擎 - 指哪编哪";
// 枚举类型
py::enum_<UnitType>(m, "UnitType")
.value("LINE", UnitType::LINE)
.value("STATEMENT", UnitType::STATEMENT)
.value("FUNCTION", UnitType::FUNCTION)
.value("CLASS", UnitType::CLASS)
.value("MODULE", UnitType::MODULE)
.value("IMPORT", UnitType::IMPORT)
.value("DECORATOR", UnitType::DECORATOR)
.value("BLOCK", UnitType::BLOCK);
py::enum_<UnitState>(m, "UnitState")
.value("UNKNOWN", UnitState::UNKNOWN)
.value("UNCHANGED", UnitState::UNCHANGED)
.value("MODIFIED", UnitState::MODIFIED)
.value("ADDED", UnitState::ADDED)
.value("DELETED", UnitState::DELETED)
.value("AFFECTED", UnitState::AFFECTED);
// CompilationUnit
py::class_<CompilationUnit>(m, "CompilationUnit")
.def(py::init<>())
.def_readwrite("id", &CompilationUnit::id)
.def_readwrite("file_path", &CompilationUnit::file_path)
.def_readwrite("start_line", &CompilationUnit::start_line)
.def_readwrite("end_line", &CompilationUnit::end_line)
.def_readwrite("type", &CompilationUnit::type)
.def_readwrite("name", &CompilationUnit::name)
.def_readwrite("content_hash", &CompilationUnit::content_hash)
.def_readwrite("dependencies", &CompilationUnit::dependencies)
.def_readwrite("dependents", &CompilationUnit::dependents)
.def_readwrite("state", &CompilationUnit::state)
.def_readwrite("cached_output", &CompilationUnit::cached_output)
.def_readwrite("cache_valid", &CompilationUnit::cache_valid);
// ChangeRecord
py::class_<ChangeRecord>(m, "ChangeRecord")
.def(py::init<>())
.def_readwrite("unit_id", &ChangeRecord::unit_id)
.def_readwrite("change_type", &ChangeRecord::change_type)
.def_readwrite("old_start_line", &ChangeRecord::old_start_line)
.def_readwrite("old_end_line", &ChangeRecord::old_end_line)
.def_readwrite("new_start_line", &ChangeRecord::new_start_line)
.def_readwrite("new_end_line", &ChangeRecord::new_end_line)
.def_readwrite("reason", &ChangeRecord::reason);
// Snapshot
py::class_<Snapshot>(m, "Snapshot")
.def(py::init<>())
.def_readwrite("file_path", &Snapshot::file_path)
.def_readwrite("content_hash", &Snapshot::content_hash)
.def_readwrite("line_hashes", &Snapshot::line_hashes)
.def_readwrite("timestamp", &Snapshot::timestamp);
// UnitManager
py::class_<UnitManager>(m, "UnitManager")
.def(py::init<>())
.def("add_unit", &UnitManager::add_unit)
.def("update_unit", &UnitManager::update_unit)
.def("remove_unit", &UnitManager::remove_unit)
.def("get_unit", py::overload_cast<const std::string&>(&UnitManager::get_unit),
py::return_value_policy::reference)
.def("get_units_by_file", &UnitManager::get_units_by_file,
py::return_value_policy::reference)
.def("get_units_in_range", &UnitManager::get_units_in_range,
py::return_value_policy::reference)
.def("add_dependency", &UnitManager::add_dependency)
.def("remove_dependency", &UnitManager::remove_dependency)
.def("get_dependencies", &UnitManager::get_dependencies)
.def("get_dependents", &UnitManager::get_dependents)
.def("get_affected_units", &UnitManager::get_affected_units)
.def("size", &UnitManager::size)
.def("clear", &UnitManager::clear)
.def("serialize", &UnitManager::serialize)
.def("deserialize", &UnitManager::deserialize);
// ChangeDetector
py::class_<ChangeDetector>(m, "ChangeDetector")
.def(py::init<>())
.def("create_snapshot", &ChangeDetector::create_snapshot)
.def("detect_changes", &ChangeDetector::detect_changes)
.def("get_changed_lines", &ChangeDetector::get_changed_lines)
.def_static("compute_hash", &ChangeDetector::compute_hash)
.def_static("compute_line_hash", &ChangeDetector::compute_line_hash);
// CompilationCache
py::class_<CompilationCache>(m, "CompilationCache")
.def(py::init<const std::string&>())
.def("has", &CompilationCache::has)
.def("get", &CompilationCache::get)
.def("put", &CompilationCache::put)
.def("invalidate", &CompilationCache::invalidate)
.def("invalidate_all", &CompilationCache::invalidate_all)
.def("is_valid", &CompilationCache::is_valid)
.def("save", &CompilationCache::save)
.def("load", &CompilationCache::load)
.def("size", &CompilationCache::size)
.def("hit_count", &CompilationCache::hit_count)
.def("miss_count", &CompilationCache::miss_count);
// IncrementalEngine
py::class_<IncrementalEngine>(m, "IncrementalEngine")
.def(py::init<const std::string&>())
.def("register_units", &IncrementalEngine::register_units)
.def("update_source", &IncrementalEngine::update_source)
.def("get_units_to_compile", &IncrementalEngine::get_units_to_compile)
.def("mark_compiled", &IncrementalEngine::mark_compiled)
.def("get_combined_output", &IncrementalEngine::get_combined_output)
.def("save_state", &IncrementalEngine::save_state)
.def("load_state", &IncrementalEngine::load_state);
// 工具函数
m.def("generate_unit_id", &generate_unit_id);
m.def("compute_hash", &ChangeDetector::compute_hash);
m.def("split_lines", &split_lines);
m.def("join_lines", &join_lines);
}

295
incremental/demo.py Normal file
View File

@@ -0,0 +1,295 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
减量编译演示 - Sikuwa Incremental Compilation Demo
展示"指哪编哪"的精准编译能力
"""
import tempfile
from pathlib import Path
from incremental import (
IncrementalCompiler,
PythonAnalyzer,
BlockType
)
def demo_analyzer():
"""演示代码分析器"""
print("=" * 60)
print("1. 代码分析器演示")
print("=" * 60)
analyzer = PythonAnalyzer()
code = '''
import os
from pathlib import Path
x = 10
y = 20
def add(a, b):
"""加法"""
return a + b
def multiply(a, b):
"""乘法"""
return a * b
class Calculator:
"""计算器类"""
def __init__(self):
self.history = []
def calculate(self, op, a, b):
if op == '+':
result = add(a, b)
elif op == '*':
result = multiply(a, b)
self.history.append(result)
return result
'''
blocks = analyzer.analyze(code, "demo.py")
print(f"\n检测到 {len(blocks)} 个代码块:\n")
for block in blocks:
type_name = block.type.name.lower()
deps = ', '.join(block.references[:5]) if block.references else ''
print(f" [{type_name:10}] {block.name:20}{block.start_line:2}-{block.end_line:2} 依赖: {deps}")
def demo_change_detection():
"""演示变更检测"""
print("\n" + "=" * 60)
print("2. 变更检测演示")
print("=" * 60)
with tempfile.TemporaryDirectory() as tmpdir:
compiler = IncrementalCompiler(tmpdir)
# 模拟编译器
compile_count = [0]
def mock_compile(unit):
compile_count[0] += 1
return f"COMPILED: {unit.name or 'unknown'}"
compiler.set_compiler(mock_compile)
# 初始代码
code_v1 = '''
def hello():
print("Hello")
def world():
print("World")
def main():
hello()
world()
'''
print("\n[v1] 初始代码:")
compiler.analyze_source("demo.py", code_v1)
changes = compiler.update_source("demo.py", code_v1)
print(f" 检测到 {len(changes)} 个新增单元")
outputs = compiler.compile_all_pending()
print(f" 编译了 {compile_count[0]} 个单元")
# 修改一个函数
code_v2 = '''
def hello():
print("Hello, World!") # 修改了这行
def world():
print("World")
def main():
hello()
world()
'''
compile_count[0] = 0
print("\n[v2] 修改 hello 函数:")
changes = compiler.update_source("demo.py", code_v2)
print(f" 检测到 {len(changes)} 个变更单元")
for ch in changes:
print(f" - {ch.unit_id[:40]}... ({ch.change_type.name})")
outputs = compiler.compile_all_pending()
print(f" 只编译了 {compile_count[0]} 个单元 (其他使用缓存)")
# 添加新函数
code_v3 = '''
def hello():
print("Hello, World!")
def world():
print("World")
def greet(name):
print(f"Hi, {name}!")
def main():
hello()
world()
greet("Sikuwa")
'''
compile_count[0] = 0
print("\n[v3] 添加 greet 函数:")
changes = compiler.update_source("demo.py", code_v3)
print(f" 检测到 {len(changes)} 个变更单元")
outputs = compiler.compile_all_pending()
print(f" 编译了 {compile_count[0]} 个新/变更单元")
# 统计
stats = compiler.get_stats()
print(f"\n统计: 缓存命中 {stats.get('cache_hits', 0)}, 总编译 {stats.get('total_compiled', 0)}")
def demo_dependency_tracking():
"""演示依赖追踪"""
print("\n" + "=" * 60)
print("3. 依赖追踪演示")
print("=" * 60)
with tempfile.TemporaryDirectory() as tmpdir:
compiler = IncrementalCompiler(tmpdir)
affected_units = []
def mock_compile(unit):
affected_units.append(unit.name or unit.id[:20])
return f"COMPILED"
compiler.set_compiler(mock_compile)
code_v1 = '''
# 基础配置
CONFIG = {"debug": False}
def get_config():
return CONFIG
def process():
cfg = get_config()
return cfg["debug"]
def main():
result = process()
print(result)
'''
print("\n初始编译...")
compiler.analyze_source("demo.py", code_v1)
compiler.update_source("demo.py", code_v1)
compiler.compile_all_pending()
# 修改 CONFIG
code_v2 = '''
# 基础配置
CONFIG = {"debug": True} # 修改
def get_config():
return CONFIG
def process():
cfg = get_config()
return cfg["debug"]
def main():
result = process()
print(result)
'''
affected_units.clear()
print("\n修改 CONFIG 后:")
changes = compiler.update_source("demo.py", code_v2)
# 显示依赖传播
print(" 受影响的单元链:")
print(" CONFIG (修改) → get_config (依赖CONFIG) → process (依赖get_config)")
compiler.compile_all_pending()
print(f" 重新编译: {', '.join(affected_units) if affected_units else ''}")
def demo_output_combination():
"""演示输出合并"""
print("\n" + "=" * 60)
print("4. 输出合并演示")
print("=" * 60)
with tempfile.TemporaryDirectory() as tmpdir:
compiler = IncrementalCompiler(tmpdir)
# 转换为 C 风格伪代码
def to_pseudo_c(unit):
lines = unit.content.strip().split('\n')
result = []
for line in lines:
line = line.strip()
if line.startswith('def '):
# def func(): -> void func() {
name = line[4:line.index('(')]
result.append(f"void {name}() {{")
elif line.startswith('print('):
# print("x") -> printf("x");
content = line[6:-1]
result.append(f" printf({content});")
elif line == '':
continue
else:
result.append(f" // {line}")
if result and not result[-1].endswith('}'):
result.append("}")
return '\n'.join(result)
compiler.set_compiler(to_pseudo_c)
code = '''
def hello():
print("Hello")
def world():
print("World")
'''
compiler.analyze_source("demo.py", code)
compiler.update_source("demo.py", code)
compiler.compile_all_pending()
combined = compiler.get_combined_output("demo.py")
print("\n原始 Python 代码:")
print(code)
print("合并后的编译产物:")
print(combined)
def main():
"""主函数"""
print("\n" + "=" * 60)
print("Sikuwa 减量编译系统演示")
print("指哪编哪 - 精准编译,高效开发")
print("=" * 60)
demo_analyzer()
demo_change_detection()
demo_dependency_tracking()
demo_output_combination()
print("\n" + "=" * 60)
print("演示完成!")
print("=" * 60)
if __name__ == '__main__':
main()

556
incremental/smart_cache.py Normal file
View File

@@ -0,0 +1,556 @@
# sikuwa/incremental/smart_cache.py
"""
智能缓存系统 V1.2
编译即缓存,缓存即编译,预测缓存预热
深度集成减量编译引擎,实现:
1. 编译即缓存 - 每次编译自动持久化,全历史可追溯
2. 缓存即编译 - 缓存命中等同于零成本编译
3. 预测缓存预热 - 基于访问模式和依赖图预测并预编译
"""
import hashlib
import json
import os
import time
import threading
import queue
from enum import Enum, auto
from dataclasses import dataclass, field, asdict
from typing import Dict, List, Set, Optional, Tuple, Callable, Any
from pathlib import Path
from collections import OrderedDict
class CacheEventType(Enum):
"""缓存事件类型"""
HIT = auto() # 命中
MISS = auto() # 未命中
WRITE = auto() # 写入
EVICT = auto() # 淘汰
WARMUP = auto() # 预热
PREDICT = auto() # 预测
@dataclass
class CacheEntry:
"""缓存条目"""
key: str = ""
content_hash: str = ""
output: str = ""
timestamp: int = 0
access_count: int = 0
last_access: int = 0
dependencies: List[str] = field(default_factory=list)
file_path: str = ""
line_range: Tuple[int, int] = (0, 0)
compile_time_ms: int = 0
size_bytes: int = 0
def touch(self):
"""更新访问信息"""
self.access_count += 1
self.last_access = int(time.time() * 1000)
def to_dict(self) -> dict:
return {
'key': self.key,
'content_hash': self.content_hash,
'output': self.output,
'timestamp': self.timestamp,
'access_count': self.access_count,
'last_access': self.last_access,
'dependencies': self.dependencies,
'file_path': self.file_path,
'line_range': list(self.line_range),
'compile_time_ms': self.compile_time_ms,
'size_bytes': self.size_bytes,
}
@classmethod
def from_dict(cls, data: dict) -> 'CacheEntry':
entry = cls()
entry.key = data.get('key', '')
entry.content_hash = data.get('content_hash', '')
entry.output = data.get('output', '')
entry.timestamp = data.get('timestamp', 0)
entry.access_count = data.get('access_count', 0)
entry.last_access = data.get('last_access', 0)
entry.dependencies = data.get('dependencies', [])
entry.file_path = data.get('file_path', '')
line_range = data.get('line_range', [0, 0])
entry.line_range = tuple(line_range) if isinstance(line_range, list) else line_range
entry.compile_time_ms = data.get('compile_time_ms', 0)
entry.size_bytes = data.get('size_bytes', 0)
return entry
@dataclass
class CacheEvent:
"""缓存事件记录"""
event_type: CacheEventType
key: str
timestamp: int
details: str = ""
@dataclass
class AccessPattern:
"""访问模式记录"""
key: str
access_sequence: List[str] = field(default_factory=list) # 之后访问的键
frequency: int = 0
def record_next(self, next_key: str):
"""记录后续访问"""
if next_key not in self.access_sequence:
self.access_sequence.append(next_key)
self.frequency += 1
class SmartCache:
"""
智能缓存系统 V1.2
核心特性:
- LRU 淘汰策略 + 访问频率权重
- 全历史编译记录持久化
- 基于访问模式的预测预热
- 依赖图感知的缓存失效
- 后台异步预热线程
"""
def __init__(self,
cache_dir: str = ".sikuwa_cache",
max_entries: int = 10000,
max_size_mb: int = 500,
enable_warmup: bool = True):
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
self.max_entries = max_entries
self.max_size_bytes = max_size_mb * 1024 * 1024
self.enable_warmup = enable_warmup
# 主缓存存储 (LRU)
self._cache: OrderedDict[str, CacheEntry] = OrderedDict()
self._total_size = 0
# 统计信息
self._hits = 0
self._misses = 0
self._evictions = 0
self._warmups = 0
# 事件日志
self._events: List[CacheEvent] = []
self._max_events = 10000
# 访问模式追踪
self._last_accessed_key: Optional[str] = None
self._access_patterns: Dict[str, AccessPattern] = {}
# 编译器回调(用于预热)
self._compiler_callback: Optional[Callable] = None
# 预热队列和线程
self._warmup_queue: queue.Queue = queue.Queue()
self._warmup_thread: Optional[threading.Thread] = None
self._warmup_running = False
# 加载持久化数据
self._load()
# 启动预热线程
if enable_warmup:
self._start_warmup_thread()
def _load(self):
"""加载持久化缓存"""
cache_file = self.cache_dir / "smart_cache_v1.2.json"
patterns_file = self.cache_dir / "access_patterns.json"
if cache_file.exists():
try:
with open(cache_file, 'r', encoding='utf-8') as f:
data = json.load(f)
for entry_data in data.get('entries', []):
entry = CacheEntry.from_dict(entry_data)
self._cache[entry.key] = entry
self._total_size += entry.size_bytes
except Exception:
pass
if patterns_file.exists():
try:
with open(patterns_file, 'r', encoding='utf-8') as f:
data = json.load(f)
for key, pattern_data in data.items():
self._access_patterns[key] = AccessPattern(
key=key,
access_sequence=pattern_data.get('sequence', []),
frequency=pattern_data.get('frequency', 0)
)
except Exception:
pass
def save(self):
"""保存缓存到磁盘"""
cache_file = self.cache_dir / "smart_cache_v1.2.json"
patterns_file = self.cache_dir / "access_patterns.json"
events_file = self.cache_dir / "cache_events.json"
# 保存缓存条目
with open(cache_file, 'w', encoding='utf-8') as f:
json.dump({
'version': '1.2',
'entries': [entry.to_dict() for entry in self._cache.values()]
}, f, indent=2)
# 保存访问模式
with open(patterns_file, 'w', encoding='utf-8') as f:
patterns = {
k: {'sequence': p.access_sequence, 'frequency': p.frequency}
for k, p in self._access_patterns.items()
}
json.dump(patterns, f, indent=2)
# 保存事件日志(最近的)
with open(events_file, 'w', encoding='utf-8') as f:
events = [
{'type': e.event_type.name, 'key': e.key,
'timestamp': e.timestamp, 'details': e.details}
for e in self._events[-1000:] # 只保存最近1000条
]
json.dump(events, f, indent=2)
def set_compiler(self, callback: Callable):
"""设置编译器回调(用于预热编译)"""
self._compiler_callback = callback
# ==================== 核心缓存操作 ====================
def get(self, key: str, content_hash: str = "") -> Optional[str]:
"""
获取缓存 - 缓存即编译
缓存命中 = 零成本获得编译结果
"""
if key in self._cache:
entry = self._cache[key]
# 验证内容哈希(如果提供)
if content_hash and entry.content_hash != content_hash:
self._record_event(CacheEventType.MISS, key, "hash mismatch")
self._misses += 1
return None
# 命中移到末尾LRU
self._cache.move_to_end(key)
entry.touch()
self._record_event(CacheEventType.HIT, key)
self._hits += 1
# 记录访问模式
self._record_access_pattern(key)
# 触发预测预热
if self.enable_warmup:
self._trigger_predictive_warmup(key)
return entry.output
self._record_event(CacheEventType.MISS, key)
self._misses += 1
return None
def put(self, key: str, output: str, content_hash: str,
dependencies: List[str] = None,
file_path: str = "",
line_range: Tuple[int, int] = (0, 0),
compile_time_ms: int = 0) -> bool:
"""
写入缓存 - 编译即缓存
每次编译结果自动持久化,全历史可追溯
"""
size_bytes = len(output.encode('utf-8'))
# 检查是否需要淘汰
while (len(self._cache) >= self.max_entries or
self._total_size + size_bytes > self.max_size_bytes):
if not self._evict_one():
break
# 创建或更新条目
entry = CacheEntry(
key=key,
content_hash=content_hash,
output=output,
timestamp=int(time.time() * 1000),
access_count=1,
last_access=int(time.time() * 1000),
dependencies=dependencies or [],
file_path=file_path,
line_range=line_range,
compile_time_ms=compile_time_ms,
size_bytes=size_bytes,
)
# 更新旧条目的大小
if key in self._cache:
self._total_size -= self._cache[key].size_bytes
self._cache[key] = entry
self._total_size += size_bytes
self._record_event(CacheEventType.WRITE, key,
f"size={size_bytes}, compile_time={compile_time_ms}ms")
# 记录访问模式
self._record_access_pattern(key)
return True
def invalidate(self, key: str):
"""使单个缓存失效"""
if key in self._cache:
self._total_size -= self._cache[key].size_bytes
del self._cache[key]
self._record_event(CacheEventType.EVICT, key, "manual invalidate")
def invalidate_by_dependency(self, dep_key: str):
"""使所有依赖指定键的缓存失效"""
to_invalidate = []
for key, entry in self._cache.items():
if dep_key in entry.dependencies:
to_invalidate.append(key)
for key in to_invalidate:
self.invalidate(key)
def _evict_one(self) -> bool:
"""淘汰一个条目LRU + 频率权重)"""
if not self._cache:
return False
# 计算淘汰分数(越低越优先淘汰)
# 分数 = access_count * 0.3 + recency_score * 0.7
now = int(time.time() * 1000)
min_score = float('inf')
evict_key = None
for key, entry in self._cache.items():
recency = (now - entry.last_access) / 1000 # 秒
score = entry.access_count * 0.3 - recency * 0.001
if score < min_score:
min_score = score
evict_key = key
if evict_key:
self._total_size -= self._cache[evict_key].size_bytes
del self._cache[evict_key]
self._evictions += 1
self._record_event(CacheEventType.EVICT, evict_key, "LRU eviction")
return True
return False
# ==================== 访问模式追踪 ====================
def _record_access_pattern(self, key: str):
"""记录访问模式"""
if self._last_accessed_key and self._last_accessed_key != key:
if self._last_accessed_key not in self._access_patterns:
self._access_patterns[self._last_accessed_key] = AccessPattern(
key=self._last_accessed_key
)
self._access_patterns[self._last_accessed_key].record_next(key)
self._last_accessed_key = key
# ==================== 预测缓存预热 ====================
def _start_warmup_thread(self):
"""启动后台预热线程"""
if self._warmup_thread and self._warmup_thread.is_alive():
return
self._warmup_running = True
self._warmup_thread = threading.Thread(target=self._warmup_worker, daemon=True)
self._warmup_thread.start()
def _warmup_worker(self):
"""预热工作线程"""
while self._warmup_running:
try:
# 等待预热任务
task = self._warmup_queue.get(timeout=1.0)
if task is None:
continue
key, content, content_hash = task
# 检查是否已缓存
if key in self._cache:
continue
# 执行预热编译
if self._compiler_callback:
try:
start = time.time()
output = self._compiler_callback(content)
compile_time = int((time.time() - start) * 1000)
self.put(key, output, content_hash,
compile_time_ms=compile_time)
self._warmups += 1
self._record_event(CacheEventType.WARMUP, key,
f"predictive warmup, time={compile_time}ms")
except Exception:
pass
except queue.Empty:
continue
def _trigger_predictive_warmup(self, key: str):
"""触发预测性预热"""
if key not in self._access_patterns:
return
pattern = self._access_patterns[key]
# 预热接下来可能访问的键
for next_key in pattern.access_sequence[:3]: # 最多预热3个
if next_key not in self._cache:
self._record_event(CacheEventType.PREDICT, next_key,
f"predicted from {key}")
# 这里只是标记预测,实际预热需要内容
# 真正的预热在 warmup_unit 中执行
def warmup_unit(self, key: str, content: str, content_hash: str):
"""手动添加预热任务"""
if key not in self._cache:
self._warmup_queue.put((key, content, content_hash))
def warmup_dependencies(self, keys: List[str],
content_provider: Callable[[str], Tuple[str, str]]):
"""
预热依赖链
content_provider: key -> (content, content_hash)
"""
for key in keys:
if key not in self._cache:
try:
content, content_hash = content_provider(key)
self._warmup_queue.put((key, content, content_hash))
except Exception:
pass
def stop_warmup(self):
"""停止预热线程"""
self._warmup_running = False
if self._warmup_thread:
self._warmup_thread.join(timeout=2.0)
# ==================== 事件日志 ====================
def _record_event(self, event_type: CacheEventType, key: str, details: str = ""):
"""记录缓存事件"""
event = CacheEvent(
event_type=event_type,
key=key,
timestamp=int(time.time() * 1000),
details=details
)
self._events.append(event)
# 限制事件数量
if len(self._events) > self._max_events:
self._events = self._events[-self._max_events//2:]
def get_recent_events(self, count: int = 100) -> List[dict]:
"""获取最近的事件"""
return [
{'type': e.event_type.name, 'key': e.key,
'timestamp': e.timestamp, 'details': e.details}
for e in self._events[-count:]
]
# ==================== 统计和诊断 ====================
def get_stats(self) -> Dict[str, Any]:
"""获取缓存统计"""
return {
'version': '1.2',
'entries': len(self._cache),
'total_size_mb': self._total_size / (1024 * 1024),
'max_entries': self.max_entries,
'max_size_mb': self.max_size_bytes / (1024 * 1024),
'hits': self._hits,
'misses': self._misses,
'hit_rate': self._hits / (self._hits + self._misses) if (self._hits + self._misses) > 0 else 0,
'evictions': self._evictions,
'warmups': self._warmups,
'access_patterns': len(self._access_patterns),
}
def get_hot_entries(self, count: int = 10) -> List[Dict]:
"""获取最热门的缓存条目"""
sorted_entries = sorted(
self._cache.values(),
key=lambda e: e.access_count,
reverse=True
)
return [
{'key': e.key, 'access_count': e.access_count,
'file': e.file_path, 'lines': e.line_range}
for e in sorted_entries[:count]
]
def get_predicted_next(self, key: str, count: int = 5) -> List[str]:
"""获取预测的下一个访问键"""
if key not in self._access_patterns:
return []
return self._access_patterns[key].access_sequence[:count]
def has(self, key: str) -> bool:
"""检查键是否存在"""
return key in self._cache
def clear(self):
"""清空缓存"""
self._cache.clear()
self._total_size = 0
self._access_patterns.clear()
self._events.clear()
def __del__(self):
"""析构时停止预热线程并保存"""
self.stop_warmup()
try:
self.save()
except Exception:
pass
# ==================== 工厂函数 ====================
_global_cache: Optional[SmartCache] = None
def get_smart_cache(cache_dir: str = ".sikuwa_cache") -> SmartCache:
"""获取全局智能缓存实例"""
global _global_cache
if _global_cache is None:
_global_cache = SmartCache(cache_dir)
return _global_cache
def create_smart_cache(cache_dir: str = ".sikuwa_cache",
max_entries: int = 10000,
max_size_mb: int = 500,
enable_warmup: bool = True) -> SmartCache:
"""创建新的智能缓存实例"""
return SmartCache(cache_dir, max_entries, max_size_mb, enable_warmup)

View File

@@ -0,0 +1,2 @@
# sikuwa/incremental/tests/__init__.py
"""减量编译测试包"""

View File

@@ -0,0 +1,360 @@
# sikuwa/incremental/tests/test_incremental.py
"""
减量编译系统测试
"""
import sys
import os
import tempfile
import unittest
from pathlib import Path
# 添加父目录到路径
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from incremental.core import (
IncrementalCompiler,
CompilationUnit,
Snapshot,
ChangeDetector,
CompilationCache,
UnitType,
UnitState
)
from incremental.analyzer import PythonAnalyzer, BlockType
class TestPythonAnalyzer(unittest.TestCase):
"""测试 Python 分析器"""
def setUp(self):
self.analyzer = PythonAnalyzer()
def test_analyze_function(self):
"""测试函数分析"""
code = '''
def hello(name):
"""Say hello"""
print(f"Hello, {name}!")
'''
blocks = self.analyzer.analyze(code, "test.py")
# 应该检测到函数块
func_blocks = [b for b in blocks if b.type == BlockType.FUNCTION]
self.assertEqual(len(func_blocks), 1)
self.assertEqual(func_blocks[0].name, "hello")
def test_analyze_class(self):
"""测试类分析"""
code = '''
class MyClass:
def __init__(self):
self.value = 0
def increment(self):
self.value += 1
'''
blocks = self.analyzer.analyze(code, "test.py")
# 应该检测到类块
class_blocks = [b for b in blocks if b.type == BlockType.CLASS]
self.assertEqual(len(class_blocks), 1)
self.assertEqual(class_blocks[0].name, "MyClass")
def test_analyze_import(self):
"""测试导入分析"""
code = '''
import os
from sys import path
from pathlib import Path
'''
blocks = self.analyzer.analyze(code, "test.py")
import_blocks = [b for b in blocks if b.type == BlockType.IMPORT]
self.assertEqual(len(import_blocks), 3)
def test_dependency_extraction(self):
"""测试依赖提取"""
code = '''
def outer():
def inner():
return x
return inner()
'''
blocks = self.analyzer.analyze(code, "test.py")
func_blocks = [b for b in blocks if b.type == BlockType.FUNCTION]
# outer 函数应该依赖 x
self.assertEqual(len(func_blocks), 1)
self.assertIn('x', func_blocks[0].references)
class TestChangeDetector(unittest.TestCase):
"""测试变更检测器"""
def setUp(self):
self.detector = ChangeDetector()
def test_detect_addition(self):
"""测试新增检测"""
old = Snapshot()
old.units = {}
new_unit = CompilationUnit(
id="u1", content="def foo(): pass",
start_line=1, end_line=1, file_path="test.py"
)
new_unit.compute_hash()
new = Snapshot()
new.units = {"u1": new_unit}
changes = self.detector.detect_changes(old, new)
self.assertEqual(len(changes), 1)
self.assertEqual(changes[0].unit_id, "u1")
self.assertEqual(changes[0].change_type, UnitState.ADDED)
def test_detect_modification(self):
"""测试修改检测"""
old_unit = CompilationUnit(
id="u1", content="def foo(): pass",
start_line=1, end_line=1, file_path="test.py"
)
old_unit.compute_hash()
old = Snapshot()
old.units = {"u1": old_unit}
new_unit = CompilationUnit(
id="u1", content="def foo(): return 1",
start_line=1, end_line=1, file_path="test.py"
)
new_unit.compute_hash()
new = Snapshot()
new.units = {"u1": new_unit}
changes = self.detector.detect_changes(old, new)
self.assertEqual(len(changes), 1)
self.assertEqual(changes[0].unit_id, "u1")
self.assertEqual(changes[0].change_type, UnitState.MODIFIED)
def test_detect_deletion(self):
"""测试删除检测"""
old_unit = CompilationUnit(
id="u1", content="def foo(): pass",
start_line=1, end_line=1, file_path="test.py"
)
old_unit.compute_hash()
old = Snapshot()
old.units = {"u1": old_unit}
new = Snapshot()
new.units = {}
changes = self.detector.detect_changes(old, new)
self.assertEqual(len(changes), 1)
self.assertEqual(changes[0].change_type, UnitState.DELETED)
class TestCompilationCache(unittest.TestCase):
"""测试编译缓存"""
def setUp(self):
self.temp_dir = tempfile.mkdtemp()
self.cache = CompilationCache(self.temp_dir)
def tearDown(self):
import shutil
shutil.rmtree(self.temp_dir, ignore_errors=True)
def test_put_get(self):
"""测试缓存存取"""
self.cache.put("key1", "value1", "hash1")
result = self.cache.get("key1")
self.assertEqual(result, "value1")
def test_get_nonexistent(self):
"""测试获取不存在的键"""
result = self.cache.get("nonexistent")
self.assertEqual(result, "") # 返回空字符串
def test_persistence(self):
"""测试持久化"""
self.cache.put("key1", "value1", "hash1")
self.cache.save()
# 创建新缓存实例
cache2 = CompilationCache(self.temp_dir)
result = cache2.get("key1")
self.assertEqual(result, "value1")
class TestIncrementalCompiler(unittest.TestCase):
"""测试减量编译器"""
def setUp(self):
self.temp_dir = tempfile.mkdtemp()
self.compiler = IncrementalCompiler(self.temp_dir)
# 设置简单的编译器(返回大写代码)
self.compiler.set_compiler(lambda unit: unit.content.upper())
def tearDown(self):
import shutil
shutil.rmtree(self.temp_dir, ignore_errors=True)
def test_initial_compile(self):
"""测试初始编译"""
code = '''
def hello():
print("Hello")
def world():
print("World")
'''
self.compiler.analyze_source("test.py", code)
changes = self.compiler.update_source("test.py", code)
# 首次编译,所有单元都应该是新的
self.assertGreater(len(changes), 0)
# 编译
outputs = self.compiler.compile_all_pending()
self.assertGreater(len(outputs), 0)
def test_incremental_compile(self):
"""测试增量编译"""
# 初始代码
code1 = '''
def hello():
print("Hello")
def world():
print("World")
'''
self.compiler.analyze_source("test.py", code1)
self.compiler.update_source("test.py", code1)
outputs1 = self.compiler.compile_all_pending()
# 修改一个函数
code2 = '''
def hello():
print("Hello Modified")
def world():
print("World")
'''
changes = self.compiler.update_source("test.py", code2)
# 应该有变更
self.assertGreater(len(changes), 0)
# 再次编译
outputs2 = self.compiler.compile_all_pending()
# 验证有输出
self.assertGreater(len(outputs1) + len(outputs2), 0)
def test_dependency_propagation(self):
"""测试依赖传播"""
code = '''
x = 10
def get_x():
return x
def double_x():
return get_x() * 2
'''
self.compiler.analyze_source("test.py", code)
self.compiler.update_source("test.py", code)
self.compiler.compile_all_pending()
# 修改 x 的值
code2 = '''
x = 20
def get_x():
return x
def double_x():
return get_x() * 2
'''
changes = self.compiler.update_source("test.py", code2)
# 应该检测到变更x 变了,依赖它的也应该被标记)
self.assertGreater(len(changes), 0)
def test_combined_output(self):
"""测试合并输出"""
code = '''
import os
def hello():
print("Hello")
def world():
print("World")
'''
self.compiler.analyze_source("test.py", code)
self.compiler.update_source("test.py", code)
self.compiler.compile_all_pending()
combined = self.compiler.get_combined_output("test.py")
# 合并输出应该包含所有编译产物
self.assertGreater(len(combined), 0)
class TestBlockBoundary(unittest.TestCase):
"""测试边界触发器"""
def setUp(self):
self.analyzer = PythonAnalyzer()
def test_class_contains_methods(self):
"""测试类包含其方法"""
code = '''
class MyClass:
def method1(self):
pass
def method2(self):
pass
'''
blocks = self.analyzer.analyze(code, "test.py")
class_blocks = [b for b in blocks if b.type == BlockType.CLASS]
self.assertEqual(len(class_blocks), 1)
# 类块应该包含整个类定义
class_block = class_blocks[0]
self.assertIn("method1", class_block.content)
self.assertIn("method2", class_block.content)
def run_tests():
"""运行所有测试"""
loader = unittest.TestLoader()
suite = unittest.TestSuite()
suite.addTests(loader.loadTestsFromTestCase(TestPythonAnalyzer))
suite.addTests(loader.loadTestsFromTestCase(TestChangeDetector))
suite.addTests(loader.loadTestsFromTestCase(TestCompilationCache))
suite.addTests(loader.loadTestsFromTestCase(TestIncrementalCompiler))
suite.addTests(loader.loadTestsFromTestCase(TestBlockBoundary))
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
return result.wasSuccessful()
if __name__ == '__main__':
success = run_tests()
sys.exit(0 if success else 1)