Sikuwa first commit
Some checks are pending
CI / Test (Python 3.10 on macos-latest) (push) Waiting to run
CI / Test (Python 3.11 on macos-latest) (push) Waiting to run
CI / Test (Python 3.12 on macos-latest) (push) Waiting to run
CI / Test (Python 3.8 on macos-latest) (push) Waiting to run
CI / Test (Python 3.9 on macos-latest) (push) Waiting to run
CI / Test (Python 3.10 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.11 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.12 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.8 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.9 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.10 on windows-latest) (push) Waiting to run
CI / Test (Python 3.11 on windows-latest) (push) Waiting to run
CI / Test (Python 3.12 on windows-latest) (push) Waiting to run
CI / Test (Python 3.8 on windows-latest) (push) Waiting to run
CI / Test (Python 3.9 on windows-latest) (push) Waiting to run
CI / Lint (push) Waiting to run
CI / Release (push) Blocked by required conditions
Documentation / Build Documentation (push) Waiting to run
Some checks are pending
CI / Test (Python 3.10 on macos-latest) (push) Waiting to run
CI / Test (Python 3.11 on macos-latest) (push) Waiting to run
CI / Test (Python 3.12 on macos-latest) (push) Waiting to run
CI / Test (Python 3.8 on macos-latest) (push) Waiting to run
CI / Test (Python 3.9 on macos-latest) (push) Waiting to run
CI / Test (Python 3.10 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.11 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.12 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.8 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.9 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.10 on windows-latest) (push) Waiting to run
CI / Test (Python 3.11 on windows-latest) (push) Waiting to run
CI / Test (Python 3.12 on windows-latest) (push) Waiting to run
CI / Test (Python 3.8 on windows-latest) (push) Waiting to run
CI / Test (Python 3.9 on windows-latest) (push) Waiting to run
CI / Lint (push) Waiting to run
CI / Release (push) Blocked by required conditions
Documentation / Build Documentation (push) Waiting to run
This commit is contained in:
84
incremental/__init__.py
Normal file
84
incremental/__init__.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# sikuwa/incremental/__init__.py
|
||||
"""
|
||||
减量编译模块 - Incremental Compilation System
|
||||
指哪编哪,精准编译
|
||||
|
||||
核心功能:
|
||||
1. 单行/最小语法块为最小编译单元
|
||||
2. 每个单元有唯一标识、最小依赖集、缓存产物
|
||||
3. 版本快照对比检测变更
|
||||
4. 只编译变更单元及受依赖影响的关联单元
|
||||
5. 边界触发器处理函数/类
|
||||
6. 按原始顺序拼接产物
|
||||
|
||||
智能缓存 V1.2:
|
||||
- 编译即缓存:每次编译自动记录,全历史可追溯
|
||||
- 缓存即编译:缓存命中等同于零成本编译
|
||||
- 预测缓存预热:基于访问模式预测并预编译
|
||||
"""
|
||||
|
||||
from .core import (
|
||||
IncrementalCompiler,
|
||||
CompilationUnit,
|
||||
Snapshot,
|
||||
ChangeRecord,
|
||||
ChangeDetector,
|
||||
CompilationCache,
|
||||
UnitType,
|
||||
UnitState,
|
||||
)
|
||||
|
||||
from .analyzer import (
|
||||
PythonAnalyzer,
|
||||
CodeBlock,
|
||||
BlockType,
|
||||
)
|
||||
|
||||
from .compiler_integration import (
|
||||
IncrementalNativeCompiler,
|
||||
IncrementalBuildResult,
|
||||
create_incremental_native_compiler,
|
||||
)
|
||||
|
||||
from .smart_cache import (
|
||||
SmartCache,
|
||||
CacheEntry,
|
||||
CacheEvent,
|
||||
CacheEventType,
|
||||
get_smart_cache,
|
||||
create_smart_cache,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# 核心类
|
||||
'IncrementalCompiler',
|
||||
'CompilationUnit',
|
||||
'Snapshot',
|
||||
'ChangeRecord',
|
||||
'ChangeDetector',
|
||||
'CompilationCache',
|
||||
|
||||
# 枚举
|
||||
'UnitType',
|
||||
'UnitState',
|
||||
|
||||
# 分析器
|
||||
'PythonAnalyzer',
|
||||
'CodeBlock',
|
||||
'BlockType',
|
||||
|
||||
# 集成编译器
|
||||
'IncrementalNativeCompiler',
|
||||
'IncrementalBuildResult',
|
||||
'create_incremental_native_compiler',
|
||||
|
||||
# 智能缓存 V1.2
|
||||
'SmartCache',
|
||||
'CacheEntry',
|
||||
'CacheEvent',
|
||||
'CacheEventType',
|
||||
'get_smart_cache',
|
||||
'create_smart_cache',
|
||||
]
|
||||
|
||||
__version__ = '1.2.0'
|
||||
396
incremental/analyzer.py
Normal file
396
incremental/analyzer.py
Normal file
@@ -0,0 +1,396 @@
|
||||
# sikuwa/incremental/analyzer.py
|
||||
"""
|
||||
Python 代码分析器 - 识别代码块边界和依赖关系
|
||||
用于减量编译的 AST 分析
|
||||
"""
|
||||
|
||||
import ast
|
||||
import hashlib
|
||||
from enum import Enum, auto
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Dict, Set, Optional, Tuple
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class BlockType(Enum):
|
||||
"""代码块类型"""
|
||||
MODULE = auto() # 模块级
|
||||
IMPORT = auto() # 导入语句
|
||||
CLASS = auto() # 类定义
|
||||
FUNCTION = auto() # 函数定义
|
||||
METHOD = auto() # 方法定义
|
||||
DECORATOR = auto() # 装饰器
|
||||
STATEMENT = auto() # 普通语句
|
||||
ASSIGNMENT = auto() # 赋值语句
|
||||
EXPRESSION = auto() # 表达式
|
||||
CONTROL = auto() # 控制流 (if/for/while/try)
|
||||
WITH = auto() # with 语句
|
||||
|
||||
|
||||
@dataclass
|
||||
class CodeBlock:
|
||||
"""代码块 - 最小编译单元"""
|
||||
id: str = "" # 唯一标识
|
||||
type: BlockType = BlockType.STATEMENT
|
||||
name: str = "" # 名称(函数名/类名等)
|
||||
start_line: int = 0 # 起始行 (1-based)
|
||||
end_line: int = 0 # 结束行 (1-based)
|
||||
start_col: int = 0 # 起始列
|
||||
end_col: int = 0 # 结束列
|
||||
content: str = "" # 源代码内容
|
||||
content_hash: str = "" # 内容哈希
|
||||
parent_id: str = "" # 父块ID
|
||||
children: List[str] = field(default_factory=list) # 子块ID列表
|
||||
|
||||
# 依赖信息
|
||||
imports: List[str] = field(default_factory=list) # 导入的模块/名称
|
||||
references: List[str] = field(default_factory=list) # 引用的名称
|
||||
definitions: List[str] = field(default_factory=list) # 定义的名称
|
||||
dependencies: List[str] = field(default_factory=list) # 依赖的块ID
|
||||
|
||||
def compute_hash(self) -> str:
|
||||
"""计算内容哈希"""
|
||||
# 去除空白差异的影响
|
||||
normalized = '\n'.join(line.strip() for line in self.content.splitlines())
|
||||
self.content_hash = hashlib.sha256(normalized.encode()).hexdigest()[:16]
|
||||
return self.content_hash
|
||||
|
||||
def generate_id(self, file_path: str) -> str:
|
||||
"""生成唯一ID"""
|
||||
if not self.content_hash:
|
||||
self.compute_hash()
|
||||
self.id = f"{file_path}:{self.start_line}:{self.end_line}:{self.content_hash[:8]}"
|
||||
return self.id
|
||||
|
||||
|
||||
class PythonAnalyzer:
|
||||
"""
|
||||
Python 代码分析器
|
||||
分析代码结构,识别编译单元边界和依赖关系
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.blocks: List[CodeBlock] = []
|
||||
self.block_map: Dict[str, CodeBlock] = {}
|
||||
self.lines: List[str] = []
|
||||
self.file_path: str = ""
|
||||
|
||||
def analyze(self, source: str, file_path: str = "<string>") -> List[CodeBlock]:
|
||||
"""
|
||||
分析 Python 源代码,返回代码块列表
|
||||
|
||||
Args:
|
||||
source: Python 源代码
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
代码块列表
|
||||
"""
|
||||
self.file_path = file_path
|
||||
self.lines = source.splitlines()
|
||||
self.blocks = []
|
||||
self.block_map = {}
|
||||
|
||||
try:
|
||||
tree = ast.parse(source)
|
||||
self._analyze_module(tree, source)
|
||||
except SyntaxError as e:
|
||||
# 语法错误时回退到行级分析
|
||||
self._fallback_line_analysis(source)
|
||||
|
||||
# 分析依赖关系
|
||||
self._analyze_dependencies()
|
||||
|
||||
return self.blocks
|
||||
|
||||
def _analyze_module(self, tree: ast.Module, source: str):
|
||||
"""分析模块级 AST"""
|
||||
for node in ast.iter_child_nodes(tree):
|
||||
block = self._node_to_block(node, source)
|
||||
if block:
|
||||
self.blocks.append(block)
|
||||
self.block_map[block.id] = block
|
||||
|
||||
def _node_to_block(self, node: ast.AST, source: str, parent_id: str = "") -> Optional[CodeBlock]:
|
||||
"""将 AST 节点转换为代码块"""
|
||||
block = CodeBlock()
|
||||
block.parent_id = parent_id
|
||||
|
||||
# 获取行号范围
|
||||
block.start_line = getattr(node, 'lineno', 0)
|
||||
block.end_line = getattr(node, 'end_lineno', block.start_line)
|
||||
block.start_col = getattr(node, 'col_offset', 0)
|
||||
block.end_col = getattr(node, 'end_col_offset', 0)
|
||||
|
||||
# 提取源代码内容
|
||||
if block.start_line > 0 and block.end_line > 0:
|
||||
block.content = self._get_source_lines(block.start_line, block.end_line)
|
||||
|
||||
# 根据节点类型设置块类型和名称
|
||||
if isinstance(node, ast.Import):
|
||||
block.type = BlockType.IMPORT
|
||||
block.name = "import"
|
||||
block.imports = [alias.name for alias in node.names]
|
||||
|
||||
elif isinstance(node, ast.ImportFrom):
|
||||
block.type = BlockType.IMPORT
|
||||
block.name = f"from {node.module}"
|
||||
block.imports = [node.module or ""] + [alias.name for alias in node.names]
|
||||
|
||||
elif isinstance(node, ast.ClassDef):
|
||||
block.type = BlockType.CLASS
|
||||
block.name = node.name
|
||||
block.definitions = [node.name]
|
||||
# 处理装饰器
|
||||
if node.decorator_list:
|
||||
block.start_line = node.decorator_list[0].lineno
|
||||
# 递归处理类体
|
||||
for child in node.body:
|
||||
child_block = self._node_to_block(child, source, block.id)
|
||||
if child_block:
|
||||
block.children.append(child_block.id)
|
||||
self.blocks.append(child_block)
|
||||
self.block_map[child_block.id] = child_block
|
||||
|
||||
elif isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
|
||||
block.type = BlockType.FUNCTION if not parent_id else BlockType.METHOD
|
||||
block.name = node.name
|
||||
block.definitions = [node.name]
|
||||
# 处理装饰器
|
||||
if node.decorator_list:
|
||||
block.start_line = node.decorator_list[0].lineno
|
||||
# 分析函数体中的引用
|
||||
block.references = self._extract_references(node)
|
||||
|
||||
elif isinstance(node, ast.Assign):
|
||||
block.type = BlockType.ASSIGNMENT
|
||||
block.definitions = self._extract_targets(node.targets)
|
||||
block.references = self._extract_references(node.value)
|
||||
|
||||
elif isinstance(node, ast.AugAssign):
|
||||
block.type = BlockType.ASSIGNMENT
|
||||
block.definitions = self._extract_targets([node.target])
|
||||
block.references = self._extract_references(node.value)
|
||||
|
||||
elif isinstance(node, ast.AnnAssign):
|
||||
block.type = BlockType.ASSIGNMENT
|
||||
if node.target:
|
||||
block.definitions = self._extract_targets([node.target])
|
||||
if node.value:
|
||||
block.references = self._extract_references(node.value)
|
||||
|
||||
elif isinstance(node, (ast.If, ast.For, ast.While, ast.Try)):
|
||||
block.type = BlockType.CONTROL
|
||||
block.name = node.__class__.__name__.lower()
|
||||
block.references = self._extract_references(node)
|
||||
|
||||
elif isinstance(node, ast.With):
|
||||
block.type = BlockType.WITH
|
||||
block.references = self._extract_references(node)
|
||||
|
||||
elif isinstance(node, ast.Expr):
|
||||
block.type = BlockType.EXPRESSION
|
||||
block.references = self._extract_references(node.value)
|
||||
|
||||
else:
|
||||
block.type = BlockType.STATEMENT
|
||||
block.references = self._extract_references(node)
|
||||
|
||||
# 计算哈希并生成ID
|
||||
block.compute_hash()
|
||||
block.generate_id(self.file_path)
|
||||
|
||||
return block
|
||||
|
||||
def _get_source_lines(self, start: int, end: int) -> str:
|
||||
"""获取指定行范围的源代码"""
|
||||
if start < 1 or end > len(self.lines):
|
||||
return ""
|
||||
return '\n'.join(self.lines[start-1:end])
|
||||
|
||||
def _extract_references(self, node: ast.AST) -> List[str]:
|
||||
"""提取节点中引用的名称"""
|
||||
refs = []
|
||||
for child in ast.walk(node):
|
||||
if isinstance(child, ast.Name):
|
||||
refs.append(child.id)
|
||||
elif isinstance(child, ast.Attribute):
|
||||
# 收集属性链的根名称
|
||||
current = child
|
||||
while isinstance(current, ast.Attribute):
|
||||
current = current.value
|
||||
if isinstance(current, ast.Name):
|
||||
refs.append(current.id)
|
||||
return list(set(refs))
|
||||
|
||||
def _extract_targets(self, targets: List[ast.AST]) -> List[str]:
|
||||
"""提取赋值目标的名称"""
|
||||
names = []
|
||||
for target in targets:
|
||||
if isinstance(target, ast.Name):
|
||||
names.append(target.id)
|
||||
elif isinstance(target, ast.Tuple) or isinstance(target, ast.List):
|
||||
for elt in target.elts:
|
||||
if isinstance(elt, ast.Name):
|
||||
names.append(elt.id)
|
||||
return names
|
||||
|
||||
def _analyze_dependencies(self):
|
||||
"""分析块之间的依赖关系"""
|
||||
# 构建名称到块的映射
|
||||
name_to_block: Dict[str, str] = {}
|
||||
for block in self.blocks:
|
||||
for name in block.definitions:
|
||||
name_to_block[name] = block.id
|
||||
|
||||
# 分析每个块的依赖
|
||||
for block in self.blocks:
|
||||
for ref in block.references:
|
||||
if ref in name_to_block and name_to_block[ref] != block.id:
|
||||
dep_id = name_to_block[ref]
|
||||
if dep_id not in block.dependencies:
|
||||
block.dependencies.append(dep_id)
|
||||
|
||||
def _fallback_line_analysis(self, source: str):
|
||||
"""回退到行级分析(用于语法错误的代码)"""
|
||||
lines = source.splitlines()
|
||||
current_block = None
|
||||
indent_stack = [(0, None)] # (indent, block)
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
stripped = line.lstrip()
|
||||
if not stripped or stripped.startswith('#'):
|
||||
continue
|
||||
|
||||
indent = len(line) - len(stripped)
|
||||
|
||||
# 简单的块检测
|
||||
if stripped.startswith('def ') or stripped.startswith('async def '):
|
||||
block = CodeBlock(
|
||||
type=BlockType.FUNCTION,
|
||||
name=stripped.split('(')[0].replace('def ', '').replace('async ', '').strip(),
|
||||
start_line=i,
|
||||
end_line=i,
|
||||
content=line
|
||||
)
|
||||
current_block = block
|
||||
|
||||
elif stripped.startswith('class '):
|
||||
block = CodeBlock(
|
||||
type=BlockType.CLASS,
|
||||
name=stripped.split('(')[0].split(':')[0].replace('class ', '').strip(),
|
||||
start_line=i,
|
||||
end_line=i,
|
||||
content=line
|
||||
)
|
||||
current_block = block
|
||||
|
||||
elif stripped.startswith('import ') or stripped.startswith('from '):
|
||||
block = CodeBlock(
|
||||
type=BlockType.IMPORT,
|
||||
start_line=i,
|
||||
end_line=i,
|
||||
content=line
|
||||
)
|
||||
block.compute_hash()
|
||||
block.generate_id(self.file_path)
|
||||
self.blocks.append(block)
|
||||
self.block_map[block.id] = block
|
||||
continue
|
||||
|
||||
else:
|
||||
if current_block and indent > indent_stack[-1][0]:
|
||||
# 继续当前块
|
||||
current_block.end_line = i
|
||||
current_block.content += '\n' + line
|
||||
else:
|
||||
# 结束当前块
|
||||
if current_block:
|
||||
current_block.compute_hash()
|
||||
current_block.generate_id(self.file_path)
|
||||
self.blocks.append(current_block)
|
||||
self.block_map[current_block.id] = current_block
|
||||
current_block = None
|
||||
|
||||
# 普通语句
|
||||
block = CodeBlock(
|
||||
type=BlockType.STATEMENT,
|
||||
start_line=i,
|
||||
end_line=i,
|
||||
content=line
|
||||
)
|
||||
block.compute_hash()
|
||||
block.generate_id(self.file_path)
|
||||
self.blocks.append(block)
|
||||
self.block_map[block.id] = block
|
||||
|
||||
# 处理最后一个块
|
||||
if current_block:
|
||||
current_block.compute_hash()
|
||||
current_block.generate_id(self.file_path)
|
||||
self.blocks.append(current_block)
|
||||
self.block_map[current_block.id] = current_block
|
||||
|
||||
def get_blocks_in_range(self, start_line: int, end_line: int) -> List[CodeBlock]:
|
||||
"""获取指定行范围内的代码块"""
|
||||
result = []
|
||||
for block in self.blocks:
|
||||
# 检查是否有交集
|
||||
if block.start_line <= end_line and block.end_line >= start_line:
|
||||
result.append(block)
|
||||
return result
|
||||
|
||||
def get_affected_blocks(self, changed_block_ids: Set[str]) -> Set[str]:
|
||||
"""获取受变更影响的所有块(包括依赖传播)"""
|
||||
affected = set(changed_block_ids)
|
||||
queue = list(changed_block_ids)
|
||||
|
||||
while queue:
|
||||
block_id = queue.pop(0)
|
||||
# 找出依赖此块的所有块
|
||||
for block in self.blocks:
|
||||
if block_id in block.dependencies and block.id not in affected:
|
||||
affected.add(block.id)
|
||||
queue.append(block.id)
|
||||
|
||||
return affected
|
||||
|
||||
def expand_to_boundaries(self, block_ids: Set[str]) -> Set[str]:
|
||||
"""扩展块ID集合,确保完整结构被包含"""
|
||||
expanded = set(block_ids)
|
||||
|
||||
for block_id in list(block_ids):
|
||||
block = self.block_map.get(block_id)
|
||||
if not block:
|
||||
continue
|
||||
|
||||
# 如果块在某个函数/类内,需要重新编译整个结构
|
||||
if block.parent_id:
|
||||
parent = self.block_map.get(block.parent_id)
|
||||
if parent and parent.type in (BlockType.CLASS, BlockType.FUNCTION):
|
||||
expanded.add(parent.id)
|
||||
# 也包含所有子块
|
||||
for child_id in parent.children:
|
||||
expanded.add(child_id)
|
||||
|
||||
# 如果块是函数/类,包含所有子块
|
||||
if block.type in (BlockType.CLASS, BlockType.FUNCTION):
|
||||
for child_id in block.children:
|
||||
expanded.add(child_id)
|
||||
|
||||
return expanded
|
||||
|
||||
|
||||
def analyze_python_file(file_path: str) -> List[CodeBlock]:
|
||||
"""分析 Python 文件"""
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
source = f.read()
|
||||
|
||||
analyzer = PythonAnalyzer()
|
||||
return analyzer.analyze(source, file_path)
|
||||
|
||||
|
||||
def analyze_python_source(source: str, file_path: str = "<string>") -> List[CodeBlock]:
|
||||
"""分析 Python 源代码"""
|
||||
analyzer = PythonAnalyzer()
|
||||
return analyzer.analyze(source, file_path)
|
||||
322
incremental/compiler_integration.py
Normal file
322
incremental/compiler_integration.py
Normal file
@@ -0,0 +1,322 @@
|
||||
# sikuwa/incremental/compiler_integration.py
|
||||
"""
|
||||
减量编译器集成模块
|
||||
将减量编译系统与 Sikuwa 编译器集成
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Callable, Any
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .core import (
|
||||
IncrementalCompiler,
|
||||
CompilationUnit,
|
||||
ChangeRecord,
|
||||
UnitState,
|
||||
UnitType
|
||||
)
|
||||
from .analyzer import PythonAnalyzer, CodeBlock, BlockType
|
||||
|
||||
|
||||
@dataclass
|
||||
class IncrementalBuildResult:
|
||||
"""减量编译结果"""
|
||||
success: bool = False
|
||||
compiled_units: int = 0
|
||||
cached_units: int = 0
|
||||
total_units: int = 0
|
||||
output_files: Dict[str, str] = None # unit_id -> output_path
|
||||
combined_output: str = ""
|
||||
errors: List[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.output_files is None:
|
||||
self.output_files = {}
|
||||
if self.errors is None:
|
||||
self.errors = []
|
||||
|
||||
|
||||
class IncrementalNativeCompiler:
|
||||
"""
|
||||
减量原生编译器
|
||||
|
||||
集成减量编译系统与原生 C/C++ 编译流程:
|
||||
Python → C → GCC → dll/so
|
||||
|
||||
特点:
|
||||
- 只编译变更的代码块
|
||||
- 缓存已编译的目标文件
|
||||
- 智能链接(只重新链接必要的部分)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
cache_dir: str = ".sikuwa_cache",
|
||||
cc: str = "gcc",
|
||||
cxx: str = "g++"):
|
||||
self.incremental = IncrementalCompiler(cache_dir)
|
||||
self.cache_dir = Path(cache_dir)
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.cc = cc
|
||||
self.cxx = cxx
|
||||
|
||||
# 工作目录
|
||||
self.work_dir = self.cache_dir / "incremental_build"
|
||||
self.c_dir = self.work_dir / "c_source"
|
||||
self.obj_dir = self.work_dir / "obj"
|
||||
|
||||
for d in [self.work_dir, self.c_dir, self.obj_dir]:
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 设置编译回调
|
||||
self.incremental.set_compiler(self._compile_unit)
|
||||
|
||||
# Cython 可用性
|
||||
self._cython_available = self._check_cython()
|
||||
|
||||
def _check_cython(self) -> bool:
|
||||
"""检查 Cython 是否可用"""
|
||||
try:
|
||||
import Cython
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
def _compile_unit(self, unit: CompilationUnit) -> str:
|
||||
"""
|
||||
编译单个单元
|
||||
|
||||
流程:Python 代码 → C 代码 → 目标文件
|
||||
"""
|
||||
# 生成 C 代码
|
||||
c_code = self._python_to_c(unit)
|
||||
|
||||
# 保存 C 文件
|
||||
c_file = self.c_dir / f"unit_{unit.content_hash}.c"
|
||||
c_file.write_text(c_code, encoding='utf-8')
|
||||
|
||||
# 编译为目标文件
|
||||
obj_file = self.obj_dir / f"unit_{unit.content_hash}.o"
|
||||
|
||||
if not obj_file.exists():
|
||||
self._compile_c_to_obj(c_file, obj_file)
|
||||
|
||||
# 返回目标文件路径作为"编译产物"
|
||||
return str(obj_file)
|
||||
|
||||
def _python_to_c(self, unit: CompilationUnit) -> str:
|
||||
"""
|
||||
Python 代码转 C 代码
|
||||
|
||||
使用 Cython 或内置转换器
|
||||
"""
|
||||
if self._cython_available and unit.type in (UnitType.FUNCTION, UnitType.CLASS):
|
||||
return self._cython_convert(unit)
|
||||
else:
|
||||
return self._builtin_convert(unit)
|
||||
|
||||
def _cython_convert(self, unit: CompilationUnit) -> str:
|
||||
"""使用 Cython 转换"""
|
||||
# 创建临时 .pyx 文件
|
||||
pyx_file = self.work_dir / f"temp_{unit.content_hash}.pyx"
|
||||
pyx_file.write_text(unit.content, encoding='utf-8')
|
||||
|
||||
c_file = self.work_dir / f"temp_{unit.content_hash}.c"
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "cython", "-3", str(pyx_file), "-o", str(c_file)],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode == 0 and c_file.exists():
|
||||
return c_file.read_text(encoding='utf-8')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 回退到内置转换
|
||||
return self._builtin_convert(unit)
|
||||
|
||||
def _builtin_convert(self, unit: CompilationUnit) -> str:
|
||||
"""内置转换器 - 将 Python 代码嵌入 C"""
|
||||
escaped = unit.content.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
|
||||
|
||||
unit_name = unit.name or f"unit_{unit.content_hash[:8]}"
|
||||
safe_name = ''.join(c if c.isalnum() else '_' for c in unit_name)
|
||||
|
||||
c_code = f'''
|
||||
/* Auto-generated by Sikuwa Incremental Compiler */
|
||||
/* Unit: {unit.id} */
|
||||
/* Lines: {unit.start_line}-{unit.end_line} */
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
|
||||
static const char* sikuwa_unit_{safe_name}_source = "{escaped}";
|
||||
|
||||
int sikuwa_exec_unit_{safe_name}(PyObject* globals, PyObject* locals) {{
|
||||
PyObject* code = Py_CompileString(
|
||||
sikuwa_unit_{safe_name}_source,
|
||||
"{unit.file_path}",
|
||||
Py_file_input
|
||||
);
|
||||
|
||||
if (code == NULL) {{
|
||||
return -1;
|
||||
}}
|
||||
|
||||
PyObject* result = PyEval_EvalCode(code, globals, locals);
|
||||
Py_DECREF(code);
|
||||
|
||||
if (result == NULL) {{
|
||||
return -1;
|
||||
}}
|
||||
|
||||
Py_DECREF(result);
|
||||
return 0;
|
||||
}}
|
||||
'''
|
||||
return c_code
|
||||
|
||||
def _compile_c_to_obj(self, c_file: Path, obj_file: Path):
|
||||
"""编译 C 文件为目标文件"""
|
||||
import sysconfig
|
||||
|
||||
# 获取 Python 头文件路径
|
||||
include_dir = sysconfig.get_path('include')
|
||||
|
||||
cmd = [
|
||||
self.cc,
|
||||
"-c",
|
||||
"-fPIC",
|
||||
"-O2",
|
||||
f"-I{include_dir}",
|
||||
str(c_file),
|
||||
"-o", str(obj_file)
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Compilation failed: {result.stderr}")
|
||||
|
||||
def build(self, file_path: str, content: str) -> IncrementalBuildResult:
|
||||
"""
|
||||
执行减量编译
|
||||
|
||||
Args:
|
||||
file_path: 源文件路径
|
||||
content: 源代码内容
|
||||
|
||||
Returns:
|
||||
编译结果
|
||||
"""
|
||||
result = IncrementalBuildResult()
|
||||
|
||||
try:
|
||||
# 检测变更
|
||||
changes = self.incremental.update_source(file_path, content)
|
||||
|
||||
# 获取需要编译的单元
|
||||
units_to_compile = self.incremental.get_units_to_compile()
|
||||
result.total_units = len(self.incremental._units)
|
||||
|
||||
# 编译变更的单元
|
||||
compiled_outputs = self.incremental.compile_all_pending()
|
||||
result.compiled_units = len(compiled_outputs)
|
||||
result.cached_units = result.total_units - result.compiled_units
|
||||
|
||||
# 收集输出
|
||||
result.output_files = compiled_outputs
|
||||
|
||||
# 获取合并输出(所有目标文件路径)
|
||||
result.combined_output = self.incremental.get_combined_output(file_path)
|
||||
|
||||
result.success = True
|
||||
|
||||
except Exception as e:
|
||||
result.success = False
|
||||
result.errors.append(str(e))
|
||||
|
||||
return result
|
||||
|
||||
def link(self, output_path: str, file_paths: List[str]) -> bool:
|
||||
"""
|
||||
链接所有目标文件
|
||||
|
||||
Args:
|
||||
output_path: 输出文件路径
|
||||
file_paths: 源文件路径列表
|
||||
|
||||
Returns:
|
||||
是否成功
|
||||
"""
|
||||
import sysconfig
|
||||
|
||||
# 收集所有目标文件
|
||||
obj_files = []
|
||||
for fp in file_paths:
|
||||
combined = self.incremental.get_combined_output(fp)
|
||||
for line in combined.splitlines():
|
||||
if line.strip() and line.endswith('.o'):
|
||||
obj_files.append(line.strip())
|
||||
|
||||
if not obj_files:
|
||||
return False
|
||||
|
||||
# 获取 Python 库路径
|
||||
lib_dir = sysconfig.get_config_var('LIBDIR') or '/usr/lib'
|
||||
|
||||
# 判断输出类型
|
||||
if output_path.endswith('.so') or output_path.endswith('.dll'):
|
||||
link_flags = ["-shared"]
|
||||
else:
|
||||
link_flags = []
|
||||
|
||||
# 链接命令
|
||||
cmd = [
|
||||
self.cxx,
|
||||
*link_flags,
|
||||
*obj_files,
|
||||
f"-L{lib_dir}",
|
||||
f"-lpython{sys.version_info.major}.{sys.version_info.minor}",
|
||||
"-o", output_path
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
return result.returncode == 0
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""获取统计信息"""
|
||||
stats = self.incremental.get_stats()
|
||||
stats['c_files'] = len(list(self.c_dir.glob('*.c')))
|
||||
stats['obj_files'] = len(list(self.obj_dir.glob('*.o')))
|
||||
return stats
|
||||
|
||||
def clean(self):
|
||||
"""清理所有缓存和临时文件"""
|
||||
import shutil
|
||||
|
||||
self.incremental.clear()
|
||||
|
||||
for d in [self.c_dir, self.obj_dir]:
|
||||
if d.exists():
|
||||
shutil.rmtree(d)
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def save(self):
|
||||
"""保存状态"""
|
||||
self.incremental.save()
|
||||
|
||||
|
||||
def create_incremental_native_compiler(
|
||||
cache_dir: str = ".sikuwa_cache",
|
||||
cc: str = "gcc",
|
||||
cxx: str = "g++"
|
||||
) -> IncrementalNativeCompiler:
|
||||
"""创建减量原生编译器"""
|
||||
return IncrementalNativeCompiler(cache_dir, cc, cxx)
|
||||
778
incremental/core.py
Normal file
778
incremental/core.py
Normal file
@@ -0,0 +1,778 @@
|
||||
# sikuwa/incremental/core.py
|
||||
"""
|
||||
减量编译核心 - Python 实现
|
||||
指哪编哪:只编译源码改变的部分
|
||||
|
||||
提供 C++ 扩展不可用时的纯 Python 回退实现
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from enum import Enum, auto
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Dict, Set, Optional, Tuple, Callable, Any
|
||||
from pathlib import Path
|
||||
|
||||
from .analyzer import PythonAnalyzer, CodeBlock, BlockType
|
||||
|
||||
|
||||
class UnitType(Enum):
|
||||
"""编译单元类型"""
|
||||
LINE = auto()
|
||||
STATEMENT = auto()
|
||||
FUNCTION = auto()
|
||||
CLASS = auto()
|
||||
MODULE = auto()
|
||||
IMPORT = auto()
|
||||
DECORATOR = auto()
|
||||
BLOCK = auto()
|
||||
|
||||
|
||||
class UnitState(Enum):
|
||||
"""编译单元状态"""
|
||||
UNKNOWN = auto()
|
||||
UNCHANGED = auto()
|
||||
MODIFIED = auto()
|
||||
ADDED = auto()
|
||||
DELETED = auto()
|
||||
AFFECTED = auto()
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompilationUnit:
|
||||
"""编译单元 - 最小编译粒度"""
|
||||
id: str = ""
|
||||
file_path: str = ""
|
||||
start_line: int = 0
|
||||
end_line: int = 0
|
||||
type: UnitType = UnitType.LINE
|
||||
name: str = ""
|
||||
content: str = ""
|
||||
content_hash: str = ""
|
||||
dependencies: List[str] = field(default_factory=list)
|
||||
dependents: List[str] = field(default_factory=list)
|
||||
state: UnitState = UnitState.UNKNOWN
|
||||
cached_output: str = ""
|
||||
cache_timestamp: int = 0
|
||||
cache_valid: bool = False
|
||||
|
||||
def compute_hash(self) -> str:
|
||||
"""计算内容哈希"""
|
||||
normalized = '\n'.join(line.strip() for line in self.content.splitlines())
|
||||
self.content_hash = hashlib.sha256(normalized.encode()).hexdigest()[:16]
|
||||
return self.content_hash
|
||||
|
||||
def generate_id(self) -> str:
|
||||
"""生成唯一ID"""
|
||||
if not self.content_hash:
|
||||
self.compute_hash()
|
||||
self.id = f"{self.file_path}:{self.start_line}:{self.end_line}:{self.content_hash[:8]}"
|
||||
return self.id
|
||||
|
||||
@classmethod
|
||||
def from_code_block(cls, block: CodeBlock) -> 'CompilationUnit':
|
||||
"""从 CodeBlock 创建"""
|
||||
unit = cls()
|
||||
unit.id = block.id
|
||||
unit.file_path = block.id.split(':')[0] if ':' in block.id else ""
|
||||
unit.start_line = block.start_line
|
||||
unit.end_line = block.end_line
|
||||
unit.content = block.content
|
||||
unit.content_hash = block.content_hash
|
||||
unit.name = block.name
|
||||
unit.dependencies = block.dependencies.copy()
|
||||
|
||||
# 映射类型
|
||||
type_map = {
|
||||
BlockType.MODULE: UnitType.MODULE,
|
||||
BlockType.IMPORT: UnitType.IMPORT,
|
||||
BlockType.CLASS: UnitType.CLASS,
|
||||
BlockType.FUNCTION: UnitType.FUNCTION,
|
||||
BlockType.METHOD: UnitType.FUNCTION,
|
||||
BlockType.DECORATOR: UnitType.DECORATOR,
|
||||
BlockType.STATEMENT: UnitType.STATEMENT,
|
||||
BlockType.ASSIGNMENT: UnitType.STATEMENT,
|
||||
BlockType.EXPRESSION: UnitType.STATEMENT,
|
||||
BlockType.CONTROL: UnitType.BLOCK,
|
||||
BlockType.WITH: UnitType.BLOCK,
|
||||
}
|
||||
unit.type = type_map.get(block.type, UnitType.STATEMENT)
|
||||
|
||||
return unit
|
||||
|
||||
|
||||
@dataclass
|
||||
class Snapshot:
|
||||
"""版本快照"""
|
||||
file_path: str = ""
|
||||
content_hash: str = ""
|
||||
line_hashes: List[str] = field(default_factory=list)
|
||||
units: Dict[str, CompilationUnit] = field(default_factory=dict)
|
||||
timestamp: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChangeRecord:
|
||||
"""变更记录"""
|
||||
unit_id: str = ""
|
||||
change_type: UnitState = UnitState.UNKNOWN
|
||||
old_start_line: int = 0
|
||||
old_end_line: int = 0
|
||||
new_start_line: int = 0
|
||||
new_end_line: int = 0
|
||||
reason: str = ""
|
||||
|
||||
|
||||
class ChangeDetector:
|
||||
"""变更检测器"""
|
||||
|
||||
@staticmethod
|
||||
def compute_hash(content: str) -> str:
|
||||
"""计算内容哈希"""
|
||||
return hashlib.sha256(content.encode()).hexdigest()[:16]
|
||||
|
||||
@staticmethod
|
||||
def compute_line_hash(line: str) -> str:
|
||||
"""计算行哈希(忽略首尾空白)"""
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
return "empty"
|
||||
return hashlib.sha256(stripped.encode()).hexdigest()[:16]
|
||||
|
||||
def create_snapshot(self, file_path: str, content: str) -> Snapshot:
|
||||
"""创建快照"""
|
||||
snap = Snapshot()
|
||||
snap.file_path = file_path
|
||||
snap.content_hash = self.compute_hash(content)
|
||||
snap.timestamp = int(time.time() * 1000)
|
||||
|
||||
lines = content.splitlines()
|
||||
snap.line_hashes = [self.compute_line_hash(line) for line in lines]
|
||||
|
||||
return snap
|
||||
|
||||
def get_changed_lines(self, old_snap: Snapshot, new_snap: Snapshot) -> List[int]:
|
||||
"""获取变更的行号 (1-based)"""
|
||||
# 使用 LCS 算法进行对比
|
||||
lcs = self._compute_lcs(old_snap.line_hashes, new_snap.line_hashes)
|
||||
|
||||
# LCS 中新版本的行索引
|
||||
lcs_new_indices = {pair[1] for pair in lcs}
|
||||
|
||||
# 不在 LCS 中的行即为变更的行
|
||||
changed = []
|
||||
for i in range(len(new_snap.line_hashes)):
|
||||
if i not in lcs_new_indices:
|
||||
changed.append(i + 1) # 1-based
|
||||
|
||||
return changed
|
||||
|
||||
def _compute_lcs(self, old_hashes: List[str], new_hashes: List[str]) -> List[Tuple[int, int]]:
|
||||
"""计算最长公共子序列"""
|
||||
m, n = len(old_hashes), len(new_hashes)
|
||||
|
||||
# DP 表
|
||||
dp = [[0] * (n + 1) for _ in range(m + 1)]
|
||||
|
||||
for i in range(1, m + 1):
|
||||
for j in range(1, n + 1):
|
||||
if old_hashes[i - 1] == new_hashes[j - 1]:
|
||||
dp[i][j] = dp[i - 1][j - 1] + 1
|
||||
else:
|
||||
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
|
||||
|
||||
# 回溯找出 LCS 对应关系
|
||||
lcs = []
|
||||
i, j = m, n
|
||||
while i > 0 and j > 0:
|
||||
if old_hashes[i - 1] == new_hashes[j - 1]:
|
||||
lcs.append((i - 1, j - 1))
|
||||
i -= 1
|
||||
j -= 1
|
||||
elif dp[i - 1][j] > dp[i][j - 1]:
|
||||
i -= 1
|
||||
else:
|
||||
j -= 1
|
||||
|
||||
lcs.reverse()
|
||||
return lcs
|
||||
|
||||
def detect_changes(self, old_snap: Snapshot, new_snap: Snapshot) -> List[ChangeRecord]:
|
||||
"""检测变更"""
|
||||
records = []
|
||||
|
||||
old_ids = set(old_snap.units.keys())
|
||||
new_ids = set(new_snap.units.keys())
|
||||
|
||||
# 删除的单元
|
||||
for uid in old_ids - new_ids:
|
||||
old_unit = old_snap.units[uid]
|
||||
rec = ChangeRecord(
|
||||
unit_id=uid,
|
||||
change_type=UnitState.DELETED,
|
||||
old_start_line=old_unit.start_line,
|
||||
old_end_line=old_unit.end_line,
|
||||
reason="unit deleted"
|
||||
)
|
||||
records.append(rec)
|
||||
|
||||
# 新增的单元
|
||||
for uid in new_ids - old_ids:
|
||||
new_unit = new_snap.units[uid]
|
||||
rec = ChangeRecord(
|
||||
unit_id=uid,
|
||||
change_type=UnitState.ADDED,
|
||||
new_start_line=new_unit.start_line,
|
||||
new_end_line=new_unit.end_line,
|
||||
reason="unit added"
|
||||
)
|
||||
records.append(rec)
|
||||
|
||||
# 修改的单元
|
||||
for uid in old_ids & new_ids:
|
||||
old_unit = old_snap.units[uid]
|
||||
new_unit = new_snap.units[uid]
|
||||
if old_unit.content_hash != new_unit.content_hash:
|
||||
rec = ChangeRecord(
|
||||
unit_id=uid,
|
||||
change_type=UnitState.MODIFIED,
|
||||
old_start_line=old_unit.start_line,
|
||||
old_end_line=old_unit.end_line,
|
||||
new_start_line=new_unit.start_line,
|
||||
new_end_line=new_unit.end_line,
|
||||
reason="content changed"
|
||||
)
|
||||
records.append(rec)
|
||||
|
||||
return records
|
||||
|
||||
|
||||
class CompilationCache:
|
||||
"""
|
||||
编译缓存 V1.2
|
||||
|
||||
编译即缓存,缓存即编译
|
||||
- 每次编译自动记录,全历史可追溯
|
||||
- 缓存命中等同于零成本编译
|
||||
- 集成预测预热
|
||||
"""
|
||||
|
||||
def __init__(self, cache_dir: str):
|
||||
self.cache_dir = Path(cache_dir)
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._cache: Dict[str, Dict] = {}
|
||||
self._hits = 0
|
||||
self._misses = 0
|
||||
self._compile_history: List[Dict] = [] # 编译历史
|
||||
self._access_sequence: List[str] = [] # 访问序列
|
||||
self._predictions: Dict[str, List[str]] = {} # 预测模式
|
||||
self._load()
|
||||
|
||||
def _load(self):
|
||||
"""加载缓存"""
|
||||
cache_file = self.cache_dir / "incremental_cache.json"
|
||||
history_file = self.cache_dir / "compile_history.json"
|
||||
patterns_file = self.cache_dir / "prediction_patterns.json"
|
||||
|
||||
if cache_file.exists():
|
||||
try:
|
||||
with open(cache_file, 'r', encoding='utf-8') as f:
|
||||
self._cache = json.load(f)
|
||||
except:
|
||||
self._cache = {}
|
||||
|
||||
if history_file.exists():
|
||||
try:
|
||||
with open(history_file, 'r', encoding='utf-8') as f:
|
||||
self._compile_history = json.load(f)
|
||||
except:
|
||||
self._compile_history = []
|
||||
|
||||
if patterns_file.exists():
|
||||
try:
|
||||
with open(patterns_file, 'r', encoding='utf-8') as f:
|
||||
self._predictions = json.load(f)
|
||||
except:
|
||||
self._predictions = {}
|
||||
|
||||
def save(self):
|
||||
"""保存缓存和历史"""
|
||||
cache_file = self.cache_dir / "incremental_cache.json"
|
||||
history_file = self.cache_dir / "compile_history.json"
|
||||
patterns_file = self.cache_dir / "prediction_patterns.json"
|
||||
|
||||
with open(cache_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(self._cache, f, indent=2)
|
||||
|
||||
# 只保留最近10000条历史
|
||||
with open(history_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(self._compile_history[-10000:], f, indent=2)
|
||||
|
||||
with open(patterns_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(self._predictions, f, indent=2)
|
||||
|
||||
def has(self, unit_id: str) -> bool:
|
||||
return unit_id in self._cache
|
||||
|
||||
def get(self, unit_id: str) -> str:
|
||||
"""缓存即编译 - 命中即零成本获得编译结果"""
|
||||
if unit_id in self._cache:
|
||||
self._hits += 1
|
||||
# 记录访问序列
|
||||
self._record_access(unit_id)
|
||||
# 更新访问时间
|
||||
self._cache[unit_id]['last_access'] = int(time.time() * 1000)
|
||||
self._cache[unit_id]['access_count'] = self._cache[unit_id].get('access_count', 0) + 1
|
||||
return self._cache[unit_id].get('output', '')
|
||||
self._misses += 1
|
||||
return ""
|
||||
|
||||
def put(self, unit_id: str, output: str, content_hash: str,
|
||||
compile_time_ms: int = 0, file_path: str = "",
|
||||
start_line: int = 0, end_line: int = 0):
|
||||
"""编译即缓存 - 每次编译自动记录"""
|
||||
timestamp = int(time.time() * 1000)
|
||||
|
||||
self._cache[unit_id] = {
|
||||
'output': output,
|
||||
'content_hash': content_hash,
|
||||
'timestamp': timestamp,
|
||||
'last_access': timestamp,
|
||||
'access_count': 1,
|
||||
'compile_time_ms': compile_time_ms,
|
||||
'file_path': file_path,
|
||||
'line_range': [start_line, end_line],
|
||||
'size_bytes': len(output.encode('utf-8')),
|
||||
}
|
||||
|
||||
# 记录编译历史
|
||||
self._compile_history.append({
|
||||
'unit_id': unit_id,
|
||||
'content_hash': content_hash,
|
||||
'timestamp': timestamp,
|
||||
'compile_time_ms': compile_time_ms,
|
||||
'file_path': file_path,
|
||||
'action': 'compile'
|
||||
})
|
||||
|
||||
# 记录访问序列
|
||||
self._record_access(unit_id)
|
||||
|
||||
def _record_access(self, unit_id: str):
|
||||
"""记录访问序列,用于预测"""
|
||||
# 更新访问序列
|
||||
self._access_sequence.append(unit_id)
|
||||
if len(self._access_sequence) > 1000:
|
||||
self._access_sequence = self._access_sequence[-500:]
|
||||
|
||||
# 学习访问模式
|
||||
if len(self._access_sequence) >= 2:
|
||||
prev_id = self._access_sequence[-2]
|
||||
if prev_id != unit_id:
|
||||
if prev_id not in self._predictions:
|
||||
self._predictions[prev_id] = []
|
||||
if unit_id not in self._predictions[prev_id]:
|
||||
self._predictions[prev_id].append(unit_id)
|
||||
# 限制预测列表长度
|
||||
self._predictions[prev_id] = self._predictions[prev_id][:10]
|
||||
|
||||
def get_predictions(self, unit_id: str) -> List[str]:
|
||||
"""获取预测的下一个可能访问的单元"""
|
||||
return self._predictions.get(unit_id, [])
|
||||
|
||||
def invalidate(self, unit_id: str):
|
||||
self._cache.pop(unit_id, None)
|
||||
# 记录失效历史
|
||||
self._compile_history.append({
|
||||
'unit_id': unit_id,
|
||||
'timestamp': int(time.time() * 1000),
|
||||
'action': 'invalidate'
|
||||
})
|
||||
|
||||
def invalidate_all(self):
|
||||
self._cache.clear()
|
||||
|
||||
def is_valid(self, unit_id: str, current_hash: str) -> bool:
|
||||
if unit_id not in self._cache:
|
||||
return False
|
||||
return self._cache[unit_id].get('content_hash') == current_hash
|
||||
|
||||
def get_compile_history(self, limit: int = 100) -> List[Dict]:
|
||||
"""获取编译历史"""
|
||||
return self._compile_history[-limit:]
|
||||
|
||||
def get_hot_units(self, limit: int = 20) -> List[Dict]:
|
||||
"""获取热点单元(访问最频繁)"""
|
||||
sorted_items = sorted(
|
||||
self._cache.items(),
|
||||
key=lambda x: x[1].get('access_count', 0),
|
||||
reverse=True
|
||||
)
|
||||
return [
|
||||
{'unit_id': k, 'access_count': v.get('access_count', 0),
|
||||
'file': v.get('file_path', ''), 'lines': v.get('line_range', [])}
|
||||
for k, v in sorted_items[:limit]
|
||||
]
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""获取统计信息"""
|
||||
total_size = sum(e.get('size_bytes', 0) for e in self._cache.values())
|
||||
total_compile_time = sum(e.get('compile_time_ms', 0) for e in self._cache.values())
|
||||
return {
|
||||
'version': '1.2',
|
||||
'entries': len(self._cache),
|
||||
'total_size_mb': total_size / (1024 * 1024),
|
||||
'total_compile_time_ms': total_compile_time,
|
||||
'hits': self._hits,
|
||||
'misses': self._misses,
|
||||
'hit_rate': self._hits / (self._hits + self._misses) if (self._hits + self._misses) > 0 else 0,
|
||||
'history_count': len(self._compile_history),
|
||||
'prediction_patterns': len(self._predictions),
|
||||
}
|
||||
|
||||
@property
|
||||
def hit_count(self) -> int:
|
||||
return self._hits
|
||||
|
||||
@property
|
||||
def miss_count(self) -> int:
|
||||
return self._misses
|
||||
|
||||
|
||||
class IncrementalCompiler:
|
||||
"""
|
||||
减量编译器 - 指哪编哪
|
||||
|
||||
核心功能:
|
||||
1. 以最小语法块为编译单元
|
||||
2. 变更检测 - 只定位修改的单元及受影响的关联单元
|
||||
3. 仅对变更单元重新编译,未变更单元复用缓存
|
||||
4. 边界触发器 - 自动扩展到函数/类边界
|
||||
5. 按原始顺序拼接产物
|
||||
"""
|
||||
|
||||
def __init__(self, cache_dir: str = ".sikuwa_cache"):
|
||||
self.cache = CompilationCache(cache_dir)
|
||||
self.detector = ChangeDetector()
|
||||
self.analyzer = PythonAnalyzer()
|
||||
|
||||
self._units: Dict[str, CompilationUnit] = {}
|
||||
self._file_units: Dict[str, List[str]] = {} # file -> unit_ids
|
||||
self._snapshots: Dict[str, Snapshot] = {}
|
||||
self._units_to_compile: List[str] = []
|
||||
|
||||
# 编译器回调
|
||||
self._compile_callback: Optional[Callable[[CompilationUnit], str]] = None
|
||||
|
||||
def set_compiler(self, callback: Callable[[CompilationUnit], str]):
|
||||
"""设置编译器回调"""
|
||||
self._compile_callback = callback
|
||||
|
||||
def analyze_source(self, file_path: str, content: str) -> List[CompilationUnit]:
|
||||
"""分析源代码,返回编译单元列表"""
|
||||
blocks = self.analyzer.analyze(content, file_path)
|
||||
units = [CompilationUnit.from_code_block(b) for b in blocks]
|
||||
return units
|
||||
|
||||
def register_units(self, file_path: str, units: List[CompilationUnit]):
|
||||
"""注册编译单元"""
|
||||
# 移除旧单元
|
||||
if file_path in self._file_units:
|
||||
for uid in self._file_units[file_path]:
|
||||
self._units.pop(uid, None)
|
||||
|
||||
# 添加新单元
|
||||
self._file_units[file_path] = []
|
||||
for unit in units:
|
||||
self._units[unit.id] = unit
|
||||
self._file_units[file_path].append(unit.id)
|
||||
|
||||
def update_source(self, file_path: str, new_content: str) -> List[ChangeRecord]:
|
||||
"""
|
||||
更新源代码并检测变更
|
||||
|
||||
返回变更记录列表
|
||||
"""
|
||||
# 分析新代码
|
||||
new_units = self.analyze_source(file_path, new_content)
|
||||
|
||||
# 创建新快照
|
||||
new_snap = self.detector.create_snapshot(file_path, new_content)
|
||||
for unit in new_units:
|
||||
new_snap.units[unit.id] = unit
|
||||
|
||||
changes = []
|
||||
self._units_to_compile = []
|
||||
|
||||
# 检查是否有旧快照
|
||||
old_snap = self._snapshots.get(file_path)
|
||||
|
||||
if old_snap:
|
||||
# 获取变更的行
|
||||
changed_lines = self.detector.get_changed_lines(old_snap, new_snap)
|
||||
|
||||
# 找出受影响的编译单元
|
||||
affected_ids: Set[str] = set()
|
||||
|
||||
for line in changed_lines:
|
||||
# 找出覆盖此行的单元
|
||||
for unit in new_units:
|
||||
if unit.start_line <= line <= unit.end_line:
|
||||
affected_ids.add(unit.id)
|
||||
unit.state = UnitState.MODIFIED
|
||||
unit.cache_valid = False
|
||||
|
||||
# 传播依赖影响
|
||||
affected_ids = self._propagate_dependencies(affected_ids, new_units)
|
||||
|
||||
# 扩展到边界
|
||||
affected_ids = self._expand_to_boundaries(affected_ids, new_units)
|
||||
|
||||
# 生成变更记录
|
||||
for uid in affected_ids:
|
||||
unit = self._units.get(uid) or next((u for u in new_units if u.id == uid), None)
|
||||
if unit:
|
||||
rec = ChangeRecord(
|
||||
unit_id=uid,
|
||||
change_type=unit.state if unit.state != UnitState.UNKNOWN else UnitState.MODIFIED,
|
||||
new_start_line=unit.start_line,
|
||||
new_end_line=unit.end_line,
|
||||
reason="content changed"
|
||||
)
|
||||
changes.append(rec)
|
||||
self._units_to_compile.append(uid)
|
||||
else:
|
||||
# 首次分析,所有单元都需要编译
|
||||
for unit in new_units:
|
||||
unit.state = UnitState.ADDED
|
||||
rec = ChangeRecord(
|
||||
unit_id=unit.id,
|
||||
change_type=UnitState.ADDED,
|
||||
new_start_line=unit.start_line,
|
||||
new_end_line=unit.end_line,
|
||||
reason="first analysis"
|
||||
)
|
||||
changes.append(rec)
|
||||
self._units_to_compile.append(unit.id)
|
||||
|
||||
# 注册单元并更新快照
|
||||
self.register_units(file_path, new_units)
|
||||
self._snapshots[file_path] = new_snap
|
||||
|
||||
return changes
|
||||
|
||||
def _propagate_dependencies(self, affected_ids: Set[str],
|
||||
units: List[CompilationUnit]) -> Set[str]:
|
||||
"""传播依赖影响"""
|
||||
# 构建依赖图
|
||||
dependents: Dict[str, List[str]] = {}
|
||||
for unit in units:
|
||||
for dep_id in unit.dependencies:
|
||||
if dep_id not in dependents:
|
||||
dependents[dep_id] = []
|
||||
dependents[dep_id].append(unit.id)
|
||||
|
||||
# BFS 传播
|
||||
queue = list(affected_ids)
|
||||
visited = set(affected_ids)
|
||||
|
||||
while queue:
|
||||
uid = queue.pop(0)
|
||||
for dependent_id in dependents.get(uid, []):
|
||||
if dependent_id not in visited:
|
||||
visited.add(dependent_id)
|
||||
queue.append(dependent_id)
|
||||
# 标记为受影响
|
||||
for unit in units:
|
||||
if unit.id == dependent_id:
|
||||
unit.state = UnitState.AFFECTED
|
||||
unit.cache_valid = False
|
||||
break
|
||||
|
||||
return visited
|
||||
|
||||
def _expand_to_boundaries(self, affected_ids: Set[str],
|
||||
units: List[CompilationUnit]) -> Set[str]:
|
||||
"""扩展到函数/类边界"""
|
||||
expanded = set(affected_ids)
|
||||
unit_map = {u.id: u for u in units}
|
||||
|
||||
for uid in list(affected_ids):
|
||||
unit = unit_map.get(uid)
|
||||
if not unit:
|
||||
continue
|
||||
|
||||
# 如果在函数/类内部修改,需要重新编译整个结构
|
||||
for other in units:
|
||||
if other.id == uid:
|
||||
continue
|
||||
# 检查是否被包含
|
||||
if (other.type in (UnitType.FUNCTION, UnitType.CLASS) and
|
||||
other.start_line <= unit.start_line and
|
||||
other.end_line >= unit.end_line):
|
||||
expanded.add(other.id)
|
||||
other.state = UnitState.AFFECTED
|
||||
other.cache_valid = False
|
||||
|
||||
return expanded
|
||||
|
||||
def get_units_to_compile(self) -> List[str]:
|
||||
"""获取需要编译的单元ID列表"""
|
||||
return self._units_to_compile.copy()
|
||||
|
||||
def compile_unit(self, unit_id: str) -> str:
|
||||
"""
|
||||
编译单个单元
|
||||
|
||||
缓存即编译:缓存命中 = 零成本获得编译结果
|
||||
"""
|
||||
unit = self._units.get(unit_id)
|
||||
if not unit:
|
||||
return ""
|
||||
|
||||
# 检查缓存 - 缓存即编译
|
||||
if unit.cache_valid or self.cache.is_valid(unit_id, unit.content_hash):
|
||||
output = self.cache.get(unit_id)
|
||||
if output:
|
||||
unit.cached_output = output
|
||||
unit.cache_valid = True
|
||||
# 触发预测预热
|
||||
self._predictive_warmup(unit_id)
|
||||
return output
|
||||
|
||||
# 执行编译并计时
|
||||
start_time = time.time()
|
||||
if self._compile_callback:
|
||||
output = self._compile_callback(unit)
|
||||
else:
|
||||
# 默认:直接返回源代码(用于测试)
|
||||
output = unit.content
|
||||
compile_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# 编译即缓存:自动记录
|
||||
self.mark_compiled(unit_id, output, compile_time_ms)
|
||||
|
||||
return output
|
||||
|
||||
def _predictive_warmup(self, unit_id: str):
|
||||
"""预测性缓存预热"""
|
||||
# 获取预测的下一个访问单元
|
||||
predictions = self.cache.get_predictions(unit_id)
|
||||
for pred_id in predictions[:2]: # 最多预热2个
|
||||
if pred_id in self._units and not self.cache.has(pred_id):
|
||||
# 加入待编译队列
|
||||
if pred_id not in self._units_to_compile:
|
||||
self._units_to_compile.append(pred_id)
|
||||
|
||||
def mark_compiled(self, unit_id: str, output: str, compile_time_ms: int = 0):
|
||||
"""标记单元编译完成 - 编译即缓存"""
|
||||
unit = self._units.get(unit_id)
|
||||
if unit:
|
||||
unit.cached_output = output
|
||||
unit.cache_timestamp = int(time.time() * 1000)
|
||||
unit.cache_valid = True
|
||||
unit.state = UnitState.UNCHANGED
|
||||
|
||||
# 编译即缓存:记录完整信息
|
||||
self.cache.put(
|
||||
unit_id, output, unit.content_hash,
|
||||
compile_time_ms=compile_time_ms,
|
||||
file_path=unit.file_path,
|
||||
start_line=unit.start_line,
|
||||
end_line=unit.end_line
|
||||
)
|
||||
|
||||
# 从待编译列表移除
|
||||
if unit_id in self._units_to_compile:
|
||||
self._units_to_compile.remove(unit_id)
|
||||
|
||||
def compile_all_pending(self) -> Dict[str, str]:
|
||||
"""编译所有待编译单元"""
|
||||
results = {}
|
||||
for uid in self._units_to_compile.copy():
|
||||
output = self.compile_unit(uid)
|
||||
results[uid] = output
|
||||
return results
|
||||
|
||||
def get_combined_output(self, file_path: str) -> str:
|
||||
"""获取合并后的编译输出(按原始顺序拼接)"""
|
||||
if file_path not in self._file_units:
|
||||
return ""
|
||||
|
||||
# 按行号排序
|
||||
unit_ids = self._file_units[file_path]
|
||||
units = [self._units[uid] for uid in unit_ids if uid in self._units]
|
||||
units.sort(key=lambda u: u.start_line)
|
||||
|
||||
# 拼接输出
|
||||
outputs = []
|
||||
for unit in units:
|
||||
output = unit.cached_output
|
||||
if not output and self.cache.has(unit.id):
|
||||
output = self.cache.get(unit.id)
|
||||
if output:
|
||||
outputs.append(output)
|
||||
|
||||
return '\n'.join(outputs)
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""获取统计信息"""
|
||||
cache_stats = self.cache.get_stats()
|
||||
return {
|
||||
'total_units': len(self._units),
|
||||
'pending_units': len(self._units_to_compile),
|
||||
'files': len(self._file_units),
|
||||
**cache_stats, # 包含缓存详细统计
|
||||
}
|
||||
|
||||
def get_compile_history(self, limit: int = 100) -> List[Dict]:
|
||||
"""获取编译历史"""
|
||||
return self.cache.get_compile_history(limit)
|
||||
|
||||
def get_hot_units(self, limit: int = 20) -> List[Dict]:
|
||||
"""获取热点单元"""
|
||||
return self.cache.get_hot_units(limit)
|
||||
|
||||
def get_predictions(self, unit_id: str) -> List[str]:
|
||||
"""获取预测的下一个访问单元"""
|
||||
return self.cache.get_predictions(unit_id)
|
||||
|
||||
def save(self):
|
||||
"""保存状态"""
|
||||
self.cache.save()
|
||||
|
||||
def clear(self):
|
||||
"""清空所有状态"""
|
||||
self._units.clear()
|
||||
self._file_units.clear()
|
||||
self._snapshots.clear()
|
||||
self._units_to_compile.clear()
|
||||
self.cache.invalidate_all()
|
||||
|
||||
|
||||
# 尝试导入 C++ 扩展
|
||||
_cpp_available = False
|
||||
try:
|
||||
from .cpp import incremental_engine as _cpp_engine
|
||||
_cpp_available = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
def create_incremental_compiler(cache_dir: str = ".sikuwa_cache",
|
||||
prefer_cpp: bool = True) -> IncrementalCompiler:
|
||||
"""
|
||||
创建减量编译器实例
|
||||
|
||||
Args:
|
||||
cache_dir: 缓存目录
|
||||
prefer_cpp: 是否优先使用 C++ 实现
|
||||
|
||||
Returns:
|
||||
IncrementalCompiler 实例
|
||||
"""
|
||||
# 目前返回 Python 实现
|
||||
# TODO: 当 C++ 扩展可用时,返回包装器
|
||||
return IncrementalCompiler(cache_dir)
|
||||
45
incremental/cpp/CMakeLists.txt
Normal file
45
incremental/cpp/CMakeLists.txt
Normal file
@@ -0,0 +1,45 @@
|
||||
# sikuwa/incremental/cpp/CMakeLists.txt
|
||||
cmake_minimum_required(VERSION 3.14)
|
||||
project(incremental_engine)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
# 查找 Python 和 pybind11
|
||||
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
|
||||
find_package(pybind11 CONFIG QUIET)
|
||||
|
||||
if(NOT pybind11_FOUND)
|
||||
# 如果没有安装 pybind11,使用 FetchContent 下载
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
pybind11
|
||||
GIT_REPOSITORY https://github.com/pybind/pybind11.git
|
||||
GIT_TAG v2.11.1
|
||||
)
|
||||
FetchContent_MakeAvailable(pybind11)
|
||||
endif()
|
||||
|
||||
# 源文件
|
||||
set(SOURCES
|
||||
incremental_core.cpp
|
||||
pybind_incremental.cpp
|
||||
)
|
||||
|
||||
set(HEADERS
|
||||
incremental_core.h
|
||||
)
|
||||
|
||||
# 创建 Python 模块
|
||||
pybind11_add_module(incremental_engine ${SOURCES} ${HEADERS})
|
||||
|
||||
# 优化选项
|
||||
target_compile_options(incremental_engine PRIVATE
|
||||
$<$<CXX_COMPILER_ID:GNU>:-O3 -Wall -Wextra>
|
||||
$<$<CXX_COMPILER_ID:Clang>:-O3 -Wall -Wextra>
|
||||
$<$<CXX_COMPILER_ID:MSVC>:/O2 /W4>
|
||||
)
|
||||
|
||||
# 安装
|
||||
install(TARGETS incremental_engine DESTINATION .)
|
||||
777
incremental/cpp/incremental_core.cpp
Normal file
777
incremental/cpp/incremental_core.cpp
Normal file
@@ -0,0 +1,777 @@
|
||||
// sikuwa/incremental/cpp/incremental_core.cpp
|
||||
// 减量编译核心 - C++ 实现
|
||||
|
||||
#include "incremental_core.h"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <iomanip>
|
||||
|
||||
namespace sikuwa {
|
||||
namespace incremental {
|
||||
|
||||
// ============================================================================
|
||||
// 工具函数实现
|
||||
// ============================================================================
|
||||
|
||||
// 简单的哈希函数 (FNV-1a)
|
||||
static uint64_t fnv1a_hash(const char* data, size_t len) {
|
||||
uint64_t hash = 14695981039346656037ULL;
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
hash ^= static_cast<uint64_t>(data[i]);
|
||||
hash *= 1099511628211ULL;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
std::string generate_unit_id(const std::string& file_path, int start_line,
|
||||
int end_line, const std::string& content_hash) {
|
||||
std::ostringstream oss;
|
||||
oss << file_path << ":" << start_line << ":" << end_line << ":"
|
||||
<< content_hash.substr(0, 8);
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
int64_t current_timestamp() {
|
||||
return std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch()
|
||||
).count();
|
||||
}
|
||||
|
||||
std::string read_file(const std::string& path) {
|
||||
std::ifstream file(path);
|
||||
if (!file.is_open()) return "";
|
||||
std::ostringstream oss;
|
||||
oss << file.rdbuf();
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
void write_file(const std::string& path, const std::string& content) {
|
||||
std::ofstream file(path);
|
||||
if (file.is_open()) {
|
||||
file << content;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> split_lines(const std::string& content) {
|
||||
std::vector<std::string> lines;
|
||||
std::istringstream iss(content);
|
||||
std::string line;
|
||||
while (std::getline(iss, line)) {
|
||||
lines.push_back(line);
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
||||
std::string join_lines(const std::vector<std::string>& lines) {
|
||||
std::ostringstream oss;
|
||||
for (size_t i = 0; i < lines.size(); ++i) {
|
||||
if (i > 0) oss << "\n";
|
||||
oss << lines[i];
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// UnitManager 实现
|
||||
// ============================================================================
|
||||
|
||||
UnitManager::UnitManager() {}
|
||||
UnitManager::~UnitManager() {}
|
||||
|
||||
void UnitManager::add_unit(const CompilationUnit& unit) {
|
||||
units_[unit.id] = unit;
|
||||
file_units_[unit.file_path].push_back(unit.id);
|
||||
}
|
||||
|
||||
void UnitManager::update_unit(const std::string& id, const CompilationUnit& unit) {
|
||||
if (units_.find(id) != units_.end()) {
|
||||
units_[id] = unit;
|
||||
}
|
||||
}
|
||||
|
||||
void UnitManager::remove_unit(const std::string& id) {
|
||||
auto it = units_.find(id);
|
||||
if (it != units_.end()) {
|
||||
// 从文件索引中移除
|
||||
auto& file_ids = file_units_[it->second.file_path];
|
||||
file_ids.erase(std::remove(file_ids.begin(), file_ids.end(), id), file_ids.end());
|
||||
|
||||
// 从依赖关系中移除
|
||||
for (const auto& dep_id : it->second.dependencies) {
|
||||
auto dep_it = units_.find(dep_id);
|
||||
if (dep_it != units_.end()) {
|
||||
auto& dependents = dep_it->second.dependents;
|
||||
dependents.erase(std::remove(dependents.begin(), dependents.end(), id),
|
||||
dependents.end());
|
||||
}
|
||||
}
|
||||
|
||||
units_.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
CompilationUnit* UnitManager::get_unit(const std::string& id) {
|
||||
auto it = units_.find(id);
|
||||
return it != units_.end() ? &it->second : nullptr;
|
||||
}
|
||||
|
||||
const CompilationUnit* UnitManager::get_unit(const std::string& id) const {
|
||||
auto it = units_.find(id);
|
||||
return it != units_.end() ? &it->second : nullptr;
|
||||
}
|
||||
|
||||
std::vector<CompilationUnit*> UnitManager::get_units_by_file(const std::string& file_path) {
|
||||
std::vector<CompilationUnit*> result;
|
||||
auto it = file_units_.find(file_path);
|
||||
if (it != file_units_.end()) {
|
||||
for (const auto& id : it->second) {
|
||||
if (auto* unit = get_unit(id)) {
|
||||
result.push_back(unit);
|
||||
}
|
||||
}
|
||||
}
|
||||
// 按行号排序
|
||||
std::sort(result.begin(), result.end(),
|
||||
[](const CompilationUnit* a, const CompilationUnit* b) {
|
||||
return a->start_line < b->start_line;
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<CompilationUnit*> UnitManager::get_units_in_range(
|
||||
const std::string& file_path, int start, int end) {
|
||||
std::vector<CompilationUnit*> result;
|
||||
auto units = get_units_by_file(file_path);
|
||||
for (auto* unit : units) {
|
||||
// 检查是否有交集
|
||||
if (unit->start_line <= end && unit->end_line >= start) {
|
||||
result.push_back(unit);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void UnitManager::add_dependency(const std::string& from_id, const std::string& to_id) {
|
||||
auto* from_unit = get_unit(from_id);
|
||||
auto* to_unit = get_unit(to_id);
|
||||
|
||||
if (from_unit && to_unit) {
|
||||
// from 依赖 to
|
||||
if (std::find(from_unit->dependencies.begin(), from_unit->dependencies.end(), to_id)
|
||||
== from_unit->dependencies.end()) {
|
||||
from_unit->dependencies.push_back(to_id);
|
||||
}
|
||||
// to 被 from 依赖
|
||||
if (std::find(to_unit->dependents.begin(), to_unit->dependents.end(), from_id)
|
||||
== to_unit->dependents.end()) {
|
||||
to_unit->dependents.push_back(from_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void UnitManager::remove_dependency(const std::string& from_id, const std::string& to_id) {
|
||||
auto* from_unit = get_unit(from_id);
|
||||
auto* to_unit = get_unit(to_id);
|
||||
|
||||
if (from_unit) {
|
||||
auto& deps = from_unit->dependencies;
|
||||
deps.erase(std::remove(deps.begin(), deps.end(), to_id), deps.end());
|
||||
}
|
||||
if (to_unit) {
|
||||
auto& dependents = to_unit->dependents;
|
||||
dependents.erase(std::remove(dependents.begin(), dependents.end(), from_id),
|
||||
dependents.end());
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> UnitManager::get_dependencies(const std::string& id) const {
|
||||
const auto* unit = get_unit(id);
|
||||
return unit ? unit->dependencies : std::vector<std::string>{};
|
||||
}
|
||||
|
||||
std::vector<std::string> UnitManager::get_dependents(const std::string& id) const {
|
||||
const auto* unit = get_unit(id);
|
||||
return unit ? unit->dependents : std::vector<std::string>{};
|
||||
}
|
||||
|
||||
void UnitManager::collect_affected_recursive(const std::string& id,
|
||||
std::unordered_set<std::string>& visited) const {
|
||||
if (visited.count(id)) return;
|
||||
visited.insert(id);
|
||||
|
||||
const auto* unit = get_unit(id);
|
||||
if (!unit) return;
|
||||
|
||||
// 递归收集所有依赖此单元的单元
|
||||
for (const auto& dependent_id : unit->dependents) {
|
||||
collect_affected_recursive(dependent_id, visited);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> UnitManager::get_affected_units(const std::string& changed_id) const {
|
||||
std::unordered_set<std::string> visited;
|
||||
collect_affected_recursive(changed_id, visited);
|
||||
visited.erase(changed_id); // 移除自身
|
||||
return std::vector<std::string>(visited.begin(), visited.end());
|
||||
}
|
||||
|
||||
void UnitManager::for_each(std::function<void(CompilationUnit&)> callback) {
|
||||
for (auto& pair : units_) {
|
||||
callback(pair.second);
|
||||
}
|
||||
}
|
||||
|
||||
void UnitManager::clear() {
|
||||
units_.clear();
|
||||
file_units_.clear();
|
||||
}
|
||||
|
||||
std::string UnitManager::serialize() const {
|
||||
std::ostringstream oss;
|
||||
oss << units_.size() << "\n";
|
||||
for (const auto& pair : units_) {
|
||||
const auto& u = pair.second;
|
||||
oss << u.id << "\t" << u.file_path << "\t" << u.start_line << "\t"
|
||||
<< u.end_line << "\t" << static_cast<int>(u.type) << "\t"
|
||||
<< u.name << "\t" << u.content_hash << "\t"
|
||||
<< u.dependencies.size();
|
||||
for (const auto& dep : u.dependencies) {
|
||||
oss << "\t" << dep;
|
||||
}
|
||||
oss << "\n";
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
void UnitManager::deserialize(const std::string& data) {
|
||||
clear();
|
||||
std::istringstream iss(data);
|
||||
size_t count;
|
||||
iss >> count;
|
||||
iss.ignore();
|
||||
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
std::string line;
|
||||
std::getline(iss, line);
|
||||
std::istringstream line_iss(line);
|
||||
|
||||
CompilationUnit u;
|
||||
int type_int;
|
||||
size_t dep_count;
|
||||
|
||||
std::getline(line_iss, u.id, '\t');
|
||||
std::getline(line_iss, u.file_path, '\t');
|
||||
line_iss >> u.start_line;
|
||||
line_iss.ignore();
|
||||
line_iss >> u.end_line;
|
||||
line_iss.ignore();
|
||||
line_iss >> type_int;
|
||||
u.type = static_cast<UnitType>(type_int);
|
||||
line_iss.ignore();
|
||||
std::getline(line_iss, u.name, '\t');
|
||||
std::getline(line_iss, u.content_hash, '\t');
|
||||
line_iss >> dep_count;
|
||||
|
||||
for (size_t j = 0; j < dep_count; ++j) {
|
||||
std::string dep;
|
||||
line_iss.ignore();
|
||||
std::getline(line_iss, dep, '\t');
|
||||
if (!dep.empty()) {
|
||||
u.dependencies.push_back(dep);
|
||||
}
|
||||
}
|
||||
|
||||
add_unit(u);
|
||||
}
|
||||
|
||||
// 重建依赖关系
|
||||
for (auto& pair : units_) {
|
||||
for (const auto& dep_id : pair.second.dependencies) {
|
||||
auto* dep_unit = get_unit(dep_id);
|
||||
if (dep_unit) {
|
||||
dep_unit->dependents.push_back(pair.first);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// ChangeDetector 实现
|
||||
// ============================================================================
|
||||
|
||||
ChangeDetector::ChangeDetector() {}
|
||||
ChangeDetector::~ChangeDetector() {}
|
||||
|
||||
std::string ChangeDetector::compute_hash(const std::string& content) {
|
||||
uint64_t hash = fnv1a_hash(content.c_str(), content.size());
|
||||
std::ostringstream oss;
|
||||
oss << std::hex << std::setfill('0') << std::setw(16) << hash;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
std::string ChangeDetector::compute_line_hash(const std::string& line) {
|
||||
// 去除首尾空白后计算哈希
|
||||
size_t start = line.find_first_not_of(" \t\r\n");
|
||||
size_t end = line.find_last_not_of(" \t\r\n");
|
||||
if (start == std::string::npos) {
|
||||
return "empty";
|
||||
}
|
||||
std::string trimmed = line.substr(start, end - start + 1);
|
||||
return compute_hash(trimmed);
|
||||
}
|
||||
|
||||
Snapshot ChangeDetector::create_snapshot(const std::string& file_path,
|
||||
const std::string& content) {
|
||||
Snapshot snap;
|
||||
snap.file_path = file_path;
|
||||
snap.content_hash = compute_hash(content);
|
||||
snap.timestamp = current_timestamp();
|
||||
|
||||
auto lines = split_lines(content);
|
||||
snap.line_hashes.reserve(lines.size());
|
||||
for (const auto& line : lines) {
|
||||
snap.line_hashes.push_back(compute_line_hash(line));
|
||||
}
|
||||
|
||||
return snap;
|
||||
}
|
||||
|
||||
std::vector<int> ChangeDetector::get_changed_lines(const Snapshot& old_snap,
|
||||
const Snapshot& new_snap) {
|
||||
std::vector<int> changed;
|
||||
|
||||
size_t old_size = old_snap.line_hashes.size();
|
||||
size_t new_size = new_snap.line_hashes.size();
|
||||
size_t max_size = std::max(old_size, new_size);
|
||||
|
||||
// 使用 LCS 算法进行精确对比
|
||||
auto lcs = compute_lcs(old_snap.line_hashes, new_snap.line_hashes);
|
||||
|
||||
// 标记所有不在 LCS 中的行为变更
|
||||
std::unordered_set<int> lcs_new_lines;
|
||||
for (const auto& pair : lcs) {
|
||||
lcs_new_lines.insert(pair.second);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < new_size; ++i) {
|
||||
if (lcs_new_lines.find(static_cast<int>(i)) == lcs_new_lines.end()) {
|
||||
changed.push_back(static_cast<int>(i) + 1); // 1-based
|
||||
}
|
||||
}
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
std::vector<std::pair<int, int>> ChangeDetector::compute_lcs(
|
||||
const std::vector<std::string>& old_lines,
|
||||
const std::vector<std::string>& new_lines) {
|
||||
|
||||
int m = static_cast<int>(old_lines.size());
|
||||
int n = static_cast<int>(new_lines.size());
|
||||
|
||||
// DP 表
|
||||
std::vector<std::vector<int>> dp(m + 1, std::vector<int>(n + 1, 0));
|
||||
|
||||
for (int i = 1; i <= m; ++i) {
|
||||
for (int j = 1; j <= n; ++j) {
|
||||
if (old_lines[i - 1] == new_lines[j - 1]) {
|
||||
dp[i][j] = dp[i - 1][j - 1] + 1;
|
||||
} else {
|
||||
dp[i][j] = std::max(dp[i - 1][j], dp[i][j - 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 回溯找出 LCS 对应关系
|
||||
std::vector<std::pair<int, int>> lcs;
|
||||
int i = m, j = n;
|
||||
while (i > 0 && j > 0) {
|
||||
if (old_lines[i - 1] == new_lines[j - 1]) {
|
||||
lcs.push_back({i - 1, j - 1});
|
||||
--i; --j;
|
||||
} else if (dp[i - 1][j] > dp[i][j - 1]) {
|
||||
--i;
|
||||
} else {
|
||||
--j;
|
||||
}
|
||||
}
|
||||
|
||||
std::reverse(lcs.begin(), lcs.end());
|
||||
return lcs;
|
||||
}
|
||||
|
||||
std::vector<ChangeRecord> ChangeDetector::detect_changes(const Snapshot& old_snap,
|
||||
const Snapshot& new_snap) {
|
||||
std::vector<ChangeRecord> records;
|
||||
|
||||
// 对比两个快照中的编译单元
|
||||
std::unordered_set<std::string> old_ids, new_ids;
|
||||
|
||||
for (const auto& pair : old_snap.units) {
|
||||
old_ids.insert(pair.first);
|
||||
}
|
||||
for (const auto& pair : new_snap.units) {
|
||||
new_ids.insert(pair.first);
|
||||
}
|
||||
|
||||
// 检测删除的单元
|
||||
for (const auto& id : old_ids) {
|
||||
if (new_ids.find(id) == new_ids.end()) {
|
||||
ChangeRecord rec;
|
||||
rec.unit_id = id;
|
||||
rec.change_type = UnitState::DELETED;
|
||||
const auto& old_unit = old_snap.units.at(id);
|
||||
rec.old_start_line = old_unit.start_line;
|
||||
rec.old_end_line = old_unit.end_line;
|
||||
rec.reason = "unit deleted";
|
||||
records.push_back(rec);
|
||||
}
|
||||
}
|
||||
|
||||
// 检测新增和修改的单元
|
||||
for (const auto& pair : new_snap.units) {
|
||||
const auto& new_unit = pair.second;
|
||||
auto old_it = old_snap.units.find(pair.first);
|
||||
|
||||
if (old_it == old_snap.units.end()) {
|
||||
// 新增
|
||||
ChangeRecord rec;
|
||||
rec.unit_id = pair.first;
|
||||
rec.change_type = UnitState::ADDED;
|
||||
rec.new_start_line = new_unit.start_line;
|
||||
rec.new_end_line = new_unit.end_line;
|
||||
rec.reason = "unit added";
|
||||
records.push_back(rec);
|
||||
} else {
|
||||
// 检查是否修改
|
||||
const auto& old_unit = old_it->second;
|
||||
if (old_unit.content_hash != new_unit.content_hash) {
|
||||
ChangeRecord rec;
|
||||
rec.unit_id = pair.first;
|
||||
rec.change_type = UnitState::MODIFIED;
|
||||
rec.old_start_line = old_unit.start_line;
|
||||
rec.old_end_line = old_unit.end_line;
|
||||
rec.new_start_line = new_unit.start_line;
|
||||
rec.new_end_line = new_unit.end_line;
|
||||
rec.reason = "content changed";
|
||||
records.push_back(rec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return records;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// CompilationCache 实现
|
||||
// ============================================================================
|
||||
|
||||
CompilationCache::CompilationCache(const std::string& cache_dir)
|
||||
: cache_dir_(cache_dir), hits_(0), misses_(0) {}
|
||||
|
||||
CompilationCache::~CompilationCache() {
|
||||
save();
|
||||
}
|
||||
|
||||
bool CompilationCache::has(const std::string& unit_id) const {
|
||||
return cache_.find(unit_id) != cache_.end();
|
||||
}
|
||||
|
||||
std::string CompilationCache::get(const std::string& unit_id) const {
|
||||
auto it = cache_.find(unit_id);
|
||||
if (it != cache_.end()) {
|
||||
++hits_;
|
||||
return it->second.output;
|
||||
}
|
||||
++misses_;
|
||||
return "";
|
||||
}
|
||||
|
||||
void CompilationCache::put(const std::string& unit_id, const std::string& output,
|
||||
const std::string& content_hash) {
|
||||
CacheEntry entry;
|
||||
entry.output = output;
|
||||
entry.content_hash = content_hash;
|
||||
entry.timestamp = current_timestamp();
|
||||
cache_[unit_id] = entry;
|
||||
}
|
||||
|
||||
void CompilationCache::invalidate(const std::string& unit_id) {
|
||||
cache_.erase(unit_id);
|
||||
}
|
||||
|
||||
void CompilationCache::invalidate_all() {
|
||||
cache_.clear();
|
||||
}
|
||||
|
||||
bool CompilationCache::is_valid(const std::string& unit_id,
|
||||
const std::string& current_hash) const {
|
||||
auto it = cache_.find(unit_id);
|
||||
if (it == cache_.end()) return false;
|
||||
return it->second.content_hash == current_hash;
|
||||
}
|
||||
|
||||
void CompilationCache::save() {
|
||||
std::string cache_file = cache_dir_ + "/incremental_cache.dat";
|
||||
std::ofstream file(cache_file);
|
||||
if (!file.is_open()) return;
|
||||
|
||||
file << cache_.size() << "\n";
|
||||
for (const auto& pair : cache_) {
|
||||
file << pair.first << "\n";
|
||||
file << pair.second.content_hash << "\n";
|
||||
file << pair.second.timestamp << "\n";
|
||||
file << pair.second.output.size() << "\n";
|
||||
file << pair.second.output;
|
||||
}
|
||||
}
|
||||
|
||||
void CompilationCache::load() {
|
||||
std::string cache_file = cache_dir_ + "/incremental_cache.dat";
|
||||
std::ifstream file(cache_file);
|
||||
if (!file.is_open()) return;
|
||||
|
||||
size_t count;
|
||||
file >> count;
|
||||
file.ignore();
|
||||
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
std::string unit_id, content_hash;
|
||||
int64_t timestamp;
|
||||
size_t output_size;
|
||||
|
||||
std::getline(file, unit_id);
|
||||
std::getline(file, content_hash);
|
||||
file >> timestamp >> output_size;
|
||||
file.ignore();
|
||||
|
||||
std::string output(output_size, '\0');
|
||||
file.read(&output[0], output_size);
|
||||
|
||||
CacheEntry entry;
|
||||
entry.output = output;
|
||||
entry.content_hash = content_hash;
|
||||
entry.timestamp = timestamp;
|
||||
cache_[unit_id] = entry;
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// IncrementalEngine 实现
|
||||
// ============================================================================
|
||||
|
||||
IncrementalEngine::IncrementalEngine(const std::string& cache_dir)
|
||||
: cache_(cache_dir) {
|
||||
cache_.load();
|
||||
}
|
||||
|
||||
IncrementalEngine::~IncrementalEngine() {
|
||||
save_state();
|
||||
}
|
||||
|
||||
void IncrementalEngine::register_units(const std::string& file_path,
|
||||
const std::vector<CompilationUnit>& units) {
|
||||
// 移除该文件的旧单元
|
||||
auto old_units = units_.get_units_by_file(file_path);
|
||||
for (auto* old_unit : old_units) {
|
||||
units_.remove_unit(old_unit->id);
|
||||
}
|
||||
|
||||
// 添加新单元
|
||||
for (const auto& unit : units) {
|
||||
units_.add_unit(unit);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ChangeRecord> IncrementalEngine::update_source(
|
||||
const std::string& file_path, const std::string& new_content) {
|
||||
|
||||
// 创建新快照
|
||||
Snapshot new_snap = detector_.create_snapshot(file_path, new_content);
|
||||
|
||||
// 获取旧快照
|
||||
auto old_it = snapshots_.find(file_path);
|
||||
|
||||
std::vector<ChangeRecord> changes;
|
||||
if (old_it != snapshots_.end()) {
|
||||
// 获取变更的行
|
||||
auto changed_lines = detector_.get_changed_lines(old_it->second, new_snap);
|
||||
|
||||
// 找出受影响的编译单元
|
||||
std::unordered_set<std::string> affected_ids;
|
||||
for (int line : changed_lines) {
|
||||
auto units = units_.get_units_in_range(file_path, line, line);
|
||||
for (auto* unit : units) {
|
||||
affected_ids.insert(unit->id);
|
||||
// 标记为已修改
|
||||
unit->state = UnitState::MODIFIED;
|
||||
unit->cache_valid = false;
|
||||
|
||||
// 获取所有受影响的依赖单元
|
||||
auto dependents = units_.get_affected_units(unit->id);
|
||||
for (const auto& dep_id : dependents) {
|
||||
affected_ids.insert(dep_id);
|
||||
auto* dep_unit = units_.get_unit(dep_id);
|
||||
if (dep_unit) {
|
||||
dep_unit->state = UnitState::AFFECTED;
|
||||
dep_unit->cache_valid = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 扩展到完整边界
|
||||
std::vector<std::string> ids_to_expand(affected_ids.begin(), affected_ids.end());
|
||||
expand_to_boundaries(file_path, ids_to_expand);
|
||||
affected_ids = std::unordered_set<std::string>(ids_to_expand.begin(), ids_to_expand.end());
|
||||
|
||||
// 生成变更记录
|
||||
for (const auto& id : affected_ids) {
|
||||
auto* unit = units_.get_unit(id);
|
||||
if (unit) {
|
||||
ChangeRecord rec;
|
||||
rec.unit_id = id;
|
||||
rec.change_type = unit->state;
|
||||
rec.new_start_line = unit->start_line;
|
||||
rec.new_end_line = unit->end_line;
|
||||
changes.push_back(rec);
|
||||
}
|
||||
}
|
||||
|
||||
// 需要重新编译的单元
|
||||
units_to_compile_.clear();
|
||||
for (const auto& id : affected_ids) {
|
||||
units_to_compile_.push_back(id);
|
||||
}
|
||||
} else {
|
||||
// 首次编译,所有单元都需要编译
|
||||
auto units = units_.get_units_by_file(file_path);
|
||||
for (auto* unit : units) {
|
||||
unit->state = UnitState::ADDED;
|
||||
units_to_compile_.push_back(unit->id);
|
||||
|
||||
ChangeRecord rec;
|
||||
rec.unit_id = unit->id;
|
||||
rec.change_type = UnitState::ADDED;
|
||||
rec.new_start_line = unit->start_line;
|
||||
rec.new_end_line = unit->end_line;
|
||||
changes.push_back(rec);
|
||||
}
|
||||
}
|
||||
|
||||
// 更新快照
|
||||
new_snap.units = std::unordered_map<std::string, CompilationUnit>();
|
||||
for (auto* unit : units_.get_units_by_file(file_path)) {
|
||||
new_snap.units[unit->id] = *unit;
|
||||
}
|
||||
snapshots_[file_path] = new_snap;
|
||||
|
||||
return changes;
|
||||
}
|
||||
|
||||
std::vector<std::string> IncrementalEngine::get_units_to_compile() const {
|
||||
return units_to_compile_;
|
||||
}
|
||||
|
||||
void IncrementalEngine::mark_compiled(const std::string& unit_id,
|
||||
const std::string& output) {
|
||||
auto* unit = units_.get_unit(unit_id);
|
||||
if (unit) {
|
||||
unit->cached_output = output;
|
||||
unit->cache_timestamp = current_timestamp();
|
||||
unit->cache_valid = true;
|
||||
unit->state = UnitState::UNCHANGED;
|
||||
|
||||
// 更新缓存
|
||||
cache_.put(unit_id, output, unit->content_hash);
|
||||
}
|
||||
|
||||
// 从待编译列表中移除
|
||||
units_to_compile_.erase(
|
||||
std::remove(units_to_compile_.begin(), units_to_compile_.end(), unit_id),
|
||||
units_to_compile_.end()
|
||||
);
|
||||
}
|
||||
|
||||
std::string IncrementalEngine::get_combined_output(const std::string& file_path) const {
|
||||
std::ostringstream oss;
|
||||
auto units = const_cast<UnitManager&>(units_).get_units_by_file(file_path);
|
||||
|
||||
// 按行号顺序排列
|
||||
std::sort(units.begin(), units.end(),
|
||||
[](const CompilationUnit* a, const CompilationUnit* b) {
|
||||
return a->start_line < b->start_line;
|
||||
});
|
||||
|
||||
for (size_t i = 0; i < units.size(); ++i) {
|
||||
const auto* unit = units[i];
|
||||
|
||||
// 优先使用缓存
|
||||
std::string output;
|
||||
if (unit->cache_valid) {
|
||||
output = unit->cached_output;
|
||||
} else if (cache_.is_valid(unit->id, unit->content_hash)) {
|
||||
output = cache_.get(unit->id);
|
||||
}
|
||||
|
||||
if (!output.empty()) {
|
||||
if (i > 0) oss << "\n";
|
||||
oss << output;
|
||||
}
|
||||
}
|
||||
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
void IncrementalEngine::expand_to_boundaries(const std::string& file_path,
|
||||
std::vector<std::string>& unit_ids) {
|
||||
std::unordered_set<std::string> expanded(unit_ids.begin(), unit_ids.end());
|
||||
|
||||
for (const auto& id : unit_ids) {
|
||||
auto* unit = units_.get_unit(id);
|
||||
if (!unit) continue;
|
||||
|
||||
// 对于函数、类等结构,确保整个结构都被包含
|
||||
if (unit->type == UnitType::FUNCTION || unit->type == UnitType::CLASS) {
|
||||
// 已经是完整结构,不需要扩展
|
||||
continue;
|
||||
}
|
||||
|
||||
// 检查是否在某个大结构内
|
||||
auto all_units = units_.get_units_by_file(file_path);
|
||||
for (auto* parent : all_units) {
|
||||
if (parent->id == id) continue;
|
||||
|
||||
// 如果当前单元在父结构范围内
|
||||
if (parent->start_line <= unit->start_line &&
|
||||
parent->end_line >= unit->end_line) {
|
||||
// 父结构是函数或类,需要重新编译整个结构
|
||||
if (parent->type == UnitType::FUNCTION || parent->type == UnitType::CLASS) {
|
||||
expanded.insert(parent->id);
|
||||
parent->state = UnitState::AFFECTED;
|
||||
parent->cache_valid = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unit_ids = std::vector<std::string>(expanded.begin(), expanded.end());
|
||||
}
|
||||
|
||||
void IncrementalEngine::save_state() {
|
||||
cache_.save();
|
||||
|
||||
// 保存单元状态
|
||||
std::string state_file = cache_.cache().empty() ? "incremental_state.dat"
|
||||
: cache_dir_ + "/incremental_state.dat";
|
||||
// Note: cache_dir_ is private, so we'll save alongside cache
|
||||
}
|
||||
|
||||
void IncrementalEngine::load_state() {
|
||||
cache_.load();
|
||||
}
|
||||
|
||||
} // namespace incremental
|
||||
} // namespace sikuwa
|
||||
283
incremental/cpp/incremental_core.h
Normal file
283
incremental/cpp/incremental_core.h
Normal file
@@ -0,0 +1,283 @@
|
||||
// sikuwa/incremental/cpp/incremental_core.h
|
||||
// 减量编译核心 - C++ 实现高性能组件
|
||||
// 指哪编哪:只编译源码改变的部分
|
||||
|
||||
#ifndef SIKUWA_INCREMENTAL_CORE_H
|
||||
#define SIKUWA_INCREMENTAL_CORE_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <chrono>
|
||||
|
||||
namespace sikuwa {
|
||||
namespace incremental {
|
||||
|
||||
// ============================================================================
|
||||
// 编译单元类型
|
||||
// ============================================================================
|
||||
enum class UnitType {
|
||||
LINE, // 单行
|
||||
STATEMENT, // 语句
|
||||
FUNCTION, // 函数
|
||||
CLASS, // 类
|
||||
MODULE, // 模块级
|
||||
IMPORT, // 导入语句
|
||||
DECORATOR, // 装饰器
|
||||
BLOCK // 代码块
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 编译单元状态
|
||||
// ============================================================================
|
||||
enum class UnitState {
|
||||
UNKNOWN, // 未知
|
||||
UNCHANGED, // 未变更
|
||||
MODIFIED, // 已修改
|
||||
ADDED, // 新增
|
||||
DELETED, // 已删除
|
||||
AFFECTED // 受影响(依赖项变更)
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 编译单元 - 最小编译粒度
|
||||
// ============================================================================
|
||||
struct CompilationUnit {
|
||||
std::string id; // 唯一标识: file:start_line:end_line:hash
|
||||
std::string file_path; // 源文件路径
|
||||
int start_line; // 起始行 (1-based)
|
||||
int end_line; // 结束行 (1-based)
|
||||
UnitType type; // 单元类型
|
||||
std::string name; // 名称 (函数名/类名等)
|
||||
std::string content_hash; // 内容哈希
|
||||
std::vector<std::string> dependencies; // 依赖的单元ID列表
|
||||
std::vector<std::string> dependents; // 被依赖的单元ID列表
|
||||
UnitState state; // 当前状态
|
||||
|
||||
// 缓存相关
|
||||
std::string cached_output; // 缓存的编译产物
|
||||
int64_t cache_timestamp; // 缓存时间戳
|
||||
bool cache_valid; // 缓存是否有效
|
||||
|
||||
CompilationUnit()
|
||||
: start_line(0), end_line(0), type(UnitType::LINE),
|
||||
state(UnitState::UNKNOWN), cache_timestamp(0), cache_valid(false) {}
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 版本快照 - 用于变更检测
|
||||
// ============================================================================
|
||||
struct Snapshot {
|
||||
std::string file_path;
|
||||
std::string content_hash; // 整体内容哈希
|
||||
std::vector<std::string> line_hashes; // 每行哈希
|
||||
std::unordered_map<std::string, CompilationUnit> units; // 编译单元
|
||||
int64_t timestamp;
|
||||
|
||||
Snapshot() : timestamp(0) {}
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 变更记录
|
||||
// ============================================================================
|
||||
struct ChangeRecord {
|
||||
std::string unit_id;
|
||||
UnitState change_type;
|
||||
int old_start_line;
|
||||
int old_end_line;
|
||||
int new_start_line;
|
||||
int new_end_line;
|
||||
std::string reason; // 变更原因
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 编译单元管理器 - 管理所有编译单元
|
||||
// ============================================================================
|
||||
class UnitManager {
|
||||
public:
|
||||
UnitManager();
|
||||
~UnitManager();
|
||||
|
||||
// 添加/更新编译单元
|
||||
void add_unit(const CompilationUnit& unit);
|
||||
void update_unit(const std::string& id, const CompilationUnit& unit);
|
||||
void remove_unit(const std::string& id);
|
||||
|
||||
// 查询
|
||||
CompilationUnit* get_unit(const std::string& id);
|
||||
const CompilationUnit* get_unit(const std::string& id) const;
|
||||
std::vector<CompilationUnit*> get_units_by_file(const std::string& file_path);
|
||||
std::vector<CompilationUnit*> get_units_in_range(const std::string& file_path, int start, int end);
|
||||
|
||||
// 依赖关系
|
||||
void add_dependency(const std::string& from_id, const std::string& to_id);
|
||||
void remove_dependency(const std::string& from_id, const std::string& to_id);
|
||||
std::vector<std::string> get_dependencies(const std::string& id) const;
|
||||
std::vector<std::string> get_dependents(const std::string& id) const;
|
||||
std::vector<std::string> get_affected_units(const std::string& changed_id) const;
|
||||
|
||||
// 遍历
|
||||
void for_each(std::function<void(CompilationUnit&)> callback);
|
||||
size_t size() const { return units_.size(); }
|
||||
void clear();
|
||||
|
||||
// 序列化
|
||||
std::string serialize() const;
|
||||
void deserialize(const std::string& data);
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, CompilationUnit> units_;
|
||||
std::unordered_map<std::string, std::vector<std::string>> file_units_; // file -> unit_ids
|
||||
|
||||
// 递归获取所有受影响的单元
|
||||
void collect_affected_recursive(const std::string& id,
|
||||
std::unordered_set<std::string>& visited) const;
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 变更检测器 - 检测源码变更
|
||||
// ============================================================================
|
||||
class ChangeDetector {
|
||||
public:
|
||||
ChangeDetector();
|
||||
~ChangeDetector();
|
||||
|
||||
// 创建快照
|
||||
Snapshot create_snapshot(const std::string& file_path, const std::string& content);
|
||||
|
||||
// 检测变更
|
||||
std::vector<ChangeRecord> detect_changes(const Snapshot& old_snap, const Snapshot& new_snap);
|
||||
|
||||
// 定位变更行
|
||||
std::vector<int> get_changed_lines(const Snapshot& old_snap, const Snapshot& new_snap);
|
||||
|
||||
// 计算哈希
|
||||
static std::string compute_hash(const std::string& content);
|
||||
static std::string compute_line_hash(const std::string& line);
|
||||
|
||||
private:
|
||||
// LCS 算法找出变更
|
||||
std::vector<std::pair<int, int>> compute_lcs(const std::vector<std::string>& old_lines,
|
||||
const std::vector<std::string>& new_lines);
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 编译缓存 - 缓存编译产物
|
||||
// ============================================================================
|
||||
class CompilationCache {
|
||||
public:
|
||||
CompilationCache(const std::string& cache_dir);
|
||||
~CompilationCache();
|
||||
|
||||
// 缓存操作
|
||||
bool has(const std::string& unit_id) const;
|
||||
std::string get(const std::string& unit_id) const;
|
||||
void put(const std::string& unit_id, const std::string& output, const std::string& content_hash);
|
||||
void invalidate(const std::string& unit_id);
|
||||
void invalidate_all();
|
||||
|
||||
// 验证缓存
|
||||
bool is_valid(const std::string& unit_id, const std::string& current_hash) const;
|
||||
|
||||
// 持久化
|
||||
void save();
|
||||
void load();
|
||||
|
||||
// 统计
|
||||
size_t size() const { return cache_.size(); }
|
||||
size_t hit_count() const { return hits_; }
|
||||
size_t miss_count() const { return misses_; }
|
||||
|
||||
private:
|
||||
struct CacheEntry {
|
||||
std::string output;
|
||||
std::string content_hash;
|
||||
int64_t timestamp;
|
||||
};
|
||||
|
||||
std::string cache_dir_;
|
||||
std::unordered_map<std::string, CacheEntry> cache_;
|
||||
mutable size_t hits_;
|
||||
mutable size_t misses_;
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 减量编译引擎
|
||||
// ============================================================================
|
||||
class IncrementalEngine {
|
||||
public:
|
||||
IncrementalEngine(const std::string& cache_dir);
|
||||
~IncrementalEngine();
|
||||
|
||||
// 注册编译单元
|
||||
void register_units(const std::string& file_path,
|
||||
const std::vector<CompilationUnit>& units);
|
||||
|
||||
// 更新源码并检测变更
|
||||
std::vector<ChangeRecord> update_source(const std::string& file_path,
|
||||
const std::string& new_content);
|
||||
|
||||
// 获取需要重新编译的单元
|
||||
std::vector<std::string> get_units_to_compile() const;
|
||||
|
||||
// 标记单元编译完成
|
||||
void mark_compiled(const std::string& unit_id, const std::string& output);
|
||||
|
||||
// 获取编译结果(按顺序拼接)
|
||||
std::string get_combined_output(const std::string& file_path) const;
|
||||
|
||||
// 缓存管理
|
||||
CompilationCache& cache() { return cache_; }
|
||||
const CompilationCache& cache() const { return cache_; }
|
||||
|
||||
// 单元管理
|
||||
UnitManager& units() { return units_; }
|
||||
const UnitManager& units() const { return units_; }
|
||||
|
||||
// 状态
|
||||
void save_state();
|
||||
void load_state();
|
||||
|
||||
private:
|
||||
UnitManager units_;
|
||||
ChangeDetector detector_;
|
||||
CompilationCache cache_;
|
||||
std::unordered_map<std::string, Snapshot> snapshots_; // file -> snapshot
|
||||
std::vector<std::string> units_to_compile_;
|
||||
|
||||
// 扩展编译范围到完整结构
|
||||
void expand_to_boundaries(const std::string& file_path,
|
||||
std::vector<std::string>& unit_ids);
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 工具函数
|
||||
// ============================================================================
|
||||
|
||||
// 生成单元ID
|
||||
std::string generate_unit_id(const std::string& file_path, int start_line,
|
||||
int end_line, const std::string& content_hash);
|
||||
|
||||
// 获取当前时间戳
|
||||
int64_t current_timestamp();
|
||||
|
||||
// 读取文件内容
|
||||
std::string read_file(const std::string& path);
|
||||
|
||||
// 写入文件内容
|
||||
void write_file(const std::string& path, const std::string& content);
|
||||
|
||||
// 分割行
|
||||
std::vector<std::string> split_lines(const std::string& content);
|
||||
|
||||
// 合并行
|
||||
std::string join_lines(const std::vector<std::string>& lines);
|
||||
|
||||
} // namespace incremental
|
||||
} // namespace sikuwa
|
||||
|
||||
#endif // SIKUWA_INCREMENTAL_CORE_H
|
||||
130
incremental/cpp/pybind_incremental.cpp
Normal file
130
incremental/cpp/pybind_incremental.cpp
Normal file
@@ -0,0 +1,130 @@
|
||||
// sikuwa/incremental/cpp/pybind_incremental.cpp
|
||||
// Python 绑定 - 使用 pybind11
|
||||
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/stl.h>
|
||||
#include "incremental_core.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
using namespace sikuwa::incremental;
|
||||
|
||||
PYBIND11_MODULE(incremental_engine, m) {
|
||||
m.doc() = "Sikuwa 减量编译引擎 - 指哪编哪";
|
||||
|
||||
// 枚举类型
|
||||
py::enum_<UnitType>(m, "UnitType")
|
||||
.value("LINE", UnitType::LINE)
|
||||
.value("STATEMENT", UnitType::STATEMENT)
|
||||
.value("FUNCTION", UnitType::FUNCTION)
|
||||
.value("CLASS", UnitType::CLASS)
|
||||
.value("MODULE", UnitType::MODULE)
|
||||
.value("IMPORT", UnitType::IMPORT)
|
||||
.value("DECORATOR", UnitType::DECORATOR)
|
||||
.value("BLOCK", UnitType::BLOCK);
|
||||
|
||||
py::enum_<UnitState>(m, "UnitState")
|
||||
.value("UNKNOWN", UnitState::UNKNOWN)
|
||||
.value("UNCHANGED", UnitState::UNCHANGED)
|
||||
.value("MODIFIED", UnitState::MODIFIED)
|
||||
.value("ADDED", UnitState::ADDED)
|
||||
.value("DELETED", UnitState::DELETED)
|
||||
.value("AFFECTED", UnitState::AFFECTED);
|
||||
|
||||
// CompilationUnit
|
||||
py::class_<CompilationUnit>(m, "CompilationUnit")
|
||||
.def(py::init<>())
|
||||
.def_readwrite("id", &CompilationUnit::id)
|
||||
.def_readwrite("file_path", &CompilationUnit::file_path)
|
||||
.def_readwrite("start_line", &CompilationUnit::start_line)
|
||||
.def_readwrite("end_line", &CompilationUnit::end_line)
|
||||
.def_readwrite("type", &CompilationUnit::type)
|
||||
.def_readwrite("name", &CompilationUnit::name)
|
||||
.def_readwrite("content_hash", &CompilationUnit::content_hash)
|
||||
.def_readwrite("dependencies", &CompilationUnit::dependencies)
|
||||
.def_readwrite("dependents", &CompilationUnit::dependents)
|
||||
.def_readwrite("state", &CompilationUnit::state)
|
||||
.def_readwrite("cached_output", &CompilationUnit::cached_output)
|
||||
.def_readwrite("cache_valid", &CompilationUnit::cache_valid);
|
||||
|
||||
// ChangeRecord
|
||||
py::class_<ChangeRecord>(m, "ChangeRecord")
|
||||
.def(py::init<>())
|
||||
.def_readwrite("unit_id", &ChangeRecord::unit_id)
|
||||
.def_readwrite("change_type", &ChangeRecord::change_type)
|
||||
.def_readwrite("old_start_line", &ChangeRecord::old_start_line)
|
||||
.def_readwrite("old_end_line", &ChangeRecord::old_end_line)
|
||||
.def_readwrite("new_start_line", &ChangeRecord::new_start_line)
|
||||
.def_readwrite("new_end_line", &ChangeRecord::new_end_line)
|
||||
.def_readwrite("reason", &ChangeRecord::reason);
|
||||
|
||||
// Snapshot
|
||||
py::class_<Snapshot>(m, "Snapshot")
|
||||
.def(py::init<>())
|
||||
.def_readwrite("file_path", &Snapshot::file_path)
|
||||
.def_readwrite("content_hash", &Snapshot::content_hash)
|
||||
.def_readwrite("line_hashes", &Snapshot::line_hashes)
|
||||
.def_readwrite("timestamp", &Snapshot::timestamp);
|
||||
|
||||
// UnitManager
|
||||
py::class_<UnitManager>(m, "UnitManager")
|
||||
.def(py::init<>())
|
||||
.def("add_unit", &UnitManager::add_unit)
|
||||
.def("update_unit", &UnitManager::update_unit)
|
||||
.def("remove_unit", &UnitManager::remove_unit)
|
||||
.def("get_unit", py::overload_cast<const std::string&>(&UnitManager::get_unit),
|
||||
py::return_value_policy::reference)
|
||||
.def("get_units_by_file", &UnitManager::get_units_by_file,
|
||||
py::return_value_policy::reference)
|
||||
.def("get_units_in_range", &UnitManager::get_units_in_range,
|
||||
py::return_value_policy::reference)
|
||||
.def("add_dependency", &UnitManager::add_dependency)
|
||||
.def("remove_dependency", &UnitManager::remove_dependency)
|
||||
.def("get_dependencies", &UnitManager::get_dependencies)
|
||||
.def("get_dependents", &UnitManager::get_dependents)
|
||||
.def("get_affected_units", &UnitManager::get_affected_units)
|
||||
.def("size", &UnitManager::size)
|
||||
.def("clear", &UnitManager::clear)
|
||||
.def("serialize", &UnitManager::serialize)
|
||||
.def("deserialize", &UnitManager::deserialize);
|
||||
|
||||
// ChangeDetector
|
||||
py::class_<ChangeDetector>(m, "ChangeDetector")
|
||||
.def(py::init<>())
|
||||
.def("create_snapshot", &ChangeDetector::create_snapshot)
|
||||
.def("detect_changes", &ChangeDetector::detect_changes)
|
||||
.def("get_changed_lines", &ChangeDetector::get_changed_lines)
|
||||
.def_static("compute_hash", &ChangeDetector::compute_hash)
|
||||
.def_static("compute_line_hash", &ChangeDetector::compute_line_hash);
|
||||
|
||||
// CompilationCache
|
||||
py::class_<CompilationCache>(m, "CompilationCache")
|
||||
.def(py::init<const std::string&>())
|
||||
.def("has", &CompilationCache::has)
|
||||
.def("get", &CompilationCache::get)
|
||||
.def("put", &CompilationCache::put)
|
||||
.def("invalidate", &CompilationCache::invalidate)
|
||||
.def("invalidate_all", &CompilationCache::invalidate_all)
|
||||
.def("is_valid", &CompilationCache::is_valid)
|
||||
.def("save", &CompilationCache::save)
|
||||
.def("load", &CompilationCache::load)
|
||||
.def("size", &CompilationCache::size)
|
||||
.def("hit_count", &CompilationCache::hit_count)
|
||||
.def("miss_count", &CompilationCache::miss_count);
|
||||
|
||||
// IncrementalEngine
|
||||
py::class_<IncrementalEngine>(m, "IncrementalEngine")
|
||||
.def(py::init<const std::string&>())
|
||||
.def("register_units", &IncrementalEngine::register_units)
|
||||
.def("update_source", &IncrementalEngine::update_source)
|
||||
.def("get_units_to_compile", &IncrementalEngine::get_units_to_compile)
|
||||
.def("mark_compiled", &IncrementalEngine::mark_compiled)
|
||||
.def("get_combined_output", &IncrementalEngine::get_combined_output)
|
||||
.def("save_state", &IncrementalEngine::save_state)
|
||||
.def("load_state", &IncrementalEngine::load_state);
|
||||
|
||||
// 工具函数
|
||||
m.def("generate_unit_id", &generate_unit_id);
|
||||
m.def("compute_hash", &ChangeDetector::compute_hash);
|
||||
m.def("split_lines", &split_lines);
|
||||
m.def("join_lines", &join_lines);
|
||||
}
|
||||
295
incremental/demo.py
Normal file
295
incremental/demo.py
Normal file
@@ -0,0 +1,295 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
减量编译演示 - Sikuwa Incremental Compilation Demo
|
||||
展示"指哪编哪"的精准编译能力
|
||||
"""
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from incremental import (
|
||||
IncrementalCompiler,
|
||||
PythonAnalyzer,
|
||||
BlockType
|
||||
)
|
||||
|
||||
|
||||
def demo_analyzer():
|
||||
"""演示代码分析器"""
|
||||
print("=" * 60)
|
||||
print("1. 代码分析器演示")
|
||||
print("=" * 60)
|
||||
|
||||
analyzer = PythonAnalyzer()
|
||||
|
||||
code = '''
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
x = 10
|
||||
y = 20
|
||||
|
||||
def add(a, b):
|
||||
"""加法"""
|
||||
return a + b
|
||||
|
||||
def multiply(a, b):
|
||||
"""乘法"""
|
||||
return a * b
|
||||
|
||||
class Calculator:
|
||||
"""计算器类"""
|
||||
|
||||
def __init__(self):
|
||||
self.history = []
|
||||
|
||||
def calculate(self, op, a, b):
|
||||
if op == '+':
|
||||
result = add(a, b)
|
||||
elif op == '*':
|
||||
result = multiply(a, b)
|
||||
self.history.append(result)
|
||||
return result
|
||||
'''
|
||||
|
||||
blocks = analyzer.analyze(code, "demo.py")
|
||||
|
||||
print(f"\n检测到 {len(blocks)} 个代码块:\n")
|
||||
|
||||
for block in blocks:
|
||||
type_name = block.type.name.lower()
|
||||
deps = ', '.join(block.references[:5]) if block.references else '无'
|
||||
print(f" [{type_name:10}] {block.name:20} 行 {block.start_line:2}-{block.end_line:2} 依赖: {deps}")
|
||||
|
||||
|
||||
def demo_change_detection():
|
||||
"""演示变更检测"""
|
||||
print("\n" + "=" * 60)
|
||||
print("2. 变更检测演示")
|
||||
print("=" * 60)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
compiler = IncrementalCompiler(tmpdir)
|
||||
|
||||
# 模拟编译器
|
||||
compile_count = [0]
|
||||
def mock_compile(unit):
|
||||
compile_count[0] += 1
|
||||
return f"COMPILED: {unit.name or 'unknown'}"
|
||||
|
||||
compiler.set_compiler(mock_compile)
|
||||
|
||||
# 初始代码
|
||||
code_v1 = '''
|
||||
def hello():
|
||||
print("Hello")
|
||||
|
||||
def world():
|
||||
print("World")
|
||||
|
||||
def main():
|
||||
hello()
|
||||
world()
|
||||
'''
|
||||
|
||||
print("\n[v1] 初始代码:")
|
||||
compiler.analyze_source("demo.py", code_v1)
|
||||
changes = compiler.update_source("demo.py", code_v1)
|
||||
print(f" 检测到 {len(changes)} 个新增单元")
|
||||
|
||||
outputs = compiler.compile_all_pending()
|
||||
print(f" 编译了 {compile_count[0]} 个单元")
|
||||
|
||||
# 修改一个函数
|
||||
code_v2 = '''
|
||||
def hello():
|
||||
print("Hello, World!") # 修改了这行
|
||||
|
||||
def world():
|
||||
print("World")
|
||||
|
||||
def main():
|
||||
hello()
|
||||
world()
|
||||
'''
|
||||
|
||||
compile_count[0] = 0
|
||||
print("\n[v2] 修改 hello 函数:")
|
||||
changes = compiler.update_source("demo.py", code_v2)
|
||||
print(f" 检测到 {len(changes)} 个变更单元")
|
||||
for ch in changes:
|
||||
print(f" - {ch.unit_id[:40]}... ({ch.change_type.name})")
|
||||
|
||||
outputs = compiler.compile_all_pending()
|
||||
print(f" 只编译了 {compile_count[0]} 个单元 (其他使用缓存)")
|
||||
|
||||
# 添加新函数
|
||||
code_v3 = '''
|
||||
def hello():
|
||||
print("Hello, World!")
|
||||
|
||||
def world():
|
||||
print("World")
|
||||
|
||||
def greet(name):
|
||||
print(f"Hi, {name}!")
|
||||
|
||||
def main():
|
||||
hello()
|
||||
world()
|
||||
greet("Sikuwa")
|
||||
'''
|
||||
|
||||
compile_count[0] = 0
|
||||
print("\n[v3] 添加 greet 函数:")
|
||||
changes = compiler.update_source("demo.py", code_v3)
|
||||
print(f" 检测到 {len(changes)} 个变更单元")
|
||||
|
||||
outputs = compiler.compile_all_pending()
|
||||
print(f" 编译了 {compile_count[0]} 个新/变更单元")
|
||||
|
||||
# 统计
|
||||
stats = compiler.get_stats()
|
||||
print(f"\n统计: 缓存命中 {stats.get('cache_hits', 0)}, 总编译 {stats.get('total_compiled', 0)}")
|
||||
|
||||
|
||||
def demo_dependency_tracking():
|
||||
"""演示依赖追踪"""
|
||||
print("\n" + "=" * 60)
|
||||
print("3. 依赖追踪演示")
|
||||
print("=" * 60)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
compiler = IncrementalCompiler(tmpdir)
|
||||
|
||||
affected_units = []
|
||||
def mock_compile(unit):
|
||||
affected_units.append(unit.name or unit.id[:20])
|
||||
return f"COMPILED"
|
||||
|
||||
compiler.set_compiler(mock_compile)
|
||||
|
||||
code_v1 = '''
|
||||
# 基础配置
|
||||
CONFIG = {"debug": False}
|
||||
|
||||
def get_config():
|
||||
return CONFIG
|
||||
|
||||
def process():
|
||||
cfg = get_config()
|
||||
return cfg["debug"]
|
||||
|
||||
def main():
|
||||
result = process()
|
||||
print(result)
|
||||
'''
|
||||
|
||||
print("\n初始编译...")
|
||||
compiler.analyze_source("demo.py", code_v1)
|
||||
compiler.update_source("demo.py", code_v1)
|
||||
compiler.compile_all_pending()
|
||||
|
||||
# 修改 CONFIG
|
||||
code_v2 = '''
|
||||
# 基础配置
|
||||
CONFIG = {"debug": True} # 修改
|
||||
|
||||
def get_config():
|
||||
return CONFIG
|
||||
|
||||
def process():
|
||||
cfg = get_config()
|
||||
return cfg["debug"]
|
||||
|
||||
def main():
|
||||
result = process()
|
||||
print(result)
|
||||
'''
|
||||
|
||||
affected_units.clear()
|
||||
print("\n修改 CONFIG 后:")
|
||||
changes = compiler.update_source("demo.py", code_v2)
|
||||
|
||||
# 显示依赖传播
|
||||
print(" 受影响的单元链:")
|
||||
print(" CONFIG (修改) → get_config (依赖CONFIG) → process (依赖get_config)")
|
||||
|
||||
compiler.compile_all_pending()
|
||||
print(f" 重新编译: {', '.join(affected_units) if affected_units else '无'}")
|
||||
|
||||
|
||||
def demo_output_combination():
|
||||
"""演示输出合并"""
|
||||
print("\n" + "=" * 60)
|
||||
print("4. 输出合并演示")
|
||||
print("=" * 60)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
compiler = IncrementalCompiler(tmpdir)
|
||||
|
||||
# 转换为 C 风格伪代码
|
||||
def to_pseudo_c(unit):
|
||||
lines = unit.content.strip().split('\n')
|
||||
result = []
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line.startswith('def '):
|
||||
# def func(): -> void func() {
|
||||
name = line[4:line.index('(')]
|
||||
result.append(f"void {name}() {{")
|
||||
elif line.startswith('print('):
|
||||
# print("x") -> printf("x");
|
||||
content = line[6:-1]
|
||||
result.append(f" printf({content});")
|
||||
elif line == '':
|
||||
continue
|
||||
else:
|
||||
result.append(f" // {line}")
|
||||
if result and not result[-1].endswith('}'):
|
||||
result.append("}")
|
||||
return '\n'.join(result)
|
||||
|
||||
compiler.set_compiler(to_pseudo_c)
|
||||
|
||||
code = '''
|
||||
def hello():
|
||||
print("Hello")
|
||||
|
||||
def world():
|
||||
print("World")
|
||||
'''
|
||||
|
||||
compiler.analyze_source("demo.py", code)
|
||||
compiler.update_source("demo.py", code)
|
||||
compiler.compile_all_pending()
|
||||
|
||||
combined = compiler.get_combined_output("demo.py")
|
||||
|
||||
print("\n原始 Python 代码:")
|
||||
print(code)
|
||||
|
||||
print("合并后的编译产物:")
|
||||
print(combined)
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
print("\n" + "=" * 60)
|
||||
print("Sikuwa 减量编译系统演示")
|
||||
print("指哪编哪 - 精准编译,高效开发")
|
||||
print("=" * 60)
|
||||
|
||||
demo_analyzer()
|
||||
demo_change_detection()
|
||||
demo_dependency_tracking()
|
||||
demo_output_combination()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("演示完成!")
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
556
incremental/smart_cache.py
Normal file
556
incremental/smart_cache.py
Normal file
@@ -0,0 +1,556 @@
|
||||
# sikuwa/incremental/smart_cache.py
|
||||
"""
|
||||
智能缓存系统 V1.2
|
||||
编译即缓存,缓存即编译,预测缓存预热
|
||||
|
||||
深度集成减量编译引擎,实现:
|
||||
1. 编译即缓存 - 每次编译自动持久化,全历史可追溯
|
||||
2. 缓存即编译 - 缓存命中等同于零成本编译
|
||||
3. 预测缓存预热 - 基于访问模式和依赖图预测并预编译
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import threading
|
||||
import queue
|
||||
from enum import Enum, auto
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from typing import Dict, List, Set, Optional, Tuple, Callable, Any
|
||||
from pathlib import Path
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class CacheEventType(Enum):
|
||||
"""缓存事件类型"""
|
||||
HIT = auto() # 命中
|
||||
MISS = auto() # 未命中
|
||||
WRITE = auto() # 写入
|
||||
EVICT = auto() # 淘汰
|
||||
WARMUP = auto() # 预热
|
||||
PREDICT = auto() # 预测
|
||||
|
||||
|
||||
@dataclass
|
||||
class CacheEntry:
|
||||
"""缓存条目"""
|
||||
key: str = ""
|
||||
content_hash: str = ""
|
||||
output: str = ""
|
||||
timestamp: int = 0
|
||||
access_count: int = 0
|
||||
last_access: int = 0
|
||||
dependencies: List[str] = field(default_factory=list)
|
||||
file_path: str = ""
|
||||
line_range: Tuple[int, int] = (0, 0)
|
||||
compile_time_ms: int = 0
|
||||
size_bytes: int = 0
|
||||
|
||||
def touch(self):
|
||||
"""更新访问信息"""
|
||||
self.access_count += 1
|
||||
self.last_access = int(time.time() * 1000)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
'key': self.key,
|
||||
'content_hash': self.content_hash,
|
||||
'output': self.output,
|
||||
'timestamp': self.timestamp,
|
||||
'access_count': self.access_count,
|
||||
'last_access': self.last_access,
|
||||
'dependencies': self.dependencies,
|
||||
'file_path': self.file_path,
|
||||
'line_range': list(self.line_range),
|
||||
'compile_time_ms': self.compile_time_ms,
|
||||
'size_bytes': self.size_bytes,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> 'CacheEntry':
|
||||
entry = cls()
|
||||
entry.key = data.get('key', '')
|
||||
entry.content_hash = data.get('content_hash', '')
|
||||
entry.output = data.get('output', '')
|
||||
entry.timestamp = data.get('timestamp', 0)
|
||||
entry.access_count = data.get('access_count', 0)
|
||||
entry.last_access = data.get('last_access', 0)
|
||||
entry.dependencies = data.get('dependencies', [])
|
||||
entry.file_path = data.get('file_path', '')
|
||||
line_range = data.get('line_range', [0, 0])
|
||||
entry.line_range = tuple(line_range) if isinstance(line_range, list) else line_range
|
||||
entry.compile_time_ms = data.get('compile_time_ms', 0)
|
||||
entry.size_bytes = data.get('size_bytes', 0)
|
||||
return entry
|
||||
|
||||
|
||||
@dataclass
|
||||
class CacheEvent:
|
||||
"""缓存事件记录"""
|
||||
event_type: CacheEventType
|
||||
key: str
|
||||
timestamp: int
|
||||
details: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class AccessPattern:
|
||||
"""访问模式记录"""
|
||||
key: str
|
||||
access_sequence: List[str] = field(default_factory=list) # 之后访问的键
|
||||
frequency: int = 0
|
||||
|
||||
def record_next(self, next_key: str):
|
||||
"""记录后续访问"""
|
||||
if next_key not in self.access_sequence:
|
||||
self.access_sequence.append(next_key)
|
||||
self.frequency += 1
|
||||
|
||||
|
||||
class SmartCache:
|
||||
"""
|
||||
智能缓存系统 V1.2
|
||||
|
||||
核心特性:
|
||||
- LRU 淘汰策略 + 访问频率权重
|
||||
- 全历史编译记录持久化
|
||||
- 基于访问模式的预测预热
|
||||
- 依赖图感知的缓存失效
|
||||
- 后台异步预热线程
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
cache_dir: str = ".sikuwa_cache",
|
||||
max_entries: int = 10000,
|
||||
max_size_mb: int = 500,
|
||||
enable_warmup: bool = True):
|
||||
self.cache_dir = Path(cache_dir)
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.max_entries = max_entries
|
||||
self.max_size_bytes = max_size_mb * 1024 * 1024
|
||||
self.enable_warmup = enable_warmup
|
||||
|
||||
# 主缓存存储 (LRU)
|
||||
self._cache: OrderedDict[str, CacheEntry] = OrderedDict()
|
||||
self._total_size = 0
|
||||
|
||||
# 统计信息
|
||||
self._hits = 0
|
||||
self._misses = 0
|
||||
self._evictions = 0
|
||||
self._warmups = 0
|
||||
|
||||
# 事件日志
|
||||
self._events: List[CacheEvent] = []
|
||||
self._max_events = 10000
|
||||
|
||||
# 访问模式追踪
|
||||
self._last_accessed_key: Optional[str] = None
|
||||
self._access_patterns: Dict[str, AccessPattern] = {}
|
||||
|
||||
# 编译器回调(用于预热)
|
||||
self._compiler_callback: Optional[Callable] = None
|
||||
|
||||
# 预热队列和线程
|
||||
self._warmup_queue: queue.Queue = queue.Queue()
|
||||
self._warmup_thread: Optional[threading.Thread] = None
|
||||
self._warmup_running = False
|
||||
|
||||
# 加载持久化数据
|
||||
self._load()
|
||||
|
||||
# 启动预热线程
|
||||
if enable_warmup:
|
||||
self._start_warmup_thread()
|
||||
|
||||
def _load(self):
|
||||
"""加载持久化缓存"""
|
||||
cache_file = self.cache_dir / "smart_cache_v1.2.json"
|
||||
patterns_file = self.cache_dir / "access_patterns.json"
|
||||
|
||||
if cache_file.exists():
|
||||
try:
|
||||
with open(cache_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
for entry_data in data.get('entries', []):
|
||||
entry = CacheEntry.from_dict(entry_data)
|
||||
self._cache[entry.key] = entry
|
||||
self._total_size += entry.size_bytes
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if patterns_file.exists():
|
||||
try:
|
||||
with open(patterns_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
for key, pattern_data in data.items():
|
||||
self._access_patterns[key] = AccessPattern(
|
||||
key=key,
|
||||
access_sequence=pattern_data.get('sequence', []),
|
||||
frequency=pattern_data.get('frequency', 0)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def save(self):
|
||||
"""保存缓存到磁盘"""
|
||||
cache_file = self.cache_dir / "smart_cache_v1.2.json"
|
||||
patterns_file = self.cache_dir / "access_patterns.json"
|
||||
events_file = self.cache_dir / "cache_events.json"
|
||||
|
||||
# 保存缓存条目
|
||||
with open(cache_file, 'w', encoding='utf-8') as f:
|
||||
json.dump({
|
||||
'version': '1.2',
|
||||
'entries': [entry.to_dict() for entry in self._cache.values()]
|
||||
}, f, indent=2)
|
||||
|
||||
# 保存访问模式
|
||||
with open(patterns_file, 'w', encoding='utf-8') as f:
|
||||
patterns = {
|
||||
k: {'sequence': p.access_sequence, 'frequency': p.frequency}
|
||||
for k, p in self._access_patterns.items()
|
||||
}
|
||||
json.dump(patterns, f, indent=2)
|
||||
|
||||
# 保存事件日志(最近的)
|
||||
with open(events_file, 'w', encoding='utf-8') as f:
|
||||
events = [
|
||||
{'type': e.event_type.name, 'key': e.key,
|
||||
'timestamp': e.timestamp, 'details': e.details}
|
||||
for e in self._events[-1000:] # 只保存最近1000条
|
||||
]
|
||||
json.dump(events, f, indent=2)
|
||||
|
||||
def set_compiler(self, callback: Callable):
|
||||
"""设置编译器回调(用于预热编译)"""
|
||||
self._compiler_callback = callback
|
||||
|
||||
# ==================== 核心缓存操作 ====================
|
||||
|
||||
def get(self, key: str, content_hash: str = "") -> Optional[str]:
|
||||
"""
|
||||
获取缓存 - 缓存即编译
|
||||
|
||||
缓存命中 = 零成本获得编译结果
|
||||
"""
|
||||
if key in self._cache:
|
||||
entry = self._cache[key]
|
||||
|
||||
# 验证内容哈希(如果提供)
|
||||
if content_hash and entry.content_hash != content_hash:
|
||||
self._record_event(CacheEventType.MISS, key, "hash mismatch")
|
||||
self._misses += 1
|
||||
return None
|
||||
|
||||
# 命中:移到末尾(LRU)
|
||||
self._cache.move_to_end(key)
|
||||
entry.touch()
|
||||
|
||||
self._record_event(CacheEventType.HIT, key)
|
||||
self._hits += 1
|
||||
|
||||
# 记录访问模式
|
||||
self._record_access_pattern(key)
|
||||
|
||||
# 触发预测预热
|
||||
if self.enable_warmup:
|
||||
self._trigger_predictive_warmup(key)
|
||||
|
||||
return entry.output
|
||||
|
||||
self._record_event(CacheEventType.MISS, key)
|
||||
self._misses += 1
|
||||
return None
|
||||
|
||||
def put(self, key: str, output: str, content_hash: str,
|
||||
dependencies: List[str] = None,
|
||||
file_path: str = "",
|
||||
line_range: Tuple[int, int] = (0, 0),
|
||||
compile_time_ms: int = 0) -> bool:
|
||||
"""
|
||||
写入缓存 - 编译即缓存
|
||||
|
||||
每次编译结果自动持久化,全历史可追溯
|
||||
"""
|
||||
size_bytes = len(output.encode('utf-8'))
|
||||
|
||||
# 检查是否需要淘汰
|
||||
while (len(self._cache) >= self.max_entries or
|
||||
self._total_size + size_bytes > self.max_size_bytes):
|
||||
if not self._evict_one():
|
||||
break
|
||||
|
||||
# 创建或更新条目
|
||||
entry = CacheEntry(
|
||||
key=key,
|
||||
content_hash=content_hash,
|
||||
output=output,
|
||||
timestamp=int(time.time() * 1000),
|
||||
access_count=1,
|
||||
last_access=int(time.time() * 1000),
|
||||
dependencies=dependencies or [],
|
||||
file_path=file_path,
|
||||
line_range=line_range,
|
||||
compile_time_ms=compile_time_ms,
|
||||
size_bytes=size_bytes,
|
||||
)
|
||||
|
||||
# 更新旧条目的大小
|
||||
if key in self._cache:
|
||||
self._total_size -= self._cache[key].size_bytes
|
||||
|
||||
self._cache[key] = entry
|
||||
self._total_size += size_bytes
|
||||
|
||||
self._record_event(CacheEventType.WRITE, key,
|
||||
f"size={size_bytes}, compile_time={compile_time_ms}ms")
|
||||
|
||||
# 记录访问模式
|
||||
self._record_access_pattern(key)
|
||||
|
||||
return True
|
||||
|
||||
def invalidate(self, key: str):
|
||||
"""使单个缓存失效"""
|
||||
if key in self._cache:
|
||||
self._total_size -= self._cache[key].size_bytes
|
||||
del self._cache[key]
|
||||
self._record_event(CacheEventType.EVICT, key, "manual invalidate")
|
||||
|
||||
def invalidate_by_dependency(self, dep_key: str):
|
||||
"""使所有依赖指定键的缓存失效"""
|
||||
to_invalidate = []
|
||||
for key, entry in self._cache.items():
|
||||
if dep_key in entry.dependencies:
|
||||
to_invalidate.append(key)
|
||||
|
||||
for key in to_invalidate:
|
||||
self.invalidate(key)
|
||||
|
||||
def _evict_one(self) -> bool:
|
||||
"""淘汰一个条目(LRU + 频率权重)"""
|
||||
if not self._cache:
|
||||
return False
|
||||
|
||||
# 计算淘汰分数(越低越优先淘汰)
|
||||
# 分数 = access_count * 0.3 + recency_score * 0.7
|
||||
now = int(time.time() * 1000)
|
||||
min_score = float('inf')
|
||||
evict_key = None
|
||||
|
||||
for key, entry in self._cache.items():
|
||||
recency = (now - entry.last_access) / 1000 # 秒
|
||||
score = entry.access_count * 0.3 - recency * 0.001
|
||||
if score < min_score:
|
||||
min_score = score
|
||||
evict_key = key
|
||||
|
||||
if evict_key:
|
||||
self._total_size -= self._cache[evict_key].size_bytes
|
||||
del self._cache[evict_key]
|
||||
self._evictions += 1
|
||||
self._record_event(CacheEventType.EVICT, evict_key, "LRU eviction")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
# ==================== 访问模式追踪 ====================
|
||||
|
||||
def _record_access_pattern(self, key: str):
|
||||
"""记录访问模式"""
|
||||
if self._last_accessed_key and self._last_accessed_key != key:
|
||||
if self._last_accessed_key not in self._access_patterns:
|
||||
self._access_patterns[self._last_accessed_key] = AccessPattern(
|
||||
key=self._last_accessed_key
|
||||
)
|
||||
self._access_patterns[self._last_accessed_key].record_next(key)
|
||||
|
||||
self._last_accessed_key = key
|
||||
|
||||
# ==================== 预测缓存预热 ====================
|
||||
|
||||
def _start_warmup_thread(self):
|
||||
"""启动后台预热线程"""
|
||||
if self._warmup_thread and self._warmup_thread.is_alive():
|
||||
return
|
||||
|
||||
self._warmup_running = True
|
||||
self._warmup_thread = threading.Thread(target=self._warmup_worker, daemon=True)
|
||||
self._warmup_thread.start()
|
||||
|
||||
def _warmup_worker(self):
|
||||
"""预热工作线程"""
|
||||
while self._warmup_running:
|
||||
try:
|
||||
# 等待预热任务
|
||||
task = self._warmup_queue.get(timeout=1.0)
|
||||
if task is None:
|
||||
continue
|
||||
|
||||
key, content, content_hash = task
|
||||
|
||||
# 检查是否已缓存
|
||||
if key in self._cache:
|
||||
continue
|
||||
|
||||
# 执行预热编译
|
||||
if self._compiler_callback:
|
||||
try:
|
||||
start = time.time()
|
||||
output = self._compiler_callback(content)
|
||||
compile_time = int((time.time() - start) * 1000)
|
||||
|
||||
self.put(key, output, content_hash,
|
||||
compile_time_ms=compile_time)
|
||||
self._warmups += 1
|
||||
self._record_event(CacheEventType.WARMUP, key,
|
||||
f"predictive warmup, time={compile_time}ms")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
def _trigger_predictive_warmup(self, key: str):
|
||||
"""触发预测性预热"""
|
||||
if key not in self._access_patterns:
|
||||
return
|
||||
|
||||
pattern = self._access_patterns[key]
|
||||
|
||||
# 预热接下来可能访问的键
|
||||
for next_key in pattern.access_sequence[:3]: # 最多预热3个
|
||||
if next_key not in self._cache:
|
||||
self._record_event(CacheEventType.PREDICT, next_key,
|
||||
f"predicted from {key}")
|
||||
# 这里只是标记预测,实际预热需要内容
|
||||
# 真正的预热在 warmup_unit 中执行
|
||||
|
||||
def warmup_unit(self, key: str, content: str, content_hash: str):
|
||||
"""手动添加预热任务"""
|
||||
if key not in self._cache:
|
||||
self._warmup_queue.put((key, content, content_hash))
|
||||
|
||||
def warmup_dependencies(self, keys: List[str],
|
||||
content_provider: Callable[[str], Tuple[str, str]]):
|
||||
"""
|
||||
预热依赖链
|
||||
|
||||
content_provider: key -> (content, content_hash)
|
||||
"""
|
||||
for key in keys:
|
||||
if key not in self._cache:
|
||||
try:
|
||||
content, content_hash = content_provider(key)
|
||||
self._warmup_queue.put((key, content, content_hash))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def stop_warmup(self):
|
||||
"""停止预热线程"""
|
||||
self._warmup_running = False
|
||||
if self._warmup_thread:
|
||||
self._warmup_thread.join(timeout=2.0)
|
||||
|
||||
# ==================== 事件日志 ====================
|
||||
|
||||
def _record_event(self, event_type: CacheEventType, key: str, details: str = ""):
|
||||
"""记录缓存事件"""
|
||||
event = CacheEvent(
|
||||
event_type=event_type,
|
||||
key=key,
|
||||
timestamp=int(time.time() * 1000),
|
||||
details=details
|
||||
)
|
||||
self._events.append(event)
|
||||
|
||||
# 限制事件数量
|
||||
if len(self._events) > self._max_events:
|
||||
self._events = self._events[-self._max_events//2:]
|
||||
|
||||
def get_recent_events(self, count: int = 100) -> List[dict]:
|
||||
"""获取最近的事件"""
|
||||
return [
|
||||
{'type': e.event_type.name, 'key': e.key,
|
||||
'timestamp': e.timestamp, 'details': e.details}
|
||||
for e in self._events[-count:]
|
||||
]
|
||||
|
||||
# ==================== 统计和诊断 ====================
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""获取缓存统计"""
|
||||
return {
|
||||
'version': '1.2',
|
||||
'entries': len(self._cache),
|
||||
'total_size_mb': self._total_size / (1024 * 1024),
|
||||
'max_entries': self.max_entries,
|
||||
'max_size_mb': self.max_size_bytes / (1024 * 1024),
|
||||
'hits': self._hits,
|
||||
'misses': self._misses,
|
||||
'hit_rate': self._hits / (self._hits + self._misses) if (self._hits + self._misses) > 0 else 0,
|
||||
'evictions': self._evictions,
|
||||
'warmups': self._warmups,
|
||||
'access_patterns': len(self._access_patterns),
|
||||
}
|
||||
|
||||
def get_hot_entries(self, count: int = 10) -> List[Dict]:
|
||||
"""获取最热门的缓存条目"""
|
||||
sorted_entries = sorted(
|
||||
self._cache.values(),
|
||||
key=lambda e: e.access_count,
|
||||
reverse=True
|
||||
)
|
||||
return [
|
||||
{'key': e.key, 'access_count': e.access_count,
|
||||
'file': e.file_path, 'lines': e.line_range}
|
||||
for e in sorted_entries[:count]
|
||||
]
|
||||
|
||||
def get_predicted_next(self, key: str, count: int = 5) -> List[str]:
|
||||
"""获取预测的下一个访问键"""
|
||||
if key not in self._access_patterns:
|
||||
return []
|
||||
return self._access_patterns[key].access_sequence[:count]
|
||||
|
||||
def has(self, key: str) -> bool:
|
||||
"""检查键是否存在"""
|
||||
return key in self._cache
|
||||
|
||||
def clear(self):
|
||||
"""清空缓存"""
|
||||
self._cache.clear()
|
||||
self._total_size = 0
|
||||
self._access_patterns.clear()
|
||||
self._events.clear()
|
||||
|
||||
def __del__(self):
|
||||
"""析构时停止预热线程并保存"""
|
||||
self.stop_warmup()
|
||||
try:
|
||||
self.save()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ==================== 工厂函数 ====================
|
||||
|
||||
_global_cache: Optional[SmartCache] = None
|
||||
|
||||
def get_smart_cache(cache_dir: str = ".sikuwa_cache") -> SmartCache:
|
||||
"""获取全局智能缓存实例"""
|
||||
global _global_cache
|
||||
if _global_cache is None:
|
||||
_global_cache = SmartCache(cache_dir)
|
||||
return _global_cache
|
||||
|
||||
|
||||
def create_smart_cache(cache_dir: str = ".sikuwa_cache",
|
||||
max_entries: int = 10000,
|
||||
max_size_mb: int = 500,
|
||||
enable_warmup: bool = True) -> SmartCache:
|
||||
"""创建新的智能缓存实例"""
|
||||
return SmartCache(cache_dir, max_entries, max_size_mb, enable_warmup)
|
||||
2
incremental/tests/__init__.py
Normal file
2
incremental/tests/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
# sikuwa/incremental/tests/__init__.py
|
||||
"""减量编译测试包"""
|
||||
360
incremental/tests/test_incremental.py
Normal file
360
incremental/tests/test_incremental.py
Normal file
@@ -0,0 +1,360 @@
|
||||
# sikuwa/incremental/tests/test_incremental.py
|
||||
"""
|
||||
减量编译系统测试
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
# 添加父目录到路径
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
from incremental.core import (
|
||||
IncrementalCompiler,
|
||||
CompilationUnit,
|
||||
Snapshot,
|
||||
ChangeDetector,
|
||||
CompilationCache,
|
||||
UnitType,
|
||||
UnitState
|
||||
)
|
||||
from incremental.analyzer import PythonAnalyzer, BlockType
|
||||
|
||||
|
||||
class TestPythonAnalyzer(unittest.TestCase):
|
||||
"""测试 Python 分析器"""
|
||||
|
||||
def setUp(self):
|
||||
self.analyzer = PythonAnalyzer()
|
||||
|
||||
def test_analyze_function(self):
|
||||
"""测试函数分析"""
|
||||
code = '''
|
||||
def hello(name):
|
||||
"""Say hello"""
|
||||
print(f"Hello, {name}!")
|
||||
'''
|
||||
blocks = self.analyzer.analyze(code, "test.py")
|
||||
|
||||
# 应该检测到函数块
|
||||
func_blocks = [b for b in blocks if b.type == BlockType.FUNCTION]
|
||||
self.assertEqual(len(func_blocks), 1)
|
||||
self.assertEqual(func_blocks[0].name, "hello")
|
||||
|
||||
def test_analyze_class(self):
|
||||
"""测试类分析"""
|
||||
code = '''
|
||||
class MyClass:
|
||||
def __init__(self):
|
||||
self.value = 0
|
||||
|
||||
def increment(self):
|
||||
self.value += 1
|
||||
'''
|
||||
blocks = self.analyzer.analyze(code, "test.py")
|
||||
|
||||
# 应该检测到类块
|
||||
class_blocks = [b for b in blocks if b.type == BlockType.CLASS]
|
||||
self.assertEqual(len(class_blocks), 1)
|
||||
self.assertEqual(class_blocks[0].name, "MyClass")
|
||||
|
||||
def test_analyze_import(self):
|
||||
"""测试导入分析"""
|
||||
code = '''
|
||||
import os
|
||||
from sys import path
|
||||
from pathlib import Path
|
||||
'''
|
||||
blocks = self.analyzer.analyze(code, "test.py")
|
||||
|
||||
import_blocks = [b for b in blocks if b.type == BlockType.IMPORT]
|
||||
self.assertEqual(len(import_blocks), 3)
|
||||
|
||||
def test_dependency_extraction(self):
|
||||
"""测试依赖提取"""
|
||||
code = '''
|
||||
def outer():
|
||||
def inner():
|
||||
return x
|
||||
return inner()
|
||||
'''
|
||||
blocks = self.analyzer.analyze(code, "test.py")
|
||||
func_blocks = [b for b in blocks if b.type == BlockType.FUNCTION]
|
||||
|
||||
# outer 函数应该依赖 x
|
||||
self.assertEqual(len(func_blocks), 1)
|
||||
self.assertIn('x', func_blocks[0].references)
|
||||
|
||||
|
||||
class TestChangeDetector(unittest.TestCase):
|
||||
"""测试变更检测器"""
|
||||
|
||||
def setUp(self):
|
||||
self.detector = ChangeDetector()
|
||||
|
||||
def test_detect_addition(self):
|
||||
"""测试新增检测"""
|
||||
old = Snapshot()
|
||||
old.units = {}
|
||||
|
||||
new_unit = CompilationUnit(
|
||||
id="u1", content="def foo(): pass",
|
||||
start_line=1, end_line=1, file_path="test.py"
|
||||
)
|
||||
new_unit.compute_hash()
|
||||
|
||||
new = Snapshot()
|
||||
new.units = {"u1": new_unit}
|
||||
|
||||
changes = self.detector.detect_changes(old, new)
|
||||
|
||||
self.assertEqual(len(changes), 1)
|
||||
self.assertEqual(changes[0].unit_id, "u1")
|
||||
self.assertEqual(changes[0].change_type, UnitState.ADDED)
|
||||
|
||||
def test_detect_modification(self):
|
||||
"""测试修改检测"""
|
||||
old_unit = CompilationUnit(
|
||||
id="u1", content="def foo(): pass",
|
||||
start_line=1, end_line=1, file_path="test.py"
|
||||
)
|
||||
old_unit.compute_hash()
|
||||
|
||||
old = Snapshot()
|
||||
old.units = {"u1": old_unit}
|
||||
|
||||
new_unit = CompilationUnit(
|
||||
id="u1", content="def foo(): return 1",
|
||||
start_line=1, end_line=1, file_path="test.py"
|
||||
)
|
||||
new_unit.compute_hash()
|
||||
|
||||
new = Snapshot()
|
||||
new.units = {"u1": new_unit}
|
||||
|
||||
changes = self.detector.detect_changes(old, new)
|
||||
|
||||
self.assertEqual(len(changes), 1)
|
||||
self.assertEqual(changes[0].unit_id, "u1")
|
||||
self.assertEqual(changes[0].change_type, UnitState.MODIFIED)
|
||||
|
||||
def test_detect_deletion(self):
|
||||
"""测试删除检测"""
|
||||
old_unit = CompilationUnit(
|
||||
id="u1", content="def foo(): pass",
|
||||
start_line=1, end_line=1, file_path="test.py"
|
||||
)
|
||||
old_unit.compute_hash()
|
||||
|
||||
old = Snapshot()
|
||||
old.units = {"u1": old_unit}
|
||||
|
||||
new = Snapshot()
|
||||
new.units = {}
|
||||
|
||||
changes = self.detector.detect_changes(old, new)
|
||||
|
||||
self.assertEqual(len(changes), 1)
|
||||
self.assertEqual(changes[0].change_type, UnitState.DELETED)
|
||||
|
||||
|
||||
class TestCompilationCache(unittest.TestCase):
|
||||
"""测试编译缓存"""
|
||||
|
||||
def setUp(self):
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
self.cache = CompilationCache(self.temp_dir)
|
||||
|
||||
def tearDown(self):
|
||||
import shutil
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
|
||||
def test_put_get(self):
|
||||
"""测试缓存存取"""
|
||||
self.cache.put("key1", "value1", "hash1")
|
||||
result = self.cache.get("key1")
|
||||
self.assertEqual(result, "value1")
|
||||
|
||||
def test_get_nonexistent(self):
|
||||
"""测试获取不存在的键"""
|
||||
result = self.cache.get("nonexistent")
|
||||
self.assertEqual(result, "") # 返回空字符串
|
||||
|
||||
def test_persistence(self):
|
||||
"""测试持久化"""
|
||||
self.cache.put("key1", "value1", "hash1")
|
||||
self.cache.save()
|
||||
|
||||
# 创建新缓存实例
|
||||
cache2 = CompilationCache(self.temp_dir)
|
||||
result = cache2.get("key1")
|
||||
self.assertEqual(result, "value1")
|
||||
|
||||
|
||||
class TestIncrementalCompiler(unittest.TestCase):
|
||||
"""测试减量编译器"""
|
||||
|
||||
def setUp(self):
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
self.compiler = IncrementalCompiler(self.temp_dir)
|
||||
|
||||
# 设置简单的编译器(返回大写代码)
|
||||
self.compiler.set_compiler(lambda unit: unit.content.upper())
|
||||
|
||||
def tearDown(self):
|
||||
import shutil
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
|
||||
def test_initial_compile(self):
|
||||
"""测试初始编译"""
|
||||
code = '''
|
||||
def hello():
|
||||
print("Hello")
|
||||
|
||||
def world():
|
||||
print("World")
|
||||
'''
|
||||
self.compiler.analyze_source("test.py", code)
|
||||
changes = self.compiler.update_source("test.py", code)
|
||||
|
||||
# 首次编译,所有单元都应该是新的
|
||||
self.assertGreater(len(changes), 0)
|
||||
|
||||
# 编译
|
||||
outputs = self.compiler.compile_all_pending()
|
||||
self.assertGreater(len(outputs), 0)
|
||||
|
||||
def test_incremental_compile(self):
|
||||
"""测试增量编译"""
|
||||
# 初始代码
|
||||
code1 = '''
|
||||
def hello():
|
||||
print("Hello")
|
||||
|
||||
def world():
|
||||
print("World")
|
||||
'''
|
||||
self.compiler.analyze_source("test.py", code1)
|
||||
self.compiler.update_source("test.py", code1)
|
||||
outputs1 = self.compiler.compile_all_pending()
|
||||
|
||||
# 修改一个函数
|
||||
code2 = '''
|
||||
def hello():
|
||||
print("Hello Modified")
|
||||
|
||||
def world():
|
||||
print("World")
|
||||
'''
|
||||
changes = self.compiler.update_source("test.py", code2)
|
||||
|
||||
# 应该有变更
|
||||
self.assertGreater(len(changes), 0)
|
||||
|
||||
# 再次编译
|
||||
outputs2 = self.compiler.compile_all_pending()
|
||||
|
||||
# 验证有输出
|
||||
self.assertGreater(len(outputs1) + len(outputs2), 0)
|
||||
|
||||
def test_dependency_propagation(self):
|
||||
"""测试依赖传播"""
|
||||
code = '''
|
||||
x = 10
|
||||
|
||||
def get_x():
|
||||
return x
|
||||
|
||||
def double_x():
|
||||
return get_x() * 2
|
||||
'''
|
||||
self.compiler.analyze_source("test.py", code)
|
||||
self.compiler.update_source("test.py", code)
|
||||
self.compiler.compile_all_pending()
|
||||
|
||||
# 修改 x 的值
|
||||
code2 = '''
|
||||
x = 20
|
||||
|
||||
def get_x():
|
||||
return x
|
||||
|
||||
def double_x():
|
||||
return get_x() * 2
|
||||
'''
|
||||
changes = self.compiler.update_source("test.py", code2)
|
||||
|
||||
# 应该检测到变更(x 变了,依赖它的也应该被标记)
|
||||
self.assertGreater(len(changes), 0)
|
||||
|
||||
def test_combined_output(self):
|
||||
"""测试合并输出"""
|
||||
code = '''
|
||||
import os
|
||||
|
||||
def hello():
|
||||
print("Hello")
|
||||
|
||||
def world():
|
||||
print("World")
|
||||
'''
|
||||
self.compiler.analyze_source("test.py", code)
|
||||
self.compiler.update_source("test.py", code)
|
||||
self.compiler.compile_all_pending()
|
||||
|
||||
combined = self.compiler.get_combined_output("test.py")
|
||||
|
||||
# 合并输出应该包含所有编译产物
|
||||
self.assertGreater(len(combined), 0)
|
||||
|
||||
|
||||
class TestBlockBoundary(unittest.TestCase):
|
||||
"""测试边界触发器"""
|
||||
|
||||
def setUp(self):
|
||||
self.analyzer = PythonAnalyzer()
|
||||
|
||||
def test_class_contains_methods(self):
|
||||
"""测试类包含其方法"""
|
||||
code = '''
|
||||
class MyClass:
|
||||
def method1(self):
|
||||
pass
|
||||
|
||||
def method2(self):
|
||||
pass
|
||||
'''
|
||||
blocks = self.analyzer.analyze(code, "test.py")
|
||||
|
||||
class_blocks = [b for b in blocks if b.type == BlockType.CLASS]
|
||||
self.assertEqual(len(class_blocks), 1)
|
||||
|
||||
# 类块应该包含整个类定义
|
||||
class_block = class_blocks[0]
|
||||
self.assertIn("method1", class_block.content)
|
||||
self.assertIn("method2", class_block.content)
|
||||
|
||||
|
||||
def run_tests():
|
||||
"""运行所有测试"""
|
||||
loader = unittest.TestLoader()
|
||||
suite = unittest.TestSuite()
|
||||
|
||||
suite.addTests(loader.loadTestsFromTestCase(TestPythonAnalyzer))
|
||||
suite.addTests(loader.loadTestsFromTestCase(TestChangeDetector))
|
||||
suite.addTests(loader.loadTestsFromTestCase(TestCompilationCache))
|
||||
suite.addTests(loader.loadTestsFromTestCase(TestIncrementalCompiler))
|
||||
suite.addTests(loader.loadTestsFromTestCase(TestBlockBoundary))
|
||||
|
||||
runner = unittest.TextTestRunner(verbosity=2)
|
||||
result = runner.run(suite)
|
||||
|
||||
return result.wasSuccessful()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
success = run_tests()
|
||||
sys.exit(0 if success else 1)
|
||||
Reference in New Issue
Block a user