Sikuwa/incremental/compiler_integration.py

# sikuwa/incremental/compiler_integration.py
"""
减量编译器集成模块
将减量编译系统与 Sikuwa 编译器集成
"""

import os
import sys
import subprocess
import tempfile
from pathlib import Path
from typing import Dict, List, Optional, Callable, Any
from dataclasses import dataclass

from .core import (
    IncrementalCompiler,
    CompilationUnit,
    ChangeRecord,
    UnitState,
    UnitType
)
from .analyzer import PythonAnalyzer, CodeBlock, BlockType


@dataclass
class IncrementalBuildResult:
    """减量编译结果"""
    success: bool = False
    compiled_units: int = 0
    cached_units: int = 0
    total_units: int = 0
    output_files: Dict[str, str] = None  # unit_id -> output_path
    combined_output: str = ""
    errors: List[str] = None

    def __post_init__(self):
        if self.output_files is None:
            self.output_files = {}
        if self.errors is None:
            self.errors = []


class IncrementalNativeCompiler:
    """
    减量原生编译器

    集成减量编译系统与原生 C/C++ 编译流程：
    Python → C → GCC → dll/so

    特点：
    - 只编译变更的代码块
    - 缓存已编译的目标文件
    - 智能链接（只重新链接必要的部分）
    """

    def __init__(self,
                 cache_dir: str = ".sikuwa_cache",
                 cc: str = "gcc",
                 cxx: str = "g++"):
        self.incremental = IncrementalCompiler(cache_dir)
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(parents=True, exist_ok=True)

        self.cc = cc
        self.cxx = cxx

        # 工作目录
        self.work_dir = self.cache_dir / "incremental_build"
        self.c_dir = self.work_dir / "c_source"
        self.obj_dir = self.work_dir / "obj"

        for d in [self.work_dir, self.c_dir, self.obj_dir]:
            d.mkdir(parents=True, exist_ok=True)

        # 设置编译回调
        self.incremental.set_compiler(self._compile_unit)

        # Cython 可用性
        self._cython_available = self._check_cython()

    def _check_cython(self) -> bool:
        """检查 Cython 是否可用"""
        try:
            import Cython
            return True
        except ImportError:
            return False

    def _compile_unit(self, unit: CompilationUnit) -> str:
        """
        编译单个单元

        流程：Python 代码 → C 代码 → 目标文件
        """
        # 生成 C 代码
        c_code = self._python_to_c(unit)

        # 保存 C 文件
        c_file = self.c_dir / f"unit_{unit.content_hash}.c"
        c_file.write_text(c_code, encoding='utf-8')

        # 编译为目标文件
        obj_file = self.obj_dir / f"unit_{unit.content_hash}.o"

        if not obj_file.exists():
            self._compile_c_to_obj(c_file, obj_file)

        # 返回目标文件路径作为"编译产物"
        return str(obj_file)

    def _python_to_c(self, unit: CompilationUnit) -> str:
        """
        Python 代码转 C 代码

        使用 Cython 或内置转换器
        """
        if self._cython_available and unit.type in (UnitType.FUNCTION, UnitType.CLASS):
            return self._cython_convert(unit)
        else:
            return self._builtin_convert(unit)

    def _cython_convert(self, unit: CompilationUnit) -> str:
        """使用 Cython 转换"""
        # 创建临时 .pyx 文件
        pyx_file = self.work_dir / f"temp_{unit.content_hash}.pyx"
        pyx_file.write_text(unit.content, encoding='utf-8')

        c_file = self.work_dir / f"temp_{unit.content_hash}.c"

        try:
            result = subprocess.run(
                [sys.executable, "-m", "cython", "-3", str(pyx_file), "-o", str(c_file)],
                capture_output=True,
                text=True
            )

            if result.returncode == 0 and c_file.exists():
                return c_file.read_text(encoding='utf-8')
        except Exception:
            pass

        # 回退到内置转换
        return self._builtin_convert(unit)

    def _builtin_convert(self, unit: CompilationUnit) -> str:
        """内置转换器 - 将 Python 代码嵌入 C"""
        escaped = unit.content.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')

        unit_name = unit.name or f"unit_{unit.content_hash[:8]}"
        safe_name = ''.join(c if c.isalnum() else '_' for c in unit_name)

        c_code = f'''
/* Auto-generated by Sikuwa Incremental Compiler */
/* Unit: {unit.id} */
/* Lines: {unit.start_line}-{unit.end_line} */

#define PY_SSIZE_T_CLEAN
#include <Python.h>

static const char* sikuwa_unit_{safe_name}_source = "{escaped}";

int sikuwa_exec_unit_{safe_name}(PyObject* globals, PyObject* locals) {{
    PyObject* code = Py_CompileString(
        sikuwa_unit_{safe_name}_source,
        "{unit.file_path}",
        Py_file_input
    );

    if (code == NULL) {{
        return -1;
    }}

    PyObject* result = PyEval_EvalCode(code, globals, locals);
    Py_DECREF(code);

    if (result == NULL) {{
        return -1;
    }}

    Py_DECREF(result);
    return 0;
}}
'''
        return c_code

    def _compile_c_to_obj(self, c_file: Path, obj_file: Path):
        """编译 C 文件为目标文件"""
        import sysconfig

        # 获取 Python 头文件路径
        include_dir = sysconfig.get_path('include')

        cmd = [
            self.cc,
            "-c",
            "-fPIC",
            "-O2",
            f"-I{include_dir}",
            str(c_file),
            "-o", str(obj_file)
        ]

        result = subprocess.run(cmd, capture_output=True, text=True)
        if result.returncode != 0:
            raise RuntimeError(f"Compilation failed: {result.stderr}")

    def build(self, file_path: str, content: str) -> IncrementalBuildResult:
        """
        执行减量编译

        Args:
            file_path: 源文件路径
            content: 源代码内容

        Returns:
            编译结果
        """
        result = IncrementalBuildResult()

        try:
            # 检测变更
            changes = self.incremental.update_source(file_path, content)

            # 获取需要编译的单元
            units_to_compile = self.incremental.get_units_to_compile()
            result.total_units = len(self.incremental._units)

            # 编译变更的单元
            compiled_outputs = self.incremental.compile_all_pending()
            result.compiled_units = len(compiled_outputs)
            result.cached_units = result.total_units - result.compiled_units

            # 收集输出
            result.output_files = compiled_outputs

            # 获取合并输出（所有目标文件路径）
            result.combined_output = self.incremental.get_combined_output(file_path)

            result.success = True

        except Exception as e:
            result.success = False
            result.errors.append(str(e))

        return result

    def link(self, output_path: str, file_paths: List[str]) -> bool:
        """
        链接所有目标文件

        Args:
            output_path: 输出文件路径
            file_paths: 源文件路径列表

        Returns:
            是否成功
        """
        import sysconfig

        # 收集所有目标文件
        obj_files = []
        for fp in file_paths:
            combined = self.incremental.get_combined_output(fp)
            for line in combined.splitlines():
                if line.strip() and line.endswith('.o'):
                    obj_files.append(line.strip())

        if not obj_files:
            return False

        # 获取 Python 库路径
        lib_dir = sysconfig.get_config_var('LIBDIR') or '/usr/lib'

        # 判断输出类型
        if output_path.endswith('.so') or output_path.endswith('.dll'):
            link_flags = ["-shared"]
        else:
            link_flags = []

        # 链接命令
        cmd = [
            self.cxx,
            *link_flags,
            *obj_files,
            f"-L{lib_dir}",
            f"-lpython{sys.version_info.major}.{sys.version_info.minor}",
            "-o", output_path
        ]

        result = subprocess.run(cmd, capture_output=True, text=True)
        return result.returncode == 0

    def get_stats(self) -> Dict[str, Any]:
        """获取统计信息"""
        stats = self.incremental.get_stats()
        stats['c_files'] = len(list(self.c_dir.glob('*.c')))
        stats['obj_files'] = len(list(self.obj_dir.glob('*.o')))
        return stats

    def clean(self):
        """清理所有缓存和临时文件"""
        import shutil

        self.incremental.clear()

        for d in [self.c_dir, self.obj_dir]:
            if d.exists():
                shutil.rmtree(d)
                d.mkdir(parents=True, exist_ok=True)

    def save(self):
        """保存状态"""
        self.incremental.save()


def create_incremental_native_compiler(
    cache_dir: str = ".sikuwa_cache",
    cc: str = "gcc",
    cxx: str = "g++"
) -> IncrementalNativeCompiler:
    """创建减量原生编译器"""
    return IncrementalNativeCompiler(cache_dir, cc, cxx)