Sikuwa first commit
Some checks are pending
CI / Test (Python 3.10 on macos-latest) (push) Waiting to run
CI / Test (Python 3.11 on macos-latest) (push) Waiting to run
CI / Test (Python 3.12 on macos-latest) (push) Waiting to run
CI / Test (Python 3.8 on macos-latest) (push) Waiting to run
CI / Test (Python 3.9 on macos-latest) (push) Waiting to run
CI / Test (Python 3.10 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.11 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.12 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.8 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.9 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.10 on windows-latest) (push) Waiting to run
CI / Test (Python 3.11 on windows-latest) (push) Waiting to run
CI / Test (Python 3.12 on windows-latest) (push) Waiting to run
CI / Test (Python 3.8 on windows-latest) (push) Waiting to run
CI / Test (Python 3.9 on windows-latest) (push) Waiting to run
CI / Lint (push) Waiting to run
CI / Release (push) Blocked by required conditions
Documentation / Build Documentation (push) Waiting to run
Some checks are pending
CI / Test (Python 3.10 on macos-latest) (push) Waiting to run
CI / Test (Python 3.11 on macos-latest) (push) Waiting to run
CI / Test (Python 3.12 on macos-latest) (push) Waiting to run
CI / Test (Python 3.8 on macos-latest) (push) Waiting to run
CI / Test (Python 3.9 on macos-latest) (push) Waiting to run
CI / Test (Python 3.10 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.11 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.12 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.8 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.9 on ubuntu-latest) (push) Waiting to run
CI / Test (Python 3.10 on windows-latest) (push) Waiting to run
CI / Test (Python 3.11 on windows-latest) (push) Waiting to run
CI / Test (Python 3.12 on windows-latest) (push) Waiting to run
CI / Test (Python 3.8 on windows-latest) (push) Waiting to run
CI / Test (Python 3.9 on windows-latest) (push) Waiting to run
CI / Lint (push) Waiting to run
CI / Release (push) Blocked by required conditions
Documentation / Build Documentation (push) Waiting to run
This commit is contained in:
556
incremental/smart_cache.py
Normal file
556
incremental/smart_cache.py
Normal file
@@ -0,0 +1,556 @@
|
||||
# sikuwa/incremental/smart_cache.py
|
||||
"""
|
||||
智能缓存系统 V1.2
|
||||
编译即缓存,缓存即编译,预测缓存预热
|
||||
|
||||
深度集成减量编译引擎,实现:
|
||||
1. 编译即缓存 - 每次编译自动持久化,全历史可追溯
|
||||
2. 缓存即编译 - 缓存命中等同于零成本编译
|
||||
3. 预测缓存预热 - 基于访问模式和依赖图预测并预编译
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import threading
|
||||
import queue
|
||||
from enum import Enum, auto
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from typing import Dict, List, Set, Optional, Tuple, Callable, Any
|
||||
from pathlib import Path
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class CacheEventType(Enum):
|
||||
"""缓存事件类型"""
|
||||
HIT = auto() # 命中
|
||||
MISS = auto() # 未命中
|
||||
WRITE = auto() # 写入
|
||||
EVICT = auto() # 淘汰
|
||||
WARMUP = auto() # 预热
|
||||
PREDICT = auto() # 预测
|
||||
|
||||
|
||||
@dataclass
|
||||
class CacheEntry:
|
||||
"""缓存条目"""
|
||||
key: str = ""
|
||||
content_hash: str = ""
|
||||
output: str = ""
|
||||
timestamp: int = 0
|
||||
access_count: int = 0
|
||||
last_access: int = 0
|
||||
dependencies: List[str] = field(default_factory=list)
|
||||
file_path: str = ""
|
||||
line_range: Tuple[int, int] = (0, 0)
|
||||
compile_time_ms: int = 0
|
||||
size_bytes: int = 0
|
||||
|
||||
def touch(self):
|
||||
"""更新访问信息"""
|
||||
self.access_count += 1
|
||||
self.last_access = int(time.time() * 1000)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
'key': self.key,
|
||||
'content_hash': self.content_hash,
|
||||
'output': self.output,
|
||||
'timestamp': self.timestamp,
|
||||
'access_count': self.access_count,
|
||||
'last_access': self.last_access,
|
||||
'dependencies': self.dependencies,
|
||||
'file_path': self.file_path,
|
||||
'line_range': list(self.line_range),
|
||||
'compile_time_ms': self.compile_time_ms,
|
||||
'size_bytes': self.size_bytes,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> 'CacheEntry':
|
||||
entry = cls()
|
||||
entry.key = data.get('key', '')
|
||||
entry.content_hash = data.get('content_hash', '')
|
||||
entry.output = data.get('output', '')
|
||||
entry.timestamp = data.get('timestamp', 0)
|
||||
entry.access_count = data.get('access_count', 0)
|
||||
entry.last_access = data.get('last_access', 0)
|
||||
entry.dependencies = data.get('dependencies', [])
|
||||
entry.file_path = data.get('file_path', '')
|
||||
line_range = data.get('line_range', [0, 0])
|
||||
entry.line_range = tuple(line_range) if isinstance(line_range, list) else line_range
|
||||
entry.compile_time_ms = data.get('compile_time_ms', 0)
|
||||
entry.size_bytes = data.get('size_bytes', 0)
|
||||
return entry
|
||||
|
||||
|
||||
@dataclass
|
||||
class CacheEvent:
|
||||
"""缓存事件记录"""
|
||||
event_type: CacheEventType
|
||||
key: str
|
||||
timestamp: int
|
||||
details: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class AccessPattern:
|
||||
"""访问模式记录"""
|
||||
key: str
|
||||
access_sequence: List[str] = field(default_factory=list) # 之后访问的键
|
||||
frequency: int = 0
|
||||
|
||||
def record_next(self, next_key: str):
|
||||
"""记录后续访问"""
|
||||
if next_key not in self.access_sequence:
|
||||
self.access_sequence.append(next_key)
|
||||
self.frequency += 1
|
||||
|
||||
|
||||
class SmartCache:
|
||||
"""
|
||||
智能缓存系统 V1.2
|
||||
|
||||
核心特性:
|
||||
- LRU 淘汰策略 + 访问频率权重
|
||||
- 全历史编译记录持久化
|
||||
- 基于访问模式的预测预热
|
||||
- 依赖图感知的缓存失效
|
||||
- 后台异步预热线程
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
cache_dir: str = ".sikuwa_cache",
|
||||
max_entries: int = 10000,
|
||||
max_size_mb: int = 500,
|
||||
enable_warmup: bool = True):
|
||||
self.cache_dir = Path(cache_dir)
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.max_entries = max_entries
|
||||
self.max_size_bytes = max_size_mb * 1024 * 1024
|
||||
self.enable_warmup = enable_warmup
|
||||
|
||||
# 主缓存存储 (LRU)
|
||||
self._cache: OrderedDict[str, CacheEntry] = OrderedDict()
|
||||
self._total_size = 0
|
||||
|
||||
# 统计信息
|
||||
self._hits = 0
|
||||
self._misses = 0
|
||||
self._evictions = 0
|
||||
self._warmups = 0
|
||||
|
||||
# 事件日志
|
||||
self._events: List[CacheEvent] = []
|
||||
self._max_events = 10000
|
||||
|
||||
# 访问模式追踪
|
||||
self._last_accessed_key: Optional[str] = None
|
||||
self._access_patterns: Dict[str, AccessPattern] = {}
|
||||
|
||||
# 编译器回调(用于预热)
|
||||
self._compiler_callback: Optional[Callable] = None
|
||||
|
||||
# 预热队列和线程
|
||||
self._warmup_queue: queue.Queue = queue.Queue()
|
||||
self._warmup_thread: Optional[threading.Thread] = None
|
||||
self._warmup_running = False
|
||||
|
||||
# 加载持久化数据
|
||||
self._load()
|
||||
|
||||
# 启动预热线程
|
||||
if enable_warmup:
|
||||
self._start_warmup_thread()
|
||||
|
||||
def _load(self):
|
||||
"""加载持久化缓存"""
|
||||
cache_file = self.cache_dir / "smart_cache_v1.2.json"
|
||||
patterns_file = self.cache_dir / "access_patterns.json"
|
||||
|
||||
if cache_file.exists():
|
||||
try:
|
||||
with open(cache_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
for entry_data in data.get('entries', []):
|
||||
entry = CacheEntry.from_dict(entry_data)
|
||||
self._cache[entry.key] = entry
|
||||
self._total_size += entry.size_bytes
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if patterns_file.exists():
|
||||
try:
|
||||
with open(patterns_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
for key, pattern_data in data.items():
|
||||
self._access_patterns[key] = AccessPattern(
|
||||
key=key,
|
||||
access_sequence=pattern_data.get('sequence', []),
|
||||
frequency=pattern_data.get('frequency', 0)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def save(self):
|
||||
"""保存缓存到磁盘"""
|
||||
cache_file = self.cache_dir / "smart_cache_v1.2.json"
|
||||
patterns_file = self.cache_dir / "access_patterns.json"
|
||||
events_file = self.cache_dir / "cache_events.json"
|
||||
|
||||
# 保存缓存条目
|
||||
with open(cache_file, 'w', encoding='utf-8') as f:
|
||||
json.dump({
|
||||
'version': '1.2',
|
||||
'entries': [entry.to_dict() for entry in self._cache.values()]
|
||||
}, f, indent=2)
|
||||
|
||||
# 保存访问模式
|
||||
with open(patterns_file, 'w', encoding='utf-8') as f:
|
||||
patterns = {
|
||||
k: {'sequence': p.access_sequence, 'frequency': p.frequency}
|
||||
for k, p in self._access_patterns.items()
|
||||
}
|
||||
json.dump(patterns, f, indent=2)
|
||||
|
||||
# 保存事件日志(最近的)
|
||||
with open(events_file, 'w', encoding='utf-8') as f:
|
||||
events = [
|
||||
{'type': e.event_type.name, 'key': e.key,
|
||||
'timestamp': e.timestamp, 'details': e.details}
|
||||
for e in self._events[-1000:] # 只保存最近1000条
|
||||
]
|
||||
json.dump(events, f, indent=2)
|
||||
|
||||
def set_compiler(self, callback: Callable):
|
||||
"""设置编译器回调(用于预热编译)"""
|
||||
self._compiler_callback = callback
|
||||
|
||||
# ==================== 核心缓存操作 ====================
|
||||
|
||||
def get(self, key: str, content_hash: str = "") -> Optional[str]:
|
||||
"""
|
||||
获取缓存 - 缓存即编译
|
||||
|
||||
缓存命中 = 零成本获得编译结果
|
||||
"""
|
||||
if key in self._cache:
|
||||
entry = self._cache[key]
|
||||
|
||||
# 验证内容哈希(如果提供)
|
||||
if content_hash and entry.content_hash != content_hash:
|
||||
self._record_event(CacheEventType.MISS, key, "hash mismatch")
|
||||
self._misses += 1
|
||||
return None
|
||||
|
||||
# 命中:移到末尾(LRU)
|
||||
self._cache.move_to_end(key)
|
||||
entry.touch()
|
||||
|
||||
self._record_event(CacheEventType.HIT, key)
|
||||
self._hits += 1
|
||||
|
||||
# 记录访问模式
|
||||
self._record_access_pattern(key)
|
||||
|
||||
# 触发预测预热
|
||||
if self.enable_warmup:
|
||||
self._trigger_predictive_warmup(key)
|
||||
|
||||
return entry.output
|
||||
|
||||
self._record_event(CacheEventType.MISS, key)
|
||||
self._misses += 1
|
||||
return None
|
||||
|
||||
def put(self, key: str, output: str, content_hash: str,
|
||||
dependencies: List[str] = None,
|
||||
file_path: str = "",
|
||||
line_range: Tuple[int, int] = (0, 0),
|
||||
compile_time_ms: int = 0) -> bool:
|
||||
"""
|
||||
写入缓存 - 编译即缓存
|
||||
|
||||
每次编译结果自动持久化,全历史可追溯
|
||||
"""
|
||||
size_bytes = len(output.encode('utf-8'))
|
||||
|
||||
# 检查是否需要淘汰
|
||||
while (len(self._cache) >= self.max_entries or
|
||||
self._total_size + size_bytes > self.max_size_bytes):
|
||||
if not self._evict_one():
|
||||
break
|
||||
|
||||
# 创建或更新条目
|
||||
entry = CacheEntry(
|
||||
key=key,
|
||||
content_hash=content_hash,
|
||||
output=output,
|
||||
timestamp=int(time.time() * 1000),
|
||||
access_count=1,
|
||||
last_access=int(time.time() * 1000),
|
||||
dependencies=dependencies or [],
|
||||
file_path=file_path,
|
||||
line_range=line_range,
|
||||
compile_time_ms=compile_time_ms,
|
||||
size_bytes=size_bytes,
|
||||
)
|
||||
|
||||
# 更新旧条目的大小
|
||||
if key in self._cache:
|
||||
self._total_size -= self._cache[key].size_bytes
|
||||
|
||||
self._cache[key] = entry
|
||||
self._total_size += size_bytes
|
||||
|
||||
self._record_event(CacheEventType.WRITE, key,
|
||||
f"size={size_bytes}, compile_time={compile_time_ms}ms")
|
||||
|
||||
# 记录访问模式
|
||||
self._record_access_pattern(key)
|
||||
|
||||
return True
|
||||
|
||||
def invalidate(self, key: str):
|
||||
"""使单个缓存失效"""
|
||||
if key in self._cache:
|
||||
self._total_size -= self._cache[key].size_bytes
|
||||
del self._cache[key]
|
||||
self._record_event(CacheEventType.EVICT, key, "manual invalidate")
|
||||
|
||||
def invalidate_by_dependency(self, dep_key: str):
|
||||
"""使所有依赖指定键的缓存失效"""
|
||||
to_invalidate = []
|
||||
for key, entry in self._cache.items():
|
||||
if dep_key in entry.dependencies:
|
||||
to_invalidate.append(key)
|
||||
|
||||
for key in to_invalidate:
|
||||
self.invalidate(key)
|
||||
|
||||
def _evict_one(self) -> bool:
|
||||
"""淘汰一个条目(LRU + 频率权重)"""
|
||||
if not self._cache:
|
||||
return False
|
||||
|
||||
# 计算淘汰分数(越低越优先淘汰)
|
||||
# 分数 = access_count * 0.3 + recency_score * 0.7
|
||||
now = int(time.time() * 1000)
|
||||
min_score = float('inf')
|
||||
evict_key = None
|
||||
|
||||
for key, entry in self._cache.items():
|
||||
recency = (now - entry.last_access) / 1000 # 秒
|
||||
score = entry.access_count * 0.3 - recency * 0.001
|
||||
if score < min_score:
|
||||
min_score = score
|
||||
evict_key = key
|
||||
|
||||
if evict_key:
|
||||
self._total_size -= self._cache[evict_key].size_bytes
|
||||
del self._cache[evict_key]
|
||||
self._evictions += 1
|
||||
self._record_event(CacheEventType.EVICT, evict_key, "LRU eviction")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
# ==================== 访问模式追踪 ====================
|
||||
|
||||
def _record_access_pattern(self, key: str):
|
||||
"""记录访问模式"""
|
||||
if self._last_accessed_key and self._last_accessed_key != key:
|
||||
if self._last_accessed_key not in self._access_patterns:
|
||||
self._access_patterns[self._last_accessed_key] = AccessPattern(
|
||||
key=self._last_accessed_key
|
||||
)
|
||||
self._access_patterns[self._last_accessed_key].record_next(key)
|
||||
|
||||
self._last_accessed_key = key
|
||||
|
||||
# ==================== 预测缓存预热 ====================
|
||||
|
||||
def _start_warmup_thread(self):
|
||||
"""启动后台预热线程"""
|
||||
if self._warmup_thread and self._warmup_thread.is_alive():
|
||||
return
|
||||
|
||||
self._warmup_running = True
|
||||
self._warmup_thread = threading.Thread(target=self._warmup_worker, daemon=True)
|
||||
self._warmup_thread.start()
|
||||
|
||||
def _warmup_worker(self):
|
||||
"""预热工作线程"""
|
||||
while self._warmup_running:
|
||||
try:
|
||||
# 等待预热任务
|
||||
task = self._warmup_queue.get(timeout=1.0)
|
||||
if task is None:
|
||||
continue
|
||||
|
||||
key, content, content_hash = task
|
||||
|
||||
# 检查是否已缓存
|
||||
if key in self._cache:
|
||||
continue
|
||||
|
||||
# 执行预热编译
|
||||
if self._compiler_callback:
|
||||
try:
|
||||
start = time.time()
|
||||
output = self._compiler_callback(content)
|
||||
compile_time = int((time.time() - start) * 1000)
|
||||
|
||||
self.put(key, output, content_hash,
|
||||
compile_time_ms=compile_time)
|
||||
self._warmups += 1
|
||||
self._record_event(CacheEventType.WARMUP, key,
|
||||
f"predictive warmup, time={compile_time}ms")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
def _trigger_predictive_warmup(self, key: str):
|
||||
"""触发预测性预热"""
|
||||
if key not in self._access_patterns:
|
||||
return
|
||||
|
||||
pattern = self._access_patterns[key]
|
||||
|
||||
# 预热接下来可能访问的键
|
||||
for next_key in pattern.access_sequence[:3]: # 最多预热3个
|
||||
if next_key not in self._cache:
|
||||
self._record_event(CacheEventType.PREDICT, next_key,
|
||||
f"predicted from {key}")
|
||||
# 这里只是标记预测,实际预热需要内容
|
||||
# 真正的预热在 warmup_unit 中执行
|
||||
|
||||
def warmup_unit(self, key: str, content: str, content_hash: str):
|
||||
"""手动添加预热任务"""
|
||||
if key not in self._cache:
|
||||
self._warmup_queue.put((key, content, content_hash))
|
||||
|
||||
def warmup_dependencies(self, keys: List[str],
|
||||
content_provider: Callable[[str], Tuple[str, str]]):
|
||||
"""
|
||||
预热依赖链
|
||||
|
||||
content_provider: key -> (content, content_hash)
|
||||
"""
|
||||
for key in keys:
|
||||
if key not in self._cache:
|
||||
try:
|
||||
content, content_hash = content_provider(key)
|
||||
self._warmup_queue.put((key, content, content_hash))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def stop_warmup(self):
|
||||
"""停止预热线程"""
|
||||
self._warmup_running = False
|
||||
if self._warmup_thread:
|
||||
self._warmup_thread.join(timeout=2.0)
|
||||
|
||||
# ==================== 事件日志 ====================
|
||||
|
||||
def _record_event(self, event_type: CacheEventType, key: str, details: str = ""):
|
||||
"""记录缓存事件"""
|
||||
event = CacheEvent(
|
||||
event_type=event_type,
|
||||
key=key,
|
||||
timestamp=int(time.time() * 1000),
|
||||
details=details
|
||||
)
|
||||
self._events.append(event)
|
||||
|
||||
# 限制事件数量
|
||||
if len(self._events) > self._max_events:
|
||||
self._events = self._events[-self._max_events//2:]
|
||||
|
||||
def get_recent_events(self, count: int = 100) -> List[dict]:
|
||||
"""获取最近的事件"""
|
||||
return [
|
||||
{'type': e.event_type.name, 'key': e.key,
|
||||
'timestamp': e.timestamp, 'details': e.details}
|
||||
for e in self._events[-count:]
|
||||
]
|
||||
|
||||
# ==================== 统计和诊断 ====================
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""获取缓存统计"""
|
||||
return {
|
||||
'version': '1.2',
|
||||
'entries': len(self._cache),
|
||||
'total_size_mb': self._total_size / (1024 * 1024),
|
||||
'max_entries': self.max_entries,
|
||||
'max_size_mb': self.max_size_bytes / (1024 * 1024),
|
||||
'hits': self._hits,
|
||||
'misses': self._misses,
|
||||
'hit_rate': self._hits / (self._hits + self._misses) if (self._hits + self._misses) > 0 else 0,
|
||||
'evictions': self._evictions,
|
||||
'warmups': self._warmups,
|
||||
'access_patterns': len(self._access_patterns),
|
||||
}
|
||||
|
||||
def get_hot_entries(self, count: int = 10) -> List[Dict]:
|
||||
"""获取最热门的缓存条目"""
|
||||
sorted_entries = sorted(
|
||||
self._cache.values(),
|
||||
key=lambda e: e.access_count,
|
||||
reverse=True
|
||||
)
|
||||
return [
|
||||
{'key': e.key, 'access_count': e.access_count,
|
||||
'file': e.file_path, 'lines': e.line_range}
|
||||
for e in sorted_entries[:count]
|
||||
]
|
||||
|
||||
def get_predicted_next(self, key: str, count: int = 5) -> List[str]:
|
||||
"""获取预测的下一个访问键"""
|
||||
if key not in self._access_patterns:
|
||||
return []
|
||||
return self._access_patterns[key].access_sequence[:count]
|
||||
|
||||
def has(self, key: str) -> bool:
|
||||
"""检查键是否存在"""
|
||||
return key in self._cache
|
||||
|
||||
def clear(self):
|
||||
"""清空缓存"""
|
||||
self._cache.clear()
|
||||
self._total_size = 0
|
||||
self._access_patterns.clear()
|
||||
self._events.clear()
|
||||
|
||||
def __del__(self):
|
||||
"""析构时停止预热线程并保存"""
|
||||
self.stop_warmup()
|
||||
try:
|
||||
self.save()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ==================== 工厂函数 ====================
|
||||
|
||||
_global_cache: Optional[SmartCache] = None
|
||||
|
||||
def get_smart_cache(cache_dir: str = ".sikuwa_cache") -> SmartCache:
|
||||
"""获取全局智能缓存实例"""
|
||||
global _global_cache
|
||||
if _global_cache is None:
|
||||
_global_cache = SmartCache(cache_dir)
|
||||
return _global_cache
|
||||
|
||||
|
||||
def create_smart_cache(cache_dir: str = ".sikuwa_cache",
|
||||
max_entries: int = 10000,
|
||||
max_size_mb: int = 500,
|
||||
enable_warmup: bool = True) -> SmartCache:
|
||||
"""创建新的智能缓存实例"""
|
||||
return SmartCache(cache_dir, max_entries, max_size_mb, enable_warmup)
|
||||
Reference in New Issue
Block a user