Files
office-data-matcher/cache.go
sakuradairong 2b17760fbd Merge remote changes, split app.go, remove V1 dead code, fix AICache (#2)
- Merge remote improvements: generic AI API, row-level cache,
  CSV export, matchPrep, prompt truncation, O(1) cache index
- Split app.go (1645 -> 5 files: app.go, cache.go, ai.go,
  matcher.go, export.go)
- Remove V1 dead code: 6 methods, 4 helpers, ~300 lines
- Fix AICache 3 bugs: TOCTOU saveToFile, silent loadFromFile,
  full-sort put
- Extract 8 named constants (threshold, time window, batch size...)
- Frontend: isRunning guard, buildMatchConfig dedup, CSS variables
- Upgrade Go to 1.24.0
2026-06-05 14:46:55 +08:00

222 lines
5.7 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package main
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"sort"
"sync"
"time"
)
// ---------- AI 缓存 ----------
// AICacheEntry 单条缓存记录
type AICacheEntry struct {
PromptHash string `json:"promptHash"`
Response string `json:"response"`
CreatedAt int64 `json:"createdAt"`
}
// AIRowCacheEntry 单行 AI 匹配缓存(跨批次复用)
type AIRowCacheEntry struct {
Key string `json:"key"`
Value string `json:"value"`
CreatedAt int64 `json:"createdAt"`
}
// AICache AI 响应缓存(持久化到临时文件)
type AICache struct {
Entries []AICacheEntry `json:"entries"`
RowEntries []AIRowCacheEntry `json:"rowEntries"`
entriesIdx map[string]int // promptHash → index in Entries (O(1) lookup)
rowEntriesIdx map[string]int // key → index in RowEntries (O(1) lookup)
mu sync.RWMutex
filePath string
maxSize int // 批量缓存最大条目数
maxRowSize int // 行级缓存最大条目数
}
// cacheFileName 缓存文件名
const cacheFileName = "data-matcher-ai-cache.json"
// newAICache 创建缓存实例并加载已有数据
func newAICache() *AICache {
c := &AICache{
filePath: filepath.Join(os.TempDir(), cacheFileName),
maxSize: cacheMaxSize,
maxRowSize: 5000,
}
c.loadFromFile()
return c
}
// loadFromFile 从磁盘加载缓存
func (c *AICache) loadFromFile() {
data, err := os.ReadFile(c.filePath)
if err != nil {
if !os.IsNotExist(err) {
fmt.Printf("[CACHE] 读取缓存文件失败: %v\n", err)
}
return
}
c.mu.Lock()
defer c.mu.Unlock()
var loaded struct {
Entries []AICacheEntry `json:"entries"`
RowEntries []AIRowCacheEntry `json:"rowEntries"`
}
if err := json.Unmarshal(data, &loaded); err != nil || (len(loaded.Entries) == 0 && len(loaded.RowEntries) == 0) {
return // 解析失败或无数据,保留当前缓存
}
// 验证每个条目字段完整性
for _, e := range loaded.Entries {
if e.PromptHash == "" {
return
}
}
for _, r := range loaded.RowEntries {
if r.Key == "" {
return
}
}
c.Entries = loaded.Entries
c.RowEntries = loaded.RowEntries
c.rebuildIndexes()
}
// rebuildIndexes 从切片重建索引 map反序列化或裁剪后调用
func (c *AICache) rebuildIndexes() {
c.entriesIdx = make(map[string]int, len(c.Entries))
for i := range c.Entries {
c.entriesIdx[c.Entries[i].PromptHash] = i
}
c.rowEntriesIdx = make(map[string]int, len(c.RowEntries))
for i := range c.RowEntries {
c.rowEntriesIdx[c.RowEntries[i].Key] = i
}
}
// saveToFile 将缓存写入磁盘(线程安全)
func (c *AICache) saveToFile() {
c.mu.RLock()
entries := make([]AICacheEntry, len(c.Entries))
copy(entries, c.Entries)
rowEntries := make([]AIRowCacheEntry, len(c.RowEntries))
copy(rowEntries, c.RowEntries)
c.mu.RUnlock()
data, err := json.Marshal(map[string]interface{}{
"entries": entries,
"rowEntries": rowEntries,
})
if err != nil {
fmt.Printf("[CACHE] 序列化缓存失败: %v\n", err)
return
}
if err := os.WriteFile(c.filePath, data, 0600); err != nil {
fmt.Printf("[CACHE] 写入缓存文件失败: %v\n", err)
}
}
// get 根据 hash 查找缓存,命中返回响应,否则返回空
func (c *AICache) get(hash string) (string, bool) {
c.mu.RLock()
defer c.mu.RUnlock()
if idx, ok := c.entriesIdx[hash]; ok && idx < len(c.Entries) && c.Entries[idx].PromptHash == hash {
return c.Entries[idx].Response, true
}
return "", false
}
// put 存入一条缓存(线程安全 + 自动裁剪)
func (c *AICache) put(hash, response string) {
c.mu.Lock()
defer c.mu.Unlock()
// 去重:如果已存在则覆盖
if idx, ok := c.entriesIdx[hash]; ok && idx < len(c.Entries) && c.Entries[idx].PromptHash == hash {
c.Entries[idx].Response = response
c.Entries[idx].CreatedAt = time.Now().Unix()
return
}
// 新增条目
idx := len(c.Entries)
c.Entries = append(c.Entries, AICacheEntry{
PromptHash: hash,
Response: response,
CreatedAt: time.Now().Unix(),
})
c.entriesIdx[hash] = idx
// 超过上限则删除最旧的条目
if len(c.Entries) > c.maxSize {
sort.Slice(c.Entries, func(i, j int) bool {
return c.Entries[i].CreatedAt > c.Entries[j].CreatedAt
})
c.Entries = c.Entries[:c.maxSize]
c.rebuildIndexes()
}
}
// getRow 查找行级缓存
func (c *AICache) getRow(key string) (string, bool) {
c.mu.RLock()
defer c.mu.RUnlock()
if idx, ok := c.rowEntriesIdx[key]; ok && idx < len(c.RowEntries) && c.RowEntries[idx].Key == key {
return c.RowEntries[idx].Value, true
}
return "", false
}
// putRow 存入行级缓存(线程安全 + 自动裁剪)
func (c *AICache) putRow(key, value string) {
c.mu.Lock()
defer c.mu.Unlock()
// 去重:更新已存在的条目
if idx, ok := c.rowEntriesIdx[key]; ok && idx < len(c.RowEntries) && c.RowEntries[idx].Key == key {
c.RowEntries[idx].Value = value
c.RowEntries[idx].CreatedAt = time.Now().Unix()
return
}
// 新增条目
idx := len(c.RowEntries)
c.RowEntries = append(c.RowEntries, AIRowCacheEntry{
Key: key,
Value: value,
CreatedAt: time.Now().Unix(),
})
c.rowEntriesIdx[key] = idx
// 超过上限则删除最旧的条目
if len(c.RowEntries) > c.maxRowSize {
sort.Slice(c.RowEntries, func(i, j int) bool {
return c.RowEntries[i].CreatedAt > c.RowEntries[j].CreatedAt
})
c.RowEntries = c.RowEntries[:c.maxRowSize]
c.rebuildIndexes()
}
}
// clear 清空所有缓存
func (c *AICache) clear() {
c.mu.Lock()
defer c.mu.Unlock()
c.Entries = nil
c.RowEntries = nil
c.entriesIdx = nil
c.rowEntriesIdx = nil
_ = os.Remove(c.filePath)
}
// stat 返回缓存统计
func (c *AICache) stat() (count int, path string) {
c.mu.RLock()
defer c.mu.RUnlock()
return len(c.Entries) + len(c.RowEntries), c.filePath
}