- Merge remote improvements: generic AI API, row-level cache, CSV export, matchPrep, prompt truncation, O(1) cache index - Split app.go (1645 -> 5 files: app.go, cache.go, ai.go, matcher.go, export.go) - Remove V1 dead code: 6 methods, 4 helpers, ~300 lines - Fix AICache 3 bugs: TOCTOU saveToFile, silent loadFromFile, full-sort put - Extract 8 named constants (threshold, time window, batch size...) - Frontend: isRunning guard, buildMatchConfig dedup, CSS variables - Upgrade Go to 1.24.0
222 lines
5.7 KiB
Go
222 lines
5.7 KiB
Go
package main
|
||
|
||
import (
|
||
"encoding/json"
|
||
"fmt"
|
||
"os"
|
||
"path/filepath"
|
||
"sort"
|
||
"sync"
|
||
"time"
|
||
)
|
||
|
||
// ---------- AI 缓存 ----------
|
||
|
||
// AICacheEntry 单条缓存记录
|
||
type AICacheEntry struct {
|
||
PromptHash string `json:"promptHash"`
|
||
Response string `json:"response"`
|
||
CreatedAt int64 `json:"createdAt"`
|
||
}
|
||
|
||
// AIRowCacheEntry 单行 AI 匹配缓存(跨批次复用)
|
||
type AIRowCacheEntry struct {
|
||
Key string `json:"key"`
|
||
Value string `json:"value"`
|
||
CreatedAt int64 `json:"createdAt"`
|
||
}
|
||
|
||
// AICache AI 响应缓存(持久化到临时文件)
|
||
type AICache struct {
|
||
Entries []AICacheEntry `json:"entries"`
|
||
RowEntries []AIRowCacheEntry `json:"rowEntries"`
|
||
entriesIdx map[string]int // promptHash → index in Entries (O(1) lookup)
|
||
rowEntriesIdx map[string]int // key → index in RowEntries (O(1) lookup)
|
||
mu sync.RWMutex
|
||
filePath string
|
||
maxSize int // 批量缓存最大条目数
|
||
maxRowSize int // 行级缓存最大条目数
|
||
}
|
||
|
||
// cacheFileName 缓存文件名
|
||
const cacheFileName = "data-matcher-ai-cache.json"
|
||
|
||
// newAICache 创建缓存实例并加载已有数据
|
||
func newAICache() *AICache {
|
||
c := &AICache{
|
||
filePath: filepath.Join(os.TempDir(), cacheFileName),
|
||
maxSize: cacheMaxSize,
|
||
maxRowSize: 5000,
|
||
}
|
||
c.loadFromFile()
|
||
return c
|
||
}
|
||
|
||
// loadFromFile 从磁盘加载缓存
|
||
func (c *AICache) loadFromFile() {
|
||
data, err := os.ReadFile(c.filePath)
|
||
if err != nil {
|
||
if !os.IsNotExist(err) {
|
||
fmt.Printf("[CACHE] 读取缓存文件失败: %v\n", err)
|
||
}
|
||
return
|
||
}
|
||
c.mu.Lock()
|
||
defer c.mu.Unlock()
|
||
var loaded struct {
|
||
Entries []AICacheEntry `json:"entries"`
|
||
RowEntries []AIRowCacheEntry `json:"rowEntries"`
|
||
}
|
||
if err := json.Unmarshal(data, &loaded); err != nil || (len(loaded.Entries) == 0 && len(loaded.RowEntries) == 0) {
|
||
return // 解析失败或无数据,保留当前缓存
|
||
}
|
||
// 验证每个条目字段完整性
|
||
for _, e := range loaded.Entries {
|
||
if e.PromptHash == "" {
|
||
return
|
||
}
|
||
}
|
||
for _, r := range loaded.RowEntries {
|
||
if r.Key == "" {
|
||
return
|
||
}
|
||
}
|
||
c.Entries = loaded.Entries
|
||
c.RowEntries = loaded.RowEntries
|
||
c.rebuildIndexes()
|
||
}
|
||
|
||
// rebuildIndexes 从切片重建索引 map(反序列化或裁剪后调用)
|
||
func (c *AICache) rebuildIndexes() {
|
||
c.entriesIdx = make(map[string]int, len(c.Entries))
|
||
for i := range c.Entries {
|
||
c.entriesIdx[c.Entries[i].PromptHash] = i
|
||
}
|
||
c.rowEntriesIdx = make(map[string]int, len(c.RowEntries))
|
||
for i := range c.RowEntries {
|
||
c.rowEntriesIdx[c.RowEntries[i].Key] = i
|
||
}
|
||
}
|
||
|
||
// saveToFile 将缓存写入磁盘(线程安全)
|
||
func (c *AICache) saveToFile() {
|
||
c.mu.RLock()
|
||
entries := make([]AICacheEntry, len(c.Entries))
|
||
copy(entries, c.Entries)
|
||
rowEntries := make([]AIRowCacheEntry, len(c.RowEntries))
|
||
copy(rowEntries, c.RowEntries)
|
||
c.mu.RUnlock()
|
||
|
||
data, err := json.Marshal(map[string]interface{}{
|
||
"entries": entries,
|
||
"rowEntries": rowEntries,
|
||
})
|
||
if err != nil {
|
||
fmt.Printf("[CACHE] 序列化缓存失败: %v\n", err)
|
||
return
|
||
}
|
||
if err := os.WriteFile(c.filePath, data, 0600); err != nil {
|
||
fmt.Printf("[CACHE] 写入缓存文件失败: %v\n", err)
|
||
}
|
||
}
|
||
|
||
// get 根据 hash 查找缓存,命中返回响应,否则返回空
|
||
func (c *AICache) get(hash string) (string, bool) {
|
||
c.mu.RLock()
|
||
defer c.mu.RUnlock()
|
||
if idx, ok := c.entriesIdx[hash]; ok && idx < len(c.Entries) && c.Entries[idx].PromptHash == hash {
|
||
return c.Entries[idx].Response, true
|
||
}
|
||
return "", false
|
||
}
|
||
|
||
// put 存入一条缓存(线程安全 + 自动裁剪)
|
||
func (c *AICache) put(hash, response string) {
|
||
c.mu.Lock()
|
||
defer c.mu.Unlock()
|
||
|
||
// 去重:如果已存在则覆盖
|
||
if idx, ok := c.entriesIdx[hash]; ok && idx < len(c.Entries) && c.Entries[idx].PromptHash == hash {
|
||
c.Entries[idx].Response = response
|
||
c.Entries[idx].CreatedAt = time.Now().Unix()
|
||
return
|
||
}
|
||
|
||
// 新增条目
|
||
idx := len(c.Entries)
|
||
c.Entries = append(c.Entries, AICacheEntry{
|
||
PromptHash: hash,
|
||
Response: response,
|
||
CreatedAt: time.Now().Unix(),
|
||
})
|
||
c.entriesIdx[hash] = idx
|
||
|
||
// 超过上限则删除最旧的条目
|
||
if len(c.Entries) > c.maxSize {
|
||
sort.Slice(c.Entries, func(i, j int) bool {
|
||
return c.Entries[i].CreatedAt > c.Entries[j].CreatedAt
|
||
})
|
||
c.Entries = c.Entries[:c.maxSize]
|
||
c.rebuildIndexes()
|
||
}
|
||
}
|
||
|
||
// getRow 查找行级缓存
|
||
func (c *AICache) getRow(key string) (string, bool) {
|
||
c.mu.RLock()
|
||
defer c.mu.RUnlock()
|
||
if idx, ok := c.rowEntriesIdx[key]; ok && idx < len(c.RowEntries) && c.RowEntries[idx].Key == key {
|
||
return c.RowEntries[idx].Value, true
|
||
}
|
||
return "", false
|
||
}
|
||
|
||
// putRow 存入行级缓存(线程安全 + 自动裁剪)
|
||
func (c *AICache) putRow(key, value string) {
|
||
c.mu.Lock()
|
||
defer c.mu.Unlock()
|
||
|
||
// 去重:更新已存在的条目
|
||
if idx, ok := c.rowEntriesIdx[key]; ok && idx < len(c.RowEntries) && c.RowEntries[idx].Key == key {
|
||
c.RowEntries[idx].Value = value
|
||
c.RowEntries[idx].CreatedAt = time.Now().Unix()
|
||
return
|
||
}
|
||
|
||
// 新增条目
|
||
idx := len(c.RowEntries)
|
||
c.RowEntries = append(c.RowEntries, AIRowCacheEntry{
|
||
Key: key,
|
||
Value: value,
|
||
CreatedAt: time.Now().Unix(),
|
||
})
|
||
c.rowEntriesIdx[key] = idx
|
||
|
||
// 超过上限则删除最旧的条目
|
||
if len(c.RowEntries) > c.maxRowSize {
|
||
sort.Slice(c.RowEntries, func(i, j int) bool {
|
||
return c.RowEntries[i].CreatedAt > c.RowEntries[j].CreatedAt
|
||
})
|
||
c.RowEntries = c.RowEntries[:c.maxRowSize]
|
||
c.rebuildIndexes()
|
||
}
|
||
}
|
||
|
||
// clear 清空所有缓存
|
||
func (c *AICache) clear() {
|
||
c.mu.Lock()
|
||
defer c.mu.Unlock()
|
||
c.Entries = nil
|
||
c.RowEntries = nil
|
||
c.entriesIdx = nil
|
||
c.rowEntriesIdx = nil
|
||
_ = os.Remove(c.filePath)
|
||
}
|
||
|
||
// stat 返回缓存统计
|
||
func (c *AICache) stat() (count int, path string) {
|
||
c.mu.RLock()
|
||
defer c.mu.RUnlock()
|
||
return len(c.Entries) + len(c.RowEntries), c.filePath
|
||
}
|