Merge remote changes, split app.go, remove V1 dead code, fix AICache (#2)

- Merge remote improvements: generic AI API, row-level cache,
  CSV export, matchPrep, prompt truncation, O(1) cache index
- Split app.go (1645 -> 5 files: app.go, cache.go, ai.go,
  matcher.go, export.go)
- Remove V1 dead code: 6 methods, 4 helpers, ~300 lines
- Fix AICache 3 bugs: TOCTOU saveToFile, silent loadFromFile,
  full-sort put
- Extract 8 named constants (threshold, time window, batch size...)
- Frontend: isRunning guard, buildMatchConfig dedup, CSS variables
- Upgrade Go to 1.24.0
This commit is contained in:
sakuradairong
2026-06-05 14:46:55 +08:00
10 changed files with 752 additions and 370 deletions

322
app.go
View File

@@ -6,9 +6,9 @@ import (
"fmt"
"math"
"path/filepath"
"regexp"
"sort"
"strings"
"sync"
"time"
"github.com/wailsapp/wails/v2/pkg/runtime"
@@ -97,12 +97,26 @@ type MatchConfig struct {
IncludeHeader bool `json:"includeHeader"` // 导出时是否包含表头行
}
// AICacheInfo 缓存状态信息
type AICacheInfo struct {
Count int `json:"count"`
FilePath string `json:"filePath"`
}
// ---------- App 结构体 ----------
type App struct {
ctx context.Context
deepseekKey string
apiKey string // AI API 密钥(兼容 OpenAI/Deepseek/本地模型)
apiEndpoint string // API 端点(默认 https://api.deepseek.com/v1/chat/completions
apiModel string // 模型名称(默认 deepseek-chat
aiCache *AICache
// 最近一次匹配的配置和表头(供导出使用)
dataMu sync.RWMutex
lastConfig MatchConfig
headersA []string
headersB []string
}
// NewApp 创建 App 实例
@@ -132,18 +146,52 @@ func (a *App) emitProgress(current, total int, message, phase string) {
})
}
// SetDeepseekAPIKey 设置 Deepseek API 密钥(仅保存在内存中)
// ---------- AI 配置 ----------
// SetDeepseekAPIKey 设置 Deepseek API 密钥(仅保存在内存中,向后兼容)
func (a *App) SetDeepseekAPIKey(key string) string {
a.deepseekKey = strings.TrimSpace(key)
if a.deepseekKey == "" {
a.apiKey = strings.TrimSpace(key)
if a.apiKey == "" {
return "已清除 Deepseek API 密钥"
}
return "Deepseek API 密钥已设置"
}
// SetAIConfig 统一设置 AI API 配置(端点、模型、密钥)
func (a *App) SetAIConfig(endpoint, model, key string) string {
if endpoint != "" {
a.apiEndpoint = strings.TrimSpace(endpoint)
}
if model != "" {
a.apiModel = strings.TrimSpace(model)
}
if key != "" {
a.apiKey = strings.TrimSpace(key)
}
return fmt.Sprintf("AI 配置已更新 (端点=%s, 模型=%s)", a.apiEndpoint, a.apiModel)
}
// SetAPIKey 设置 AI API 密钥(仅保存在内存中)
func (a *App) SetAPIKey(key string) string {
a.apiKey = strings.TrimSpace(key)
if a.apiKey == "" {
return "已清除 AI API 密钥"
}
return "AI API 密钥已设置"
}
// GetDeepseekStatus 返回是否已配置 Deepseek API 密钥
func (a *App) GetDeepseekStatus() bool {
return a.deepseekKey != ""
return a.apiKey != ""
}
// GetAIStatus 返回 AI API 配置状态
func (a *App) GetAIStatus() map[string]string {
return map[string]string{
"ready": fmt.Sprintf("%v", a.apiKey != ""),
"endpoint": a.apiEndpoint,
"model": a.apiModel,
}
}
// ClearAICache 清除所有 AI 缓存
@@ -154,12 +202,9 @@ func (a *App) ClearAICache() string {
}
// GetAICacheInfo 返回 AI 缓存信息(条目数、文件路径)
func (a *App) GetAICacheInfo() map[string]interface{} {
func (a *App) GetAICacheInfo() AICacheInfo {
count, path := a.aiCache.stat()
return map[string]interface{}{
"count": count,
"filePath": path,
}
return AICacheInfo{Count: count, FilePath: path}
}
// ---------- 文件选择对话框 ----------
@@ -212,54 +257,17 @@ func (a *App) ParseHeaders(filePath string) ([]string, error) {
// RunMatch 接收完整 MatchConfig按列索引执行通用匹配
func (a *App) RunMatch(config MatchConfig) ([]MatchResult, error) {
// 1. 编译正则
var reg *regexp.Regexp
if config.RegexPattern != "" {
var err error
reg, err = regexp.Compile(config.RegexPattern)
if err != nil {
return nil, fmt.Errorf("正则表达式格式错误,请检查: %v", err)
}
fmt.Printf("[DEBUG] RunMatch 使用正则: '%s'\n", config.RegexPattern)
} else {
fmt.Printf("[DEBUG] RunMatch 跳过清洗(正则为空)\n")
prep, err := a.prepareMatch(config)
if err != nil {
return nil, err
}
return a.runMatchOnData(prep, config)
}
// 2. 默认值兜底
timeWindow := config.TimeWindow
if timeWindow <= 0 {
timeWindow = DefaultTimeWindowHours
}
threshold := config.Threshold
if threshold <= 0 {
threshold = DefaultThreshold
}
// runMatchOnData 在已读取的数据上执行匹配
func (a *App) runMatchOnData(prep *matchPrep, config MatchConfig) ([]MatchResult, error) {
useTime := config.ColATimeIndex >= 0 && config.ColBTimeIndex >= 0
// 3. 读取原始数据
a.emitProgress(0, 100, "正在读取 A 表...", "reading")
rowsA, err := a.readRawRows(config.FileAPath)
if err != nil {
return nil, fmt.Errorf("读取 A 表失败: %v", err)
}
a.emitProgress(0, 100, "正在读取 B 表...", "reading")
rowsB, err := a.readRawRows(config.FileBPath)
if err != nil {
return nil, fmt.Errorf("读取 B 表失败: %v", err)
}
if len(rowsA) < 2 {
return nil, fmt.Errorf("A 表无有效数据行")
}
if len(rowsB) < 2 {
return nil, fmt.Errorf("B 表无有效数据行")
}
aHeaders := rowsA[0]
_ = aHeaders // 保留表头引用(将来导出时可能用到)
dataA := rowsA[1:]
dataB := rowsB[1:]
windowDuration := time.Duration(timeWindow * float64(time.Hour))
totalA := len(dataA)
totalA := len(prep.dataA)
var results []MatchResult
useAllMatches := config.AllMatches
@@ -268,7 +276,32 @@ func (a *App) RunMatch(config MatchConfig) ([]MatchResult, error) {
maxPreview = DefaultMaxPreview
}
for i, rowA := range dataA {
// 预计算 B 表清洗后的匹配值,避免内层循环中重复 regex 替换
totalB := len(prep.dataB)
cleanedBMatch := make([]string, totalB)
origBMatch := make([]string, totalB)
parsedBTime := make([]time.Time, totalB)
hasBTime := make([]bool, totalB)
bExtractVal := make([]string, totalB)
for bIdx, rowB := range prep.dataB {
matchStrB := getCell(rowB, config.ColBMatchIndex)
origBMatch[bIdx] = matchStrB
if matchStrB == "" {
cleanedBMatch[bIdx] = ""
} else {
cleanedBMatch[bIdx] = cleanWithRegex(matchStrB, prep.reg)
}
if useTime {
t, err := parseTimeFlexible(getCell(rowB, config.ColBTimeIndex))
if err == nil {
parsedBTime[bIdx] = t
hasBTime[bIdx] = true
}
}
bExtractVal[bIdx] = getCell(rowB, config.ColBExtractIndex)
}
for i, rowA := range prep.dataA {
if i%10 == 0 || i == totalA-1 {
pct := (i + 1) * 100 / totalA
a.emitProgress(i+1, totalA,
@@ -287,38 +320,35 @@ func (a *App) RunMatch(config MatchConfig) ([]MatchResult, error) {
if err == nil { timeA = t; hasTimeA = true }
}
cleanA := cleanWithRegex(matchStrA, reg)
cleanA := cleanWithRegex(matchStrA, prep.reg)
// 收集该 A 行的所有候选匹配
var candidates []MatchResult
for _, rowB := range dataB {
matchStrB := getCell(rowB, config.ColBMatchIndex)
if matchStrB == "" { continue }
for bIdx := range prep.dataB {
if cleanedBMatch[bIdx] == "" { continue }
var timeDiff time.Duration
if hasTimeA && useTime {
tB, err := parseTimeFlexible(getCell(rowB, config.ColBTimeIndex))
if err != nil { continue }
td := timeA.Sub(tB)
if td < -windowDuration || td > windowDuration { continue }
timeDiff = td
if hasTimeA && useTime && hasBTime[bIdx] {
td := timeA.Sub(parsedBTime[bIdx])
if td < -prep.windowDuration || td > prep.windowDuration { continue }
}
cleanB := cleanWithRegex(matchStrB, reg)
if cleanA == "" || cleanB == "" { continue }
similarity := calcSimilarity(matchStrA, matchStrB, reg, config.CaseSensitive)
similarity := similarityFromCleaned(cleanA, cleanedBMatch[bIdx], config.CaseSensitive)
if i < maxPreview {
fmt.Printf("[DEBUG] | A[%d]='%s'→'%s' | B='%s'→'%s' | 相似度=%.4f\n",
i, matchStrA, cleanA, matchStrB, cleanB, similarity)
i, matchStrA, cleanA, origBMatch[bIdx], cleanedBMatch[bIdx], similarity)
}
if similarity >= threshold {
if similarity >= prep.threshold {
var timeDiff time.Duration
if hasTimeA && useTime && hasBTime[bIdx] {
timeDiff = timeA.Sub(parsedBTime[bIdx])
}
mr := MatchResult{
RowAData: rowA,
RowBKey: matchStrB,
ExtractValue: getCell(rowB, config.ColBExtractIndex),
RowBKey: origBMatch[bIdx],
ExtractValue: bExtractVal[bIdx],
TimeDiff: formatTimeDiff(timeDiff),
SimilarityScore: math.Round(similarity*10000) / 10000,
AIMatched: false,
@@ -343,13 +373,14 @@ func (a *App) RunMatch(config MatchConfig) ([]MatchResult, error) {
}
// 结果排序
if config.SortBy == "similarity" {
switch config.SortBy {
case "similarity":
sort.Slice(results, func(i, j int) bool {
return results[i].SimilarityScore > results[j].SimilarityScore
})
} else if config.SortBy == "timeDiff" {
case "timeDiff":
sort.Slice(results, func(i, j int) bool {
return results[i].TimeDiff < results[j].TimeDiff
return parseTimeDiffDuration(results[i].TimeDiff) < parseTimeDiffDuration(results[j].TimeDiff)
})
}
@@ -359,42 +390,31 @@ func (a *App) RunMatch(config MatchConfig) ([]MatchResult, error) {
return results, nil
}
// RunMatchWithAI 执行基础匹配 + Deepseek AI 增强匹配(配置驱动)
// RunMatchWithAI 执行基础匹配 + AI 增强匹配(配置驱动)
func (a *App) RunMatchWithAI(config MatchConfig) ([]MatchResult, error) {
if a.deepseekKey == "" {
return nil, fmt.Errorf("请先设置 Deepseek API 密钥")
if a.apiKey == "" {
return nil, fmt.Errorf("请先设置 AI API 密钥")
}
// 1. 先执行基础匹配
results, err := a.RunMatch(config)
prep, err := a.prepareMatch(config)
if err != nil {
return nil, err
}
// 2. 重新读取数据,找出未被基础匹配覆盖的 A 表行
rowsA, err := a.readRawRows(config.FileAPath)
// 1. 先执行基础匹配
results, err := a.runMatchOnData(prep, config)
if err != nil {
return nil, fmt.Errorf("读取 A 表失败: %v", err)
}
rowsB, err := a.readRawRows(config.FileBPath)
if err != nil {
return nil, fmt.Errorf("读取 B 表失败: %v", err)
}
if len(rowsA) < 2 || len(rowsB) < 2 {
return results, nil
return nil, err
}
dataA := rowsA[1:]
dataB := rowsB[1:]
// 用 RowAData 快速判断哪些 A 行已经被匹配
// 2. 找出未被基础匹配覆盖的 A 表行
matchedSet := make(map[string]bool)
for _, r := range results {
matchedSet[strings.Join(r.RowAData, "\x00")] = true
}
var unmatchedA [][]string
for _, row := range dataA {
for _, row := range prep.dataA {
if !matchedSet[strings.Join(row, "\x00")] {
unmatchedA = append(unmatchedA, row)
}
@@ -405,33 +425,61 @@ func (a *App) RunMatchWithAI(config MatchConfig) ([]MatchResult, error) {
return results, nil
}
// 3. AI 增强匹配
timeWindow := config.TimeWindow
if timeWindow <= 0 {
timeWindow = DefaultTimeWindowHours
}
windowDuration := time.Duration(timeWindow * float64(time.Hour))
// 3. AI 增强匹配(先查行级缓存,减少 API 调用)
useTime := config.ColATimeIndex >= 0 && config.ColBTimeIndex >= 0
totalUnmatched := len(unmatchedA)
aiMatched := 0
var failedBatches []int
// 3a. 检查行级缓存,命中则直接加入结果
var uncachedA [][]string
cacheHits := 0
for _, row := range unmatchedA {
matchVal := getCell(row, config.ColAMatchIndex)
timeStr := ""
if useTime {
timeStr = getCell(row, config.ColATimeIndex)
}
cacheKey := a.buildRowCacheKey(matchVal, timeStr, config)
if cachedVal, ok := a.aiCache.getRow(cacheKey); ok {
results = append(results, MatchResult{
RowAData: row,
RowBKey: "",
ExtractValue: cachedVal,
SimilarityScore: 0,
AIMatched: true,
})
aiMatched++
cacheHits++
} else {
uncachedA = append(uncachedA, row)
}
}
if cacheHits > 0 {
fmt.Printf("[CACHE] ✓ 行级缓存命中 %d 条,剩余 %d 条需 AI 处理\n", cacheHits, len(uncachedA))
}
if len(uncachedA) == 0 {
a.emitProgress(1, 1,
fmt.Sprintf("AI 增强完成!全部 %d 条命中缓存", cacheHits), "done")
return results, nil
}
totalUnmatched := len(uncachedA)
a.emitProgress(0, totalUnmatched,
fmt.Sprintf("AI 增强匹配:还有 %d 条未匹配记录,正在调用 Deepseek...", totalUnmatched),
fmt.Sprintf("AI 增强匹配:%d 条命中缓存,%d 条需调用 AI...", cacheHits, totalUnmatched),
"ai-enhancing")
batchSize := DefaultBatchSize
aiMatched := 0
for batchStart := 0; batchStart < totalUnmatched; batchStart += batchSize {
end := batchStart + batchSize
if end > totalUnmatched {
end = totalUnmatched
}
for batchStart := 0; batchStart < totalUnmatched; batchStart += DefaultBatchSize {
end := min(batchStart+DefaultBatchSize, totalUnmatched)
batchNum := (batchStart / DefaultBatchSize) + 1
a.emitProgress(batchStart+1, totalUnmatched,
fmt.Sprintf("AI 分析中 %d/%d (第 %d 批)...", end, totalUnmatched, (batchStart/batchSize)+1),
fmt.Sprintf("AI 分析中 %d/%d (第 %d 批)...", end, totalUnmatched, batchNum),
"ai-enhancing")
batch := unmatchedA[batchStart:end]
batch := uncachedA[batchStart:end]
// 计算本批 A 表的时间范围
var minTime, maxTime time.Time
@@ -459,10 +507,10 @@ func (a *App) RunMatchWithAI(config MatchConfig) ([]MatchResult, error) {
// 过滤 B 表在时间窗口内的行(用户配置时间窗口 + 额外余量覆盖批次跨度)
var relevantB [][]string
if hasBatchTime && useTime {
padding := windowDuration + 3*time.Hour
padding := prep.windowDuration + time.Duration(defaultAIWindowPadH)*time.Hour
ws := minTime.Add(-padding)
we := maxTime.Add(padding)
for _, row := range dataB {
for _, row := range prep.dataB {
t, err := parseTimeFlexible(getCell(row, config.ColBTimeIndex))
if err != nil || t.Before(ws) || t.After(we) {
continue
@@ -471,17 +519,16 @@ func (a *App) RunMatchWithAI(config MatchConfig) ([]MatchResult, error) {
}
} else {
// 无时间列时限制 B 表条数以控制 token 消耗
maxB := 200
if len(dataB) < maxB {
maxB = len(dataB)
}
relevantB = dataB[:maxB]
maxB := min(defaultMaxBNoTime, len(prep.dataB))
relevantB = prep.dataB[:maxB]
}
// 构建 AI 提示
prompt := a.buildGenericAIPrompt(batch, relevantB, config, windowDuration, hasBatchTime)
aiResp, err := a.callDeepseekAPI(prompt)
prompt := a.buildGenericAIPrompt(batch, relevantB, config, prep.windowDuration, hasBatchTime)
aiResp, err := a.callAIAPI(prompt)
if err != nil {
fmt.Printf("[AI-WARN] 第 %d 批 API 调用失败: %v\n", batchNum, err)
failedBatches = append(failedBatches, batchNum)
continue
}
@@ -502,7 +549,8 @@ func (a *App) RunMatchWithAI(config MatchConfig) ([]MatchResult, error) {
}
if parseErr != nil {
fmt.Printf("[AI-WARN] 响应解析失败 (第 %d 批): %s\n 原始响应: %.200s\n",
(batchStart/batchSize)+1, parseErr.Error(), aiResp)
batchNum, parseErr.Error(), aiResp)
failedBatches = append(failedBatches, batchNum)
continue
}
@@ -522,12 +570,26 @@ func (a *App) RunMatchWithAI(config MatchConfig) ([]MatchResult, error) {
}
results = append(results, mr)
aiMatched++
// 写入行级缓存
matchVal := getCell(rowA, config.ColAMatchIndex)
timeStr := ""
if useTime {
timeStr = getCell(rowA, config.ColATimeIndex)
}
cacheKey := a.buildRowCacheKey(matchVal, timeStr, config)
a.aiCache.putRow(cacheKey, val)
}
}
a.aiCache.saveToFile()
a.emitProgress(totalUnmatched, totalUnmatched,
fmt.Sprintf("AI 增强完成!基础匹配 %d 条 + AI 补充 %d 条 = 共 %d 条",
len(results)-aiMatched, aiMatched, len(results)), "done")
// 构建完成消息
msg := fmt.Sprintf("AI 增强完成!基础匹配 %d 条 + AI 补充 %d 条 = 共 %d 条",
len(results)-aiMatched, aiMatched, len(results))
if len(failedBatches) > 0 {
msg += fmt.Sprintf("(警告:第 %v 批失败)", failedBatches)
}
a.emitProgress(totalUnmatched, totalUnmatched, msg, "done")
return results, nil
}