Merge remote changes, split app.go, remove V1 dead code, fix AICache (#2)

- Merge remote improvements: generic AI API, row-level cache, CSV export, matchPrep, prompt truncation, O(1) cache index - Split app.go (1645 -> 5 files: app.go, cache.go, ai.go, matcher.go, export.go) - Remove V1 dead code: 6 methods, 4 helpers, ~300 lines - Fix AICache 3 bugs: TOCTOU saveToFile, silent loadFromFile, full-sort put - Extract 8 named constants (threshold, time window, batch size...) - Frontend: isRunning guard, buildMatchConfig dedup, CSS variables - Upgrade Go to 1.24.0
2026-06-05 14:46:55 +08:00
parent 40745f5632 5163dc6408
commit 2b17760fbd
10 changed files with 752 additions and 370 deletions
--- a/app.go
+++ b/app.go
@@ -6,9 +6,9 @@ import (
 	"fmt"
 	"math"
 	"path/filepath"
-	"regexp"
 	"sort"
 	"strings"
+	"sync"
 	"time"

 	"github.com/wailsapp/wails/v2/pkg/runtime"
@@ -97,12 +97,26 @@ type MatchConfig struct {
 	IncludeHeader bool   `json:"includeHeader"` // 导出时是否包含表头行
 }

+// AICacheInfo 缓存状态信息
+type AICacheInfo struct {
+	Count    int    `json:"count"`
+	FilePath string `json:"filePath"`
+}
+
 // ---------- App 结构体 ----------

 type App struct {
 	ctx         context.Context
-	deepseekKey string
+	apiKey      string // AI API 密钥（兼容 OpenAI/Deepseek/本地模型）
+	apiEndpoint string // API 端点（默认 https://api.deepseek.com/v1/chat/completions）
+	apiModel    string // 模型名称（默认 deepseek-chat）
 	aiCache     *AICache
+
+	// 最近一次匹配的配置和表头（供导出使用）
+	dataMu      sync.RWMutex
+	lastConfig  MatchConfig
+	headersA    []string
+	headersB    []string
 }

 // NewApp 创建 App 实例
@@ -132,18 +146,52 @@ func (a *App) emitProgress(current, total int, message, phase string) {
 	})
 }

-// SetDeepseekAPIKey 设置 Deepseek API 密钥（仅保存在内存中）
+// ---------- AI 配置 ----------
+
+// SetDeepseekAPIKey 设置 Deepseek API 密钥（仅保存在内存中，向后兼容）
 func (a *App) SetDeepseekAPIKey(key string) string {
-	a.deepseekKey = strings.TrimSpace(key)
-	if a.deepseekKey == "" {
+	a.apiKey = strings.TrimSpace(key)
+	if a.apiKey == "" {
 		return "已清除 Deepseek API 密钥"
 	}
 	return "Deepseek API 密钥已设置"
 }

+// SetAIConfig 统一设置 AI API 配置（端点、模型、密钥）
+func (a *App) SetAIConfig(endpoint, model, key string) string {
+	if endpoint != "" {
+		a.apiEndpoint = strings.TrimSpace(endpoint)
+	}
+	if model != "" {
+		a.apiModel = strings.TrimSpace(model)
+	}
+	if key != "" {
+		a.apiKey = strings.TrimSpace(key)
+	}
+	return fmt.Sprintf("AI 配置已更新 (端点=%s, 模型=%s)", a.apiEndpoint, a.apiModel)
+}
+
+// SetAPIKey 设置 AI API 密钥（仅保存在内存中）
+func (a *App) SetAPIKey(key string) string {
+	a.apiKey = strings.TrimSpace(key)
+	if a.apiKey == "" {
+		return "已清除 AI API 密钥"
+	}
+	return "AI API 密钥已设置"
+}
+
 // GetDeepseekStatus 返回是否已配置 Deepseek API 密钥
 func (a *App) GetDeepseekStatus() bool {
-	return a.deepseekKey != ""
+	return a.apiKey != ""
+}
+
+// GetAIStatus 返回 AI API 配置状态
+func (a *App) GetAIStatus() map[string]string {
+	return map[string]string{
+		"ready":    fmt.Sprintf("%v", a.apiKey != ""),
+		"endpoint": a.apiEndpoint,
+		"model":    a.apiModel,
+	}
 }

 // ClearAICache 清除所有 AI 缓存
@@ -154,12 +202,9 @@ func (a *App) ClearAICache() string {
 }

 // GetAICacheInfo 返回 AI 缓存信息（条目数、文件路径）
-func (a *App) GetAICacheInfo() map[string]interface{} {
+func (a *App) GetAICacheInfo() AICacheInfo {
 	count, path := a.aiCache.stat()
-	return map[string]interface{}{
-		"count":    count,
-		"filePath": path,
-	}
+	return AICacheInfo{Count: count, FilePath: path}
 }

 // ---------- 文件选择对话框 ----------
@@ -212,54 +257,17 @@ func (a *App) ParseHeaders(filePath string) ([]string, error) {

 // RunMatch 接收完整 MatchConfig，按列索引执行通用匹配
 func (a *App) RunMatch(config MatchConfig) ([]MatchResult, error) {
-	// 1. 编译正则
-	var reg *regexp.Regexp
-	if config.RegexPattern != "" {
-		var err error
-		reg, err = regexp.Compile(config.RegexPattern)
-		if err != nil {
-			return nil, fmt.Errorf("正则表达式格式错误，请检查: %v", err)
-		}
-		fmt.Printf("[DEBUG] RunMatch 使用正则: '%s'\n", config.RegexPattern)
-	} else {
-		fmt.Printf("[DEBUG] RunMatch 跳过清洗（正则为空）\n")
+	prep, err := a.prepareMatch(config)
+	if err != nil {
+		return nil, err
 	}
+	return a.runMatchOnData(prep, config)
+}

-	// 2. 默认值兜底
-	timeWindow := config.TimeWindow
-	if timeWindow <= 0 {
-		timeWindow = DefaultTimeWindowHours
-	}
-	threshold := config.Threshold
-	if threshold <= 0 {
-		threshold = DefaultThreshold
-	}
+// runMatchOnData 在已读取的数据上执行匹配
+func (a *App) runMatchOnData(prep *matchPrep, config MatchConfig) ([]MatchResult, error) {
 	useTime := config.ColATimeIndex >= 0 && config.ColBTimeIndex >= 0
-
-	// 3. 读取原始数据
-	a.emitProgress(0, 100, "正在读取 A 表...", "reading")
-	rowsA, err := a.readRawRows(config.FileAPath)
-	if err != nil {
-		return nil, fmt.Errorf("读取 A 表失败: %v", err)
-	}
-	a.emitProgress(0, 100, "正在读取 B 表...", "reading")
-	rowsB, err := a.readRawRows(config.FileBPath)
-	if err != nil {
-		return nil, fmt.Errorf("读取 B 表失败: %v", err)
-	}
-	if len(rowsA) < 2 {
-		return nil, fmt.Errorf("A 表无有效数据行")
-	}
-	if len(rowsB) < 2 {
-		return nil, fmt.Errorf("B 表无有效数据行")
-	}
-
-	aHeaders := rowsA[0]
-	_ = aHeaders // 保留表头引用（将来导出时可能用到）
-	dataA := rowsA[1:]
-	dataB := rowsB[1:]
-	windowDuration := time.Duration(timeWindow * float64(time.Hour))
-	totalA := len(dataA)
+	totalA := len(prep.dataA)
 	var results []MatchResult

 	useAllMatches := config.AllMatches
@@ -268,7 +276,32 @@ func (a *App) RunMatch(config MatchConfig) ([]MatchResult, error) {
 		maxPreview = DefaultMaxPreview
 	}

-	for i, rowA := range dataA {
+	// 预计算 B 表清洗后的匹配值，避免内层循环中重复 regex 替换
+	totalB := len(prep.dataB)
+	cleanedBMatch := make([]string, totalB)
+	origBMatch := make([]string, totalB)
+	parsedBTime := make([]time.Time, totalB)
+	hasBTime := make([]bool, totalB)
+	bExtractVal := make([]string, totalB)
+	for bIdx, rowB := range prep.dataB {
+		matchStrB := getCell(rowB, config.ColBMatchIndex)
+		origBMatch[bIdx] = matchStrB
+		if matchStrB == "" {
+			cleanedBMatch[bIdx] = ""
+		} else {
+			cleanedBMatch[bIdx] = cleanWithRegex(matchStrB, prep.reg)
+		}
+		if useTime {
+			t, err := parseTimeFlexible(getCell(rowB, config.ColBTimeIndex))
+			if err == nil {
+				parsedBTime[bIdx] = t
+				hasBTime[bIdx] = true
+			}
+		}
+		bExtractVal[bIdx] = getCell(rowB, config.ColBExtractIndex)
+	}
+
+	for i, rowA := range prep.dataA {
 		if i%10 == 0 || i == totalA-1 {
 			pct := (i + 1) * 100 / totalA
 			a.emitProgress(i+1, totalA,
@@ -287,38 +320,35 @@ func (a *App) RunMatch(config MatchConfig) ([]MatchResult, error) {
 			if err == nil { timeA = t; hasTimeA = true }
 		}

-		cleanA := cleanWithRegex(matchStrA, reg)
+		cleanA := cleanWithRegex(matchStrA, prep.reg)
+
 		// 收集该 A 行的所有候选匹配
 		var candidates []MatchResult

-		for _, rowB := range dataB {
-			matchStrB := getCell(rowB, config.ColBMatchIndex)
-			if matchStrB == "" { continue }
+		for bIdx := range prep.dataB {
+			if cleanedBMatch[bIdx] == "" { continue }

-			var timeDiff time.Duration
-			if hasTimeA && useTime {
-				tB, err := parseTimeFlexible(getCell(rowB, config.ColBTimeIndex))
-				if err != nil { continue }
-				td := timeA.Sub(tB)
-				if td < -windowDuration || td > windowDuration { continue }
-				timeDiff = td
+			if hasTimeA && useTime && hasBTime[bIdx] {
+				td := timeA.Sub(parsedBTime[bIdx])
+				if td < -prep.windowDuration || td > prep.windowDuration { continue }
 			}

-			cleanB := cleanWithRegex(matchStrB, reg)
-			if cleanA == "" || cleanB == "" { continue }
-
-			similarity := calcSimilarity(matchStrA, matchStrB, reg, config.CaseSensitive)
+			similarity := similarityFromCleaned(cleanA, cleanedBMatch[bIdx], config.CaseSensitive)

 			if i < maxPreview {
 				fmt.Printf("[DEBUG] | A[%d]='%s'→'%s' | B='%s'→'%s' | 相似度=%.4f\n",
-					i, matchStrA, cleanA, matchStrB, cleanB, similarity)
+					i, matchStrA, cleanA, origBMatch[bIdx], cleanedBMatch[bIdx], similarity)
 			}

-			if similarity >= threshold {
+			if similarity >= prep.threshold {
+				var timeDiff time.Duration
+				if hasTimeA && useTime && hasBTime[bIdx] {
+					timeDiff = timeA.Sub(parsedBTime[bIdx])
+				}
 				mr := MatchResult{
 					RowAData:        rowA,
-					RowBKey:         matchStrB,
-					ExtractValue:    getCell(rowB, config.ColBExtractIndex),
+					RowBKey:         origBMatch[bIdx],
+					ExtractValue:    bExtractVal[bIdx],
 					TimeDiff:        formatTimeDiff(timeDiff),
 					SimilarityScore: math.Round(similarity*10000) / 10000,
 					AIMatched:       false,
@@ -343,13 +373,14 @@ func (a *App) RunMatch(config MatchConfig) ([]MatchResult, error) {
 	}

 	// 结果排序
-	if config.SortBy == "similarity" {
+	switch config.SortBy {
+	case "similarity":
 		sort.Slice(results, func(i, j int) bool {
 			return results[i].SimilarityScore > results[j].SimilarityScore
 		})
-	} else if config.SortBy == "timeDiff" {
+	case "timeDiff":
 		sort.Slice(results, func(i, j int) bool {
-			return results[i].TimeDiff < results[j].TimeDiff
+			return parseTimeDiffDuration(results[i].TimeDiff) < parseTimeDiffDuration(results[j].TimeDiff)
 		})
 	}

@@ -359,42 +390,31 @@ func (a *App) RunMatch(config MatchConfig) ([]MatchResult, error) {
 	return results, nil
 }

-// RunMatchWithAI 执行基础匹配 + Deepseek AI 增强匹配（配置驱动）
+// RunMatchWithAI 执行基础匹配 + AI 增强匹配（配置驱动）
 func (a *App) RunMatchWithAI(config MatchConfig) ([]MatchResult, error) {
-	if a.deepseekKey == "" {
-		return nil, fmt.Errorf("请先设置 Deepseek API 密钥")
+	if a.apiKey == "" {
+		return nil, fmt.Errorf("请先设置 AI API 密钥")
 	}

-	// 1. 先执行基础匹配
-	results, err := a.RunMatch(config)
+	prep, err := a.prepareMatch(config)
 	if err != nil {
 		return nil, err
 	}

-	// 2. 重新读取数据，找出未被基础匹配覆盖的 A 表行
-	rowsA, err := a.readRawRows(config.FileAPath)
+	// 1. 先执行基础匹配
+	results, err := a.runMatchOnData(prep, config)
 	if err != nil {
-		return nil, fmt.Errorf("读取 A 表失败: %v", err)
-	}
-	rowsB, err := a.readRawRows(config.FileBPath)
-	if err != nil {
-		return nil, fmt.Errorf("读取 B 表失败: %v", err)
-	}
-	if len(rowsA) < 2 || len(rowsB) < 2 {
-		return results, nil
+		return nil, err
 	}

-	dataA := rowsA[1:]
-	dataB := rowsB[1:]
-
-	// 用 RowAData 快速判断哪些 A 行已经被匹配
+	// 2. 找出未被基础匹配覆盖的 A 表行
 	matchedSet := make(map[string]bool)
 	for _, r := range results {
 		matchedSet[strings.Join(r.RowAData, "\x00")] = true
 	}

 	var unmatchedA [][]string
-	for _, row := range dataA {
+	for _, row := range prep.dataA {
 		if !matchedSet[strings.Join(row, "\x00")] {
 			unmatchedA = append(unmatchedA, row)
 		}
@@ -405,33 +425,61 @@ func (a *App) RunMatchWithAI(config MatchConfig) ([]MatchResult, error) {
 		return results, nil
 	}

-	// 3. AI 增强匹配
-	timeWindow := config.TimeWindow
-	if timeWindow <= 0 {
-		timeWindow = DefaultTimeWindowHours
-	}
-	windowDuration := time.Duration(timeWindow * float64(time.Hour))
+	// 3. AI 增强匹配（先查行级缓存，减少 API 调用）
 	useTime := config.ColATimeIndex >= 0 && config.ColBTimeIndex >= 0

-	totalUnmatched := len(unmatchedA)
+	aiMatched := 0
+	var failedBatches []int
+
+	// 3a. 检查行级缓存，命中则直接加入结果
+	var uncachedA [][]string
+	cacheHits := 0
+	for _, row := range unmatchedA {
+		matchVal := getCell(row, config.ColAMatchIndex)
+		timeStr := ""
+		if useTime {
+			timeStr = getCell(row, config.ColATimeIndex)
+		}
+		cacheKey := a.buildRowCacheKey(matchVal, timeStr, config)
+		if cachedVal, ok := a.aiCache.getRow(cacheKey); ok {
+			results = append(results, MatchResult{
+				RowAData:        row,
+				RowBKey:         "",
+				ExtractValue:    cachedVal,
+				SimilarityScore: 0,
+				AIMatched:       true,
+			})
+			aiMatched++
+			cacheHits++
+		} else {
+			uncachedA = append(uncachedA, row)
+		}
+	}
+
+	if cacheHits > 0 {
+		fmt.Printf("[CACHE] ✓ 行级缓存命中 %d 条，剩余 %d 条需 AI 处理\n", cacheHits, len(uncachedA))
+	}
+
+	if len(uncachedA) == 0 {
+		a.emitProgress(1, 1,
+			fmt.Sprintf("AI 增强完成！全部 %d 条命中缓存", cacheHits), "done")
+		return results, nil
+	}
+
+	totalUnmatched := len(uncachedA)
 	a.emitProgress(0, totalUnmatched,
-		fmt.Sprintf("AI 增强匹配：还有 %d 条未匹配记录，正在调用 Deepseek...", totalUnmatched),
+		fmt.Sprintf("AI 增强匹配：%d 条命中缓存，%d 条需调用 AI...", cacheHits, totalUnmatched),
 		"ai-enhancing")

-	batchSize := DefaultBatchSize
-	aiMatched := 0
-
-	for batchStart := 0; batchStart < totalUnmatched; batchStart += batchSize {
-		end := batchStart + batchSize
-		if end > totalUnmatched {
-			end = totalUnmatched
-		}
+	for batchStart := 0; batchStart < totalUnmatched; batchStart += DefaultBatchSize {
+		end := min(batchStart+DefaultBatchSize, totalUnmatched)
+		batchNum := (batchStart / DefaultBatchSize) + 1

 		a.emitProgress(batchStart+1, totalUnmatched,
-			fmt.Sprintf("AI 分析中 %d/%d (第 %d 批)...", end, totalUnmatched, (batchStart/batchSize)+1),
+			fmt.Sprintf("AI 分析中 %d/%d (第 %d 批)...", end, totalUnmatched, batchNum),
 			"ai-enhancing")

-		batch := unmatchedA[batchStart:end]
+		batch := uncachedA[batchStart:end]

 		// 计算本批 A 表的时间范围
 		var minTime, maxTime time.Time
@@ -459,10 +507,10 @@ func (a *App) RunMatchWithAI(config MatchConfig) ([]MatchResult, error) {
 		// 过滤 B 表在时间窗口内的行（用户配置时间窗口 + 额外余量覆盖批次跨度）
 		var relevantB [][]string
 		if hasBatchTime && useTime {
-			padding := windowDuration + 3*time.Hour
+			padding := prep.windowDuration + time.Duration(defaultAIWindowPadH)*time.Hour
 			ws := minTime.Add(-padding)
 			we := maxTime.Add(padding)
-			for _, row := range dataB {
+			for _, row := range prep.dataB {
 				t, err := parseTimeFlexible(getCell(row, config.ColBTimeIndex))
 				if err != nil || t.Before(ws) || t.After(we) {
 					continue
@@ -471,17 +519,16 @@ func (a *App) RunMatchWithAI(config MatchConfig) ([]MatchResult, error) {
 			}
 		} else {
 			// 无时间列时限制 B 表条数以控制 token 消耗
-			maxB := 200
-			if len(dataB) < maxB {
-				maxB = len(dataB)
-			}
-			relevantB = dataB[:maxB]
+			maxB := min(defaultMaxBNoTime, len(prep.dataB))
+			relevantB = prep.dataB[:maxB]
 		}

 		// 构建 AI 提示
-		prompt := a.buildGenericAIPrompt(batch, relevantB, config, windowDuration, hasBatchTime)
-		aiResp, err := a.callDeepseekAPI(prompt)
+		prompt := a.buildGenericAIPrompt(batch, relevantB, config, prep.windowDuration, hasBatchTime)
+		aiResp, err := a.callAIAPI(prompt)
 		if err != nil {
+			fmt.Printf("[AI-WARN] 第 %d 批 API 调用失败: %v\n", batchNum, err)
+			failedBatches = append(failedBatches, batchNum)
 			continue
 		}

@@ -502,7 +549,8 @@ func (a *App) RunMatchWithAI(config MatchConfig) ([]MatchResult, error) {
 		}
 		if parseErr != nil {
 			fmt.Printf("[AI-WARN] 响应解析失败 (第 %d 批): %s\n   原始响应: %.200s\n",
-				(batchStart/batchSize)+1, parseErr.Error(), aiResp)
+				batchNum, parseErr.Error(), aiResp)
+			failedBatches = append(failedBatches, batchNum)
 			continue
 		}

@@ -522,12 +570,26 @@ func (a *App) RunMatchWithAI(config MatchConfig) ([]MatchResult, error) {
 			}
 			results = append(results, mr)
 			aiMatched++
+
+			// 写入行级缓存
+			matchVal := getCell(rowA, config.ColAMatchIndex)
+			timeStr := ""
+			if useTime {
+				timeStr = getCell(rowA, config.ColATimeIndex)
+			}
+			cacheKey := a.buildRowCacheKey(matchVal, timeStr, config)
+			a.aiCache.putRow(cacheKey, val)
 		}
 	}
+	a.aiCache.saveToFile()

-	a.emitProgress(totalUnmatched, totalUnmatched,
-		fmt.Sprintf("AI 增强完成！基础匹配 %d 条 + AI 补充 %d 条 = 共 %d 条",
-			len(results)-aiMatched, aiMatched, len(results)), "done")
+	// 构建完成消息
+	msg := fmt.Sprintf("AI 增强完成！基础匹配 %d 条 + AI 补充 %d 条 = 共 %d 条",
+		len(results)-aiMatched, aiMatched, len(results))
+	if len(failedBatches) > 0 {
+		msg += fmt.Sprintf("（警告：第 %v 批失败）", failedBatches)
+	}
+	a.emitProgress(totalUnmatched, totalUnmatched, msg, "done")

 	return results, nil
 }