- 修复 MaxPreview=0 仍被覆盖为默认值的 bug - 修复 API Endpoint 自动补全逻辑(避免 /v1/v1/chat/completions) - 为 AI 配置与匹配状态字段增加并发锁 - AI 增强未匹配行改为按索引跟踪,避免重复行误判 - 无时间列时 AI 匹配 B 表行数可配置并增加截断警告 - 导出时防御参差不齐行导致的数组越界 panic - Excel 读取时对单元格统一 TrimSpace - 删除未使用的 minInt 函数 - 修复 wails.json 开发服务器地址为 http://localhost:5173 - 重新生成 Wails 前端绑定 - 新增 ai_test.go / export_test.go 单元测试
616 lines
18 KiB
Go
616 lines
18 KiB
Go
package main
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
"fmt"
|
||
"math"
|
||
"path/filepath"
|
||
"sort"
|
||
"strings"
|
||
"sync"
|
||
"time"
|
||
|
||
"github.com/wailsapp/wails/v2/pkg/runtime"
|
||
)
|
||
|
||
// ---------- 常量 ----------
|
||
|
||
// DefaultThreshold 默认匹配阈值
|
||
const DefaultThreshold = 0.65
|
||
|
||
// DefaultTimeWindowHours 默认时间窗口(小时)
|
||
const DefaultTimeWindowHours = 12.0
|
||
|
||
// DefaultBatchSize AI 分批调用每批条数
|
||
const DefaultBatchSize = 8
|
||
|
||
// DefaultMaxPreview 调试日志预览条数
|
||
const DefaultMaxPreview = 3
|
||
|
||
// deepseekModel Deepseek 模型名称
|
||
const deepseekModel = "deepseek-chat"
|
||
|
||
// deepseekTemperature AI 温度参数
|
||
const deepseekTemperature = 0.05
|
||
|
||
// deepseekMaxTokens AI 最大 token 数
|
||
const deepseekMaxTokens = 2048
|
||
|
||
// cacheMaxSize AI 缓存最大条目数
|
||
const cacheMaxSize = 500
|
||
|
||
// ---------- 数据结构 ----------
|
||
|
||
// MatchResult 匹配结果(新旧字段兼容)
|
||
type MatchResult struct {
|
||
// 新字段(通用化)
|
||
RowAData []string `json:"rowAData"` // A 表原始所有列(新)
|
||
RowBKey string `json:"rowBKey"` // B 表匹配列的值(新)
|
||
ExtractValue string `json:"extractValue"` // 从 B 表提取的目标列值(新)
|
||
|
||
// 旧字段(向后兼容)
|
||
MonthlyCellName string `json:"monthlyCellName"`
|
||
DailyCellID string `json:"dailyCellId"`
|
||
InterruptReason string `json:"interruptReason"`
|
||
|
||
// 公共字段
|
||
TimeDiff string `json:"timeDiff"`
|
||
SimilarityScore float64 `json:"similarityScore"`
|
||
AIMatched bool `json:"aiMatched"`
|
||
}
|
||
|
||
// ProgressPayload 进度信息
|
||
type ProgressPayload struct {
|
||
Current int `json:"current"`
|
||
Total int `json:"total"`
|
||
Message string `json:"message"`
|
||
Phase string `json:"phase"` // reading / matching / ai-enhancing / done
|
||
}
|
||
|
||
// MatchConfig 前端传递的完整匹配配置
|
||
type MatchConfig struct {
|
||
// 文件路径
|
||
FileAPath string `json:"fileAPath"`
|
||
FileBPath string `json:"fileBPath"`
|
||
|
||
// A 表列索引(-1 表示不使用)
|
||
ColAMatchIndex int `json:"colAMatchIndex"` // A 表匹配列
|
||
ColATimeIndex int `json:"colATimeIndex"` // A 表时间列(可选,-1 跳过时间剪枝)
|
||
|
||
// B 表列索引
|
||
ColBMatchIndex int `json:"colBMatchIndex"` // B 表匹配列
|
||
ColBTimeIndex int `json:"colBTimeIndex"` // B 表时间列(可选,-1 跳过时间剪枝)
|
||
ColBExtractIndex int `json:"colBExtractIndex"` // B 表要提取的目标列
|
||
|
||
// 清洗与匹配参数
|
||
RegexPattern string `json:"regexPattern"` // 空字符串 = 跳过清洗
|
||
TimeWindow float64 `json:"timeWindow"` // 小时
|
||
Threshold float64 `json:"threshold"` // 0.0 - 1.0
|
||
|
||
// 扩展选项
|
||
AllMatches bool `json:"allMatches"` // true=返回该A行所有匹配(>=阈值)而非仅最佳
|
||
CaseSensitive bool `json:"caseSensitive"` // true=大小写敏感匹配
|
||
SortBy string `json:"sortBy"` // "similarity" / "timeDiff" / ""=不排序
|
||
MaxPreview int `json:"maxPreview"` // 调试日志中打印的前 N 条比对详情,0=不打印
|
||
MaxBRowsNoTime int `json:"maxBRowsNoTime"` // 无时间列时 AI 匹配最多取 B 表多少行(0=使用默认值 200)
|
||
ExportFormat string `json:"exportFormat"` // "xlsx"(默认) / "csv"
|
||
IncludeHeader bool `json:"includeHeader"` // 导出时是否包含表头行
|
||
}
|
||
|
||
// AICacheInfo 缓存状态信息
|
||
type AICacheInfo struct {
|
||
Count int `json:"count"`
|
||
FilePath string `json:"filePath"`
|
||
}
|
||
|
||
// ---------- App 结构体 ----------
|
||
|
||
type App struct {
|
||
ctx context.Context
|
||
aiCache *AICache
|
||
|
||
// AI API 配置(并发访问需要加锁)
|
||
aiMu sync.RWMutex
|
||
apiKey string // AI API 密钥(兼容 OpenAI/Deepseek/本地模型)
|
||
apiEndpoint string // API 端点(默认 https://api.deepseek.com/v1/chat/completions)
|
||
apiModel string // 模型名称(默认 deepseek-chat)
|
||
|
||
// 最近一次匹配的配置和表头(供导出使用)
|
||
dataMu sync.RWMutex
|
||
lastConfig MatchConfig
|
||
headersA []string
|
||
headersB []string
|
||
}
|
||
|
||
// NewApp 创建 App 实例
|
||
func NewApp() *App {
|
||
return &App{
|
||
aiCache: newAICache(),
|
||
}
|
||
}
|
||
|
||
// startup 保存上下文
|
||
func (a *App) startup(ctx context.Context) {
|
||
a.ctx = ctx
|
||
count, path := a.aiCache.stat()
|
||
fmt.Printf("[CACHE] AI 缓存已加载,当前 %d 条缓存记录 (文件: %s)\n", count, path)
|
||
}
|
||
|
||
// emitProgress 向前端发送进度事件
|
||
func (a *App) emitProgress(current, total int, message, phase string) {
|
||
if a.ctx == nil {
|
||
return
|
||
}
|
||
runtime.EventsEmit(a.ctx, "match-progress", ProgressPayload{
|
||
Current: current,
|
||
Total: total,
|
||
Message: message,
|
||
Phase: phase,
|
||
})
|
||
}
|
||
|
||
// ---------- AI 配置 ----------
|
||
|
||
// SetDeepseekAPIKey 设置 Deepseek API 密钥(仅保存在内存中,向后兼容)
|
||
func (a *App) SetDeepseekAPIKey(key string) string {
|
||
a.aiMu.Lock()
|
||
defer a.aiMu.Unlock()
|
||
a.apiKey = strings.TrimSpace(key)
|
||
if a.apiKey == "" {
|
||
return "已清除 Deepseek API 密钥"
|
||
}
|
||
return "Deepseek API 密钥已设置"
|
||
}
|
||
|
||
// SetAIConfig 统一设置 AI API 配置(端点、模型、密钥)
|
||
func (a *App) SetAIConfig(endpoint, model, key string) string {
|
||
a.aiMu.Lock()
|
||
defer a.aiMu.Unlock()
|
||
if endpoint != "" {
|
||
a.apiEndpoint = strings.TrimSpace(endpoint)
|
||
}
|
||
if model != "" {
|
||
a.apiModel = strings.TrimSpace(model)
|
||
}
|
||
if key != "" {
|
||
a.apiKey = strings.TrimSpace(key)
|
||
}
|
||
return fmt.Sprintf("AI 配置已更新 (端点=%s, 模型=%s)", a.apiEndpoint, a.apiModel)
|
||
}
|
||
|
||
// SetAPIKey 设置 AI API 密钥(仅保存在内存中)
|
||
func (a *App) SetAPIKey(key string) string {
|
||
a.aiMu.Lock()
|
||
defer a.aiMu.Unlock()
|
||
a.apiKey = strings.TrimSpace(key)
|
||
if a.apiKey == "" {
|
||
return "已清除 AI API 密钥"
|
||
}
|
||
return "AI API 密钥已设置"
|
||
}
|
||
|
||
// GetDeepseekStatus 返回是否已配置 Deepseek API 密钥
|
||
func (a *App) GetDeepseekStatus() bool {
|
||
a.aiMu.RLock()
|
||
defer a.aiMu.RUnlock()
|
||
return a.apiKey != ""
|
||
}
|
||
|
||
// GetAIStatus 返回 AI API 配置状态
|
||
func (a *App) GetAIStatus() map[string]string {
|
||
a.aiMu.RLock()
|
||
defer a.aiMu.RUnlock()
|
||
return map[string]string{
|
||
"ready": fmt.Sprintf("%v", a.apiKey != ""),
|
||
"endpoint": a.apiEndpoint,
|
||
"model": a.apiModel,
|
||
}
|
||
}
|
||
|
||
// ClearAICache 清除所有 AI 缓存
|
||
func (a *App) ClearAICache() string {
|
||
before, _ := a.aiCache.stat()
|
||
a.aiCache.clear()
|
||
return fmt.Sprintf("已清除 %d 条 AI 缓存记录", before)
|
||
}
|
||
|
||
// GetAICacheInfo 返回 AI 缓存信息(条目数、文件路径)
|
||
func (a *App) GetAICacheInfo() AICacheInfo {
|
||
count, path := a.aiCache.stat()
|
||
return AICacheInfo{Count: count, FilePath: path}
|
||
}
|
||
|
||
// ---------- 文件选择对话框 ----------
|
||
|
||
// OpenFileA 打开文件对话框选择 A 表(基准表)
|
||
func (a *App) OpenFileA() (string, error) {
|
||
return a.openFileDialog("选择 A 表文件(基准表)")
|
||
}
|
||
|
||
// OpenFileB 打开文件对话框选择 B 表(数据源表)
|
||
func (a *App) OpenFileB() (string, error) {
|
||
return a.openFileDialog("选择 B 表文件(数据源表)")
|
||
}
|
||
|
||
func (a *App) openFileDialog(title string) (string, error) {
|
||
file, err := runtime.OpenFileDialog(a.ctx, runtime.OpenDialogOptions{
|
||
Title: title,
|
||
Filters: []runtime.FileFilter{
|
||
{DisplayName: "Excel / CSV 文件 (*.xlsx, *.xls, *.csv)", Pattern: "*.xlsx;*.xls;*.csv"},
|
||
},
|
||
})
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
return file, nil
|
||
}
|
||
|
||
// ParseHeaders 读取文件第一行作为表头数组返回给前端,用于动态渲染列映射下拉框
|
||
func (a *App) ParseHeaders(filePath string) ([]string, error) {
|
||
if filePath == "" {
|
||
return nil, fmt.Errorf("文件路径为空")
|
||
}
|
||
allRows, err := a.readRawRows(filePath)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if len(allRows) == 0 {
|
||
return nil, fmt.Errorf("文件为空,无表头")
|
||
}
|
||
headers := allRows[0]
|
||
// TrimSpace 每个表头
|
||
for i := range headers {
|
||
headers[i] = strings.TrimSpace(headers[i])
|
||
}
|
||
fmt.Printf("[DEBUG] ParseHeaders: '%s' → %d 列 %v\n", filepath.Base(filePath), len(headers), headers)
|
||
return headers, nil
|
||
}
|
||
|
||
// ---------- 通用匹配引擎 ----------
|
||
|
||
// RunMatch 接收完整 MatchConfig,按列索引执行通用匹配
|
||
func (a *App) RunMatch(config MatchConfig) ([]MatchResult, error) {
|
||
prep, err := a.prepareMatch(config)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
results, _, err := a.runMatchOnData(prep, config)
|
||
return results, err
|
||
}
|
||
|
||
// runMatchOnData 在已读取的数据上执行匹配
|
||
// 返回匹配结果,以及被匹配到的 A 表行索引集合(供 AI 增强阶段判断未匹配行)
|
||
func (a *App) runMatchOnData(prep *matchPrep, config MatchConfig) ([]MatchResult, map[int]bool, error) {
|
||
useTime := config.ColATimeIndex >= 0 && config.ColBTimeIndex >= 0
|
||
totalA := len(prep.dataA)
|
||
var results []MatchResult
|
||
matchedAIndices := make(map[int]bool)
|
||
|
||
useAllMatches := config.AllMatches
|
||
maxPreview := config.MaxPreview
|
||
if maxPreview < 0 {
|
||
maxPreview = DefaultMaxPreview
|
||
}
|
||
|
||
// 预计算 B 表清洗后的匹配值,避免内层循环中重复 regex 替换
|
||
totalB := len(prep.dataB)
|
||
cleanedBMatch := make([]string, totalB)
|
||
origBMatch := make([]string, totalB)
|
||
parsedBTime := make([]time.Time, totalB)
|
||
hasBTime := make([]bool, totalB)
|
||
bExtractVal := make([]string, totalB)
|
||
for bIdx, rowB := range prep.dataB {
|
||
matchStrB := getCell(rowB, config.ColBMatchIndex)
|
||
origBMatch[bIdx] = matchStrB
|
||
if matchStrB == "" {
|
||
cleanedBMatch[bIdx] = ""
|
||
} else {
|
||
cleanedBMatch[bIdx] = cleanWithRegex(matchStrB, prep.reg)
|
||
}
|
||
if useTime {
|
||
t, err := parseTimeFlexible(getCell(rowB, config.ColBTimeIndex))
|
||
if err == nil {
|
||
parsedBTime[bIdx] = t
|
||
hasBTime[bIdx] = true
|
||
}
|
||
}
|
||
bExtractVal[bIdx] = getCell(rowB, config.ColBExtractIndex)
|
||
}
|
||
|
||
for i, rowA := range prep.dataA {
|
||
if i%10 == 0 || i == totalA-1 {
|
||
pct := (i + 1) * 100 / totalA
|
||
a.emitProgress(i+1, totalA,
|
||
fmt.Sprintf("匹配中 %d/%d (%d%%)...", i+1, totalA, pct), "matching")
|
||
}
|
||
|
||
matchStrA := getCell(rowA, config.ColAMatchIndex)
|
||
if matchStrA == "" {
|
||
continue
|
||
}
|
||
|
||
var timeA time.Time
|
||
var hasTimeA bool
|
||
if useTime {
|
||
t, err := parseTimeFlexible(getCell(rowA, config.ColATimeIndex))
|
||
if err == nil { timeA = t; hasTimeA = true }
|
||
}
|
||
|
||
cleanA := cleanWithRegex(matchStrA, prep.reg)
|
||
|
||
// 收集该 A 行的所有候选匹配
|
||
var candidates []MatchResult
|
||
|
||
for bIdx := range prep.dataB {
|
||
if cleanedBMatch[bIdx] == "" { continue }
|
||
|
||
if hasTimeA && useTime && hasBTime[bIdx] {
|
||
td := timeA.Sub(parsedBTime[bIdx])
|
||
if td < -prep.windowDuration || td > prep.windowDuration { continue }
|
||
}
|
||
|
||
similarity := similarityFromCleaned(cleanA, cleanedBMatch[bIdx], config.CaseSensitive)
|
||
|
||
if i < maxPreview {
|
||
fmt.Printf("[DEBUG] | A[%d]='%s'→'%s' | B='%s'→'%s' | 相似度=%.4f\n",
|
||
i, matchStrA, cleanA, origBMatch[bIdx], cleanedBMatch[bIdx], similarity)
|
||
}
|
||
|
||
if similarity >= prep.threshold {
|
||
var timeDiff time.Duration
|
||
if hasTimeA && useTime && hasBTime[bIdx] {
|
||
timeDiff = timeA.Sub(parsedBTime[bIdx])
|
||
}
|
||
mr := MatchResult{
|
||
RowAData: rowA,
|
||
RowBKey: origBMatch[bIdx],
|
||
ExtractValue: bExtractVal[bIdx],
|
||
TimeDiff: formatTimeDiff(timeDiff),
|
||
SimilarityScore: math.Round(similarity*10000) / 10000,
|
||
AIMatched: false,
|
||
}
|
||
if useAllMatches {
|
||
candidates = append(candidates, mr)
|
||
} else if len(candidates) == 0 || similarity > candidates[0].SimilarityScore {
|
||
candidates = []MatchResult{mr}
|
||
}
|
||
}
|
||
}
|
||
|
||
if len(candidates) > 0 {
|
||
if i < maxPreview {
|
||
for _, c := range candidates {
|
||
fmt.Printf("[DEBUG] ✓ 命中 | A='%s'→B='%s' | 相似度=%.4f\n",
|
||
matchStrA, c.RowBKey, c.SimilarityScore)
|
||
}
|
||
}
|
||
results = append(results, candidates...)
|
||
matchedAIndices[i] = true
|
||
}
|
||
}
|
||
|
||
// 结果排序
|
||
switch config.SortBy {
|
||
case "similarity":
|
||
sort.Slice(results, func(i, j int) bool {
|
||
return results[i].SimilarityScore > results[j].SimilarityScore
|
||
})
|
||
case "timeDiff":
|
||
sort.Slice(results, func(i, j int) bool {
|
||
return parseTimeDiffDuration(results[i].TimeDiff) < parseTimeDiffDuration(results[j].TimeDiff)
|
||
})
|
||
}
|
||
|
||
a.emitProgress(totalA, totalA,
|
||
fmt.Sprintf("匹配完成!共匹配成功 %d 条记录", len(results)), "done")
|
||
|
||
return results, matchedAIndices, nil
|
||
}
|
||
|
||
// RunMatchWithAI 执行基础匹配 + AI 增强匹配(配置驱动)
|
||
func (a *App) RunMatchWithAI(config MatchConfig) ([]MatchResult, error) {
|
||
a.aiMu.RLock()
|
||
if a.apiKey == "" {
|
||
a.aiMu.RUnlock()
|
||
return nil, fmt.Errorf("请先设置 AI API 密钥")
|
||
}
|
||
a.aiMu.RUnlock()
|
||
|
||
prep, err := a.prepareMatch(config)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
// 1. 先执行基础匹配
|
||
results, matchedAIndices, err := a.runMatchOnData(prep, config)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
// 2. 找出未被基础匹配覆盖的 A 表行(按索引,避免重复行内容相同导致误判)
|
||
var unmatchedA [][]string
|
||
for i, row := range prep.dataA {
|
||
if !matchedAIndices[i] {
|
||
unmatchedA = append(unmatchedA, row)
|
||
}
|
||
}
|
||
|
||
if len(unmatchedA) == 0 {
|
||
a.emitProgress(1, 1, "全部已匹配,无需 AI 增强", "done")
|
||
return results, nil
|
||
}
|
||
|
||
// 3. AI 增强匹配(先查行级缓存,减少 API 调用)
|
||
useTime := config.ColATimeIndex >= 0 && config.ColBTimeIndex >= 0
|
||
|
||
aiMatched := 0
|
||
var failedBatches []int
|
||
|
||
// 3a. 检查行级缓存,命中则直接加入结果
|
||
var uncachedA [][]string
|
||
cacheHits := 0
|
||
for _, row := range unmatchedA {
|
||
matchVal := getCell(row, config.ColAMatchIndex)
|
||
timeStr := ""
|
||
if useTime {
|
||
timeStr = getCell(row, config.ColATimeIndex)
|
||
}
|
||
cacheKey := a.buildRowCacheKey(matchVal, timeStr, config)
|
||
if cachedVal, ok := a.aiCache.getRow(cacheKey); ok {
|
||
results = append(results, MatchResult{
|
||
RowAData: row,
|
||
RowBKey: "",
|
||
ExtractValue: cachedVal,
|
||
SimilarityScore: 0,
|
||
AIMatched: true,
|
||
})
|
||
aiMatched++
|
||
cacheHits++
|
||
} else {
|
||
uncachedA = append(uncachedA, row)
|
||
}
|
||
}
|
||
|
||
if cacheHits > 0 {
|
||
fmt.Printf("[CACHE] ✓ 行级缓存命中 %d 条,剩余 %d 条需 AI 处理\n", cacheHits, len(uncachedA))
|
||
}
|
||
|
||
if len(uncachedA) == 0 {
|
||
a.emitProgress(1, 1,
|
||
fmt.Sprintf("AI 增强完成!全部 %d 条命中缓存", cacheHits), "done")
|
||
return results, nil
|
||
}
|
||
|
||
totalUnmatched := len(uncachedA)
|
||
a.emitProgress(0, totalUnmatched,
|
||
fmt.Sprintf("AI 增强匹配:%d 条命中缓存,%d 条需调用 AI...", cacheHits, totalUnmatched),
|
||
"ai-enhancing")
|
||
|
||
for batchStart := 0; batchStart < totalUnmatched; batchStart += DefaultBatchSize {
|
||
end := min(batchStart+DefaultBatchSize, totalUnmatched)
|
||
batchNum := (batchStart / DefaultBatchSize) + 1
|
||
|
||
a.emitProgress(batchStart+1, totalUnmatched,
|
||
fmt.Sprintf("AI 分析中 %d/%d (第 %d 批)...", end, totalUnmatched, batchNum),
|
||
"ai-enhancing")
|
||
|
||
batch := uncachedA[batchStart:end]
|
||
|
||
// 计算本批 A 表的时间范围
|
||
var minTime, maxTime time.Time
|
||
hasBatchTime := false
|
||
if useTime {
|
||
for _, row := range batch {
|
||
t, err := parseTimeFlexible(getCell(row, config.ColATimeIndex))
|
||
if err != nil {
|
||
continue
|
||
}
|
||
if !hasBatchTime {
|
||
minTime, maxTime = t, t
|
||
hasBatchTime = true
|
||
} else {
|
||
if t.Before(minTime) {
|
||
minTime = t
|
||
}
|
||
if t.After(maxTime) {
|
||
maxTime = t
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// 过滤 B 表在时间窗口内的行(用户配置时间窗口 + 额外余量覆盖批次跨度)
|
||
var relevantB [][]string
|
||
if hasBatchTime && useTime {
|
||
padding := prep.windowDuration + time.Duration(defaultAIWindowPadH)*time.Hour
|
||
ws := minTime.Add(-padding)
|
||
we := maxTime.Add(padding)
|
||
for _, row := range prep.dataB {
|
||
t, err := parseTimeFlexible(getCell(row, config.ColBTimeIndex))
|
||
if err != nil || t.Before(ws) || t.After(we) {
|
||
continue
|
||
}
|
||
relevantB = append(relevantB, row)
|
||
}
|
||
} else {
|
||
// 无时间列时限制 B 表条数以控制 token 消耗
|
||
if len(prep.dataB) > prep.maxBRowsNoTime {
|
||
fmt.Printf("[AI-WARN] 无时间列,B 表共 %d 行,AI 匹配仅取前 %d 行(可在高级设置调整)\n",
|
||
len(prep.dataB), prep.maxBRowsNoTime)
|
||
}
|
||
maxB := min(prep.maxBRowsNoTime, len(prep.dataB))
|
||
relevantB = prep.dataB[:maxB]
|
||
}
|
||
|
||
// 构建 AI 提示
|
||
prompt := a.buildGenericAIPrompt(batch, relevantB, config, prep.windowDuration, hasBatchTime)
|
||
aiResp, err := a.callAIAPI(prompt)
|
||
if err != nil {
|
||
fmt.Printf("[AI-WARN] 第 %d 批 API 调用失败: %v\n", batchNum, err)
|
||
failedBatches = append(failedBatches, batchNum)
|
||
continue
|
||
}
|
||
|
||
// 解析 AI 返回
|
||
var matchResp struct {
|
||
Matches []struct {
|
||
Index int `json:"index"`
|
||
Value string `json:"value"`
|
||
} `json:"matches"`
|
||
}
|
||
parseErr := json.Unmarshal([]byte(aiResp), &matchResp)
|
||
if parseErr != nil {
|
||
if idx := strings.Index(aiResp, "{"); idx >= 0 {
|
||
if endIdx := strings.LastIndex(aiResp, "}"); endIdx > idx {
|
||
parseErr = json.Unmarshal([]byte(aiResp[idx:endIdx+1]), &matchResp)
|
||
}
|
||
}
|
||
}
|
||
if parseErr != nil {
|
||
fmt.Printf("[AI-WARN] 响应解析失败 (第 %d 批): %s\n 原始响应: %.200s\n",
|
||
batchNum, parseErr.Error(), aiResp)
|
||
failedBatches = append(failedBatches, batchNum)
|
||
continue
|
||
}
|
||
|
||
for _, item := range matchResp.Matches {
|
||
idx := item.Index
|
||
val := strings.TrimSpace(item.Value)
|
||
if idx < 0 || idx >= len(batch) || val == "" {
|
||
continue
|
||
}
|
||
rowA := batch[idx]
|
||
mr := MatchResult{
|
||
RowAData: rowA,
|
||
RowBKey: "",
|
||
ExtractValue: val,
|
||
SimilarityScore: 0,
|
||
AIMatched: true,
|
||
}
|
||
results = append(results, mr)
|
||
aiMatched++
|
||
|
||
// 写入行级缓存
|
||
matchVal := getCell(rowA, config.ColAMatchIndex)
|
||
timeStr := ""
|
||
if useTime {
|
||
timeStr = getCell(rowA, config.ColATimeIndex)
|
||
}
|
||
cacheKey := a.buildRowCacheKey(matchVal, timeStr, config)
|
||
a.aiCache.putRow(cacheKey, val)
|
||
}
|
||
}
|
||
a.aiCache.saveToFile()
|
||
|
||
// 构建完成消息
|
||
msg := fmt.Sprintf("AI 增强完成!基础匹配 %d 条 + AI 补充 %d 条 = 共 %d 条",
|
||
len(results)-aiMatched, aiMatched, len(results))
|
||
if len(failedBatches) > 0 {
|
||
msg += fmt.Sprintf("(警告:第 %v 批失败)", failedBatches)
|
||
}
|
||
a.emitProgress(totalUnmatched, totalUnmatched, msg, "done")
|
||
|
||
return results, nil
|
||
}
|