A级(严重): - ExportResults 支持 CSV 格式导出和 IncludeHeader 配置,使用实际表头名 - RunMatchWithAI 消除重复文件读取,提取 runMatchOnData() 内部函数 - AI 缓存文件权限收紧至 0600 B级(中等): - 移除废弃代码约400行 (MonthlyReport/DailyReport/StartMatching/DeepseekEnhanceMatching) - 替换自定义 parseCSVLine 为标准 encoding/csv - GetAICacheInfo 返回命名结构体 AICacheInfo - 时间差排序改为数值比较 - App.vue 提取 buildMatchConfig() 工厂函数消除配置重复 - AllMatches=false 时命中 1.0 相似度可提前结束 B 表循环 C级(轻微): - 魔法数字提取为命名常量 - main.go 替换 println 为 log.Fatalf - 清理未使用变量 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1239 lines
33 KiB
Go
1239 lines
33 KiB
Go
package main
|
||
|
||
import (
|
||
"bytes"
|
||
"context"
|
||
"crypto/sha256"
|
||
"encoding/csv"
|
||
"encoding/hex"
|
||
"encoding/json"
|
||
"fmt"
|
||
"io"
|
||
"math"
|
||
"net/http"
|
||
"os"
|
||
"path/filepath"
|
||
"regexp"
|
||
"sort"
|
||
"strings"
|
||
"sync"
|
||
"time"
|
||
|
||
"github.com/wailsapp/wails/v2/pkg/runtime"
|
||
"github.com/xuri/excelize/v2"
|
||
)
|
||
|
||
// ---------- 数据结构 ----------
|
||
|
||
// MatchResult 匹配结果
|
||
type MatchResult struct {
|
||
// 新字段(通用化)
|
||
RowAData []string `json:"rowAData"` // A 表原始所有列(新)
|
||
RowBKey string `json:"rowBKey"` // B 表匹配列的值(新)
|
||
ExtractValue string `json:"extractValue"` // 从 B 表提取的目标列值(新)
|
||
|
||
// 旧字段(向后兼容)
|
||
MonthlyCellName string `json:"monthlyCellName"`
|
||
DailyCellID string `json:"dailyCellId"`
|
||
InterruptReason string `json:"interruptReason"`
|
||
|
||
// 公共字段
|
||
TimeDiff string `json:"timeDiff"`
|
||
SimilarityScore float64 `json:"similarityScore"`
|
||
AIMatched bool `json:"aiMatched"`
|
||
}
|
||
|
||
// ProgressPayload 进度信息
|
||
type ProgressPayload struct {
|
||
Current int `json:"current"`
|
||
Total int `json:"total"`
|
||
Message string `json:"message"`
|
||
Phase string `json:"phase"` // reading / matching / ai-enhancing / done
|
||
}
|
||
|
||
// MatchConfig 前端传递的完整匹配配置
|
||
type MatchConfig struct {
|
||
// 文件路径
|
||
FileAPath string `json:"fileAPath"`
|
||
FileBPath string `json:"fileBPath"`
|
||
|
||
// A 表列索引(-1 表示不使用)
|
||
ColAMatchIndex int `json:"colAMatchIndex"` // A 表匹配列
|
||
ColATimeIndex int `json:"colATimeIndex"` // A 表时间列(可选,-1 跳过时间剪枝)
|
||
|
||
// B 表列索引
|
||
ColBMatchIndex int `json:"colBMatchIndex"` // B 表匹配列
|
||
ColBTimeIndex int `json:"colBTimeIndex"` // B 表时间列(可选,-1 跳过时间剪枝)
|
||
ColBExtractIndex int `json:"colBExtractIndex"` // B 表要提取的目标列
|
||
|
||
// 清洗与匹配参数
|
||
RegexPattern string `json:"regexPattern"` // 空字符串 = 跳过清洗
|
||
TimeWindow float64 `json:"timeWindow"` // 小时
|
||
Threshold float64 `json:"threshold"` // 0.0 - 1.0
|
||
|
||
// 扩展选项
|
||
AllMatches bool `json:"allMatches"` // true=返回该A行所有匹配(>=阈值)而非仅最佳
|
||
CaseSensitive bool `json:"caseSensitive"` // true=大小写敏感匹配
|
||
SortBy string `json:"sortBy"` // "similarity" / "timeDiff" / ""=不排序
|
||
MaxPreview int `json:"maxPreview"` // 调试日志中打印的前 N 条比对详情,0=不打印
|
||
ExportFormat string `json:"exportFormat"` // "xlsx"(默认) / "csv"
|
||
IncludeHeader bool `json:"includeHeader"` // 导出时是否包含表头行
|
||
}
|
||
|
||
// ---------- Deepseek API 类型 ----------
|
||
|
||
type deepseekMessage struct {
|
||
Role string `json:"role"`
|
||
Content string `json:"content"`
|
||
}
|
||
|
||
type deepseekRequest struct {
|
||
Model string `json:"model"`
|
||
Messages []deepseekMessage `json:"messages"`
|
||
Temperature float64 `json:"temperature"`
|
||
MaxTokens int `json:"max_tokens,omitempty"`
|
||
}
|
||
|
||
type deepseekResponse struct {
|
||
Choices []struct {
|
||
Message struct {
|
||
Content string `json:"content"`
|
||
} `json:"message"`
|
||
} `json:"choices"`
|
||
Error *struct {
|
||
Message string `json:"message"`
|
||
} `json:"error,omitempty"`
|
||
}
|
||
|
||
// ---------- AI 缓存 ----------
|
||
|
||
// AICacheInfo 缓存信息(前端展示用)
|
||
type AICacheInfo struct {
|
||
Count int `json:"count"`
|
||
FilePath string `json:"filePath"`
|
||
}
|
||
|
||
// AICacheEntry 单条缓存记录
|
||
type AICacheEntry struct {
|
||
PromptHash string `json:"promptHash"`
|
||
Response string `json:"response"`
|
||
CreatedAt int64 `json:"createdAt"`
|
||
}
|
||
|
||
// AICache AI 响应缓存(持久化到临时文件)
|
||
type AICache struct {
|
||
Entries []AICacheEntry `json:"entries"`
|
||
mu sync.RWMutex // 小写,必须保持非导出以兼容 JSON 序列化
|
||
filePath string
|
||
maxSize int // 最大缓存条目数
|
||
}
|
||
|
||
// cacheFileName 缓存文件名
|
||
const cacheFileName = "data-matcher-ai-cache.json"
|
||
|
||
// 默认常量
|
||
const (
|
||
defaultThreshold = 0.65
|
||
defaultTimeWindowH = 12.0
|
||
defaultBatchSize = 8
|
||
defaultMaxPreview = 3
|
||
defaultMaxBNoTime = 200
|
||
defaultAIWindowPadH = 3.0
|
||
)
|
||
|
||
// newAICache 创建缓存实例并加载已有数据
|
||
func newAICache() *AICache {
|
||
c := &AICache{
|
||
filePath: filepath.Join(os.TempDir(), cacheFileName),
|
||
maxSize: 500,
|
||
}
|
||
c.loadFromFile()
|
||
return c
|
||
}
|
||
|
||
// loadFromFile 从磁盘加载缓存
|
||
func (c *AICache) loadFromFile() {
|
||
data, err := os.ReadFile(c.filePath)
|
||
if err != nil {
|
||
return // 文件不存在或无法读取,从空缓存开始
|
||
}
|
||
c.mu.Lock()
|
||
defer c.mu.Unlock()
|
||
_ = json.Unmarshal(data, c) // 忽略解析错误,重置为 entries
|
||
}
|
||
|
||
// saveToFile 将缓存写入磁盘
|
||
func (c *AICache) saveToFile() {
|
||
c.mu.RLock()
|
||
data, err := json.Marshal(c)
|
||
c.mu.RUnlock()
|
||
if err != nil {
|
||
return
|
||
}
|
||
_ = os.WriteFile(c.filePath, data, 0600)
|
||
}
|
||
|
||
// get 根据 hash 查找缓存,命中返回响应,否则返回空
|
||
func (c *AICache) get(hash string) (string, bool) {
|
||
c.mu.RLock()
|
||
defer c.mu.RUnlock()
|
||
for i := range c.Entries {
|
||
if c.Entries[i].PromptHash == hash {
|
||
return c.Entries[i].Response, true
|
||
}
|
||
}
|
||
return "", false
|
||
}
|
||
|
||
// put 存入一条缓存(线程安全 + 自动裁剪)
|
||
func (c *AICache) put(hash, response string) {
|
||
c.mu.Lock()
|
||
defer c.mu.Unlock()
|
||
|
||
// 去重:如果已存在则覆盖
|
||
for i := range c.Entries {
|
||
if c.Entries[i].PromptHash == hash {
|
||
c.Entries[i].Response = response
|
||
c.Entries[i].CreatedAt = time.Now().Unix()
|
||
return
|
||
}
|
||
}
|
||
|
||
c.Entries = append(c.Entries, AICacheEntry{
|
||
PromptHash: hash,
|
||
Response: response,
|
||
CreatedAt: time.Now().Unix(),
|
||
})
|
||
|
||
// 超过上限则删除最旧的条目
|
||
if len(c.Entries) > c.maxSize {
|
||
// 按 CreatedAt 排序保留最新的
|
||
sort.Slice(c.Entries, func(i, j int) bool {
|
||
return c.Entries[i].CreatedAt > c.Entries[j].CreatedAt
|
||
})
|
||
c.Entries = c.Entries[:c.maxSize]
|
||
}
|
||
}
|
||
|
||
// clear 清空所有缓存
|
||
func (c *AICache) clear() {
|
||
c.mu.Lock()
|
||
defer c.mu.Unlock()
|
||
c.Entries = nil
|
||
_ = os.Remove(c.filePath)
|
||
}
|
||
|
||
// stat 返回缓存统计
|
||
func (c *AICache) stat() (count int, path string) {
|
||
c.mu.RLock()
|
||
defer c.mu.RUnlock()
|
||
return len(c.Entries), c.filePath
|
||
}
|
||
|
||
// ---------- App 结构体 ----------
|
||
|
||
type App struct {
|
||
ctx context.Context
|
||
deepseekKey string
|
||
aiCache *AICache
|
||
|
||
// 最近一次匹配的配置和表头(供导出使用)
|
||
lastConfig MatchConfig
|
||
headersA []string
|
||
headersB []string
|
||
}
|
||
|
||
// NewApp 创建 App 实例
|
||
func NewApp() *App {
|
||
return &App{
|
||
aiCache: newAICache(),
|
||
}
|
||
}
|
||
|
||
// startup 保存上下文
|
||
func (a *App) startup(ctx context.Context) {
|
||
a.ctx = ctx
|
||
count, path := a.aiCache.stat()
|
||
fmt.Printf("[CACHE] AI 缓存已加载,当前 %d 条缓存记录 (文件: %s)\n", count, path)
|
||
}
|
||
|
||
// emitProgress 向前端发送进度事件
|
||
func (a *App) emitProgress(current, total int, message, phase string) {
|
||
if a.ctx == nil {
|
||
return
|
||
}
|
||
runtime.EventsEmit(a.ctx, "match-progress", ProgressPayload{
|
||
Current: current,
|
||
Total: total,
|
||
Message: message,
|
||
Phase: phase,
|
||
})
|
||
}
|
||
|
||
// SetDeepseekAPIKey 设置 Deepseek API 密钥(仅保存在内存中)
|
||
func (a *App) SetDeepseekAPIKey(key string) string {
|
||
a.deepseekKey = strings.TrimSpace(key)
|
||
if a.deepseekKey == "" {
|
||
return "已清除 Deepseek API 密钥"
|
||
}
|
||
return "Deepseek API 密钥已设置"
|
||
}
|
||
|
||
// GetDeepseekStatus 返回是否已配置 Deepseek API 密钥
|
||
func (a *App) GetDeepseekStatus() bool {
|
||
return a.deepseekKey != ""
|
||
}
|
||
|
||
// ClearAICache 清除所有 AI 缓存
|
||
func (a *App) ClearAICache() string {
|
||
before, _ := a.aiCache.stat()
|
||
a.aiCache.clear()
|
||
return fmt.Sprintf("已清除 %d 条 AI 缓存记录", before)
|
||
}
|
||
|
||
// GetAICacheInfo 返回 AI 缓存信息(条目数、文件路径)
|
||
func (a *App) GetAICacheInfo() AICacheInfo {
|
||
count, path := a.aiCache.stat()
|
||
return AICacheInfo{Count: count, FilePath: path}
|
||
}
|
||
|
||
// ---------- 文件选择对话框 ----------
|
||
|
||
// OpenMonthlyReport 打开文件对话框选择月报
|
||
func (a *App) OpenMonthlyReport() (string, error) {
|
||
file, err := runtime.OpenFileDialog(a.ctx, runtime.OpenDialogOptions{
|
||
Title: "选择月报文件",
|
||
Filters: []runtime.FileFilter{
|
||
{DisplayName: "Excel / CSV 文件 (*.xlsx, *.xls, *.csv)", Pattern: "*.xlsx;*.xls;*.csv"},
|
||
},
|
||
})
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
return file, nil
|
||
}
|
||
|
||
// OpenDailyReport 打开文件对话框选择日报
|
||
func (a *App) OpenDailyReport() (string, error) {
|
||
file, err := runtime.OpenFileDialog(a.ctx, runtime.OpenDialogOptions{
|
||
Title: "选择日报文件",
|
||
Filters: []runtime.FileFilter{
|
||
{DisplayName: "Excel / CSV 文件 (*.xlsx, *.xls, *.csv)", Pattern: "*.xlsx;*.xls;*.csv"},
|
||
},
|
||
})
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
return file, nil
|
||
}
|
||
|
||
// OpenFileA 打开文件对话框选择 A 表(基准表)
|
||
func (a *App) OpenFileA() (string, error) {
|
||
return a.openFileDialog("选择 A 表文件(基准表)")
|
||
}
|
||
|
||
// OpenFileB 打开文件对话框选择 B 表(数据源表)
|
||
func (a *App) OpenFileB() (string, error) {
|
||
return a.openFileDialog("选择 B 表文件(数据源表)")
|
||
}
|
||
|
||
func (a *App) openFileDialog(title string) (string, error) {
|
||
file, err := runtime.OpenFileDialog(a.ctx, runtime.OpenDialogOptions{
|
||
Title: title,
|
||
Filters: []runtime.FileFilter{
|
||
{DisplayName: "Excel / CSV 文件 (*.xlsx, *.xls, *.csv)", Pattern: "*.xlsx;*.xls;*.csv"},
|
||
},
|
||
})
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
return file, nil
|
||
}
|
||
|
||
// ParseHeaders 读取文件第一行作为表头数组返回给前端,用于动态渲染列映射下拉框
|
||
func (a *App) ParseHeaders(filePath string) ([]string, error) {
|
||
if filePath == "" {
|
||
return nil, fmt.Errorf("文件路径为空")
|
||
}
|
||
allRows, err := a.readRawRows(filePath)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if len(allRows) == 0 {
|
||
return nil, fmt.Errorf("文件为空,无表头")
|
||
}
|
||
headers := allRows[0]
|
||
// TrimSpace 每个表头
|
||
for i := range headers {
|
||
headers[i] = strings.TrimSpace(headers[i])
|
||
}
|
||
fmt.Printf("[DEBUG] ParseHeaders: '%s' → %d 列 %v\n", filepath.Base(filePath), len(headers), headers)
|
||
return headers, nil
|
||
}
|
||
|
||
var nonChineseRegex = regexp.MustCompile(`[^\p{Han}]+`)
|
||
|
||
// CleanString 剔除字符串中的所有非中文字符,仅保留纯中文字符
|
||
func (a *App) CleanString(input string) string {
|
||
return nonChineseRegex.ReplaceAllString(input, "")
|
||
}
|
||
|
||
// ---------- 健壮的时间解析 ----------
|
||
|
||
// 多种时间格式,覆盖月报和日报的不同格式
|
||
var timeFormats = []string{
|
||
"2006-01-02 15:04:05",
|
||
"2006-01-02 15:04",
|
||
"2006/01/02 15:04:05",
|
||
"2006/01/02 15:04",
|
||
"2006-1-2 15:04:05",
|
||
"2006-1-2 15:04",
|
||
"2006/1/2 15:04:05",
|
||
"2006/1/2 15:04",
|
||
"2006-01-02T15:04:05",
|
||
"2006/01/02T15:04:05",
|
||
"01/02/2006 15:04",
|
||
"1/2/2006 15:04",
|
||
"2006-01-02",
|
||
"2006/01/02",
|
||
}
|
||
|
||
// parseTimeFlexible 使用多种格式尝试解析时间字符串
|
||
func parseTimeFlexible(timeStr string) (time.Time, error) {
|
||
timeStr = strings.TrimSpace(timeStr)
|
||
for _, format := range timeFormats {
|
||
if t, err := time.Parse(format, timeStr); err == nil {
|
||
return t, nil
|
||
}
|
||
}
|
||
return time.Time{}, fmt.Errorf("无法解析时间格式: %s", timeStr)
|
||
}
|
||
|
||
// ---------- Levenshtein 距离算法 ----------
|
||
|
||
func levenshteinDistance(s1, s2 string) int {
|
||
runes1 := []rune(s1)
|
||
runes2 := []rune(s2)
|
||
m, n := len(runes1), len(runes2)
|
||
|
||
// 使用一维数组优化空间复杂度
|
||
dp := make([]int, n+1)
|
||
for j := range dp {
|
||
dp[j] = j
|
||
}
|
||
|
||
for i := 1; i <= m; i++ {
|
||
prev := dp[0]
|
||
dp[0] = i
|
||
for j := 1; j <= n; j++ {
|
||
temp := dp[j]
|
||
cost := 1
|
||
if runes1[i-1] == runes2[j-1] {
|
||
cost = 0
|
||
}
|
||
dp[j] = min(dp[j]+1, min(dp[j-1]+1, prev+cost))
|
||
prev = temp
|
||
}
|
||
}
|
||
return dp[n]
|
||
}
|
||
|
||
func min(a, b int) int {
|
||
if a < b {
|
||
return a
|
||
}
|
||
return b
|
||
}
|
||
|
||
// CalculateSimilarity 计算清洗后中文名称的相似度(基于 Levenshtein 距离归一化)
|
||
func (a *App) CalculateSimilarity(s1, s2 string) float64 {
|
||
return calcSimilarity(s1, s2, nonChineseRegex, false)
|
||
}
|
||
|
||
// calcSimilarity 带自定义正则的相似度计算;reg 为 nil 时不做清洗直接比对
|
||
func calcSimilarity(s1, s2 string, reg *regexp.Regexp, caseSensitive bool) float64 {
|
||
clean1 := s1
|
||
clean2 := s2
|
||
if reg != nil {
|
||
clean1 = reg.ReplaceAllString(s1, "")
|
||
clean2 = reg.ReplaceAllString(s2, "")
|
||
}
|
||
if !caseSensitive {
|
||
clean1 = strings.ToLower(clean1)
|
||
clean2 = strings.ToLower(clean2)
|
||
}
|
||
|
||
r1 := []rune(clean1)
|
||
r2 := []rune(clean2)
|
||
|
||
if len(r1) == 0 && len(r2) == 0 {
|
||
return 1.0
|
||
}
|
||
if len(r1) == 0 || len(r2) == 0 {
|
||
return 0.0
|
||
}
|
||
|
||
dist := levenshteinDistance(clean1, clean2)
|
||
maxLen := math.Max(float64(len(r1)), float64(len(r2)))
|
||
return 1.0 - float64(dist)/maxLen
|
||
}
|
||
|
||
// cleanWithRegex 使用自定义正则清洗字符串;reg 为 nil 时返回原文
|
||
func cleanWithRegex(input string, reg *regexp.Regexp) string {
|
||
if reg == nil {
|
||
return input
|
||
}
|
||
return reg.ReplaceAllString(input, "")
|
||
}
|
||
|
||
// ---------- 文件读取(通用)----------
|
||
|
||
// readRawRows 读取 Excel/CSV 文件,返回原始二维字符串切片(row[0] = 表头)
|
||
func (a *App) readRawRows(path string) ([][]string, error) {
|
||
ext := strings.ToLower(filepath.Ext(path))
|
||
switch ext {
|
||
case ".csv":
|
||
return a.readCSVRaw(path)
|
||
default:
|
||
return a.readExcelRaw(path)
|
||
}
|
||
}
|
||
|
||
func (a *App) readCSVRaw(path string) ([][]string, error) {
|
||
f, err := os.Open(path)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("打开 CSV 文件失败: %v", err)
|
||
}
|
||
defer f.Close()
|
||
|
||
reader := csv.NewReader(f)
|
||
reader.LazyQuotes = true
|
||
reader.TrimLeadingSpace = true
|
||
allRows, err := reader.ReadAll()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("读取 CSV 文件失败: %v", err)
|
||
}
|
||
if len(allRows) < 2 {
|
||
return nil, fmt.Errorf("CSV 文件至少需要标题行和一条数据")
|
||
}
|
||
for i := range allRows {
|
||
for j := range allRows[i] {
|
||
allRows[i][j] = strings.TrimSpace(allRows[i][j])
|
||
}
|
||
}
|
||
return allRows, nil
|
||
}
|
||
|
||
func (a *App) readExcelRaw(path string) ([][]string, error) {
|
||
f, err := excelize.OpenFile(path)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("打开 Excel 文件失败: %v", err)
|
||
}
|
||
defer f.Close()
|
||
|
||
sheetName := f.GetSheetName(0)
|
||
allRows, err := f.GetRows(sheetName)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("读取工作表失败: %v", err)
|
||
}
|
||
if len(allRows) < 2 {
|
||
return nil, fmt.Errorf("Excel 文件至少需要标题行和一条数据")
|
||
}
|
||
return allRows, nil
|
||
}
|
||
|
||
// getCell 安全获取行中指定索引的单元格值,越界返回空字符串
|
||
func getCell(row []string, idx int) string {
|
||
if idx < 0 || idx >= len(row) {
|
||
return ""
|
||
}
|
||
return strings.TrimSpace(row[idx])
|
||
}
|
||
|
||
|
||
// RunMatch 接收完整 MatchConfig,按列索引执行通用匹配
|
||
func (a *App) RunMatch(config MatchConfig) ([]MatchResult, error) {
|
||
// 1. 编译正则
|
||
reg, err := compileRegex(config.RegexPattern)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
// 2. 默认值兜底
|
||
timeWindow := config.TimeWindow
|
||
if timeWindow <= 0 {
|
||
timeWindow = defaultTimeWindowH
|
||
}
|
||
threshold := config.Threshold
|
||
if threshold <= 0 {
|
||
threshold = defaultThreshold
|
||
}
|
||
|
||
// 3. 读取原始数据
|
||
a.emitProgress(0, 100, "正在读取 A 表...", "reading")
|
||
rowsA, err := a.readRawRows(config.FileAPath)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("读取 A 表失败: %v", err)
|
||
}
|
||
a.emitProgress(0, 100, "正在读取 B 表...", "reading")
|
||
rowsB, err := a.readRawRows(config.FileBPath)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("读取 B 表失败: %v", err)
|
||
}
|
||
if len(rowsA) < 2 {
|
||
return nil, fmt.Errorf("A 表无有效数据行")
|
||
}
|
||
if len(rowsB) < 2 {
|
||
return nil, fmt.Errorf("B 表无有效数据行")
|
||
}
|
||
|
||
// 保存表头供导出使用
|
||
a.headersA = rowsA[0]
|
||
a.headersB = rowsB[0]
|
||
a.lastConfig = config
|
||
|
||
dataA := rowsA[1:]
|
||
dataB := rowsB[1:]
|
||
windowDuration := time.Duration(timeWindow * float64(time.Hour))
|
||
|
||
return a.runMatchOnData(dataA, dataB, reg, config, timeWindow, threshold, windowDuration)
|
||
}
|
||
|
||
// runMatchOnData 在已读取的数据上执行匹配(内部使用,避免重复 I/O)
|
||
func (a *App) runMatchOnData(dataA, dataB [][]string, reg *regexp.Regexp, config MatchConfig, _, threshold float64, windowDuration time.Duration) ([]MatchResult, error) {
|
||
useTime := config.ColATimeIndex >= 0 && config.ColBTimeIndex >= 0
|
||
totalA := len(dataA)
|
||
var results []MatchResult
|
||
|
||
useAllMatches := config.AllMatches
|
||
maxPreview := config.MaxPreview
|
||
if maxPreview <= 0 {
|
||
maxPreview = defaultMaxPreview
|
||
}
|
||
|
||
for i, rowA := range dataA {
|
||
if i%10 == 0 || i == totalA-1 {
|
||
pct := (i + 1) * 100 / totalA
|
||
a.emitProgress(i+1, totalA,
|
||
fmt.Sprintf("匹配中 %d/%d (%d%%)...", i+1, totalA, pct), "matching")
|
||
}
|
||
|
||
matchStrA := getCell(rowA, config.ColAMatchIndex)
|
||
if matchStrA == "" {
|
||
continue
|
||
}
|
||
|
||
var timeA time.Time
|
||
var hasTimeA bool
|
||
if useTime {
|
||
t, err := parseTimeFlexible(getCell(rowA, config.ColATimeIndex))
|
||
if err == nil {
|
||
timeA = t
|
||
hasTimeA = true
|
||
}
|
||
}
|
||
|
||
cleanA := cleanWithRegex(matchStrA, reg)
|
||
// 收集该 A 行的所有候选匹配
|
||
var candidates []MatchResult
|
||
|
||
for _, rowB := range dataB {
|
||
matchStrB := getCell(rowB, config.ColBMatchIndex)
|
||
if matchStrB == "" {
|
||
continue
|
||
}
|
||
|
||
var timeDiff time.Duration
|
||
if hasTimeA && useTime {
|
||
tB, err := parseTimeFlexible(getCell(rowB, config.ColBTimeIndex))
|
||
if err != nil {
|
||
continue
|
||
}
|
||
td := timeA.Sub(tB)
|
||
if td < -windowDuration || td > windowDuration {
|
||
continue
|
||
}
|
||
timeDiff = td
|
||
}
|
||
|
||
cleanB := cleanWithRegex(matchStrB, reg)
|
||
if cleanA == "" || cleanB == "" {
|
||
continue
|
||
}
|
||
|
||
similarity := calcSimilarity(matchStrA, matchStrB, reg, config.CaseSensitive)
|
||
|
||
if i < maxPreview {
|
||
fmt.Printf("[DEBUG] | A[%d]='%s'→'%s' | B='%s'→'%s' | 相似度=%.4f\n",
|
||
i, matchStrA, cleanA, matchStrB, cleanB, similarity)
|
||
}
|
||
|
||
if similarity >= threshold {
|
||
mr := MatchResult{
|
||
RowAData: rowA,
|
||
RowBKey: matchStrB,
|
||
ExtractValue: getCell(rowB, config.ColBExtractIndex),
|
||
TimeDiff: formatTimeDiff(timeDiff),
|
||
SimilarityScore: math.Round(similarity*10000) / 10000,
|
||
AIMatched: false,
|
||
}
|
||
if useAllMatches {
|
||
candidates = append(candidates, mr)
|
||
} else {
|
||
if len(candidates) == 0 || similarity > candidates[0].SimilarityScore {
|
||
candidates = []MatchResult{mr}
|
||
}
|
||
// B7: 完美匹配时提前退出
|
||
if similarity == 1.0 {
|
||
break
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if len(candidates) > 0 {
|
||
if i < maxPreview {
|
||
for _, c := range candidates {
|
||
fmt.Printf("[DEBUG] ✓ 命中 | A='%s'→B='%s' | 相似度=%.4f\n",
|
||
matchStrA, c.RowBKey, c.SimilarityScore)
|
||
}
|
||
}
|
||
results = append(results, candidates...)
|
||
}
|
||
}
|
||
|
||
// 结果排序
|
||
switch config.SortBy {
|
||
case "similarity":
|
||
sort.Slice(results, func(i, j int) bool {
|
||
return results[i].SimilarityScore > results[j].SimilarityScore
|
||
})
|
||
case "timeDiff":
|
||
// B5: 使用原始数据重新解析时间差做数值排序
|
||
sort.Slice(results, func(i, j int) bool {
|
||
return parseTimeDiffDuration(results[i].TimeDiff) < parseTimeDiffDuration(results[j].TimeDiff)
|
||
})
|
||
}
|
||
|
||
a.emitProgress(totalA, totalA,
|
||
fmt.Sprintf("匹配完成!共匹配成功 %d 条记录", len(results)), "done")
|
||
|
||
return results, nil
|
||
}
|
||
|
||
// compileRegex 编译正则,nil 表示跳过清洗
|
||
func compileRegex(pattern string) (*regexp.Regexp, error) {
|
||
if pattern == "" {
|
||
return nil, nil
|
||
}
|
||
reg, err := regexp.Compile(pattern)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("正则表达式格式错误,请检查: %v", err)
|
||
}
|
||
fmt.Printf("[DEBUG] 使用正则: '%s'\n", pattern)
|
||
return reg, nil
|
||
}
|
||
|
||
// parseTimeDiffDuration 将 TimeDiff 字符串(如 "1h30m")解析为 time.Duration(用于排序)
|
||
func parseTimeDiffDuration(s string) time.Duration {
|
||
if s == "" {
|
||
return 0
|
||
}
|
||
sign := time.Duration(1)
|
||
if s[0] == '-' {
|
||
sign = -1
|
||
s = s[1:]
|
||
}
|
||
d, err := time.ParseDuration(s)
|
||
if err != nil {
|
||
return 0
|
||
}
|
||
return sign * d
|
||
}
|
||
|
||
// RunMatchWithAI 执行基础匹配 + Deepseek AI 增强匹配(配置驱动)
|
||
func (a *App) RunMatchWithAI(config MatchConfig) ([]MatchResult, error) {
|
||
if a.deepseekKey == "" {
|
||
return nil, fmt.Errorf("请先设置 Deepseek API 密钥")
|
||
}
|
||
|
||
// 1. 编译正则
|
||
reg, err := compileRegex(config.RegexPattern)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
// 2. 默认值兜底
|
||
timeWindow := config.TimeWindow
|
||
if timeWindow <= 0 {
|
||
timeWindow = defaultTimeWindowH
|
||
}
|
||
threshold := config.Threshold
|
||
if threshold <= 0 {
|
||
threshold = defaultThreshold
|
||
}
|
||
windowDuration := time.Duration(timeWindow * float64(time.Hour))
|
||
|
||
// 3. 一次读取数据,供匹配和 AI 使用
|
||
a.emitProgress(0, 100, "正在读取 A 表...", "reading")
|
||
rowsA, err := a.readRawRows(config.FileAPath)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("读取 A 表失败: %v", err)
|
||
}
|
||
a.emitProgress(0, 100, "正在读取 B 表...", "reading")
|
||
rowsB, err := a.readRawRows(config.FileBPath)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("读取 B 表失败: %v", err)
|
||
}
|
||
if len(rowsA) < 2 || len(rowsB) < 2 {
|
||
return nil, fmt.Errorf("数据表无有效数据行")
|
||
}
|
||
|
||
// 保存表头供导出使用
|
||
a.headersA = rowsA[0]
|
||
a.headersB = rowsB[0]
|
||
a.lastConfig = config
|
||
|
||
dataA := rowsA[1:]
|
||
dataB := rowsB[1:]
|
||
|
||
// 4. 先执行基础匹配(使用已读取的数据,避免重复 I/O)
|
||
results, err := a.runMatchOnData(dataA, dataB, reg, config, timeWindow, threshold, windowDuration)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
// 5. 找出未被基础匹配覆盖的 A 表行
|
||
matchedSet := make(map[string]bool)
|
||
for _, r := range results {
|
||
matchedSet[strings.Join(r.RowAData, "\x00")] = true
|
||
}
|
||
|
||
var unmatchedA [][]string
|
||
for _, row := range dataA {
|
||
if !matchedSet[strings.Join(row, "\x00")] {
|
||
unmatchedA = append(unmatchedA, row)
|
||
}
|
||
}
|
||
|
||
if len(unmatchedA) == 0 {
|
||
a.emitProgress(1, 1, "全部已匹配,无需 AI 增强", "done")
|
||
return results, nil
|
||
}
|
||
|
||
// 6. AI 增强匹配
|
||
useTime := config.ColATimeIndex >= 0 && config.ColBTimeIndex >= 0
|
||
totalUnmatched := len(unmatchedA)
|
||
a.emitProgress(0, totalUnmatched,
|
||
fmt.Sprintf("AI 增强匹配:还有 %d 条未匹配记录,正在调用 Deepseek...", totalUnmatched),
|
||
"ai-enhancing")
|
||
|
||
aiMatched := 0
|
||
|
||
for batchStart := 0; batchStart < totalUnmatched; batchStart += defaultBatchSize {
|
||
end := batchStart + defaultBatchSize
|
||
if end > totalUnmatched {
|
||
end = totalUnmatched
|
||
}
|
||
|
||
a.emitProgress(batchStart+1, totalUnmatched,
|
||
fmt.Sprintf("AI 分析中 %d/%d (第 %d 批)...", end, totalUnmatched, (batchStart/defaultBatchSize)+1),
|
||
"ai-enhancing")
|
||
|
||
batch := unmatchedA[batchStart:end]
|
||
|
||
// 计算本批 A 表的时间范围
|
||
var minTime, maxTime time.Time
|
||
hasBatchTime := false
|
||
if useTime {
|
||
for _, row := range batch {
|
||
t, err := parseTimeFlexible(getCell(row, config.ColATimeIndex))
|
||
if err != nil {
|
||
continue
|
||
}
|
||
if !hasBatchTime {
|
||
minTime, maxTime = t, t
|
||
hasBatchTime = true
|
||
} else {
|
||
if t.Before(minTime) {
|
||
minTime = t
|
||
}
|
||
if t.After(maxTime) {
|
||
maxTime = t
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// 过滤 B 表在时间窗口内的行(用户配置时间窗口 + 额外余量覆盖批次跨度)
|
||
var relevantB [][]string
|
||
if hasBatchTime && useTime {
|
||
padding := windowDuration + time.Duration(defaultAIWindowPadH)*time.Hour
|
||
ws := minTime.Add(-padding)
|
||
we := maxTime.Add(padding)
|
||
for _, row := range dataB {
|
||
t, err := parseTimeFlexible(getCell(row, config.ColBTimeIndex))
|
||
if err != nil || t.Before(ws) || t.After(we) {
|
||
continue
|
||
}
|
||
relevantB = append(relevantB, row)
|
||
}
|
||
} else {
|
||
// 无时间列时限制 B 表条数以控制 token 消耗
|
||
maxB := defaultMaxBNoTime
|
||
if len(dataB) < maxB {
|
||
maxB = len(dataB)
|
||
}
|
||
relevantB = dataB[:maxB]
|
||
}
|
||
|
||
// 构建 AI 提示
|
||
prompt := a.buildGenericAIPrompt(batch, relevantB, config, windowDuration, hasBatchTime)
|
||
aiResp, err := a.callDeepseekAPI(prompt)
|
||
if err != nil {
|
||
continue
|
||
}
|
||
|
||
// 解析 AI 返回
|
||
var matchResp struct {
|
||
Matches []struct {
|
||
Index int `json:"index"`
|
||
Value string `json:"value"`
|
||
} `json:"matches"`
|
||
}
|
||
parseErr := json.Unmarshal([]byte(aiResp), &matchResp)
|
||
if parseErr != nil {
|
||
if idx := strings.Index(aiResp, "{"); idx >= 0 {
|
||
if endIdx := strings.LastIndex(aiResp, "}"); endIdx > idx {
|
||
parseErr = json.Unmarshal([]byte(aiResp[idx:endIdx+1]), &matchResp)
|
||
}
|
||
}
|
||
}
|
||
if parseErr != nil {
|
||
fmt.Printf("[AI-WARN] 响应解析失败 (第 %d 批): %s\n 原始响应: %.200s\n",
|
||
(batchStart/defaultBatchSize)+1, parseErr.Error(), aiResp)
|
||
continue
|
||
}
|
||
|
||
for _, item := range matchResp.Matches {
|
||
idx := item.Index
|
||
val := strings.TrimSpace(item.Value)
|
||
if idx < 0 || idx >= len(batch) || val == "" {
|
||
continue
|
||
}
|
||
rowA := batch[idx]
|
||
mr := MatchResult{
|
||
RowAData: rowA,
|
||
RowBKey: "",
|
||
ExtractValue: val,
|
||
SimilarityScore: 0,
|
||
AIMatched: true,
|
||
}
|
||
results = append(results, mr)
|
||
aiMatched++
|
||
}
|
||
}
|
||
|
||
a.emitProgress(totalUnmatched, totalUnmatched,
|
||
fmt.Sprintf("AI 增强完成!基础匹配 %d 条 + AI 补充 %d 条 = 共 %d 条",
|
||
len(results)-aiMatched, aiMatched, len(results)), "done")
|
||
|
||
return results, nil
|
||
}
|
||
|
||
// buildGenericAIPrompt 构建通用 AI 匹配提示词
|
||
func (a *App) buildGenericAIPrompt(unmatched, bRows [][]string, config MatchConfig, windowDuration time.Duration, hasTime bool) []deepseekMessage {
|
||
var sb strings.Builder
|
||
sb.WriteString("你是一个数据匹配专家。请根据以下 A 表记录,从 B 表数据中找出最匹配的记录。\n\n")
|
||
sb.WriteString("匹配规则:\n")
|
||
sb.WriteString("1. 根据文本相似度匹配(注意中文字段的核心含义,忽略字母数字前缀后缀)\n")
|
||
if hasTime {
|
||
sb.WriteString(fmt.Sprintf("2. 时间差应在 %.0f 小时内\n", windowDuration.Hours()))
|
||
}
|
||
sb.WriteString(fmt.Sprintf("3. 返回匹配到的 B 表记录的目标列值(第 %d 列)\n\n", config.ColBExtractIndex+1))
|
||
|
||
sb.WriteString("请严格按照以下 JSON 格式返回结果:\n")
|
||
sb.WriteString(`{"matches":[{"index":0,"value":"匹配到的目标列值"},{"index":1,"value":""}]}` + "\n")
|
||
sb.WriteString("如果某条无法匹配,value 设为空字符串。\n\n")
|
||
|
||
sb.WriteString(fmt.Sprintf("A 表记录(需要匹配,共 %d 条):\n", len(unmatched)))
|
||
for i, row := range unmatched {
|
||
matchVal := getCell(row, config.ColAMatchIndex)
|
||
sb.WriteString(fmt.Sprintf("- 索引 %d: 「%s」", i, matchVal))
|
||
if hasTime {
|
||
sb.WriteString(fmt.Sprintf(", 时间=%s", getCell(row, config.ColATimeIndex)))
|
||
}
|
||
sb.WriteString("\n")
|
||
}
|
||
|
||
sb.WriteString(fmt.Sprintf("\nB 表参考数据(共 %d 条):\n", len(bRows)))
|
||
for _, row := range bRows {
|
||
matchVal := getCell(row, config.ColBMatchIndex)
|
||
extractVal := getCell(row, config.ColBExtractIndex)
|
||
sb.WriteString(fmt.Sprintf(" 「%s」 → 目标列值: 「%s」", matchVal, extractVal))
|
||
if hasTime {
|
||
sb.WriteString(fmt.Sprintf(", 时间=%s", getCell(row, config.ColBTimeIndex)))
|
||
}
|
||
sb.WriteString("\n")
|
||
}
|
||
|
||
sb.WriteString("\n请返回 JSON 格式的匹配结果。")
|
||
|
||
return []deepseekMessage{
|
||
{Role: "system", Content: "你是一个数据匹配专家。请严格按照 JSON 格式返回结果,不要添加额外说明。"},
|
||
{Role: "user", Content: sb.String()},
|
||
}
|
||
}
|
||
|
||
// ---------- Deepseek AI 增强匹配 ----------
|
||
|
||
// hashPrompt 对 prompt 消息计算 SHA256(用于缓存键)
|
||
func hashPrompt(messages []deepseekMessage) string {
|
||
h := sha256.New()
|
||
for _, m := range messages {
|
||
h.Write([]byte(m.Role))
|
||
h.Write([]byte{0})
|
||
h.Write([]byte(m.Content))
|
||
h.Write([]byte{0})
|
||
}
|
||
return hex.EncodeToString(h.Sum(nil))
|
||
}
|
||
|
||
// callDeepseekAPI 调用 Deepseek Chat API(带缓存)
|
||
func (a *App) callDeepseekAPI(messages []deepseekMessage) (string, error) {
|
||
if a.deepseekKey == "" {
|
||
return "", fmt.Errorf("请先设置 Deepseek API 密钥")
|
||
}
|
||
|
||
hash := hashPrompt(messages)
|
||
|
||
// 先查缓存
|
||
if cached, ok := a.aiCache.get(hash); ok {
|
||
fmt.Printf("[CACHE] ✓ 命中 AI 缓存 (hash=%s)\n", hash[:12])
|
||
return cached, nil
|
||
}
|
||
fmt.Printf("[CACHE] ✗ 缓存未命中 (hash=%s),调用 API...\n", hash[:12])
|
||
|
||
reqBody := deepseekRequest{
|
||
Model: "deepseek-chat",
|
||
Messages: messages,
|
||
Temperature: 0.05,
|
||
MaxTokens: 2048,
|
||
}
|
||
|
||
bodyBytes, _ := json.Marshal(reqBody)
|
||
httpReq, err := http.NewRequest("POST", "https://api.deepseek.com/v1/chat/completions",
|
||
bytes.NewReader(bodyBytes))
|
||
if err != nil {
|
||
return "", fmt.Errorf("创建请求失败: %v", err)
|
||
}
|
||
httpReq.Header.Set("Content-Type", "application/json")
|
||
httpReq.Header.Set("Authorization", "Bearer "+a.deepseekKey)
|
||
|
||
client := &http.Client{Timeout: 60 * time.Second}
|
||
resp, err := client.Do(httpReq)
|
||
if err != nil {
|
||
return "", fmt.Errorf("调用 Deepseek API 失败: %v", err)
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
respBytes, _ := io.ReadAll(resp.Body)
|
||
var dr deepseekResponse
|
||
if err := json.Unmarshal(respBytes, &dr); err != nil {
|
||
return "", fmt.Errorf("解析 Deepseek 响应失败: %v", err)
|
||
}
|
||
|
||
if dr.Error != nil {
|
||
return "", fmt.Errorf("Deepseek API 错误: %s", dr.Error.Message)
|
||
}
|
||
|
||
if len(dr.Choices) == 0 {
|
||
return "", fmt.Errorf("Deepseek 未返回有效结果")
|
||
}
|
||
|
||
result := strings.TrimSpace(dr.Choices[0].Message.Content)
|
||
|
||
// 写入缓存并持久化
|
||
a.aiCache.put(hash, result)
|
||
a.aiCache.saveToFile()
|
||
|
||
return result, nil
|
||
}
|
||
|
||
|
||
// formatTimeDiff 格式化时间差为可读字符串
|
||
func formatTimeDiff(d time.Duration) string {
|
||
abs := d
|
||
if abs < 0 {
|
||
abs = -abs
|
||
}
|
||
hours := int(abs.Hours())
|
||
mins := int(abs.Minutes()) % 60
|
||
secs := int(abs.Seconds()) % 60
|
||
|
||
sign := ""
|
||
if d < 0 {
|
||
sign = "-"
|
||
}
|
||
if hours > 0 {
|
||
return fmt.Sprintf("%s%dh%dm%ds", sign, hours, mins, secs)
|
||
} else if mins > 0 {
|
||
return fmt.Sprintf("%s%dm%ds", sign, mins, secs)
|
||
}
|
||
return fmt.Sprintf("%s%ds", sign, secs)
|
||
}
|
||
|
||
// ---------- 导出结果 ----------
|
||
|
||
// ExportResults 将匹配结果导出为 Excel 或 CSV 文件
|
||
func (a *App) ExportResults(results []MatchResult) (string, error) {
|
||
if len(results) == 0 {
|
||
return "", fmt.Errorf("没有匹配结果可以导出")
|
||
}
|
||
|
||
isCSV := a.lastConfig.ExportFormat == "csv"
|
||
ext := ".xlsx"
|
||
filterDisplay := "Excel 文件 (*.xlsx)"
|
||
filterPattern := "*.xlsx"
|
||
if isCSV {
|
||
ext = ".csv"
|
||
filterDisplay = "CSV 文件 (*.csv)"
|
||
filterPattern = "*.csv"
|
||
}
|
||
|
||
savePath, err := runtime.SaveFileDialog(a.ctx, runtime.SaveDialogOptions{
|
||
Title: "导出匹配结果",
|
||
DefaultFilename: fmt.Sprintf("匹配结果_%s%s", time.Now().Format("20060102_150405"), ext),
|
||
Filters: []runtime.FileFilter{
|
||
{DisplayName: filterDisplay, Pattern: filterPattern},
|
||
},
|
||
})
|
||
if err != nil {
|
||
return "", fmt.Errorf("打开保存对话框失败: %v", err)
|
||
}
|
||
if savePath == "" {
|
||
return "", nil
|
||
}
|
||
if !strings.HasSuffix(strings.ToLower(savePath), ext) {
|
||
savePath += ext
|
||
}
|
||
|
||
if isCSV {
|
||
return a.exportResultsCSV(results, savePath)
|
||
}
|
||
return a.exportResultsXLSX(results, savePath)
|
||
}
|
||
|
||
// exportHeaders 构建导出表头行(使用真实表头或回退默认)
|
||
func (a *App) exportHeaders(numACols int) []string {
|
||
headers := make([]string, 0, numACols+1)
|
||
if len(a.headersA) >= numACols {
|
||
for _, h := range a.headersA[:numACols] {
|
||
n := h
|
||
if n == "" {
|
||
n = fmt.Sprintf("Col%d", len(headers)+1)
|
||
}
|
||
headers = append(headers, n)
|
||
}
|
||
} else {
|
||
for i := 0; i < numACols; i++ {
|
||
headers = append(headers, fmt.Sprintf("A-Col%d", i+1))
|
||
}
|
||
}
|
||
headers = append(headers, "匹配结果(由B表提取)")
|
||
return headers
|
||
}
|
||
|
||
func (a *App) exportResultsXLSX(results []MatchResult, savePath string) (string, error) {
|
||
f := excelize.NewFile()
|
||
defer f.Close()
|
||
sheetName := "匹配结果"
|
||
f.SetSheetName("Sheet1", sheetName)
|
||
|
||
numACols := len(results[0].RowAData)
|
||
colLetter := func(n int) string { c, _ := excelize.ColumnNumberToName(n + 1); return c }
|
||
|
||
headers := a.exportHeaders(numACols)
|
||
extractCol := numACols
|
||
|
||
// 表头
|
||
if a.lastConfig.IncludeHeader {
|
||
for i, h := range headers {
|
||
f.SetCellValue(sheetName, fmt.Sprintf("%s1", colLetter(i)), h)
|
||
}
|
||
headerStyle, _ := f.NewStyle(&excelize.Style{
|
||
Font: &excelize.Font{Bold: true, Size: 12, Color: "FFFFFF"},
|
||
Fill: excelize.Fill{Type: "pattern", Pattern: 1, Color: []string{"4472C4"}},
|
||
})
|
||
f.SetCellStyle(sheetName, "A1", fmt.Sprintf("%s1", colLetter(extractCol)), headerStyle)
|
||
}
|
||
|
||
// 数据行
|
||
for i, r := range results {
|
||
rowNum := i + 2
|
||
if !a.lastConfig.IncludeHeader {
|
||
rowNum = i + 1
|
||
}
|
||
for ci := 0; ci < numACols; ci++ {
|
||
f.SetCellValue(sheetName, fmt.Sprintf("%s%d", colLetter(ci), rowNum), r.RowAData[ci])
|
||
}
|
||
f.SetCellValue(sheetName, fmt.Sprintf("%s%d", colLetter(extractCol), rowNum), r.ExtractValue)
|
||
}
|
||
|
||
// 列宽
|
||
for ci := 0; ci <= numACols; ci++ {
|
||
f.SetColWidth(sheetName, colLetter(ci), colLetter(ci), 22)
|
||
}
|
||
|
||
if err := f.SaveAs(savePath); err != nil {
|
||
return "", fmt.Errorf("保存文件失败: %v", err)
|
||
}
|
||
return savePath, nil
|
||
}
|
||
|
||
func (a *App) exportResultsCSV(results []MatchResult, savePath string) (string, error) {
|
||
var buf bytes.Buffer
|
||
// 使用 UTF-8 BOM 帮助 Excel 正确识别编码
|
||
buf.Write([]byte{0xEF, 0xBB, 0xBF})
|
||
|
||
numACols := len(results[0].RowAData)
|
||
headers := a.exportHeaders(numACols)
|
||
|
||
// 表头行
|
||
if a.lastConfig.IncludeHeader {
|
||
for i, h := range headers {
|
||
if i > 0 {
|
||
buf.WriteByte(',')
|
||
}
|
||
buf.WriteString(csvEscape(h))
|
||
}
|
||
buf.WriteByte('\n')
|
||
}
|
||
|
||
// 数据行
|
||
for _, r := range results {
|
||
for ci := 0; ci < numACols; ci++ {
|
||
if ci > 0 {
|
||
buf.WriteByte(',')
|
||
}
|
||
buf.WriteString(csvEscape(r.RowAData[ci]))
|
||
}
|
||
buf.WriteByte(',')
|
||
buf.WriteString(csvEscape(r.ExtractValue))
|
||
buf.WriteByte('\n')
|
||
}
|
||
|
||
if err := os.WriteFile(savePath, buf.Bytes(), 0600); err != nil {
|
||
return "", fmt.Errorf("保存 CSV 文件失败: %v", err)
|
||
}
|
||
return savePath, nil
|
||
}
|
||
|
||
// csvEscape 对 CSV 字段进行转义(含逗号或引号时包裹双引号)
|
||
func csvEscape(s string) string {
|
||
if strings.ContainsAny(s, "\",\n\r") {
|
||
return `"` + strings.ReplaceAll(s, `"`, `""`) + `"`
|
||
}
|
||
return s
|
||
}
|