659 lines
17 KiB
Go
659 lines
17 KiB
Go
|
|
package pdf
|
|||
|
|
|
|||
|
|
import (
|
|||
|
|
"encoding/json"
|
|||
|
|
"fmt"
|
|||
|
|
"regexp"
|
|||
|
|
"strings"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
// MarkdownConverter Markdown转换器 - 将各种格式的markdown内容标准化
|
|||
|
|
type MarkdownConverter struct {
|
|||
|
|
textProcessor *TextProcessor
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// NewMarkdownConverter 创建Markdown转换器
|
|||
|
|
func NewMarkdownConverter(textProcessor *TextProcessor) *MarkdownConverter {
|
|||
|
|
return &MarkdownConverter{
|
|||
|
|
textProcessor: textProcessor,
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ConvertToStandardMarkdown 将各种格式的内容转换为标准的markdown格式
|
|||
|
|
// 这是第一步:预处理和标准化
|
|||
|
|
func (mc *MarkdownConverter) ConvertToStandardMarkdown(content string) string {
|
|||
|
|
if strings.TrimSpace(content) == "" {
|
|||
|
|
return content
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 1. 先清理HTML标签(保留内容)
|
|||
|
|
content = mc.textProcessor.StripHTML(content)
|
|||
|
|
|
|||
|
|
// 2. 处理代码块 - 确保代码块格式正确
|
|||
|
|
content = mc.normalizeCodeBlocks(content)
|
|||
|
|
|
|||
|
|
// 3. 处理表格 - 确保表格格式正确
|
|||
|
|
content = mc.normalizeTables(content)
|
|||
|
|
|
|||
|
|
// 4. 处理列表 - 统一列表格式
|
|||
|
|
content = mc.normalizeLists(content)
|
|||
|
|
|
|||
|
|
// 5. 处理JSON内容 - 尝试识别并格式化JSON
|
|||
|
|
content = mc.normalizeJSONContent(content)
|
|||
|
|
|
|||
|
|
// 6. 处理链接和图片 - 转换为文本
|
|||
|
|
content = mc.convertLinksToText(content)
|
|||
|
|
content = mc.convertImagesToText(content)
|
|||
|
|
|
|||
|
|
// 7. 处理引用块
|
|||
|
|
content = mc.normalizeBlockquotes(content)
|
|||
|
|
|
|||
|
|
// 8. 处理水平线
|
|||
|
|
content = mc.normalizeHorizontalRules(content)
|
|||
|
|
|
|||
|
|
// 9. 清理多余空行(保留代码块内的空行)
|
|||
|
|
content = mc.cleanupExtraBlankLines(content)
|
|||
|
|
|
|||
|
|
return content
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// normalizeCodeBlocks 规范化代码块
|
|||
|
|
func (mc *MarkdownConverter) normalizeCodeBlocks(content string) string {
|
|||
|
|
lines := strings.Split(content, "\n")
|
|||
|
|
var result []string
|
|||
|
|
inCodeBlock := false
|
|||
|
|
codeBlockLang := ""
|
|||
|
|
|
|||
|
|
for i, line := range lines {
|
|||
|
|
trimmed := strings.TrimSpace(line)
|
|||
|
|
|
|||
|
|
// 检查是否是代码块开始
|
|||
|
|
if strings.HasPrefix(trimmed, "```") {
|
|||
|
|
if inCodeBlock {
|
|||
|
|
// 代码块结束
|
|||
|
|
result = append(result, line)
|
|||
|
|
inCodeBlock = false
|
|||
|
|
codeBlockLang = ""
|
|||
|
|
} else {
|
|||
|
|
// 代码块开始
|
|||
|
|
inCodeBlock = true
|
|||
|
|
// 提取语言标识
|
|||
|
|
if len(trimmed) > 3 {
|
|||
|
|
codeBlockLang = strings.TrimSpace(trimmed[3:])
|
|||
|
|
if codeBlockLang != "" {
|
|||
|
|
result = append(result, fmt.Sprintf("```%s", codeBlockLang))
|
|||
|
|
} else {
|
|||
|
|
result = append(result, "```")
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
result = append(result, "```")
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
} else if inCodeBlock {
|
|||
|
|
// 在代码块中,保留原样
|
|||
|
|
result = append(result, line)
|
|||
|
|
} else {
|
|||
|
|
// 不在代码块中,处理其他内容
|
|||
|
|
result = append(result, line)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 如果代码块没有正确关闭,在文件末尾自动关闭
|
|||
|
|
if i == len(lines)-1 && inCodeBlock {
|
|||
|
|
result = append(result, "```")
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return strings.Join(result, "\n")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// normalizeTables 规范化表格格式
|
|||
|
|
func (mc *MarkdownConverter) normalizeTables(content string) string {
|
|||
|
|
lines := strings.Split(content, "\n")
|
|||
|
|
var result []string
|
|||
|
|
inCodeBlock := false
|
|||
|
|
|
|||
|
|
for _, line := range lines {
|
|||
|
|
trimmed := strings.TrimSpace(line)
|
|||
|
|
|
|||
|
|
// 检查是否在代码块中
|
|||
|
|
if strings.HasPrefix(trimmed, "```") {
|
|||
|
|
inCodeBlock = !inCodeBlock
|
|||
|
|
result = append(result, line)
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if inCodeBlock {
|
|||
|
|
// 代码块中的内容不处理
|
|||
|
|
result = append(result, line)
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 检查是否是表格行
|
|||
|
|
if strings.Contains(trimmed, "|") {
|
|||
|
|
// 检查是否是分隔行
|
|||
|
|
isSeparator := mc.isTableSeparator(trimmed)
|
|||
|
|
if isSeparator {
|
|||
|
|
// 确保分隔行格式正确
|
|||
|
|
cells := strings.Split(trimmed, "|")
|
|||
|
|
// 清理首尾空元素
|
|||
|
|
if len(cells) > 0 && cells[0] == "" {
|
|||
|
|
cells = cells[1:]
|
|||
|
|
}
|
|||
|
|
if len(cells) > 0 && cells[len(cells)-1] == "" {
|
|||
|
|
cells = cells[:len(cells)-1]
|
|||
|
|
}
|
|||
|
|
// 构建标准分隔行
|
|||
|
|
separator := "|"
|
|||
|
|
for range cells {
|
|||
|
|
separator += " --- |"
|
|||
|
|
}
|
|||
|
|
result = append(result, separator)
|
|||
|
|
} else {
|
|||
|
|
// 普通表格行,确保格式正确
|
|||
|
|
normalizedLine := mc.normalizeTableRow(line)
|
|||
|
|
result = append(result, normalizedLine)
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
result = append(result, line)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return strings.Join(result, "\n")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// isTableSeparator 检查是否是表格分隔行
|
|||
|
|
func (mc *MarkdownConverter) isTableSeparator(line string) bool {
|
|||
|
|
trimmed := strings.TrimSpace(line)
|
|||
|
|
if !strings.Contains(trimmed, "-") {
|
|||
|
|
return false
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 检查是否只包含 |、-、:、空格
|
|||
|
|
for _, r := range trimmed {
|
|||
|
|
if r != '|' && r != '-' && r != ':' && r != ' ' {
|
|||
|
|
return false
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return true
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// normalizeTableRow 规范化表格行
|
|||
|
|
func (mc *MarkdownConverter) normalizeTableRow(line string) string {
|
|||
|
|
trimmed := strings.TrimSpace(line)
|
|||
|
|
if !strings.Contains(trimmed, "|") {
|
|||
|
|
return line
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
cells := strings.Split(trimmed, "|")
|
|||
|
|
// 清理首尾空元素
|
|||
|
|
if len(cells) > 0 && cells[0] == "" {
|
|||
|
|
cells = cells[1:]
|
|||
|
|
}
|
|||
|
|
if len(cells) > 0 && cells[len(cells)-1] == "" {
|
|||
|
|
cells = cells[:len(cells)-1]
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 清理每个单元格
|
|||
|
|
normalizedCells := make([]string, 0, len(cells))
|
|||
|
|
for _, cell := range cells {
|
|||
|
|
cell = strings.TrimSpace(cell)
|
|||
|
|
// 移除markdown格式但保留内容
|
|||
|
|
cell = mc.textProcessor.RemoveMarkdownSyntax(cell)
|
|||
|
|
normalizedCells = append(normalizedCells, cell)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 重新构建表格行
|
|||
|
|
return "| " + strings.Join(normalizedCells, " | ") + " |"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// normalizeLists 规范化列表格式
|
|||
|
|
func (mc *MarkdownConverter) normalizeLists(content string) string {
|
|||
|
|
lines := strings.Split(content, "\n")
|
|||
|
|
var result []string
|
|||
|
|
inCodeBlock := false
|
|||
|
|
|
|||
|
|
for _, line := range lines {
|
|||
|
|
trimmed := strings.TrimSpace(line)
|
|||
|
|
|
|||
|
|
// 检查是否在代码块中
|
|||
|
|
if strings.HasPrefix(trimmed, "```") {
|
|||
|
|
inCodeBlock = !inCodeBlock
|
|||
|
|
result = append(result, line)
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if inCodeBlock {
|
|||
|
|
result = append(result, line)
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 处理有序列表
|
|||
|
|
if matched, _ := regexp.MatchString(`^\d+\.\s+`, trimmed); matched {
|
|||
|
|
// 确保格式统一:数字. 空格
|
|||
|
|
re := regexp.MustCompile(`^(\d+)\.\s*`)
|
|||
|
|
trimmed = re.ReplaceAllString(trimmed, "$1. ")
|
|||
|
|
result = append(result, trimmed)
|
|||
|
|
} else if strings.HasPrefix(trimmed, "- ") || strings.HasPrefix(trimmed, "* ") || strings.HasPrefix(trimmed, "+ ") {
|
|||
|
|
// 处理无序列表,统一使用 -
|
|||
|
|
re := regexp.MustCompile(`^[-*+]\s*`)
|
|||
|
|
trimmed = re.ReplaceAllString(trimmed, "- ")
|
|||
|
|
result = append(result, trimmed)
|
|||
|
|
} else {
|
|||
|
|
result = append(result, line)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return strings.Join(result, "\n")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// normalizeJSONContent 规范化JSON内容
|
|||
|
|
func (mc *MarkdownConverter) normalizeJSONContent(content string) string {
|
|||
|
|
// 尝试识别并格式化JSON代码块
|
|||
|
|
jsonBlockRegex := regexp.MustCompile("(?s)```(?:json)?\\s*\n(.*?)\n```")
|
|||
|
|
content = jsonBlockRegex.ReplaceAllStringFunc(content, func(match string) string {
|
|||
|
|
// 提取JSON内容
|
|||
|
|
submatch := jsonBlockRegex.FindStringSubmatch(match)
|
|||
|
|
if len(submatch) < 2 {
|
|||
|
|
return match
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
jsonStr := strings.TrimSpace(submatch[1])
|
|||
|
|
// 尝试格式化JSON
|
|||
|
|
var jsonObj interface{}
|
|||
|
|
if err := json.Unmarshal([]byte(jsonStr), &jsonObj); err == nil {
|
|||
|
|
// 格式化成功
|
|||
|
|
formatted, err := json.MarshalIndent(jsonObj, "", " ")
|
|||
|
|
if err == nil {
|
|||
|
|
return fmt.Sprintf("```json\n%s\n```", string(formatted))
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return match
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
return content
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// convertLinksToText 将链接转换为文本
|
|||
|
|
func (mc *MarkdownConverter) convertLinksToText(content string) string {
|
|||
|
|
// [text](url) -> text (url)
|
|||
|
|
linkRegex := regexp.MustCompile(`\[([^\]]+)\]\(([^\)]+)\)`)
|
|||
|
|
content = linkRegex.ReplaceAllString(content, "$1 ($2)")
|
|||
|
|
|
|||
|
|
// [text][ref] -> text
|
|||
|
|
refLinkRegex := regexp.MustCompile(`\[([^\]]+)\]\[[^\]]+\]`)
|
|||
|
|
content = refLinkRegex.ReplaceAllString(content, "$1")
|
|||
|
|
|
|||
|
|
return content
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// convertImagesToText 将图片转换为文本
|
|||
|
|
func (mc *MarkdownConverter) convertImagesToText(content string) string {
|
|||
|
|
//  -> [图片: alt]
|
|||
|
|
imageRegex := regexp.MustCompile(`!\[([^\]]*)\]\([^\)]+\)`)
|
|||
|
|
content = imageRegex.ReplaceAllString(content, "[图片: $1]")
|
|||
|
|
|
|||
|
|
return content
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// normalizeBlockquotes 规范化引用块
|
|||
|
|
func (mc *MarkdownConverter) normalizeBlockquotes(content string) string {
|
|||
|
|
lines := strings.Split(content, "\n")
|
|||
|
|
var result []string
|
|||
|
|
inCodeBlock := false
|
|||
|
|
|
|||
|
|
for _, line := range lines {
|
|||
|
|
trimmed := strings.TrimSpace(line)
|
|||
|
|
|
|||
|
|
// 检查是否在代码块中
|
|||
|
|
if strings.HasPrefix(trimmed, "```") {
|
|||
|
|
inCodeBlock = !inCodeBlock
|
|||
|
|
result = append(result, line)
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if inCodeBlock {
|
|||
|
|
result = append(result, line)
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 处理引用块 > text -> > text
|
|||
|
|
if strings.HasPrefix(trimmed, ">") {
|
|||
|
|
// 确保格式统一
|
|||
|
|
quoteText := strings.TrimSpace(trimmed[1:])
|
|||
|
|
if quoteText != "" {
|
|||
|
|
result = append(result, "> "+quoteText)
|
|||
|
|
} else {
|
|||
|
|
result = append(result, ">")
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
result = append(result, line)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return strings.Join(result, "\n")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// normalizeHorizontalRules 规范化水平线
|
|||
|
|
func (mc *MarkdownConverter) normalizeHorizontalRules(content string) string {
|
|||
|
|
// 统一水平线格式为 ---
|
|||
|
|
hrRegex := regexp.MustCompile(`^[-*_]{3,}\s*$`)
|
|||
|
|
lines := strings.Split(content, "\n")
|
|||
|
|
var result []string
|
|||
|
|
inCodeBlock := false
|
|||
|
|
|
|||
|
|
for _, line := range lines {
|
|||
|
|
trimmed := strings.TrimSpace(line)
|
|||
|
|
|
|||
|
|
// 检查是否在代码块中
|
|||
|
|
if strings.HasPrefix(trimmed, "```") {
|
|||
|
|
inCodeBlock = !inCodeBlock
|
|||
|
|
result = append(result, line)
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if inCodeBlock {
|
|||
|
|
result = append(result, line)
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 如果是水平线,统一格式
|
|||
|
|
if hrRegex.MatchString(trimmed) {
|
|||
|
|
result = append(result, "---")
|
|||
|
|
} else {
|
|||
|
|
result = append(result, line)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return strings.Join(result, "\n")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// cleanupExtraBlankLines 清理多余空行(保留代码块内的空行)
|
|||
|
|
func (mc *MarkdownConverter) cleanupExtraBlankLines(content string) string {
|
|||
|
|
lines := strings.Split(content, "\n")
|
|||
|
|
var result []string
|
|||
|
|
inCodeBlock := false
|
|||
|
|
lastWasBlank := false
|
|||
|
|
|
|||
|
|
for _, line := range lines {
|
|||
|
|
trimmed := strings.TrimSpace(line)
|
|||
|
|
|
|||
|
|
// 检查是否在代码块中
|
|||
|
|
if strings.HasPrefix(trimmed, "```") {
|
|||
|
|
inCodeBlock = !inCodeBlock
|
|||
|
|
result = append(result, line)
|
|||
|
|
lastWasBlank = false
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if inCodeBlock {
|
|||
|
|
// 代码块中的内容全部保留
|
|||
|
|
result = append(result, line)
|
|||
|
|
lastWasBlank = (trimmed == "")
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 不在代码块中
|
|||
|
|
if trimmed == "" {
|
|||
|
|
// 空行:最多保留一个连续空行
|
|||
|
|
if !lastWasBlank {
|
|||
|
|
result = append(result, "")
|
|||
|
|
lastWasBlank = true
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
result = append(result, line)
|
|||
|
|
lastWasBlank = false
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return strings.Join(result, "\n")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// PreprocessContent 预处理内容 - 这是主要的转换入口
|
|||
|
|
// 先转换,再解析
|
|||
|
|
func (mc *MarkdownConverter) PreprocessContent(content string) string {
|
|||
|
|
if strings.TrimSpace(content) == "" {
|
|||
|
|
return content
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 第一步:转换为标准markdown
|
|||
|
|
content = mc.ConvertToStandardMarkdown(content)
|
|||
|
|
|
|||
|
|
// 第二步:尝试识别并转换JSON数组为表格
|
|||
|
|
content = mc.convertJSONArrayToTable(content)
|
|||
|
|
|
|||
|
|
// 第三步:确保所有表格都有正确的分隔行
|
|||
|
|
content = mc.ensureTableSeparators(content)
|
|||
|
|
|
|||
|
|
return content
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// convertJSONArrayToTable 将JSON数组转换为markdown表格
|
|||
|
|
func (mc *MarkdownConverter) convertJSONArrayToTable(content string) string {
|
|||
|
|
// 如果内容已经是表格格式,不处理
|
|||
|
|
if strings.Contains(content, "|") {
|
|||
|
|
lines := strings.Split(content, "\n")
|
|||
|
|
for _, line := range lines {
|
|||
|
|
trimmed := strings.TrimSpace(line)
|
|||
|
|
if strings.Contains(trimmed, "|") && !strings.HasPrefix(trimmed, "```") {
|
|||
|
|
// 已经有表格,不转换
|
|||
|
|
return content
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 尝试解析为JSON数组
|
|||
|
|
trimmedContent := strings.TrimSpace(content)
|
|||
|
|
if strings.HasPrefix(trimmedContent, "[") {
|
|||
|
|
var jsonArray []map[string]interface{}
|
|||
|
|
if err := json.Unmarshal([]byte(trimmedContent), &jsonArray); err == nil && len(jsonArray) > 0 {
|
|||
|
|
// 转换为markdown表格
|
|||
|
|
return mc.jsonArrayToMarkdownTable(jsonArray)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 尝试解析为JSON对象(包含params或fields字段)
|
|||
|
|
if strings.HasPrefix(trimmedContent, "{") {
|
|||
|
|
var jsonObj map[string]interface{}
|
|||
|
|
if err := json.Unmarshal([]byte(trimmedContent), &jsonObj); err == nil {
|
|||
|
|
// 检查是否有params字段
|
|||
|
|
if params, ok := jsonObj["params"].([]interface{}); ok {
|
|||
|
|
paramMaps := make([]map[string]interface{}, 0, len(params))
|
|||
|
|
for _, p := range params {
|
|||
|
|
if pm, ok := p.(map[string]interface{}); ok {
|
|||
|
|
paramMaps = append(paramMaps, pm)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
if len(paramMaps) > 0 {
|
|||
|
|
return mc.jsonArrayToMarkdownTable(paramMaps)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
// 检查是否有fields字段
|
|||
|
|
if fields, ok := jsonObj["fields"].([]interface{}); ok {
|
|||
|
|
fieldMaps := make([]map[string]interface{}, 0, len(fields))
|
|||
|
|
for _, f := range fields {
|
|||
|
|
if fm, ok := f.(map[string]interface{}); ok {
|
|||
|
|
fieldMaps = append(fieldMaps, fm)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
if len(fieldMaps) > 0 {
|
|||
|
|
return mc.jsonArrayToMarkdownTable(fieldMaps)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return content
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// jsonArrayToMarkdownTable 将JSON数组转换为markdown表格
|
|||
|
|
func (mc *MarkdownConverter) jsonArrayToMarkdownTable(data []map[string]interface{}) string {
|
|||
|
|
if len(data) == 0 {
|
|||
|
|
return ""
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
var result strings.Builder
|
|||
|
|
|
|||
|
|
// 收集所有可能的列名(保持原始顺序)
|
|||
|
|
// 使用map记录是否已添加,使用slice保持顺序
|
|||
|
|
columnSet := make(map[string]bool)
|
|||
|
|
columns := make([]string, 0)
|
|||
|
|
|
|||
|
|
// 遍历所有数据行,按第一次出现的顺序收集列名
|
|||
|
|
for _, row := range data {
|
|||
|
|
for key := range row {
|
|||
|
|
if !columnSet[key] {
|
|||
|
|
columns = append(columns, key)
|
|||
|
|
columnSet[key] = true
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if len(columns) == 0 {
|
|||
|
|
return ""
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 构建表头(直接使用原始列名,不做映射)
|
|||
|
|
result.WriteString("|")
|
|||
|
|
for _, col := range columns {
|
|||
|
|
result.WriteString(" ")
|
|||
|
|
result.WriteString(col) // 直接使用原始列名
|
|||
|
|
result.WriteString(" |")
|
|||
|
|
}
|
|||
|
|
result.WriteString("\n")
|
|||
|
|
|
|||
|
|
// 构建分隔行
|
|||
|
|
result.WriteString("|")
|
|||
|
|
for range columns {
|
|||
|
|
result.WriteString(" --- |")
|
|||
|
|
}
|
|||
|
|
result.WriteString("\n")
|
|||
|
|
|
|||
|
|
// 构建数据行
|
|||
|
|
for _, row := range data {
|
|||
|
|
result.WriteString("|")
|
|||
|
|
for _, col := range columns {
|
|||
|
|
result.WriteString(" ")
|
|||
|
|
value := mc.formatCellValue(row[col])
|
|||
|
|
result.WriteString(value)
|
|||
|
|
result.WriteString(" |")
|
|||
|
|
}
|
|||
|
|
result.WriteString("\n")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return result.String()
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// formatColumnName 格式化列名(直接返回原始列名,不做映射)
|
|||
|
|
// 保持数据库原始数据的列名,不进行转换
|
|||
|
|
func (mc *MarkdownConverter) formatColumnName(name string) string {
|
|||
|
|
// 直接返回原始列名,保持数据库数据的原始格式
|
|||
|
|
return name
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// formatCellValue 格式化单元格值
|
|||
|
|
func (mc *MarkdownConverter) formatCellValue(value interface{}) string {
|
|||
|
|
if value == nil {
|
|||
|
|
return ""
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
switch v := value.(type) {
|
|||
|
|
case string:
|
|||
|
|
v = strings.ReplaceAll(v, "\n", " ")
|
|||
|
|
v = strings.ReplaceAll(v, "\r", " ")
|
|||
|
|
v = strings.TrimSpace(v)
|
|||
|
|
v = strings.ReplaceAll(v, "|", "\\|")
|
|||
|
|
return v
|
|||
|
|
case bool:
|
|||
|
|
if v {
|
|||
|
|
return "是"
|
|||
|
|
}
|
|||
|
|
return "否"
|
|||
|
|
case float64:
|
|||
|
|
if v == float64(int64(v)) {
|
|||
|
|
return fmt.Sprintf("%.0f", v)
|
|||
|
|
}
|
|||
|
|
return fmt.Sprintf("%g", v)
|
|||
|
|
case int, int8, int16, int32, int64:
|
|||
|
|
return fmt.Sprintf("%d", v)
|
|||
|
|
case uint, uint8, uint16, uint32, uint64:
|
|||
|
|
return fmt.Sprintf("%d", v)
|
|||
|
|
default:
|
|||
|
|
str := fmt.Sprintf("%v", v)
|
|||
|
|
str = strings.ReplaceAll(str, "\n", " ")
|
|||
|
|
str = strings.ReplaceAll(str, "\r", " ")
|
|||
|
|
str = strings.ReplaceAll(str, "|", "\\|")
|
|||
|
|
return strings.TrimSpace(str)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ensureTableSeparators 确保所有表格都有正确的分隔行
|
|||
|
|
func (mc *MarkdownConverter) ensureTableSeparators(content string) string {
|
|||
|
|
lines := strings.Split(content, "\n")
|
|||
|
|
var result []string
|
|||
|
|
inCodeBlock := false
|
|||
|
|
lastLineWasTableHeader := false
|
|||
|
|
|
|||
|
|
for i, line := range lines {
|
|||
|
|
trimmed := strings.TrimSpace(line)
|
|||
|
|
|
|||
|
|
// 检查是否在代码块中
|
|||
|
|
if strings.HasPrefix(trimmed, "```") {
|
|||
|
|
inCodeBlock = !inCodeBlock
|
|||
|
|
result = append(result, line)
|
|||
|
|
lastLineWasTableHeader = false
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if inCodeBlock {
|
|||
|
|
result = append(result, line)
|
|||
|
|
lastLineWasTableHeader = false
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 检查是否是表格行
|
|||
|
|
if strings.Contains(trimmed, "|") {
|
|||
|
|
// 检查是否是分隔行
|
|||
|
|
if mc.isTableSeparator(trimmed) {
|
|||
|
|
result = append(result, line)
|
|||
|
|
lastLineWasTableHeader = false
|
|||
|
|
} else {
|
|||
|
|
// 普通表格行
|
|||
|
|
result = append(result, line)
|
|||
|
|
// 检查上一行是否是表头
|
|||
|
|
if lastLineWasTableHeader {
|
|||
|
|
// 在表头后插入分隔行
|
|||
|
|
cells := strings.Split(trimmed, "|")
|
|||
|
|
if len(cells) > 0 && cells[0] == "" {
|
|||
|
|
cells = cells[1:]
|
|||
|
|
}
|
|||
|
|
if len(cells) > 0 && cells[len(cells)-1] == "" {
|
|||
|
|
cells = cells[:len(cells)-1]
|
|||
|
|
}
|
|||
|
|
separator := "|"
|
|||
|
|
for range cells {
|
|||
|
|
separator += " --- |"
|
|||
|
|
}
|
|||
|
|
// 在当前位置插入分隔行
|
|||
|
|
result = append(result[:len(result)-1], separator, line)
|
|||
|
|
} else {
|
|||
|
|
// 检查是否是表头(第一行表格)
|
|||
|
|
if i > 0 {
|
|||
|
|
prevLine := strings.TrimSpace(lines[i-1])
|
|||
|
|
if !strings.Contains(prevLine, "|") || mc.isTableSeparator(prevLine) {
|
|||
|
|
// 这可能是表头
|
|||
|
|
lastLineWasTableHeader = true
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
lastLineWasTableHeader = true
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
result = append(result, line)
|
|||
|
|
lastLineWasTableHeader = false
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return strings.Join(result, "\n")
|
|||
|
|
}
|