add seo

2026-02-28 16:10:29 +08:00
parent 2c97f724f5
commit ced2cd04db
26 changed files with 1950 additions and 82 deletions
--- a/server/crawler-detector.js
+++ b/server/crawler-detector.js
@@ -0,0 +1,170 @@
+/**
+ * 爬虫检测模块
+ * 用于识别搜索引擎爬虫和社交媒体爬虫
+ */
+
+class CrawlerDetector {
+    constructor() {
+        // 常见搜索引擎爬虫User-Agent列表
+        this.crawlerPatterns = [
+            // 百度爬虫
+            'baiduspider',
+            'baiduspider-mobile',
+            'baiduspider-image',
+            'baiduspider-video',
+            'baiduspider-news',
+            'baiduboxapp',
+
+            // Google爬虫
+            'googlebot',
+            'googlebot-image',
+            'googlebot-news',
+            'googlebot-mobile',
+            'googlebot-video',
+            'google-web-snippet',
+
+            // 360搜索
+            '360spider',
+            'soha-agent',
+            'haosouspider',
+
+            // 搜狗搜索
+            'sogou spider',
+            'sogou news spider',
+            'sogou orion spider',
+            'sogou-blog',
+
+            // 必应
+            'bingbot',
+            'msnbot',
+
+            // 雅虎
+            'slurp',
+
+            // 搜搜
+            'sosospider',
+            'sosoimagespider',
+
+            // 有道
+            'youdaobot',
+            'yodaobot',
+
+            // 头条搜索
+            'bytedance-spider',
+            'toutiaospider',
+
+            // 社交媒体爬虫
+            'facebookexternalhit',
+            'facebookcatalog',
+            'twitterbot',
+            'linkedinbot',
+            'whatsapp',
+            'telegrambot',
+            'viber',
+            'line',
+
+            // 其他常见爬虫
+            'applebot',
+            'semrushbot',
+            'ahrefsbot',
+            'mj12bot',
+            'dotbot',
+            'crawler',
+            'spider',
+            'bot'
+        ]
+
+        // 需要检测的头部字段
+        this.crawlerHeaders = ['x-bot', 'x-crawler', 'x-forwarded-for']
+    }
+
+    /**
+     * 检测请求是否来自爬虫
+     * @param {Object} req - HTTP请求对象
+     * @returns {Boolean} 是否为爬虫
+     */
+    isCrawler(req) {
+        const userAgent = req.headers['user-agent']?.toLowerCase() || ''
+        const headers = req.headers
+
+        // 1. 通过User-Agent检测
+        if (this.checkUserAgent(userAgent)) {
+            console.log(`[CrawlerDetector] 检测到爬虫 UA: ${userAgent}`)
+            return true
+        }
+
+        // 2. 通过特定头部检测
+        if (this.checkHeaders(headers)) {
+            console.log(`[CrawlerDetector] 检测到爬虫 Headers`)
+            return true
+        }
+
+        // 3. 通过IP地址检测（可选）
+        // if (this.checkIP(req.connection.remoteAddress)) {
+        //     return true
+        // }
+
+        return false
+    }
+
+    /**
+     * 检查User-Agent
+     * @param {String} userAgent
+     * @returns {Boolean}
+     */
+    checkUserAgent(userAgent) {
+        if (!userAgent) return false
+
+        return this.crawlerPatterns.some(pattern => {
+            return userAgent.includes(pattern.toLowerCase())
+        })
+    }
+
+    /**
+     * 检查请求头
+     * @param {Object} headers
+     * @returns {Boolean}
+     */
+    checkHeaders(headers) {
+        for (const header of this.crawlerHeaders) {
+            const headerValue = headers[header]?.toLowerCase()
+            if (headerValue && (headerValue.includes('bot') || headerValue.includes('crawler'))) {
+                return true
+            }
+        }
+        return false
+    }
+
+    /**
+     * 检查IP地址是否为已知爬虫IP
+     * @param {String} ip
+     * @returns {Boolean}
+     */
+    checkIP(ip) {
+        // 这里可以添加已知爬虫IP段的检测
+        // 需要定期更新爬虫IP列表
+        return false
+    }
+
+    /**
+     * 获取爬虫类型
+     * @param {String} userAgent
+     * @returns {String} 爬虫类型
+     */
+    getCrawlerType(userAgent) {
+        const ua = userAgent.toLowerCase()
+
+        if (ua.includes('baiduspider')) return 'baidu'
+        if (ua.includes('googlebot')) return 'google'
+        if (ua.includes('bingbot') || ua.includes('msnbot')) return 'bing'
+        if (ua.includes('360spider')) return '360'
+        if (ua.includes('sogou spider')) return 'sogou'
+        if (ua.includes('facebookexternalhit')) return 'facebook'
+        if (ua.includes('twitterbot')) return 'twitter'
+        if (ua.includes('linkedinbot')) return 'linkedin'
+
+        return 'unknown'
+    }
+}
+
+module.exports = CrawlerDetector
--- a/server/generate-seo-templates.cjs
+++ b/server/generate-seo-templates.cjs
@@ -0,0 +1,242 @@
+/**
+ * SEO模板生成器
+ * 根据路由配置自动生成静态HTML模板
+ */
+
+const fs = require('fs')
+const path = require('path')
+
+// 页面SEO配置（与useSEO.js保持一致）
+const pageSEOConfigs = {
+    'index.html': {
+        title: '真爱查官网_婚姻状态核验_婚前背景互信平台',
+        description: '真爱查致力于维护婚姻家庭安全。提供基于合法公开数据的婚姻状态报告与综合风险检测。核心排查历史婚姻涉诉记录、失信记录及潜在情感隐患。拒绝盲目信任，一键生成婚恋状态评估，为爱护航。',
+        keywords: '真爱查, 婚姻状态查询, 婚姻历史核验, 再婚背景调查, 情感风险评估, 婚恋互信工具',
+        url: 'https://www.zhenaicha.com'
+    },
+    'agent.html': {
+        title: '真爱查代理 - 免费开通代理权限 | 大数据风险报告代理',
+        description: '真爱查代理平台，免费开通代理权限，享受大数据风险报告查询服务代理收益。专业的大数据风险报告、婚姻查询、个人信用评估等服务的代理合作。',
+        keywords: '真爱查代理, 免费代理, 大数据风险报告代理, 代理权限, 代理收益',
+        url: 'https://www.zhenaicha.com/agent'
+    },
+    'help.html': {
+        title: '帮助中心 - 真爱查使用指南 | 常见问题解答',
+        description: '真爱查帮助中心，提供详细的使用指南、常见问题解答、操作教程等，帮助用户更好地使用大数据风险报告查询服务。',
+        keywords: '真爱查帮助, 使用指南, 常见问题, 操作教程, 客服支持',
+        url: 'https://www.zhenaicha.com/help'
+    },
+    'help-guide.html': {
+        title: '使用指南 - 真爱查操作教程 | 功能说明',
+        description: '真爱查详细使用指南，包含各功能模块的操作教程、功能说明、注意事项等，让用户快速上手使用。',
+        keywords: '使用指南, 操作教程, 功能说明, 快速上手, 真爱查教程',
+        url: 'https://www.zhenaicha.com/help/guide'
+    },
+    'example.html': {
+        title: '示例报告 - 真爱查报告展示 | 大数据风险报告样例',
+        description: '真爱查示例报告展示，包含大数据风险报告、婚姻状况查询、个人信用评估等服务的报告样例，让用户了解报告内容和格式。',
+        keywords: '示例报告, 报告展示, 报告样例, 大数据风险报告, 婚姻查询报告',
+        url: 'https://www.zhenaicha.com/example'
+    },
+    'service.html': {
+        title: '客服中心 - 真爱查在线客服 | 技术支持',
+        description: '真爱查客服中心，提供在线客服支持、技术咨询、问题反馈等服务，确保用户获得及时有效的帮助。',
+        keywords: '客服中心, 在线客服, 技术支持, 问题反馈, 真爱查客服',
+        url: 'https://www.zhenaicha.com/service'
+    },
+    'inquire-riskassessment.html': {
+        title: '个人综合履约能力画像_多维风险指数检测_信用健康度_真爱查',
+        description: '深度解析个人履约能力综合状况,辅助用户进行风险自查。报告维度包含历史履约趋势、多维生活经营风险指数及关联负面标签。数据客观中立,实时更新,帮助用户优化个人资信档案,提升生活经营能力。',
+        keywords: '个人风险画像,履约能力评估,综合风险指数,信用健康体检,个人数据分析',
+        url: 'https://www.zhenaicha.com/inquire/riskassessment'
+    },
+    'inquire-companyinfo.html': {
+        title: '商业背景真实性核验_企业主经营实力与司法风险_真爱查',
+        description: '真爱查企业版助您鉴别商业背景的真实性。一键核验目标对象的名下关联企业、工商变更记录、司法被执行信息及股权冻结状况。全方位评估经营实力与法律风险,规避商业合作或家庭资产联保隐患。',
+        keywords: '商业背景核验,企业主信用评估,公司经营风险,工商信息核验,资产风险评估',
+        url: 'https://www.zhenaicha.com/inquire/companyinfo'
+    },
+    'inquire-marriage.html': {
+        title: '婚前综合背景了解_涉婚司法风险评估_情感诚意度报告_真爱查',
+        description: '真爱查婚恋报告基于公开司法大数据,提供客观的婚前背景参考。深度评估对象的涉婚法律诉讼、失信被执行历史及社会不良标签。旨在消除信息不对称,辅助用户建立透明、安全的婚姻基础。',
+        keywords: '婚前背景核验,恋爱对象风险,婚姻司法记录,情感互信报告,家庭履约风险',
+        url: 'https://www.zhenaicha.com/inquire/marriage'
+    },
+    'promote.html': {
+        title: '真爱查合作伙伴计划_婚恋行业数字化风控解决方案_渠道招募',
+        description: '真爱查开放全国渠道合作,为婚介机构及情感咨询师提供专业的数据化风控工具。一键接入婚恋风险评估系统,支持推广海报生成与多级数据管理。正规项目,赋能合作伙伴,共同挖掘婚恋市场的合规商业价值。',
+        keywords: '婚恋行业合作,情感咨询工具,风控系统代理,渠道合作伙伴,婚介数据服务',
+        url: 'https://www.zhenaicha.com/promote'
+    }
+}
+
+/**
+ * 规范化文案：移除损坏字符（U+FFFD），统一为中文标点，避免乱码
+ */
+function normalizeText(str) {
+    if (typeof str !== 'string') return str
+    return str
+        .replace(/\uFFFD/g, '') // 移除 UTF-8 替换符（乱码）
+        .replace(/｡/g, '。')
+        .replace(/､/g, '、')
+}
+
+/**
+ * 生成HTML模板
+ */
+function generateHTMLTemplate(config) {
+    const title = normalizeText(config.title)
+    const description = normalizeText(config.description)
+    const keywords = normalizeText(config.keywords)
+    const structuredData = {
+        "@context": "https://schema.org",
+        "@type": "WebPage",
+        "name": title,
+        "description": description,
+        "url": config.url,
+        "mainEntity": {
+            "@type": "Organization",
+            "name": "真爱查",
+            "url": "https://www.zhenaicha.com/",
+            "description": "专业大数据风险报告查询与代理平台，支持个人和企业多场景风控应用"
+        }
+    }
+
+    return `<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    
+    <!-- 页面标题 -->
+    <title>${title}</title>
+    
+    <!-- SEO Meta标签 -->
+    <meta name="description" content="${description}">
+    <meta name="keywords" content="${keywords}">
+    
+    <!-- Open Graph标签 -->
+    <meta property="og:title" content="${title}">
+    <meta property="og:description" content="${description}">
+    <meta property="og:url" content="${config.url}">
+    <meta property="og:type" content="website">
+    <meta property="og:site_name" content="真爱查">
+    <meta property="og:locale" content="zh_CN">
+    
+    <!-- Twitter Cards -->
+    <meta name="twitter:card" content="summary">
+    <meta name="twitter:title" content="${title}">
+    <meta name="twitter:description" content="${description}">
+    <meta name="twitter:url" content="${config.url}">
+    
+    <!-- Canonical URL -->
+    <link rel="canonical" href="${config.url}">
+    
+    <!-- 结构化数据 -->
+    <script type="application/ld+json">
+${JSON.stringify(structuredData, null, 8)}
+    </script>
+    
+    <!-- 其他Meta标签 -->
+    <meta name="robots" content="index, follow">
+    <meta name="googlebot" content="index, follow">
+    <meta name="baiduspider" content="index, follow">
+    
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
+            margin: 0;
+            padding: 0;
+            line-height: 1.6;
+        }
+        .seo-content {
+            max-width: 1200px;
+            margin: 0 auto;
+            padding: 20px;
+        }
+        h1 { color: #333; }
+        p { color: #666; }
+        .redirect-notice {
+            background: #fff3cd;
+            border: 1px solid #ffc107;
+            color: #856404;
+            padding: 10px;
+            margin: 20px 0;
+            border-radius: 4px;
+        }
+    </style>
+</head>
+<body>
+    <div class="seo-content">
+        <h1>${title}</h1>
+        
+        <div class="redirect-notice">
+            <p>正在跳转到完整版网站...</p>
+            <p>如果浏览器没有自动跳转，请 <a href="${config.url}">点击这里</a></p>
+        </div>
+        
+        <p>${description}</p>
+        
+        <!-- 这里可以添加更多SEO友好的静态内容 -->
+        <section>
+            <h2>关于真爱查</h2>
+            <p>真爱查是专业的婚姻状态核验与婚前背景互信平台，提供全方位的风险查询服务。我们的核心使命是帮助用户建立安全、可信赖的婚姻基础。</p>
+        </section>
+        
+        <section>
+            <h2>核心服务</h2>
+            <ul>
+                <li>个人综合履约能力画像</li>
+                <li>商业背景真实性核验</li>
+                <li>婚前综合背景了解</li>
+                <li>涉婚司法风险评估</li>
+                <li>情感诚意度报告</li>
+            </ul>
+        </section>
+        
+        <section>
+            <h2>为什么选择真爱查</h2>
+            <p>真爱查依托大数据技术，为用户提供准确、及时的婚恋风险评估报告，帮助您做出明智决策。</p>
+        </section>
+    </div>
+    
+    <script>
+        // 可选：自动跳转到SPA应用
+        // setTimeout(function() {
+        //     window.location.href = '${config.url}';
+        // }, 2000);
+    </script>
+</body>
+</html>`
+}
+
+/**
+ * 主函数
+ */
+function main() {
+    const outputDir = path.join(__dirname, '../public/seo-templates')
+
+    // 创建输出目录
+    if (!fs.existsSync(outputDir)) {
+        fs.mkdirSync(outputDir, { recursive: true })
+        console.log(`✓ 创建模板目录: ${outputDir}`)
+    }
+
+    // 生成所有模板文件
+    let successCount = 0
+    Object.entries(pageSEOConfigs).forEach(([filename, config]) => {
+        const htmlContent = generateHTMLTemplate(config)
+        const filePath = path.join(outputDir, filename)
+
+        fs.writeFileSync(filePath, htmlContent, 'utf-8')
+        console.log(`✓ 生成模板: ${filename}`)
+        successCount++
+    })
+
+    console.log(`\n✓ 成功生成 ${successCount} 个SEO模板文件`)
+    console.log(`📁 模板目录: ${outputDir}`)
+}
+
+// 执行生成
+main()
--- a/server/middleware.js
+++ b/server/middleware.js
@@ -0,0 +1,174 @@
+/**
+ * SEO中间件
+ * 用于在Node.js服务器中检测爬虫并返回静态HTML
+ */
+
+const fs = require('fs')
+const path = require('path')
+const CrawlerDetector = require('./crawler-detector')
+
+class SEOMiddleware {
+    constructor(options = {}) {
+        this.detector = new CrawlerDetector()
+        this.templateDir = options.templateDir || path.join(__dirname, '../public/seo-templates')
+        this.defaultTemplate = options.defaultTemplate || 'index.html'
+        this.fallbackToSPA = options.fallbackToSPA !== false
+        this.debug = options.debug || false
+
+        // 路由到模板的映射（与useSEO.js保持一致）
+        this.routeTemplateMap = {
+            '/': 'index.html',
+            '/agent': 'agent.html',
+            '/help': 'help.html',
+            '/help/guide': 'help-guide.html',
+            '/example': 'example.html',
+            '/service': 'service.html',
+            '/inquire/riskassessment': 'inquire-riskassessment.html',
+            '/inquire/companyinfo': 'inquire-companyinfo.html',
+            '/inquire/marriage': 'inquire-marriage.html',
+            '/promote': 'promote.html'
+        }
+
+        // 初始化模板缓存
+        this.templateCache = new Map()
+        this.cacheTemplates()
+    }
+
+    /**
+     * 缓存所有模板文件
+     */
+    cacheTemplates() {
+        try {
+            if (!fs.existsSync(this.templateDir)) {
+                console.warn(`[SEOMiddleware] 模板目录不存在: ${this.templateDir}`)
+                return
+            }
+
+            const files = fs.readdirSync(this.templateDir)
+            files.forEach(file => {
+                const filePath = path.join(this.templateDir, file)
+                if (fs.statSync(filePath).isFile()) {
+                    this.templateCache.set(file, fs.readFileSync(filePath, 'utf-8'))
+                    if (this.debug) {
+                        console.log(`[SEOMiddleware] 已缓存模板: ${file}`)
+                    }
+                }
+            })
+
+            console.log(`[SEOMiddleware] 已缓存 ${this.templateCache.size} 个模板文件`)
+        } catch (error) {
+            console.error('[SEOMiddleware] 缓存模板失败:', error)
+        }
+    }
+
+    /**
+     * 获取对应的模板文件名
+     * @param {String} path - 请求路径
+     * @returns {String} 模板文件名
+     */
+    getTemplatePath(requestPath) {
+        // 完全匹配
+        if (this.routeTemplateMap[requestPath]) {
+            return this.routeTemplateMap[requestPath]
+        }
+
+        // 模糊匹配（处理动态路由）
+        const matchedKey = Object.keys(this.routeTemplateMap).find(route => {
+            return requestPath.startsWith(route)
+        })
+
+        return matchedKey ? this.routeTemplateMap[matchedKey] : this.defaultTemplate
+    }
+
+    /**
+     * 获取模板内容
+     * @param {String} templateName - 模板文件名
+     * @returns {String|null} 模板内容
+     */
+    getTemplate(templateName) {
+        // 首先尝试缓存
+        let content = this.templateCache.get(templateName)
+
+        // 如果缓存中没有，尝试从磁盘读取
+        if (!content) {
+            try {
+                const filePath = path.join(this.templateDir, templateName)
+                if (fs.existsSync(filePath)) {
+                    content = fs.readFileSync(filePath, 'utf-8')
+                    this.templateCache.set(templateName, content)
+                }
+            } catch (error) {
+                console.error(`[SEOMiddleware] 读取模板失败: ${templateName}`, error)
+            }
+        }
+
+        return content || null
+    }
+
+    /**
+     * Express中间件
+     */
+    express() {
+        return (req, res, next) => {
+            // 检测是否为爬虫
+            if (this.detector.isCrawler(req)) {
+                const templateName = this.getTemplatePath(req.path)
+                const template = this.getTemplate(templateName)
+
+                if (template) {
+                    // 设置响应头
+                    res.setHeader('Content-Type', 'text/html; charset=utf-8')
+                    res.setHeader('X-SEOMiddleware', 'prerendered')
+
+                    // 返回静态HTML
+                    if (this.debug) {
+                        console.log(`[SEOMiddleware] 返回SEO模板: ${templateName} for ${req.path}`)
+                    }
+
+                    return res.send(template)
+                }
+            }
+
+            // 不是爬虫或模板不存在，继续处理SPA
+            next()
+        }
+    }
+
+    /**
+     * Koa中间件
+     */
+    koa() {
+        return async (ctx, next) => {
+            // 检测是否为爬虫
+            if (this.detector.isCrawler(ctx.req)) {
+                const templateName = this.getTemplatePath(ctx.path)
+                const template = this.getTemplate(templateName)
+
+                if (template) {
+                    ctx.type = 'text/html; charset=utf-8'
+                    ctx.set('X-SEOMiddleware', 'prerendered')
+
+                    if (this.debug) {
+                        console.log(`[SEOMiddleware] 返回SEO模板: ${templateName} for ${ctx.path}`)
+                    }
+
+                    ctx.body = template
+                    return
+                }
+            }
+
+            await next()
+        }
+    }
+
+    /**
+     * 重新加载模板缓存
+     */
+    reloadCache() {
+        this.templateCache.clear()
+        this.cacheTemplates()
+        console.log('[SEOMiddleware] 模板缓存已重新加载')
+    }
+}
+
+module.exports = SEOMiddleware
--- a/server/package.json
+++ b/server/package.json
@@ -0,0 +1,28 @@
+{
+  "name": "xfc-seo-server",
+  "version": "1.0.0",
+  "description": "幸福查SEO优化服务器 - 爬虫检测与静态HTML回退",
+  "main": "server-example-express.js",
+  "scripts": {
+    "start": "node server-example-express.js",
+    "dev": "node server-example-express.js",
+    "generate": "node generate-seo-templates.cjs",
+    "test": "node test-seo.js",
+    "test:crawler": "node test-crawler-detection.js"
+  },
+  "keywords": [
+    "seo",
+    "crawler",
+    "spa",
+    "prerender"
+  ],
+  "author": "xingfucha",
+  "license": "MIT",
+  "dependencies": {
+    "express": "^4.18.2",
+    "compression": "^1.7.4"
+  },
+  "devDependencies": {
+    "nodemon": "^3.0.1"
+  }
+}
--- a/server/server-example-express.js
+++ b/server/server-example-express.js
@@ -0,0 +1,36 @@
+/**
+ * Express服务器示例
+ * 展示如何集成SEO中间件
+ */
+
+const express = require('express')
+const path = require('path')
+const SEOMiddleware = require('./middleware')
+
+const app = express()
+const port = process.env.PORT || 3000
+
+// 初始化SEO中间件
+const seoMiddleware = new SEOMiddleware({
+    templateDir: path.join(__dirname, '../public/seo-templates'),
+    debug: process.env.NODE_ENV === 'development'
+})
+
+// 应用SEO中间件（必须在静态文件服务之前）
+app.use(seoMiddleware.express())
+
+// 静态文件服务
+app.use(express.static(path.join(__dirname, '../dist')))
+
+// SPA路由处理
+app.get('*', (req, res) => {
+    res.sendFile(path.join(__dirname, '../dist/index.html'))
+})
+
+// 启动服务器
+app.listen(port, () => {
+    console.log(`🚀 服务器运行在 http://localhost:${port}`)
+    console.log(`🔍 SEO中间件已启用`)
+})
+
+module.exports = app