add seo
This commit is contained in:
112
server/test-crawler-detection.js
Normal file
112
server/test-crawler-detection.js
Normal file
@@ -0,0 +1,112 @@
|
||||
/**
|
||||
* 爬虫检测测试脚本
|
||||
* 用于验证爬虫检测功能是否正常工作
|
||||
*/
|
||||
|
||||
const CrawlerDetector = require('./crawler-detector')
|
||||
|
||||
const detector = new CrawlerDetector()
|
||||
|
||||
// 测试用例
|
||||
const testCases = [
|
||||
// 爬虫User-Agent
|
||||
{ userAgent: 'Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)', expected: true, description: '百度爬虫' },
|
||||
{ userAgent: 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', expected: true, description: 'Google爬虫' },
|
||||
{ userAgent: 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)', expected: true, description: '必应爬虫' },
|
||||
{ userAgent: 'Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)', expected: true, description: '搜狗爬虫' },
|
||||
{ userAgent: '360Spider', expected: true, description: '360爬虫' },
|
||||
{ userAgent: 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)', expected: true, description: 'Facebook爬虫' },
|
||||
{ userAgent: 'Twitterbot/1.0', expected: true, description: 'Twitter爬虫' },
|
||||
{ userAgent: 'LinkedInBot/1.0 (compatible; Mozilla/5.0; +https://www.linkedin.com/help/linkedin/answer/8665)', expected: true, description: 'LinkedIn爬虫' },
|
||||
|
||||
// 正常浏览器User-Agent
|
||||
{ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', expected: false, description: 'Chrome浏览器' },
|
||||
{ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0', expected: false, description: 'Firefox浏览器' },
|
||||
{ userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15', expected: false, description: 'Safari浏览器' },
|
||||
{ userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1', expected: false, description: 'iPhone Safari' },
|
||||
{ userAgent: 'Mozilla/5.0 (Linux; Android 13; SM-S908B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36', expected: false, description: 'Android Chrome' },
|
||||
{ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0', expected: false, description: 'Edge浏览器' },
|
||||
|
||||
// 边界情况
|
||||
{ userAgent: '', expected: false, description: '空User-Agent' },
|
||||
{ userAgent: 'Mozilla/5.0 (compatible; MyBot/1.0)', expected: true, description: '包含bot关键词' },
|
||||
{ userAgent: 'Mozilla/5.0 (compatible; Spider/1.0)', expected: true, description: '包含spider关键词' },
|
||||
{ userAgent: 'Mozilla/5.0 (compatible; Crawler/1.0)', expected: true, description: '包含crawler关键词' }
|
||||
]
|
||||
|
||||
console.log('='.repeat(70))
|
||||
console.log('爬虫检测测试')
|
||||
console.log('='.repeat(70))
|
||||
console.log()
|
||||
|
||||
let passed = 0
|
||||
let failed = 0
|
||||
|
||||
testCases.forEach((testCase, index) => {
|
||||
const req = {
|
||||
headers: {
|
||||
'user-agent': testCase.userAgent
|
||||
}
|
||||
}
|
||||
|
||||
const result = detector.isCrawler(req)
|
||||
const success = result === testCase.expected
|
||||
const status = success ? '✓ 通过' : '✗ 失败'
|
||||
const crawlerType = result ? detector.getCrawlerType(testCase.userAgent) : 'N/A'
|
||||
|
||||
if (success) {
|
||||
passed++
|
||||
console.log(`${status} 测试 ${index + 1}: ${testCase.description}`)
|
||||
} else {
|
||||
failed++
|
||||
console.error(`${status} 测试 ${index + 1}: ${testCase.description}`)
|
||||
console.error(` User-Agent: ${testCase.userAgent.substring(0, 80)}...`)
|
||||
console.error(` 预期: ${testCase.expected}, 实际: ${result}`)
|
||||
}
|
||||
|
||||
if (result) {
|
||||
console.log(` 识别为: ${crawlerType} 爬虫`)
|
||||
}
|
||||
})
|
||||
|
||||
console.log()
|
||||
console.log('='.repeat(70))
|
||||
console.log(`测试结果: ${passed} 通过, ${failed} 失败, 共 ${testCases.length} 个测试`)
|
||||
console.log('='.repeat(70))
|
||||
console.log()
|
||||
|
||||
// 测试爬虫类型识别
|
||||
console.log('爬虫类型识别测试:')
|
||||
console.log('-'.repeat(70))
|
||||
|
||||
const crawlerTypes = [
|
||||
{ userAgent: 'Baiduspider', expected: 'baidu', description: '百度爬虫' },
|
||||
{ userAgent: 'Googlebot', expected: 'google', description: 'Google爬虫' },
|
||||
{ userAgent: 'bingbot', expected: 'bing', description: '必应爬虫' },
|
||||
{ userAgent: '360spider', expected: '360', description: '360爬虫' },
|
||||
{ userAgent: 'sogou spider', expected: 'sogou', description: '搜狗爬虫' },
|
||||
{ userAgent: 'facebookexternalhit', expected: 'facebook', description: 'Facebook爬虫' },
|
||||
{ userAgent: 'Twitterbot', expected: 'twitter', description: 'Twitter爬虫' },
|
||||
{ userAgent: 'linkedinbot', expected: 'linkedin', description: 'LinkedIn爬虫' }
|
||||
]
|
||||
|
||||
let typePassed = 0
|
||||
crawlerTypes.forEach(test => {
|
||||
const result = detector.getCrawlerType(test.userAgent)
|
||||
const success = result === test.expected
|
||||
|
||||
if (success) {
|
||||
typePassed++
|
||||
console.log(`✓ ${test.description}: ${result}`)
|
||||
} else {
|
||||
console.error(`✗ ${test.description}: 预期 ${test.expected}, 实际 ${result}`)
|
||||
}
|
||||
})
|
||||
|
||||
console.log()
|
||||
console.log('='.repeat(70))
|
||||
console.log(`爬虫类型识别: ${typePassed}/${crawlerTypes.length} 正确`)
|
||||
console.log('='.repeat(70))
|
||||
|
||||
// 退出码
|
||||
process.exit(failed === 0 ? 0 : 1)
|
||||
Reference in New Issue
Block a user