Files
tyc-webview-v2/server/test-crawler-detection.js

113 lines
5.3 KiB
JavaScript
Raw Normal View History

2026-02-28 12:45:13 +08:00
/**
* 爬虫检测测试脚本
* 用于验证爬虫检测功能是否正常工作
*/
const CrawlerDetector = require('./crawler-detector')
const detector = new CrawlerDetector()
// 测试用例
const testCases = [
// 爬虫User-Agent
{ userAgent: 'Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)', expected: true, description: '百度爬虫' },
{ userAgent: 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', expected: true, description: 'Google爬虫' },
{ userAgent: 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)', expected: true, description: '必应爬虫' },
{ userAgent: 'Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)', expected: true, description: '搜狗爬虫' },
{ userAgent: '360Spider', expected: true, description: '360爬虫' },
{ userAgent: 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)', expected: true, description: 'Facebook爬虫' },
{ userAgent: 'Twitterbot/1.0', expected: true, description: 'Twitter爬虫' },
{ userAgent: 'LinkedInBot/1.0 (compatible; Mozilla/5.0; +https://www.linkedin.com/help/linkedin/answer/8665)', expected: true, description: 'LinkedIn爬虫' },
// 正常浏览器User-Agent
{ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', expected: false, description: 'Chrome浏览器' },
{ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0', expected: false, description: 'Firefox浏览器' },
{ userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15', expected: false, description: 'Safari浏览器' },
{ userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1', expected: false, description: 'iPhone Safari' },
{ userAgent: 'Mozilla/5.0 (Linux; Android 13; SM-S908B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36', expected: false, description: 'Android Chrome' },
{ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0', expected: false, description: 'Edge浏览器' },
// 边界情况
{ userAgent: '', expected: false, description: '空User-Agent' },
{ userAgent: 'Mozilla/5.0 (compatible; MyBot/1.0)', expected: true, description: '包含bot关键词' },
{ userAgent: 'Mozilla/5.0 (compatible; Spider/1.0)', expected: true, description: '包含spider关键词' },
{ userAgent: 'Mozilla/5.0 (compatible; Crawler/1.0)', expected: true, description: '包含crawler关键词' }
]
console.log('='.repeat(70))
console.log('爬虫检测测试')
console.log('='.repeat(70))
console.log()
let passed = 0
let failed = 0
testCases.forEach((testCase, index) => {
const req = {
headers: {
'user-agent': testCase.userAgent
}
}
const result = detector.isCrawler(req)
const success = result === testCase.expected
const status = success ? '✓ 通过' : '✗ 失败'
const crawlerType = result ? detector.getCrawlerType(testCase.userAgent) : 'N/A'
if (success) {
passed++
console.log(`${status} 测试 ${index + 1}: ${testCase.description}`)
} else {
failed++
console.error(`${status} 测试 ${index + 1}: ${testCase.description}`)
console.error(` User-Agent: ${testCase.userAgent.substring(0, 80)}...`)
console.error(` 预期: ${testCase.expected}, 实际: ${result}`)
}
if (result) {
console.log(` 识别为: ${crawlerType} 爬虫`)
}
})
console.log()
console.log('='.repeat(70))
console.log(`测试结果: ${passed} 通过, ${failed} 失败, 共 ${testCases.length} 个测试`)
console.log('='.repeat(70))
console.log()
// 测试爬虫类型识别
console.log('爬虫类型识别测试:')
console.log('-'.repeat(70))
const crawlerTypes = [
{ userAgent: 'Baiduspider', expected: 'baidu', description: '百度爬虫' },
{ userAgent: 'Googlebot', expected: 'google', description: 'Google爬虫' },
{ userAgent: 'bingbot', expected: 'bing', description: '必应爬虫' },
{ userAgent: '360spider', expected: '360', description: '360爬虫' },
{ userAgent: 'sogou spider', expected: 'sogou', description: '搜狗爬虫' },
{ userAgent: 'facebookexternalhit', expected: 'facebook', description: 'Facebook爬虫' },
{ userAgent: 'Twitterbot', expected: 'twitter', description: 'Twitter爬虫' },
{ userAgent: 'linkedinbot', expected: 'linkedin', description: 'LinkedIn爬虫' }
]
let typePassed = 0
crawlerTypes.forEach(test => {
const result = detector.getCrawlerType(test.userAgent)
const success = result === test.expected
if (success) {
typePassed++
console.log(`${test.description}: ${result}`)
} else {
console.error(`${test.description}: 预期 ${test.expected}, 实际 ${result}`)
}
})
console.log()
console.log('='.repeat(70))
console.log(`爬虫类型识别: ${typePassed}/${crawlerTypes.length} 正确`)
console.log('='.repeat(70))
// 退出码
process.exit(failed === 0 ? 0 : 1)