import fs from 'fs'; import path from 'path'; import { fileURLToPath } from 'url'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const INPUT_FILES = ['DWBG9FB2.json', 'DWBG9FB3.json']; const ORG_SUFFIXES = [ '股份有限公司城区支行', '农村商业银行股份有限公司', '物业服务有限公司南宁分公司', '物业服务有限公司', '房地产开发有限公司', '国际大酒店有限公司', '生态旅游家园开发有限公司', '农业发展有限公司', '农资有限公司', '贸易有限公司', '发展有限公司', '开发有限公司', '股份有限公司', '农村合作银行', '信用合作联社', '有限公司', '分公司', ]; const COMPANY_KEYWORDS = /银行|公司|联社|集团|酒店|有限|股份/; const NAME_BLACKLIST = new Set([ '当事人', '法律关系', '法律关', '案涉房', '案涉', '房屋', '所有权', '合同法律', '侵权法律', '物业服务', '诉讼请求', '正当理由', '民发物业', '民发实', '民发', '广西广为', '广西中硕','广西', ]); // ── 基础脱敏函数 ────────────────────────────────────────── function desensitizePersonName(name) { if (!name || typeof name !== 'string') return name; if (COMPANY_KEYWORDS.test(name)) return desensitizeCompany(name); if (name.length <= 1) return name; return name[0] + '某某'; } function desensitizeCompany(name) { if (!name || typeof name !== 'string') return name; for (const suffix of ORG_SUFFIXES) { if (!name.endsWith(suffix)) continue; const core = name.slice(0, -suffix.length); if (core.startsWith('广西')) return '广西****' + suffix; if (core.startsWith('湖北')) return '湖北****' + suffix; if (core.startsWith('桂林市')) return '桂林市****' + suffix; if (core.startsWith('兴安县')) return '兴安县****' + suffix; if (core.startsWith('资源县')) return '资源县****' + suffix; if (/^.+县/.test(core)) return '**县****' + suffix; if (/^.+市/.test(core)) return '**市****' + suffix; return core.slice(0, 2) + '****' + suffix; } return name.slice(0, 2) + '****'; } function desensitizeCourt(court) { if (!court || typeof court !== 'string') return court; return court .replace(/^(.+?市)(.+?区)/, '**市**区') .replace(/^(.+?市)(.+?县)/, '**市**县') .replace(/^(.+?县)/, '**县') .replace(/^(.+?市)/, '**市'); } function desensitizeProvince(text) { if (!text || typeof text !== 'string') return text; return text .replace(/湖北省/g, '**省') .replace(/辽宁省/g, '**省'); } function desensitizeAreaStat(text) { if (!text || typeof text !== 'string') return text; return desensitizeProvince(text).replace(/辽宁省\(/g, '**省('); } function desensitizeIdCard(idCard) { if (!idCard || typeof idCard !== 'string') return idCard; if (idCard.length === 18) { return idCard.substring(0, 6) + '********' + idCard.substring(14); } if (/^[0-9A-Z]{15,18}$/.test(idCard)) { return idCard.substring(0, 4) + '**********' + idCard.substring(idCard.length - 4); } return idCard; } function desensitizeMobile(mobile) { if (!mobile || typeof mobile !== 'string' || mobile.length !== 11) return mobile; return mobile.substring(0, 3) + '****' + mobile.substring(7); } function desensitizeAddressText(text) { if (!text || typeof text !== 'string') return text; let result = text; result = result.replace(/统一社会信用代码[::]?\s*[0-9A-Z]{15,18}/g, (m) => { const code = m.replace(/统一社会信用代码[::]?\s*/, ''); return '统一社会信用代码:' + desensitizeIdCard(code); }); result = result.replace(/账号[::]?\s*[\d×]{10,}/g, '账号:********'); result = result.replace(/[\u4e00-\u9fa5]{2,6}路\d+号[^,,。;;]*/g, '**路**号****'); result = result.replace(/民发[·・][\u4e00-\u9fa5A-Za-z0-9]{2,15}?(?:小区|会所)/g, '****小区'); result = result.replace(/民发物业服务有限公司南宁分公司/g, '****物业服务有限公司南宁分公司'); result = result.replace(/民发物业服务有限公司/g, '****物业服务有限公司'); result = result.replace(/民发物业南宁分公司/g, '****物业南宁分公司'); result = result.replace(/民发物业公司/g, '****物业公司'); result = result.replace(/广西中硕资产评估有限责任公司/g, '广西****资产评估有限责任公司'); result = result.replace(/民发实业集团\(广西\)房地产开发有限公司/g, '****实业集团(广西)****开发有限公司'); result = result.replace(/[\u4e00-\u9fa5]{2,4}市[\u4e00-\u9fa5]{2,6}区人民法院/g, '**市**区人民法院'); result = result.replace(/上诉于([\u4e00-\u9fa5]{2,4})市中级人民法院/g, '上诉于**市中级人民法院'); result = result.replace(/开户名称[::]([\u4e00-\u9fa5]{2,4})市中级人民法院/g, '开户名称:**市中级人民法院'); result = result.replace(/([\u4e00-\u9fa5]{2,4})县人民法院/g, '**县人民法院'); result = result.replace(/住广西壮族自治区[^,,。;;]{2,20}?[区县]/g, '住广西壮族自治区**市**区'); result = result.replace(/住所地广西壮族自治区[^,,。;;]{2,30}/g, '住所地广西壮族自治区**市**区****'); result = result.replace(/住所地湖北省[^,,。;;]{2,30}/g, '住所地**省**市**区****'); result = result.replace(/位于[\u4e00-\u9fa5]{2,4}市[\u4e00-\u9fa5]{2,6}区/g, '位于**市**区'); result = result.replace(/坐落于[\u4e00-\u9fa5]{2,4}市[\u4e00-\u9fa5]{2,6}区/g, '坐落于**市**区'); result = result.replace(/系[\u4e00-\u9fa5]{2,5}市[\u4e00-\u9fa5]{2,6}区/g, '系**市**区'); result = result.replace(/[\u4e00-\u9fa5·・A-Za-z0-9]+栋\d+单元\d+号/g, '****栋**单元**号'); return result; } // ── 从数据中收集替换映射 ────────────────────────────────── function isValidPersonName(name) { return ( name && name.length >= 2 && name.length <= 4 && !COMPANY_KEYWORDS.test(name) && !NAME_BLACKLIST.has(name) && /^[\u4e00-\u9fa5]+$/.test(name) ); } function collectMappings(data) { const personNames = new Set(); const companyNames = new Set(); const courtNames = new Set(); function walk(obj) { if (!obj || typeof obj !== 'object') return; if (Array.isArray(obj)) { obj.forEach(walk); return; } if (obj.c_mc) { if (obj.n_dsrlx === '企业组织' || COMPANY_KEYWORDS.test(obj.c_mc)) { companyNames.add(obj.c_mc); } else if (isValidPersonName(obj.c_mc)) { personNames.add(obj.c_mc); } } if (obj.n_jbfy) courtNames.add(obj.n_jbfy); for (const field of ['c_gkws_dsr', 'c_gkws_pjjg']) { if (obj[field]) extractNamesFromLegalText(obj[field], personNames, companyNames); } Object.values(obj).forEach(walk); } walk(data); return { personNames, companyNames, courtNames }; } function extractNamesFromLegalText(text, personNames, companyNames) { if (!text || typeof text !== 'string') return; const rolePatterns = [ /(?:原告|被告|上诉人|被上诉人|原审被告人|被告人|负责人|法定代表人|案外人|委托诉讼代理人|代理人|承租人|出租人)[::]([\u4e00-\u9fa5]{2,4})/g, /与案外人([\u4e00-\u9fa5]{2,4})签/g, /([\u4e00-\u9fa5]{2,4})所有的/g, /向([\u4e00-\u9fa5]{2,4})转账/g, ]; let match; for (const rolePattern of rolePatterns) { while ((match = rolePattern.exec(text)) !== null) { if (isValidPersonName(match[1])) personNames.add(match[1]); } } const companyPattern = /([\u4e00-\u9fa5()()·・]{4,40}?(?:有限公司|股份有限公司|合作银行|信用合作联社))/g; while ((match = companyPattern.exec(text)) !== null) { companyNames.add(match[1]); } } function buildReplacementList(personNames, companyNames, courtNames) { const replacements = []; for (const name of personNames) { replacements.push({ from: name, to: desensitizePersonName(name) }); } for (const name of companyNames) { replacements.push({ from: name, to: desensitizeCompany(name) }); } for (const court of courtNames) { replacements.push({ from: court, to: desensitizeCourt(court) }); } replacements.sort((a, b) => b.from.length - a.from.length); return replacements; } function desensitizeText(text, replacements) { if (!text || typeof text !== 'string') return text; let result = text; for (const { from, to } of replacements) { if (from && to && from !== to) { result = result.split(from).join(to); } } result = desensitizeAddressText(result); result = desensitizeProvince(result); return result; } // ── 递归脱敏 ────────────────────────────────────────────── function desensitizeObject(obj, replacements) { if (obj === null || typeof obj !== 'object') { return obj; } if (Array.isArray(obj)) { return obj.map((item) => desensitizeObject(item, replacements)); } const result = {}; for (const [key, value] of Object.entries(obj)) { switch (key) { case 'name': result[key] = desensitizePersonName(value); break; case 'id_card': result[key] = desensitizeIdCard(value); break; case 'mobile': result[key] = desensitizeMobile(value); break; case 'c_mc': result[key] = obj.n_dsrlx === '企业组织' || COMPANY_KEYWORDS.test(value) ? desensitizeCompany(value) : desensitizePersonName(value); break; case 'c_gkws_dsr': case 'c_gkws_pjjg': result[key] = desensitizeText(value, replacements); break; case 'n_jbfy': result[key] = desensitizeCourt(value); break; case 'c_ssdy': result[key] = value; break; case 'area_stat': result[key] = desensitizeAreaStat(value); break; default: result[key] = desensitizeObject(value, replacements); break; } } return result; } function processFile(filename) { const inputFile = path.join(__dirname, filename); const baseName = filename.replace('.json', ''); const outputFile = path.join(__dirname, `${baseName}_desensitized.json`); const data = JSON.parse(fs.readFileSync(inputFile, 'utf8')); const { personNames, companyNames, courtNames } = collectMappings(data); const replacements = buildReplacementList(personNames, companyNames, courtNames); const desensitizedData = desensitizeObject(data, replacements); fs.writeFileSync(outputFile, JSON.stringify(desensitizedData, null, 2), 'utf8'); console.log(`\n✓ ${filename} 脱敏完成`); console.log(` 原始文件:${inputFile}`); console.log(` 输出文件:${outputFile}`); console.log(` 姓名 ${personNames.size} 个,公司 ${companyNames.size} 个,法院 ${courtNames.size} 个`); return { personNames, companyNames, courtNames }; } // ── 执行 ────────────────────────────────────────────────── console.log('开始脱敏处理...'); const summary = { person: 0, company: 0, court: 0 }; for (const file of INPUT_FILES) { const stats = processFile(file); summary.person += stats.personNames.size; summary.company += stats.companyNames.size; summary.court += stats.courtNames.size; } console.log('\n脱敏摘要:'); console.log('- 姓名:保留姓氏,名字替换为「某某」'); console.log('- 公司/机构:保留地区前缀与组织类型,中间替换为「****」'); console.log('- 法院:市/县/区名称替换为「**」'); console.log('- 省份/地区:次要省份(湖北、辽宁等)脱敏,广西自治区保留'); console.log('- 判决书文本:地址、信用代码、路名等同步脱敏'); console.log('- 身份证号/手机号:按字段规则脱敏'); console.log(`- 合计处理:姓名 ${summary.person} 个,公司 ${summary.company} 个,法院 ${summary.court} 个`);