Files
report_viewer/public/desensitize.js

332 lines
12 KiB
JavaScript
Raw Normal View History

2026-06-10 12:55:01 +08:00
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
2026-06-12 14:25:45 +08:00
const INPUT_FILES = ['DWBG9FB2.json', 'DWBG9FB3.json'];
const ORG_SUFFIXES = [
'股份有限公司城区支行',
'农村商业银行股份有限公司',
'物业服务有限公司南宁分公司',
'物业服务有限公司',
'房地产开发有限公司',
'国际大酒店有限公司',
'生态旅游家园开发有限公司',
'农业发展有限公司',
'农资有限公司',
'贸易有限公司',
'发展有限公司',
'开发有限公司',
'股份有限公司',
'农村合作银行',
'信用合作联社',
'有限公司',
'分公司',
];
const COMPANY_KEYWORDS = /银行|公司|联社|集团|酒店|有限|股份/;
const NAME_BLACKLIST = new Set([
'当事人', '法律关系', '法律关', '案涉房', '案涉', '房屋', '所有权',
'合同法律', '侵权法律', '物业服务', '诉讼请求', '正当理由',
'民发物业', '民发实', '民发', '广西广为', '广西中硕','广西',
]);
// ── 基础脱敏函数 ──────────────────────────────────────────
function desensitizePersonName(name) {
if (!name || typeof name !== 'string') return name;
if (COMPANY_KEYWORDS.test(name)) return desensitizeCompany(name);
if (name.length <= 1) return name;
return name[0] + '某某';
}
function desensitizeCompany(name) {
if (!name || typeof name !== 'string') return name;
for (const suffix of ORG_SUFFIXES) {
if (!name.endsWith(suffix)) continue;
const core = name.slice(0, -suffix.length);
if (core.startsWith('广西')) return '广西****' + suffix;
if (core.startsWith('湖北')) return '湖北****' + suffix;
if (core.startsWith('桂林市')) return '桂林市****' + suffix;
if (core.startsWith('兴安县')) return '兴安县****' + suffix;
if (core.startsWith('资源县')) return '资源县****' + suffix;
if (/^.+县/.test(core)) return '**县****' + suffix;
if (/^.+市/.test(core)) return '**市****' + suffix;
return core.slice(0, 2) + '****' + suffix;
2026-06-10 12:55:01 +08:00
}
2026-06-12 14:25:45 +08:00
return name.slice(0, 2) + '****';
}
function desensitizeCourt(court) {
if (!court || typeof court !== 'string') return court;
return court
.replace(/^(.+?市)(.+?区)/, '**市**区')
.replace(/^(.+?市)(.+?县)/, '**市**县')
.replace(/^(.+?县)/, '**县')
.replace(/^(.+?市)/, '**市');
}
function desensitizeProvince(text) {
if (!text || typeof text !== 'string') return text;
return text
.replace(/湖北省/g, '**省')
.replace(/辽宁省/g, '**省');
}
function desensitizeAreaStat(text) {
if (!text || typeof text !== 'string') return text;
return desensitizeProvince(text).replace(/辽宁省\(/g, '**省(');
2026-06-10 12:55:01 +08:00
}
function desensitizeIdCard(idCard) {
2026-06-12 14:25:45 +08:00
if (!idCard || typeof idCard !== 'string') return idCard;
if (idCard.length === 18) {
return idCard.substring(0, 6) + '********' + idCard.substring(14);
}
if (/^[0-9A-Z]{15,18}$/.test(idCard)) {
return idCard.substring(0, 4) + '**********' + idCard.substring(idCard.length - 4);
}
return idCard;
2026-06-10 12:55:01 +08:00
}
function desensitizeMobile(mobile) {
2026-06-12 14:25:45 +08:00
if (!mobile || typeof mobile !== 'string' || mobile.length !== 11) return mobile;
2026-06-10 12:55:01 +08:00
return mobile.substring(0, 3) + '****' + mobile.substring(7);
}
2026-06-12 14:25:45 +08:00
function desensitizeAddressText(text) {
if (!text || typeof text !== 'string') return text;
let result = text;
result = result.replace(/统一社会信用代码[:]?\s*[0-9A-Z]{15,18}/g, (m) => {
const code = m.replace(/统一社会信用代码[:]?\s*/, '');
return '统一社会信用代码:' + desensitizeIdCard(code);
});
result = result.replace(/账号[:]?\s*[\d×]{10,}/g, '账号:********');
result = result.replace(/[\u4e00-\u9fa5]{2,6}路\d+号[^,,。;;]*/g, '**路**号****');
result = result.replace(/民发[·・][\u4e00-\u9fa5A-Za-z0-9]{2,15}?(?:小区|会所)/g, '****小区');
result = result.replace(/民发物业服务有限公司南宁分公司/g, '****物业服务有限公司南宁分公司');
result = result.replace(/民发物业服务有限公司/g, '****物业服务有限公司');
result = result.replace(/民发物业南宁分公司/g, '****物业南宁分公司');
result = result.replace(/民发物业公司/g, '****物业公司');
result = result.replace(/广西中硕资产评估有限责任公司/g, '广西****资产评估有限责任公司');
result = result.replace(/民发实业集团\(广西\)房地产开发有限公司/g, '****实业集团(广西)****开发有限公司');
result = result.replace(/[\u4e00-\u9fa5]{2,4}市[\u4e00-\u9fa5]{2,6}区人民法院/g, '**市**区人民法院');
result = result.replace(/上诉于([\u4e00-\u9fa5]{2,4})市中级人民法院/g, '上诉于**市中级人民法院');
result = result.replace(/开户名称[:]([\u4e00-\u9fa5]{2,4})市中级人民法院/g, '开户名称:**市中级人民法院');
result = result.replace(/([\u4e00-\u9fa5]{2,4})县人民法院/g, '**县人民法院');
result = result.replace(/住广西壮族自治区[^,,。;;]{2,20}?[区县]/g, '住广西壮族自治区**市**区');
result = result.replace(/住所地广西壮族自治区[^,,。;;]{2,30}/g, '住所地广西壮族自治区**市**区****');
result = result.replace(/住所地湖北省[^,,。;;]{2,30}/g, '住所地**省**市**区****');
result = result.replace(/位于[\u4e00-\u9fa5]{2,4}市[\u4e00-\u9fa5]{2,6}区/g, '位于**市**区');
result = result.replace(/坐落于[\u4e00-\u9fa5]{2,4}市[\u4e00-\u9fa5]{2,6}区/g, '坐落于**市**区');
result = result.replace(/系[\u4e00-\u9fa5]{2,5}市[\u4e00-\u9fa5]{2,6}区/g, '系**市**区');
result = result.replace(/[\u4e00-\u9fa5·・A-Za-z0-9]+栋\d+单元\d+号/g, '****栋**单元**号');
return result;
}
// ── 从数据中收集替换映射 ──────────────────────────────────
function isValidPersonName(name) {
return (
name &&
name.length >= 2 &&
name.length <= 4 &&
!COMPANY_KEYWORDS.test(name) &&
!NAME_BLACKLIST.has(name) &&
/^[\u4e00-\u9fa5]+$/.test(name)
);
}
function collectMappings(data) {
const personNames = new Set();
const companyNames = new Set();
const courtNames = new Set();
function walk(obj) {
if (!obj || typeof obj !== 'object') return;
if (Array.isArray(obj)) {
obj.forEach(walk);
return;
}
if (obj.c_mc) {
if (obj.n_dsrlx === '企业组织' || COMPANY_KEYWORDS.test(obj.c_mc)) {
companyNames.add(obj.c_mc);
} else if (isValidPersonName(obj.c_mc)) {
personNames.add(obj.c_mc);
}
}
if (obj.n_jbfy) courtNames.add(obj.n_jbfy);
for (const field of ['c_gkws_dsr', 'c_gkws_pjjg']) {
if (obj[field]) extractNamesFromLegalText(obj[field], personNames, companyNames);
}
Object.values(obj).forEach(walk);
}
walk(data);
return { personNames, companyNames, courtNames };
}
function extractNamesFromLegalText(text, personNames, companyNames) {
if (!text || typeof text !== 'string') return;
const rolePatterns = [
/(?:原告|被告|上诉人|被上诉人|原审被告人|被告人|负责人|法定代表人|案外人|委托诉讼代理人|代理人|承租人|出租人)[:]([\u4e00-\u9fa5]{2,4})/g,
/与案外人([\u4e00-\u9fa5]{2,4})签/g,
/([\u4e00-\u9fa5]{2,4})所有的/g,
/向([\u4e00-\u9fa5]{2,4})转账/g,
];
let match;
for (const rolePattern of rolePatterns) {
while ((match = rolePattern.exec(text)) !== null) {
if (isValidPersonName(match[1])) personNames.add(match[1]);
}
}
const companyPattern =
/([\u4e00-\u9fa5()·・]{4,40}?(?:有限公司|股份有限公司|合作银行|信用合作联社))/g;
while ((match = companyPattern.exec(text)) !== null) {
companyNames.add(match[1]);
}
}
function buildReplacementList(personNames, companyNames, courtNames) {
const replacements = [];
for (const name of personNames) {
replacements.push({ from: name, to: desensitizePersonName(name) });
}
for (const name of companyNames) {
replacements.push({ from: name, to: desensitizeCompany(name) });
}
for (const court of courtNames) {
replacements.push({ from: court, to: desensitizeCourt(court) });
}
replacements.sort((a, b) => b.from.length - a.from.length);
return replacements;
}
function desensitizeText(text, replacements) {
2026-06-10 12:55:01 +08:00
if (!text || typeof text !== 'string') return text;
let result = text;
2026-06-12 14:25:45 +08:00
for (const { from, to } of replacements) {
if (from && to && from !== to) {
result = result.split(from).join(to);
}
2026-06-10 12:55:01 +08:00
}
2026-06-12 14:25:45 +08:00
result = desensitizeAddressText(result);
result = desensitizeProvince(result);
2026-06-10 12:55:01 +08:00
return result;
}
2026-06-12 14:25:45 +08:00
// ── 递归脱敏 ──────────────────────────────────────────────
function desensitizeObject(obj, replacements) {
2026-06-10 12:55:01 +08:00
if (obj === null || typeof obj !== 'object') {
return obj;
}
if (Array.isArray(obj)) {
2026-06-12 14:25:45 +08:00
return obj.map((item) => desensitizeObject(item, replacements));
2026-06-10 12:55:01 +08:00
}
const result = {};
for (const [key, value] of Object.entries(obj)) {
switch (key) {
case 'name':
2026-06-12 14:25:45 +08:00
result[key] = desensitizePersonName(value);
2026-06-10 12:55:01 +08:00
break;
case 'id_card':
result[key] = desensitizeIdCard(value);
break;
case 'mobile':
result[key] = desensitizeMobile(value);
break;
case 'c_mc':
2026-06-12 14:25:45 +08:00
result[key] =
obj.n_dsrlx === '企业组织' || COMPANY_KEYWORDS.test(value)
? desensitizeCompany(value)
: desensitizePersonName(value);
2026-06-10 12:55:01 +08:00
break;
case 'c_gkws_dsr':
case 'c_gkws_pjjg':
2026-06-12 14:25:45 +08:00
result[key] = desensitizeText(value, replacements);
break;
case 'n_jbfy':
result[key] = desensitizeCourt(value);
break;
case 'c_ssdy':
result[key] = value;
break;
case 'area_stat':
result[key] = desensitizeAreaStat(value);
2026-06-10 12:55:01 +08:00
break;
default:
2026-06-12 14:25:45 +08:00
result[key] = desensitizeObject(value, replacements);
2026-06-10 12:55:01 +08:00
break;
}
}
return result;
}
2026-06-12 14:25:45 +08:00
function processFile(filename) {
const inputFile = path.join(__dirname, filename);
const baseName = filename.replace('.json', '');
const outputFile = path.join(__dirname, `${baseName}_desensitized.json`);
const data = JSON.parse(fs.readFileSync(inputFile, 'utf8'));
const { personNames, companyNames, courtNames } = collectMappings(data);
const replacements = buildReplacementList(personNames, companyNames, courtNames);
const desensitizedData = desensitizeObject(data, replacements);
2026-06-10 12:55:01 +08:00
2026-06-12 14:25:45 +08:00
fs.writeFileSync(outputFile, JSON.stringify(desensitizedData, null, 2), 'utf8');
2026-06-10 12:55:01 +08:00
2026-06-12 14:25:45 +08:00
console.log(`\n${filename} 脱敏完成`);
console.log(` 原始文件:${inputFile}`);
console.log(` 输出文件:${outputFile}`);
console.log(` 姓名 ${personNames.size} 个,公司 ${companyNames.size} 个,法院 ${courtNames.size}`);
return { personNames, companyNames, courtNames };
}
// ── 执行 ──────────────────────────────────────────────────
console.log('开始脱敏处理...');
const summary = { person: 0, company: 0, court: 0 };
for (const file of INPUT_FILES) {
const stats = processFile(file);
summary.person += stats.personNames.size;
summary.company += stats.companyNames.size;
summary.court += stats.courtNames.size;
}
2026-06-10 12:55:01 +08:00
console.log('\n脱敏摘要');
2026-06-12 14:25:45 +08:00
console.log('- 姓名:保留姓氏,名字替换为「某某」');
console.log('- 公司/机构:保留地区前缀与组织类型,中间替换为「****」');
console.log('- 法院:市/县/区名称替换为「**」');
console.log('- 省份/地区:次要省份(湖北、辽宁等)脱敏,广西自治区保留');
console.log('- 判决书文本:地址、信用代码、路名等同步脱敏');
console.log('- 身份证号/手机号:按字段规则脱敏');
console.log(`- 合计处理:姓名 ${summary.person} 个,公司 ${summary.company} 个,法院 ${summary.court}`);