Files
report_viewer/public/desensitize.js
2026-06-12 14:25:45 +08:00

332 lines
12 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const INPUT_FILES = ['DWBG9FB2.json', 'DWBG9FB3.json'];
const ORG_SUFFIXES = [
'股份有限公司城区支行',
'农村商业银行股份有限公司',
'物业服务有限公司南宁分公司',
'物业服务有限公司',
'房地产开发有限公司',
'国际大酒店有限公司',
'生态旅游家园开发有限公司',
'农业发展有限公司',
'农资有限公司',
'贸易有限公司',
'发展有限公司',
'开发有限公司',
'股份有限公司',
'农村合作银行',
'信用合作联社',
'有限公司',
'分公司',
];
const COMPANY_KEYWORDS = /银行|公司|联社|集团|酒店|有限|股份/;
const NAME_BLACKLIST = new Set([
'当事人', '法律关系', '法律关', '案涉房', '案涉', '房屋', '所有权',
'合同法律', '侵权法律', '物业服务', '诉讼请求', '正当理由',
'民发物业', '民发实', '民发', '广西广为', '广西中硕','广西',
]);
// ── 基础脱敏函数 ──────────────────────────────────────────
function desensitizePersonName(name) {
if (!name || typeof name !== 'string') return name;
if (COMPANY_KEYWORDS.test(name)) return desensitizeCompany(name);
if (name.length <= 1) return name;
return name[0] + '某某';
}
function desensitizeCompany(name) {
if (!name || typeof name !== 'string') return name;
for (const suffix of ORG_SUFFIXES) {
if (!name.endsWith(suffix)) continue;
const core = name.slice(0, -suffix.length);
if (core.startsWith('广西')) return '广西****' + suffix;
if (core.startsWith('湖北')) return '湖北****' + suffix;
if (core.startsWith('桂林市')) return '桂林市****' + suffix;
if (core.startsWith('兴安县')) return '兴安县****' + suffix;
if (core.startsWith('资源县')) return '资源县****' + suffix;
if (/^.+县/.test(core)) return '**县****' + suffix;
if (/^.+市/.test(core)) return '**市****' + suffix;
return core.slice(0, 2) + '****' + suffix;
}
return name.slice(0, 2) + '****';
}
function desensitizeCourt(court) {
if (!court || typeof court !== 'string') return court;
return court
.replace(/^(.+?市)(.+?区)/, '**市**区')
.replace(/^(.+?市)(.+?县)/, '**市**县')
.replace(/^(.+?县)/, '**县')
.replace(/^(.+?市)/, '**市');
}
function desensitizeProvince(text) {
if (!text || typeof text !== 'string') return text;
return text
.replace(/湖北省/g, '**省')
.replace(/辽宁省/g, '**省');
}
function desensitizeAreaStat(text) {
if (!text || typeof text !== 'string') return text;
return desensitizeProvince(text).replace(/辽宁省\(/g, '**省(');
}
function desensitizeIdCard(idCard) {
if (!idCard || typeof idCard !== 'string') return idCard;
if (idCard.length === 18) {
return idCard.substring(0, 6) + '********' + idCard.substring(14);
}
if (/^[0-9A-Z]{15,18}$/.test(idCard)) {
return idCard.substring(0, 4) + '**********' + idCard.substring(idCard.length - 4);
}
return idCard;
}
function desensitizeMobile(mobile) {
if (!mobile || typeof mobile !== 'string' || mobile.length !== 11) return mobile;
return mobile.substring(0, 3) + '****' + mobile.substring(7);
}
function desensitizeAddressText(text) {
if (!text || typeof text !== 'string') return text;
let result = text;
result = result.replace(/统一社会信用代码[:]?\s*[0-9A-Z]{15,18}/g, (m) => {
const code = m.replace(/统一社会信用代码[:]?\s*/, '');
return '统一社会信用代码:' + desensitizeIdCard(code);
});
result = result.replace(/账号[:]?\s*[\d×]{10,}/g, '账号:********');
result = result.replace(/[\u4e00-\u9fa5]{2,6}路\d+号[^,,。;;]*/g, '**路**号****');
result = result.replace(/民发[·・][\u4e00-\u9fa5A-Za-z0-9]{2,15}?(?:小区|会所)/g, '****小区');
result = result.replace(/民发物业服务有限公司南宁分公司/g, '****物业服务有限公司南宁分公司');
result = result.replace(/民发物业服务有限公司/g, '****物业服务有限公司');
result = result.replace(/民发物业南宁分公司/g, '****物业南宁分公司');
result = result.replace(/民发物业公司/g, '****物业公司');
result = result.replace(/广西中硕资产评估有限责任公司/g, '广西****资产评估有限责任公司');
result = result.replace(/民发实业集团\(广西\)房地产开发有限公司/g, '****实业集团(广西)****开发有限公司');
result = result.replace(/[\u4e00-\u9fa5]{2,4}市[\u4e00-\u9fa5]{2,6}区人民法院/g, '**市**区人民法院');
result = result.replace(/上诉于([\u4e00-\u9fa5]{2,4})市中级人民法院/g, '上诉于**市中级人民法院');
result = result.replace(/开户名称[:]([\u4e00-\u9fa5]{2,4})市中级人民法院/g, '开户名称:**市中级人民法院');
result = result.replace(/([\u4e00-\u9fa5]{2,4})县人民法院/g, '**县人民法院');
result = result.replace(/住广西壮族自治区[^,,。;;]{2,20}?[区县]/g, '住广西壮族自治区**市**区');
result = result.replace(/住所地广西壮族自治区[^,,。;;]{2,30}/g, '住所地广西壮族自治区**市**区****');
result = result.replace(/住所地湖北省[^,,。;;]{2,30}/g, '住所地**省**市**区****');
result = result.replace(/位于[\u4e00-\u9fa5]{2,4}市[\u4e00-\u9fa5]{2,6}区/g, '位于**市**区');
result = result.replace(/坐落于[\u4e00-\u9fa5]{2,4}市[\u4e00-\u9fa5]{2,6}区/g, '坐落于**市**区');
result = result.replace(/系[\u4e00-\u9fa5]{2,5}市[\u4e00-\u9fa5]{2,6}区/g, '系**市**区');
result = result.replace(/[\u4e00-\u9fa5·・A-Za-z0-9]+栋\d+单元\d+号/g, '****栋**单元**号');
return result;
}
// ── 从数据中收集替换映射 ──────────────────────────────────
function isValidPersonName(name) {
return (
name &&
name.length >= 2 &&
name.length <= 4 &&
!COMPANY_KEYWORDS.test(name) &&
!NAME_BLACKLIST.has(name) &&
/^[\u4e00-\u9fa5]+$/.test(name)
);
}
function collectMappings(data) {
const personNames = new Set();
const companyNames = new Set();
const courtNames = new Set();
function walk(obj) {
if (!obj || typeof obj !== 'object') return;
if (Array.isArray(obj)) {
obj.forEach(walk);
return;
}
if (obj.c_mc) {
if (obj.n_dsrlx === '企业组织' || COMPANY_KEYWORDS.test(obj.c_mc)) {
companyNames.add(obj.c_mc);
} else if (isValidPersonName(obj.c_mc)) {
personNames.add(obj.c_mc);
}
}
if (obj.n_jbfy) courtNames.add(obj.n_jbfy);
for (const field of ['c_gkws_dsr', 'c_gkws_pjjg']) {
if (obj[field]) extractNamesFromLegalText(obj[field], personNames, companyNames);
}
Object.values(obj).forEach(walk);
}
walk(data);
return { personNames, companyNames, courtNames };
}
function extractNamesFromLegalText(text, personNames, companyNames) {
if (!text || typeof text !== 'string') return;
const rolePatterns = [
/(?:原告|被告|上诉人|被上诉人|原审被告人|被告人|负责人|法定代表人|案外人|委托诉讼代理人|代理人|承租人|出租人)[:]([\u4e00-\u9fa5]{2,4})/g,
/与案外人([\u4e00-\u9fa5]{2,4})签/g,
/([\u4e00-\u9fa5]{2,4})所有的/g,
/向([\u4e00-\u9fa5]{2,4})转账/g,
];
let match;
for (const rolePattern of rolePatterns) {
while ((match = rolePattern.exec(text)) !== null) {
if (isValidPersonName(match[1])) personNames.add(match[1]);
}
}
const companyPattern =
/([\u4e00-\u9fa5()·・]{4,40}?(?:有限公司|股份有限公司|合作银行|信用合作联社))/g;
while ((match = companyPattern.exec(text)) !== null) {
companyNames.add(match[1]);
}
}
function buildReplacementList(personNames, companyNames, courtNames) {
const replacements = [];
for (const name of personNames) {
replacements.push({ from: name, to: desensitizePersonName(name) });
}
for (const name of companyNames) {
replacements.push({ from: name, to: desensitizeCompany(name) });
}
for (const court of courtNames) {
replacements.push({ from: court, to: desensitizeCourt(court) });
}
replacements.sort((a, b) => b.from.length - a.from.length);
return replacements;
}
function desensitizeText(text, replacements) {
if (!text || typeof text !== 'string') return text;
let result = text;
for (const { from, to } of replacements) {
if (from && to && from !== to) {
result = result.split(from).join(to);
}
}
result = desensitizeAddressText(result);
result = desensitizeProvince(result);
return result;
}
// ── 递归脱敏 ──────────────────────────────────────────────
function desensitizeObject(obj, replacements) {
if (obj === null || typeof obj !== 'object') {
return obj;
}
if (Array.isArray(obj)) {
return obj.map((item) => desensitizeObject(item, replacements));
}
const result = {};
for (const [key, value] of Object.entries(obj)) {
switch (key) {
case 'name':
result[key] = desensitizePersonName(value);
break;
case 'id_card':
result[key] = desensitizeIdCard(value);
break;
case 'mobile':
result[key] = desensitizeMobile(value);
break;
case 'c_mc':
result[key] =
obj.n_dsrlx === '企业组织' || COMPANY_KEYWORDS.test(value)
? desensitizeCompany(value)
: desensitizePersonName(value);
break;
case 'c_gkws_dsr':
case 'c_gkws_pjjg':
result[key] = desensitizeText(value, replacements);
break;
case 'n_jbfy':
result[key] = desensitizeCourt(value);
break;
case 'c_ssdy':
result[key] = value;
break;
case 'area_stat':
result[key] = desensitizeAreaStat(value);
break;
default:
result[key] = desensitizeObject(value, replacements);
break;
}
}
return result;
}
function processFile(filename) {
const inputFile = path.join(__dirname, filename);
const baseName = filename.replace('.json', '');
const outputFile = path.join(__dirname, `${baseName}_desensitized.json`);
const data = JSON.parse(fs.readFileSync(inputFile, 'utf8'));
const { personNames, companyNames, courtNames } = collectMappings(data);
const replacements = buildReplacementList(personNames, companyNames, courtNames);
const desensitizedData = desensitizeObject(data, replacements);
fs.writeFileSync(outputFile, JSON.stringify(desensitizedData, null, 2), 'utf8');
console.log(`\n${filename} 脱敏完成`);
console.log(` 原始文件:${inputFile}`);
console.log(` 输出文件:${outputFile}`);
console.log(` 姓名 ${personNames.size} 个,公司 ${companyNames.size} 个,法院 ${courtNames.size}`);
return { personNames, companyNames, courtNames };
}
// ── 执行 ──────────────────────────────────────────────────
console.log('开始脱敏处理...');
const summary = { person: 0, company: 0, court: 0 };
for (const file of INPUT_FILES) {
const stats = processFile(file);
summary.person += stats.personNames.size;
summary.company += stats.companyNames.size;
summary.court += stats.courtNames.size;
}
console.log('\n脱敏摘要');
console.log('- 姓名:保留姓氏,名字替换为「某某」');
console.log('- 公司/机构:保留地区前缀与组织类型,中间替换为「****」');
console.log('- 法院:市/县/区名称替换为「**」');
console.log('- 省份/地区:次要省份(湖北、辽宁等)脱敏,广西自治区保留');
console.log('- 判决书文本:地址、信用代码、路名等同步脱敏');
console.log('- 身份证号/手机号:按字段规则脱敏');
console.log(`- 合计处理:姓名 ${summary.person} 个,公司 ${summary.company} 个,法院 ${summary.court}`);