f
This commit is contained in:
1648
public/DWBG9FB2.json
Normal file
1648
public/DWBG9FB2.json
Normal file
File diff suppressed because one or more lines are too long
1648
public/DWBG9FB2_desensitized.json
Normal file
1648
public/DWBG9FB2_desensitized.json
Normal file
File diff suppressed because one or more lines are too long
1648
public/DWBG9FB2hcl.json
Normal file
1648
public/DWBG9FB2hcl.json
Normal file
File diff suppressed because one or more lines are too long
1184
public/DWBG9FB2hzy.json
Normal file
1184
public/DWBG9FB2hzy.json
Normal file
File diff suppressed because it is too large
Load Diff
2175
public/DWBG9FB3.json
2175
public/DWBG9FB3.json
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1253
public/DWBG9FB3hcl.json
Normal file
1253
public/DWBG9FB3hcl.json
Normal file
File diff suppressed because one or more lines are too long
1029
public/DWBG9FB3hzy.json
Normal file
1029
public/DWBG9FB3hzy.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -5,75 +5,256 @@ import { fileURLToPath } from 'url';
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// 读取JSON文件
|
||||
// 读取JSON文件
|
||||
const inputFile = path.join(__dirname, 'DWBG9FB3.json');
|
||||
const data = JSON.parse(fs.readFileSync(inputFile, 'utf8'));
|
||||
const INPUT_FILES = ['DWBG9FB2.json', 'DWBG9FB3.json'];
|
||||
|
||||
// 姓名映射表(保持同一姓名脱敏后一致)
|
||||
const nameMap = {
|
||||
'何志勇': '何某某',
|
||||
'覃圣有': '覃某',
|
||||
'刘飞': '刘某某',
|
||||
'陈波': '陈某某',
|
||||
'覃小群': '覃某某',
|
||||
'陈观海': '陈某某',
|
||||
'刘国富': '刘某某'
|
||||
};
|
||||
const ORG_SUFFIXES = [
|
||||
'股份有限公司城区支行',
|
||||
'农村商业银行股份有限公司',
|
||||
'物业服务有限公司南宁分公司',
|
||||
'物业服务有限公司',
|
||||
'房地产开发有限公司',
|
||||
'国际大酒店有限公司',
|
||||
'生态旅游家园开发有限公司',
|
||||
'农业发展有限公司',
|
||||
'农资有限公司',
|
||||
'贸易有限公司',
|
||||
'发展有限公司',
|
||||
'开发有限公司',
|
||||
'股份有限公司',
|
||||
'农村合作银行',
|
||||
'信用合作联社',
|
||||
'有限公司',
|
||||
'分公司',
|
||||
|
||||
// 脱敏函数
|
||||
function desensitizeName(name) {
|
||||
if (nameMap[name]) {
|
||||
return nameMap[name];
|
||||
];
|
||||
|
||||
const COMPANY_KEYWORDS = /银行|公司|联社|集团|酒店|有限|股份/;
|
||||
|
||||
const NAME_BLACKLIST = new Set([
|
||||
'当事人', '法律关系', '法律关', '案涉房', '案涉', '房屋', '所有权',
|
||||
'合同法律', '侵权法律', '物业服务', '诉讼请求', '正当理由',
|
||||
'民发物业', '民发实', '民发', '广西广为', '广西中硕','广西',
|
||||
]);
|
||||
|
||||
// ── 基础脱敏函数 ──────────────────────────────────────────
|
||||
|
||||
function desensitizePersonName(name) {
|
||||
if (!name || typeof name !== 'string') return name;
|
||||
if (COMPANY_KEYWORDS.test(name)) return desensitizeCompany(name);
|
||||
if (name.length <= 1) return name;
|
||||
return name[0] + '某某';
|
||||
}
|
||||
|
||||
function desensitizeCompany(name) {
|
||||
if (!name || typeof name !== 'string') return name;
|
||||
|
||||
for (const suffix of ORG_SUFFIXES) {
|
||||
if (!name.endsWith(suffix)) continue;
|
||||
const core = name.slice(0, -suffix.length);
|
||||
|
||||
if (core.startsWith('广西')) return '广西****' + suffix;
|
||||
if (core.startsWith('湖北')) return '湖北****' + suffix;
|
||||
if (core.startsWith('桂林市')) return '桂林市****' + suffix;
|
||||
if (core.startsWith('兴安县')) return '兴安县****' + suffix;
|
||||
if (core.startsWith('资源县')) return '资源县****' + suffix;
|
||||
if (/^.+县/.test(core)) return '**县****' + suffix;
|
||||
if (/^.+市/.test(core)) return '**市****' + suffix;
|
||||
|
||||
return core.slice(0, 2) + '****' + suffix;
|
||||
}
|
||||
// 对于未知的姓名,保留姓氏,名字用星号代替
|
||||
if (name && name.length > 0) {
|
||||
const surname = name[0];
|
||||
return surname + '某某';
|
||||
}
|
||||
return name;
|
||||
|
||||
return name.slice(0, 2) + '****';
|
||||
}
|
||||
|
||||
function desensitizeCourt(court) {
|
||||
if (!court || typeof court !== 'string') return court;
|
||||
return court
|
||||
.replace(/^(.+?市)(.+?区)/, '**市**区')
|
||||
.replace(/^(.+?市)(.+?县)/, '**市**县')
|
||||
.replace(/^(.+?县)/, '**县')
|
||||
.replace(/^(.+?市)/, '**市');
|
||||
}
|
||||
|
||||
function desensitizeProvince(text) {
|
||||
if (!text || typeof text !== 'string') return text;
|
||||
return text
|
||||
.replace(/湖北省/g, '**省')
|
||||
.replace(/辽宁省/g, '**省');
|
||||
}
|
||||
|
||||
function desensitizeAreaStat(text) {
|
||||
if (!text || typeof text !== 'string') return text;
|
||||
return desensitizeProvince(text).replace(/辽宁省\(/g, '**省(');
|
||||
}
|
||||
|
||||
function desensitizeIdCard(idCard) {
|
||||
if (!idCard || idCard.length !== 18) return idCard;
|
||||
return idCard.substring(0, 6) + '********' + idCard.substring(14);
|
||||
if (!idCard || typeof idCard !== 'string') return idCard;
|
||||
if (idCard.length === 18) {
|
||||
return idCard.substring(0, 6) + '********' + idCard.substring(14);
|
||||
}
|
||||
if (/^[0-9A-Z]{15,18}$/.test(idCard)) {
|
||||
return idCard.substring(0, 4) + '**********' + idCard.substring(idCard.length - 4);
|
||||
}
|
||||
return idCard;
|
||||
}
|
||||
|
||||
function desensitizeMobile(mobile) {
|
||||
if (!mobile || mobile.length !== 11) return mobile;
|
||||
if (!mobile || typeof mobile !== 'string' || mobile.length !== 11) return mobile;
|
||||
return mobile.substring(0, 3) + '****' + mobile.substring(7);
|
||||
}
|
||||
|
||||
function desensitizeText(text) {
|
||||
function desensitizeAddressText(text) {
|
||||
if (!text || typeof text !== 'string') return text;
|
||||
let result = text;
|
||||
// 替换所有出现的人名
|
||||
for (const [realName, maskedName] of Object.entries(nameMap)) {
|
||||
// 替换姓名
|
||||
const regex1 = new RegExp(realName, 'g');
|
||||
result = result.replace(regex1, maskedName);
|
||||
// 替换姓名+某的形式(如:何志某 -> 何某某某)
|
||||
const regex2 = new RegExp(realName.substring(0, realName.length - 1) + '某', 'g');
|
||||
result = result.replace(regex2, maskedName);
|
||||
}
|
||||
|
||||
result = result.replace(/统一社会信用代码[::]?\s*[0-9A-Z]{15,18}/g, (m) => {
|
||||
const code = m.replace(/统一社会信用代码[::]?\s*/, '');
|
||||
return '统一社会信用代码:' + desensitizeIdCard(code);
|
||||
});
|
||||
|
||||
result = result.replace(/账号[::]?\s*[\d×]{10,}/g, '账号:********');
|
||||
result = result.replace(/[\u4e00-\u9fa5]{2,6}路\d+号[^,,。;;]*/g, '**路**号****');
|
||||
result = result.replace(/民发[·・][\u4e00-\u9fa5A-Za-z0-9]{2,15}?(?:小区|会所)/g, '****小区');
|
||||
result = result.replace(/民发物业服务有限公司南宁分公司/g, '****物业服务有限公司南宁分公司');
|
||||
result = result.replace(/民发物业服务有限公司/g, '****物业服务有限公司');
|
||||
result = result.replace(/民发物业南宁分公司/g, '****物业南宁分公司');
|
||||
result = result.replace(/民发物业公司/g, '****物业公司');
|
||||
result = result.replace(/广西中硕资产评估有限责任公司/g, '广西****资产评估有限责任公司');
|
||||
result = result.replace(/民发实业集团\(广西\)房地产开发有限公司/g, '****实业集团(广西)****开发有限公司');
|
||||
result = result.replace(/[\u4e00-\u9fa5]{2,4}市[\u4e00-\u9fa5]{2,6}区人民法院/g, '**市**区人民法院');
|
||||
result = result.replace(/上诉于([\u4e00-\u9fa5]{2,4})市中级人民法院/g, '上诉于**市中级人民法院');
|
||||
result = result.replace(/开户名称[::]([\u4e00-\u9fa5]{2,4})市中级人民法院/g, '开户名称:**市中级人民法院');
|
||||
result = result.replace(/([\u4e00-\u9fa5]{2,4})县人民法院/g, '**县人民法院');
|
||||
|
||||
result = result.replace(/住广西壮族自治区[^,,。;;]{2,20}?[区县]/g, '住广西壮族自治区**市**区');
|
||||
result = result.replace(/住所地广西壮族自治区[^,,。;;]{2,30}/g, '住所地广西壮族自治区**市**区****');
|
||||
result = result.replace(/住所地湖北省[^,,。;;]{2,30}/g, '住所地**省**市**区****');
|
||||
result = result.replace(/位于[\u4e00-\u9fa5]{2,4}市[\u4e00-\u9fa5]{2,6}区/g, '位于**市**区');
|
||||
result = result.replace(/坐落于[\u4e00-\u9fa5]{2,4}市[\u4e00-\u9fa5]{2,6}区/g, '坐落于**市**区');
|
||||
result = result.replace(/系[\u4e00-\u9fa5]{2,5}市[\u4e00-\u9fa5]{2,6}区/g, '系**市**区');
|
||||
result = result.replace(/[\u4e00-\u9fa5·・A-Za-z0-9]+栋\d+单元\d+号/g, '****栋**单元**号');
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// 递归遍历对象进行脱敏
|
||||
function desensitizeObject(obj) {
|
||||
// ── 从数据中收集替换映射 ──────────────────────────────────
|
||||
|
||||
function isValidPersonName(name) {
|
||||
return (
|
||||
name &&
|
||||
name.length >= 2 &&
|
||||
name.length <= 4 &&
|
||||
!COMPANY_KEYWORDS.test(name) &&
|
||||
!NAME_BLACKLIST.has(name) &&
|
||||
/^[\u4e00-\u9fa5]+$/.test(name)
|
||||
);
|
||||
}
|
||||
|
||||
function collectMappings(data) {
|
||||
const personNames = new Set();
|
||||
const companyNames = new Set();
|
||||
const courtNames = new Set();
|
||||
|
||||
function walk(obj) {
|
||||
if (!obj || typeof obj !== 'object') return;
|
||||
if (Array.isArray(obj)) {
|
||||
obj.forEach(walk);
|
||||
return;
|
||||
}
|
||||
|
||||
if (obj.c_mc) {
|
||||
if (obj.n_dsrlx === '企业组织' || COMPANY_KEYWORDS.test(obj.c_mc)) {
|
||||
companyNames.add(obj.c_mc);
|
||||
} else if (isValidPersonName(obj.c_mc)) {
|
||||
personNames.add(obj.c_mc);
|
||||
}
|
||||
}
|
||||
if (obj.n_jbfy) courtNames.add(obj.n_jbfy);
|
||||
|
||||
for (const field of ['c_gkws_dsr', 'c_gkws_pjjg']) {
|
||||
if (obj[field]) extractNamesFromLegalText(obj[field], personNames, companyNames);
|
||||
}
|
||||
|
||||
Object.values(obj).forEach(walk);
|
||||
}
|
||||
|
||||
walk(data);
|
||||
return { personNames, companyNames, courtNames };
|
||||
}
|
||||
|
||||
function extractNamesFromLegalText(text, personNames, companyNames) {
|
||||
if (!text || typeof text !== 'string') return;
|
||||
|
||||
const rolePatterns = [
|
||||
/(?:原告|被告|上诉人|被上诉人|原审被告人|被告人|负责人|法定代表人|案外人|委托诉讼代理人|代理人|承租人|出租人)[::]([\u4e00-\u9fa5]{2,4})/g,
|
||||
/与案外人([\u4e00-\u9fa5]{2,4})签/g,
|
||||
/([\u4e00-\u9fa5]{2,4})所有的/g,
|
||||
/向([\u4e00-\u9fa5]{2,4})转账/g,
|
||||
];
|
||||
|
||||
let match;
|
||||
for (const rolePattern of rolePatterns) {
|
||||
while ((match = rolePattern.exec(text)) !== null) {
|
||||
if (isValidPersonName(match[1])) personNames.add(match[1]);
|
||||
}
|
||||
}
|
||||
|
||||
const companyPattern =
|
||||
/([\u4e00-\u9fa5()()·・]{4,40}?(?:有限公司|股份有限公司|合作银行|信用合作联社))/g;
|
||||
while ((match = companyPattern.exec(text)) !== null) {
|
||||
companyNames.add(match[1]);
|
||||
}
|
||||
}
|
||||
|
||||
function buildReplacementList(personNames, companyNames, courtNames) {
|
||||
const replacements = [];
|
||||
|
||||
for (const name of personNames) {
|
||||
replacements.push({ from: name, to: desensitizePersonName(name) });
|
||||
}
|
||||
for (const name of companyNames) {
|
||||
replacements.push({ from: name, to: desensitizeCompany(name) });
|
||||
}
|
||||
for (const court of courtNames) {
|
||||
replacements.push({ from: court, to: desensitizeCourt(court) });
|
||||
}
|
||||
|
||||
replacements.sort((a, b) => b.from.length - a.from.length);
|
||||
return replacements;
|
||||
}
|
||||
|
||||
function desensitizeText(text, replacements) {
|
||||
if (!text || typeof text !== 'string') return text;
|
||||
let result = text;
|
||||
|
||||
for (const { from, to } of replacements) {
|
||||
if (from && to && from !== to) {
|
||||
result = result.split(from).join(to);
|
||||
}
|
||||
}
|
||||
|
||||
result = desensitizeAddressText(result);
|
||||
result = desensitizeProvince(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ── 递归脱敏 ──────────────────────────────────────────────
|
||||
|
||||
function desensitizeObject(obj, replacements) {
|
||||
if (obj === null || typeof obj !== 'object') {
|
||||
return obj;
|
||||
}
|
||||
|
||||
if (Array.isArray(obj)) {
|
||||
return obj.map(item => desensitizeObject(item));
|
||||
return obj.map((item) => desensitizeObject(item, replacements));
|
||||
}
|
||||
|
||||
const result = {};
|
||||
for (const [key, value] of Object.entries(obj)) {
|
||||
switch (key) {
|
||||
case 'name':
|
||||
result[key] = desensitizeName(value);
|
||||
result[key] = desensitizePersonName(value);
|
||||
break;
|
||||
case 'id_card':
|
||||
result[key] = desensitizeIdCard(value);
|
||||
@@ -82,36 +263,69 @@ function desensitizeObject(obj) {
|
||||
result[key] = desensitizeMobile(value);
|
||||
break;
|
||||
case 'c_mc':
|
||||
// 当事人姓名
|
||||
result[key] = desensitizeName(value);
|
||||
result[key] =
|
||||
obj.n_dsrlx === '企业组织' || COMPANY_KEYWORDS.test(value)
|
||||
? desensitizeCompany(value)
|
||||
: desensitizePersonName(value);
|
||||
break;
|
||||
case 'c_gkws_dsr':
|
||||
case 'c_gkws_pjjg':
|
||||
// 判决书内容中的文本
|
||||
result[key] = desensitizeText(value);
|
||||
result[key] = desensitizeText(value, replacements);
|
||||
break;
|
||||
case 'n_jbfy':
|
||||
result[key] = desensitizeCourt(value);
|
||||
break;
|
||||
case 'c_ssdy':
|
||||
result[key] = value;
|
||||
break;
|
||||
case 'area_stat':
|
||||
result[key] = desensitizeAreaStat(value);
|
||||
break;
|
||||
default:
|
||||
result[key] = desensitizeObject(value);
|
||||
result[key] = desensitizeObject(value, replacements);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// 执行脱敏
|
||||
const desensitizedData = desensitizeObject(data);
|
||||
function processFile(filename) {
|
||||
const inputFile = path.join(__dirname, filename);
|
||||
const baseName = filename.replace('.json', '');
|
||||
const outputFile = path.join(__dirname, `${baseName}_desensitized.json`);
|
||||
|
||||
// 保存脱敏后的文件
|
||||
const outputFile = path.join(__dirname, 'DWBG9FB3_desensitized.json');
|
||||
fs.writeFileSync(outputFile, JSON.stringify(desensitizedData, null, 2), 'utf8');
|
||||
const data = JSON.parse(fs.readFileSync(inputFile, 'utf8'));
|
||||
const { personNames, companyNames, courtNames } = collectMappings(data);
|
||||
const replacements = buildReplacementList(personNames, companyNames, courtNames);
|
||||
const desensitizedData = desensitizeObject(data, replacements);
|
||||
|
||||
console.log('脱敏完成!');
|
||||
console.log('原始文件:', inputFile);
|
||||
console.log('脱敏后文件:', outputFile);
|
||||
fs.writeFileSync(outputFile, JSON.stringify(desensitizedData, null, 2), 'utf8');
|
||||
|
||||
console.log(`\n✓ ${filename} 脱敏完成`);
|
||||
console.log(` 原始文件:${inputFile}`);
|
||||
console.log(` 输出文件:${outputFile}`);
|
||||
console.log(` 姓名 ${personNames.size} 个,公司 ${companyNames.size} 个,法院 ${courtNames.size} 个`);
|
||||
|
||||
return { personNames, companyNames, courtNames };
|
||||
}
|
||||
|
||||
// ── 执行 ──────────────────────────────────────────────────
|
||||
|
||||
console.log('开始脱敏处理...');
|
||||
const summary = { person: 0, company: 0, court: 0 };
|
||||
|
||||
for (const file of INPUT_FILES) {
|
||||
const stats = processFile(file);
|
||||
summary.person += stats.personNames.size;
|
||||
summary.company += stats.companyNames.size;
|
||||
summary.court += stats.courtNames.size;
|
||||
}
|
||||
|
||||
// 显示脱敏摘要
|
||||
console.log('\n脱敏摘要:');
|
||||
console.log('- 姓名:已脱敏(保留姓氏)');
|
||||
console.log('- 身份证号:已脱敏(保留前6位和后4位)');
|
||||
console.log('- 手机号:已脱敏(保留前3位和后4位)');
|
||||
console.log('- 判决书文本中的姓名:已批量替换');
|
||||
console.log('- 姓名:保留姓氏,名字替换为「某某」');
|
||||
console.log('- 公司/机构:保留地区前缀与组织类型,中间替换为「****」');
|
||||
console.log('- 法院:市/县/区名称替换为「**」');
|
||||
console.log('- 省份/地区:次要省份(湖北、辽宁等)脱敏,广西自治区保留');
|
||||
console.log('- 判决书文本:地址、信用代码、路名等同步脱敏');
|
||||
console.log('- 身份证号/手机号:按字段规则脱敏');
|
||||
console.log(`- 合计处理:姓名 ${summary.person} 个,公司 ${summary.company} 个,法院 ${summary.court} 个`);
|
||||
|
||||
Reference in New Issue
Block a user