This commit is contained in:
Mrx
2026-06-12 14:25:45 +08:00
parent f069d93d84
commit bee67272bb
29 changed files with 13788 additions and 2040 deletions

1648
public/DWBG9FB2.json Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1648
public/DWBG9FB2hcl.json Normal file

File diff suppressed because one or more lines are too long

1184
public/DWBG9FB2hzy.json Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1253
public/DWBG9FB3hcl.json Normal file

File diff suppressed because one or more lines are too long

1029
public/DWBG9FB3hzy.json Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -5,75 +5,256 @@ import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// 读取JSON文件
// 读取JSON文件
const inputFile = path.join(__dirname, 'DWBG9FB3.json');
const data = JSON.parse(fs.readFileSync(inputFile, 'utf8'));
const INPUT_FILES = ['DWBG9FB2.json', 'DWBG9FB3.json'];
// 姓名映射表(保持同一姓名脱敏后一致)
const nameMap = {
'何志勇': '何某某',
'覃圣有': '覃某',
'刘飞': '刘某某',
'陈波': '陈某某',
'覃小群': '覃某某',
'陈观海': '陈某某',
'刘国富': '刘某某'
};
const ORG_SUFFIXES = [
'股份有限公司城区支行',
'农村商业银行股份有限公司',
'物业服务有限公司南宁分公司',
'物业服务有限公司',
'房地产开发有限公司',
'国际大酒店有限公司',
'生态旅游家园开发有限公司',
'农业发展有限公司',
'农资有限公司',
'贸易有限公司',
'发展有限公司',
'开发有限公司',
'股份有限公司',
'农村合作银行',
'信用合作联社',
'有限公司',
'分公司',
// 脱敏函数
function desensitizeName(name) {
if (nameMap[name]) {
return nameMap[name];
];
const COMPANY_KEYWORDS = /银行|公司|联社|集团|酒店|有限|股份/;
const NAME_BLACKLIST = new Set([
'当事人', '法律关系', '法律关', '案涉房', '案涉', '房屋', '所有权',
'合同法律', '侵权法律', '物业服务', '诉讼请求', '正当理由',
'民发物业', '民发实', '民发', '广西广为', '广西中硕','广西',
]);
// ── 基础脱敏函数 ──────────────────────────────────────────
function desensitizePersonName(name) {
if (!name || typeof name !== 'string') return name;
if (COMPANY_KEYWORDS.test(name)) return desensitizeCompany(name);
if (name.length <= 1) return name;
return name[0] + '某某';
}
function desensitizeCompany(name) {
if (!name || typeof name !== 'string') return name;
for (const suffix of ORG_SUFFIXES) {
if (!name.endsWith(suffix)) continue;
const core = name.slice(0, -suffix.length);
if (core.startsWith('广西')) return '广西****' + suffix;
if (core.startsWith('湖北')) return '湖北****' + suffix;
if (core.startsWith('桂林市')) return '桂林市****' + suffix;
if (core.startsWith('兴安县')) return '兴安县****' + suffix;
if (core.startsWith('资源县')) return '资源县****' + suffix;
if (/^.+县/.test(core)) return '**县****' + suffix;
if (/^.+市/.test(core)) return '**市****' + suffix;
return core.slice(0, 2) + '****' + suffix;
}
// 对于未知的姓名,保留姓氏,名字用星号代替
if (name && name.length > 0) {
const surname = name[0];
return surname + '某某';
}
return name;
return name.slice(0, 2) + '****';
}
function desensitizeCourt(court) {
if (!court || typeof court !== 'string') return court;
return court
.replace(/^(.+?市)(.+?区)/, '**市**区')
.replace(/^(.+?市)(.+?县)/, '**市**县')
.replace(/^(.+?县)/, '**县')
.replace(/^(.+?市)/, '**市');
}
function desensitizeProvince(text) {
if (!text || typeof text !== 'string') return text;
return text
.replace(/湖北省/g, '**省')
.replace(/辽宁省/g, '**省');
}
function desensitizeAreaStat(text) {
if (!text || typeof text !== 'string') return text;
return desensitizeProvince(text).replace(/辽宁省\(/g, '**省(');
}
function desensitizeIdCard(idCard) {
if (!idCard || idCard.length !== 18) return idCard;
return idCard.substring(0, 6) + '********' + idCard.substring(14);
if (!idCard || typeof idCard !== 'string') return idCard;
if (idCard.length === 18) {
return idCard.substring(0, 6) + '********' + idCard.substring(14);
}
if (/^[0-9A-Z]{15,18}$/.test(idCard)) {
return idCard.substring(0, 4) + '**********' + idCard.substring(idCard.length - 4);
}
return idCard;
}
function desensitizeMobile(mobile) {
if (!mobile || mobile.length !== 11) return mobile;
if (!mobile || typeof mobile !== 'string' || mobile.length !== 11) return mobile;
return mobile.substring(0, 3) + '****' + mobile.substring(7);
}
function desensitizeText(text) {
function desensitizeAddressText(text) {
if (!text || typeof text !== 'string') return text;
let result = text;
// 替换所有出现的人名
for (const [realName, maskedName] of Object.entries(nameMap)) {
// 替换姓名
const regex1 = new RegExp(realName, 'g');
result = result.replace(regex1, maskedName);
// 替换姓名+某的形式(如:何志某 -> 何某某某)
const regex2 = new RegExp(realName.substring(0, realName.length - 1) + '某', 'g');
result = result.replace(regex2, maskedName);
}
result = result.replace(/统一社会信用代码[:]?\s*[0-9A-Z]{15,18}/g, (m) => {
const code = m.replace(/统一社会信用代码[:]?\s*/, '');
return '统一社会信用代码:' + desensitizeIdCard(code);
});
result = result.replace(/账号[:]?\s*[\d×]{10,}/g, '账号:********');
result = result.replace(/[\u4e00-\u9fa5]{2,6}路\d+号[^,,。;;]*/g, '**路**号****');
result = result.replace(/民发[·・][\u4e00-\u9fa5A-Za-z0-9]{2,15}?(?:小区|会所)/g, '****小区');
result = result.replace(/民发物业服务有限公司南宁分公司/g, '****物业服务有限公司南宁分公司');
result = result.replace(/民发物业服务有限公司/g, '****物业服务有限公司');
result = result.replace(/民发物业南宁分公司/g, '****物业南宁分公司');
result = result.replace(/民发物业公司/g, '****物业公司');
result = result.replace(/广西中硕资产评估有限责任公司/g, '广西****资产评估有限责任公司');
result = result.replace(/民发实业集团\(广西\)房地产开发有限公司/g, '****实业集团(广西)****开发有限公司');
result = result.replace(/[\u4e00-\u9fa5]{2,4}市[\u4e00-\u9fa5]{2,6}区人民法院/g, '**市**区人民法院');
result = result.replace(/上诉于([\u4e00-\u9fa5]{2,4})市中级人民法院/g, '上诉于**市中级人民法院');
result = result.replace(/开户名称[:]([\u4e00-\u9fa5]{2,4})市中级人民法院/g, '开户名称:**市中级人民法院');
result = result.replace(/([\u4e00-\u9fa5]{2,4})县人民法院/g, '**县人民法院');
result = result.replace(/住广西壮族自治区[^,,。;;]{2,20}?[区县]/g, '住广西壮族自治区**市**区');
result = result.replace(/住所地广西壮族自治区[^,,。;;]{2,30}/g, '住所地广西壮族自治区**市**区****');
result = result.replace(/住所地湖北省[^,,。;;]{2,30}/g, '住所地**省**市**区****');
result = result.replace(/位于[\u4e00-\u9fa5]{2,4}市[\u4e00-\u9fa5]{2,6}区/g, '位于**市**区');
result = result.replace(/坐落于[\u4e00-\u9fa5]{2,4}市[\u4e00-\u9fa5]{2,6}区/g, '坐落于**市**区');
result = result.replace(/系[\u4e00-\u9fa5]{2,5}市[\u4e00-\u9fa5]{2,6}区/g, '系**市**区');
result = result.replace(/[\u4e00-\u9fa5·・A-Za-z0-9]+栋\d+单元\d+号/g, '****栋**单元**号');
return result;
}
// 递归遍历对象进行脱敏
function desensitizeObject(obj) {
// ── 从数据中收集替换映射 ──────────────────────────────────
function isValidPersonName(name) {
return (
name &&
name.length >= 2 &&
name.length <= 4 &&
!COMPANY_KEYWORDS.test(name) &&
!NAME_BLACKLIST.has(name) &&
/^[\u4e00-\u9fa5]+$/.test(name)
);
}
function collectMappings(data) {
const personNames = new Set();
const companyNames = new Set();
const courtNames = new Set();
function walk(obj) {
if (!obj || typeof obj !== 'object') return;
if (Array.isArray(obj)) {
obj.forEach(walk);
return;
}
if (obj.c_mc) {
if (obj.n_dsrlx === '企业组织' || COMPANY_KEYWORDS.test(obj.c_mc)) {
companyNames.add(obj.c_mc);
} else if (isValidPersonName(obj.c_mc)) {
personNames.add(obj.c_mc);
}
}
if (obj.n_jbfy) courtNames.add(obj.n_jbfy);
for (const field of ['c_gkws_dsr', 'c_gkws_pjjg']) {
if (obj[field]) extractNamesFromLegalText(obj[field], personNames, companyNames);
}
Object.values(obj).forEach(walk);
}
walk(data);
return { personNames, companyNames, courtNames };
}
function extractNamesFromLegalText(text, personNames, companyNames) {
if (!text || typeof text !== 'string') return;
const rolePatterns = [
/(?:原告|被告|上诉人|被上诉人|原审被告人|被告人|负责人|法定代表人|案外人|委托诉讼代理人|代理人|承租人|出租人)[:]([\u4e00-\u9fa5]{2,4})/g,
/与案外人([\u4e00-\u9fa5]{2,4})签/g,
/([\u4e00-\u9fa5]{2,4})所有的/g,
/向([\u4e00-\u9fa5]{2,4})转账/g,
];
let match;
for (const rolePattern of rolePatterns) {
while ((match = rolePattern.exec(text)) !== null) {
if (isValidPersonName(match[1])) personNames.add(match[1]);
}
}
const companyPattern =
/([\u4e00-\u9fa5()·・]{4,40}?(?:有限公司|股份有限公司|合作银行|信用合作联社))/g;
while ((match = companyPattern.exec(text)) !== null) {
companyNames.add(match[1]);
}
}
function buildReplacementList(personNames, companyNames, courtNames) {
const replacements = [];
for (const name of personNames) {
replacements.push({ from: name, to: desensitizePersonName(name) });
}
for (const name of companyNames) {
replacements.push({ from: name, to: desensitizeCompany(name) });
}
for (const court of courtNames) {
replacements.push({ from: court, to: desensitizeCourt(court) });
}
replacements.sort((a, b) => b.from.length - a.from.length);
return replacements;
}
function desensitizeText(text, replacements) {
if (!text || typeof text !== 'string') return text;
let result = text;
for (const { from, to } of replacements) {
if (from && to && from !== to) {
result = result.split(from).join(to);
}
}
result = desensitizeAddressText(result);
result = desensitizeProvince(result);
return result;
}
// ── 递归脱敏 ──────────────────────────────────────────────
function desensitizeObject(obj, replacements) {
if (obj === null || typeof obj !== 'object') {
return obj;
}
if (Array.isArray(obj)) {
return obj.map(item => desensitizeObject(item));
return obj.map((item) => desensitizeObject(item, replacements));
}
const result = {};
for (const [key, value] of Object.entries(obj)) {
switch (key) {
case 'name':
result[key] = desensitizeName(value);
result[key] = desensitizePersonName(value);
break;
case 'id_card':
result[key] = desensitizeIdCard(value);
@@ -82,36 +263,69 @@ function desensitizeObject(obj) {
result[key] = desensitizeMobile(value);
break;
case 'c_mc':
// 当事人姓名
result[key] = desensitizeName(value);
result[key] =
obj.n_dsrlx === '企业组织' || COMPANY_KEYWORDS.test(value)
? desensitizeCompany(value)
: desensitizePersonName(value);
break;
case 'c_gkws_dsr':
case 'c_gkws_pjjg':
// 判决书内容中的文本
result[key] = desensitizeText(value);
result[key] = desensitizeText(value, replacements);
break;
case 'n_jbfy':
result[key] = desensitizeCourt(value);
break;
case 'c_ssdy':
result[key] = value;
break;
case 'area_stat':
result[key] = desensitizeAreaStat(value);
break;
default:
result[key] = desensitizeObject(value);
result[key] = desensitizeObject(value, replacements);
break;
}
}
return result;
}
// 执行脱敏
const desensitizedData = desensitizeObject(data);
function processFile(filename) {
const inputFile = path.join(__dirname, filename);
const baseName = filename.replace('.json', '');
const outputFile = path.join(__dirname, `${baseName}_desensitized.json`);
// 保存脱敏后的文件
const outputFile = path.join(__dirname, 'DWBG9FB3_desensitized.json');
fs.writeFileSync(outputFile, JSON.stringify(desensitizedData, null, 2), 'utf8');
const data = JSON.parse(fs.readFileSync(inputFile, 'utf8'));
const { personNames, companyNames, courtNames } = collectMappings(data);
const replacements = buildReplacementList(personNames, companyNames, courtNames);
const desensitizedData = desensitizeObject(data, replacements);
console.log('脱敏完成!');
console.log('原始文件:', inputFile);
console.log('脱敏后文件:', outputFile);
fs.writeFileSync(outputFile, JSON.stringify(desensitizedData, null, 2), 'utf8');
console.log(`\n${filename} 脱敏完成`);
console.log(` 原始文件:${inputFile}`);
console.log(` 输出文件:${outputFile}`);
console.log(` 姓名 ${personNames.size} 个,公司 ${companyNames.size} 个,法院 ${courtNames.size}`);
return { personNames, companyNames, courtNames };
}
// ── 执行 ──────────────────────────────────────────────────
console.log('开始脱敏处理...');
const summary = { person: 0, company: 0, court: 0 };
for (const file of INPUT_FILES) {
const stats = processFile(file);
summary.person += stats.personNames.size;
summary.company += stats.companyNames.size;
summary.court += stats.courtNames.size;
}
// 显示脱敏摘要
console.log('\n脱敏摘要');
console.log('- 姓名:已脱敏(保留姓氏)');
console.log('- 身份证号已脱敏保留前6位和后4位');
console.log('- 手机号已脱敏保留前3位和后4位');
console.log('- 判决书文本中的姓名:已批量替换');
console.log('- 姓名:保留姓氏,名字替换为「某某」');
console.log('- 公司/机构:保留地区前缀与组织类型,中间替换为「****」');
console.log('- 法院:市/县/区名称替换为「**」');
console.log('- 省份/地区:次要省份(湖北、辽宁等)脱敏,广西自治区保留');
console.log('- 判决书文本:地址、信用代码、路名等同步脱敏');
console.log('- 身份证号/手机号:按字段规则脱敏');
console.log(`- 合计处理:姓名 ${summary.person} 个,公司 ${summary.company} 个,法院 ${summary.court}`);