Files
tyapi-server/internal/shared/ocr/baidu_ocr_service.go

549 lines
15 KiB
Go
Raw Normal View History

2025-07-11 21:05:58 +08:00
package ocr
import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"time"
"go.uber.org/zap"
"tyapi-server/internal/domains/certification/dto"
)
// BaiduOCRService 百度OCR服务
type BaiduOCRService struct {
appID string
apiKey string
secretKey string
endpoint string
timeout time.Duration
logger *zap.Logger
}
// NewBaiduOCRService 创建百度OCR服务
func NewBaiduOCRService(appID, apiKey, secretKey string, logger *zap.Logger) *BaiduOCRService {
return &BaiduOCRService{
appID: appID,
apiKey: apiKey,
secretKey: secretKey,
endpoint: "https://aip.baidubce.com",
timeout: 30 * time.Second,
logger: logger,
}
}
// RecognizeBusinessLicense 识别营业执照
func (s *BaiduOCRService) RecognizeBusinessLicense(ctx context.Context, imageBytes []byte) (*dto.BusinessLicenseResult, error) {
s.logger.Info("开始识别营业执照", zap.Int("image_size", len(imageBytes)))
// 获取访问令牌
accessToken, err := s.getAccessToken(ctx)
if err != nil {
return nil, fmt.Errorf("获取访问令牌失败: %w", err)
}
// 将图片转换为base64
imageBase64 := base64.StdEncoding.EncodeToString(imageBytes)
// 构建请求参数
params := url.Values{}
params.Set("access_token", accessToken)
params.Set("image", imageBase64)
// 发送请求
apiURL := fmt.Sprintf("%s/rest/2.0/ocr/v1/business_license?%s", s.endpoint, params.Encode())
resp, err := s.sendRequest(ctx, "POST", apiURL, nil)
if err != nil {
return nil, fmt.Errorf("营业执照识别请求失败: %w", err)
}
// 解析响应
var result map[string]interface{}
if err := json.Unmarshal(resp, &result); err != nil {
return nil, fmt.Errorf("解析响应失败: %w", err)
}
// 检查错误
if errCode, ok := result["error_code"].(float64); ok && errCode != 0 {
errorMsg := result["error_msg"].(string)
return nil, fmt.Errorf("OCR识别失败: %s", errorMsg)
}
// 解析识别结果
licenseResult := s.parseBusinessLicenseResult(result)
s.logger.Info("营业执照识别成功",
zap.String("company_name", licenseResult.CompanyName),
zap.String("legal_representative", licenseResult.LegalRepresentative),
zap.String("registered_capital", licenseResult.RegisteredCapital),
)
return licenseResult, nil
}
// RecognizeIDCard 识别身份证
func (s *BaiduOCRService) RecognizeIDCard(ctx context.Context, imageBytes []byte, side string) (*dto.IDCardResult, error) {
s.logger.Info("开始识别身份证", zap.String("side", side), zap.Int("image_size", len(imageBytes)))
// 获取访问令牌
accessToken, err := s.getAccessToken(ctx)
if err != nil {
return nil, fmt.Errorf("获取访问令牌失败: %w", err)
}
// 将图片转换为base64
imageBase64 := base64.StdEncoding.EncodeToString(imageBytes)
// 构建请求参数
params := url.Values{}
params.Set("access_token", accessToken)
params.Set("image", imageBase64)
params.Set("side", side)
// 发送请求
apiURL := fmt.Sprintf("%s/rest/2.0/ocr/v1/idcard?%s", s.endpoint, params.Encode())
resp, err := s.sendRequest(ctx, "POST", apiURL, nil)
if err != nil {
return nil, fmt.Errorf("身份证识别请求失败: %w", err)
}
// 解析响应
var result map[string]interface{}
if err := json.Unmarshal(resp, &result); err != nil {
return nil, fmt.Errorf("解析响应失败: %w", err)
}
// 检查错误
if errCode, ok := result["error_code"].(float64); ok && errCode != 0 {
errorMsg := result["error_msg"].(string)
return nil, fmt.Errorf("OCR识别失败: %s", errorMsg)
}
// 解析识别结果
idCardResult := s.parseIDCardResult(result, side)
s.logger.Info("身份证识别成功",
zap.String("name", idCardResult.Name),
zap.String("id_number", idCardResult.IDNumber),
zap.String("side", side),
)
return idCardResult, nil
}
// RecognizeGeneralText 通用文字识别
func (s *BaiduOCRService) RecognizeGeneralText(ctx context.Context, imageBytes []byte) (*dto.GeneralTextResult, error) {
s.logger.Info("开始通用文字识别", zap.Int("image_size", len(imageBytes)))
// 获取访问令牌
accessToken, err := s.getAccessToken(ctx)
if err != nil {
return nil, fmt.Errorf("获取访问令牌失败: %w", err)
}
// 将图片转换为base64
imageBase64 := base64.StdEncoding.EncodeToString(imageBytes)
// 构建请求参数
params := url.Values{}
params.Set("access_token", accessToken)
params.Set("image", imageBase64)
// 发送请求
apiURL := fmt.Sprintf("%s/rest/2.0/ocr/v1/general_basic?%s", s.endpoint, params.Encode())
resp, err := s.sendRequest(ctx, "POST", apiURL, nil)
if err != nil {
return nil, fmt.Errorf("通用文字识别请求失败: %w", err)
}
// 解析响应
var result map[string]interface{}
if err := json.Unmarshal(resp, &result); err != nil {
return nil, fmt.Errorf("解析响应失败: %w", err)
}
// 检查错误
if errCode, ok := result["error_code"].(float64); ok && errCode != 0 {
errorMsg := result["error_msg"].(string)
return nil, fmt.Errorf("OCR识别失败: %s", errorMsg)
}
// 解析识别结果
textResult := s.parseGeneralTextResult(result)
s.logger.Info("通用文字识别成功",
zap.Int("word_count", len(textResult.Words)),
zap.Float64("confidence", textResult.Confidence),
)
return textResult, nil
}
// RecognizeFromURL 从URL识别图片
func (s *BaiduOCRService) RecognizeFromURL(ctx context.Context, imageURL string, ocrType string) (interface{}, error) {
s.logger.Info("从URL识别图片", zap.String("url", imageURL), zap.String("type", ocrType))
// 下载图片
imageBytes, err := s.downloadImage(ctx, imageURL)
if err != nil {
s.logger.Error("下载图片失败", zap.Error(err))
return nil, fmt.Errorf("下载图片失败: %w", err)
}
// 根据类型调用相应的识别方法
switch ocrType {
case "business_license":
return s.RecognizeBusinessLicense(ctx, imageBytes)
case "idcard_front":
return s.RecognizeIDCard(ctx, imageBytes, "front")
case "idcard_back":
return s.RecognizeIDCard(ctx, imageBytes, "back")
case "general_text":
return s.RecognizeGeneralText(ctx, imageBytes)
default:
return nil, fmt.Errorf("不支持的OCR类型: %s", ocrType)
}
}
// getAccessToken 获取百度API访问令牌
func (s *BaiduOCRService) getAccessToken(ctx context.Context) (string, error) {
// 构建获取访问令牌的URL
tokenURL := fmt.Sprintf("%s/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s",
s.endpoint, s.apiKey, s.secretKey)
// 发送请求
resp, err := s.sendRequest(ctx, "POST", tokenURL, nil)
if err != nil {
return "", fmt.Errorf("获取访问令牌请求失败: %w", err)
}
// 解析响应
var result map[string]interface{}
if err := json.Unmarshal(resp, &result); err != nil {
return "", fmt.Errorf("解析访问令牌响应失败: %w", err)
}
// 检查错误
if errCode, ok := result["error"].(string); ok && errCode != "" {
errorDesc := result["error_description"].(string)
return "", fmt.Errorf("获取访问令牌失败: %s - %s", errCode, errorDesc)
}
// 提取访问令牌
accessToken, ok := result["access_token"].(string)
if !ok {
return "", fmt.Errorf("响应中未找到访问令牌")
}
return accessToken, nil
}
// sendRequest 发送HTTP请求
func (s *BaiduOCRService) sendRequest(ctx context.Context, method, url string, body io.Reader) ([]byte, error) {
// 创建HTTP客户端
client := &http.Client{
Timeout: s.timeout,
}
// 创建请求
req, err := http.NewRequestWithContext(ctx, method, url, body)
if err != nil {
return nil, fmt.Errorf("创建请求失败: %w", err)
}
// 设置请求头
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("User-Agent", "tyapi-server/1.0")
// 发送请求
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("发送请求失败: %w", err)
}
defer resp.Body.Close()
// 检查响应状态
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("请求失败,状态码: %d", resp.StatusCode)
}
// 读取响应内容
responseBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("读取响应内容失败: %w", err)
}
return responseBody, nil
}
// parseBusinessLicenseResult 解析营业执照识别结果
func (s *BaiduOCRService) parseBusinessLicenseResult(result map[string]interface{}) *dto.BusinessLicenseResult {
// 解析百度OCR返回的结果
wordsResult := result["words_result"].(map[string]interface{})
licenseResult := &dto.BusinessLicenseResult{
Confidence: s.extractConfidence(result),
Words: s.extractWords(result),
}
// 提取关键字段
if companyName, ok := wordsResult["单位名称"]; ok {
if word, ok := companyName.(map[string]interface{}); ok {
licenseResult.CompanyName = word["words"].(string)
}
}
if legalRep, ok := wordsResult["法人"]; ok {
if word, ok := legalRep.(map[string]interface{}); ok {
licenseResult.LegalRepresentative = word["words"].(string)
}
}
if regCapital, ok := wordsResult["注册资本"]; ok {
if word, ok := regCapital.(map[string]interface{}); ok {
licenseResult.RegisteredCapital = word["words"].(string)
}
}
if regAddress, ok := wordsResult["地址"]; ok {
if word, ok := regAddress.(map[string]interface{}); ok {
licenseResult.RegisteredAddress = word["words"].(string)
}
}
if regNumber, ok := wordsResult["社会信用代码"]; ok {
if word, ok := regNumber.(map[string]interface{}); ok {
licenseResult.RegistrationNumber = word["words"].(string)
}
}
if businessScope, ok := wordsResult["经营范围"]; ok {
if word, ok := businessScope.(map[string]interface{}); ok {
licenseResult.BusinessScope = word["words"].(string)
}
}
if regDate, ok := wordsResult["成立日期"]; ok {
if word, ok := regDate.(map[string]interface{}); ok {
licenseResult.RegistrationDate = word["words"].(string)
}
}
if validDate, ok := wordsResult["营业期限"]; ok {
if word, ok := validDate.(map[string]interface{}); ok {
licenseResult.ValidDate = word["words"].(string)
}
}
return licenseResult
}
// parseIDCardResult 解析身份证识别结果
func (s *BaiduOCRService) parseIDCardResult(result map[string]interface{}, side string) *dto.IDCardResult {
wordsResult := result["words_result"].(map[string]interface{})
idCardResult := &dto.IDCardResult{
Side: side,
Confidence: s.extractConfidence(result),
Words: s.extractWords(result),
}
if side == "front" {
// 正面信息
if name, ok := wordsResult["姓名"]; ok {
if word, ok := name.(map[string]interface{}); ok {
idCardResult.Name = word["words"].(string)
}
}
if sex, ok := wordsResult["性别"]; ok {
if word, ok := sex.(map[string]interface{}); ok {
idCardResult.Sex = word["words"].(string)
}
}
if nation, ok := wordsResult["民族"]; ok {
if word, ok := nation.(map[string]interface{}); ok {
idCardResult.Nation = word["words"].(string)
}
}
if birth, ok := wordsResult["出生"]; ok {
if word, ok := birth.(map[string]interface{}); ok {
idCardResult.BirthDate = word["words"].(string)
}
}
if address, ok := wordsResult["住址"]; ok {
if word, ok := address.(map[string]interface{}); ok {
idCardResult.Address = word["words"].(string)
}
}
if idNumber, ok := wordsResult["公民身份号码"]; ok {
if word, ok := idNumber.(map[string]interface{}); ok {
idCardResult.IDNumber = word["words"].(string)
}
}
} else {
// 背面信息
if authority, ok := wordsResult["签发机关"]; ok {
if word, ok := authority.(map[string]interface{}); ok {
idCardResult.IssuingAuthority = word["words"].(string)
}
}
if validDate, ok := wordsResult["有效期限"]; ok {
if word, ok := validDate.(map[string]interface{}); ok {
idCardResult.ValidDate = word["words"].(string)
}
}
}
return idCardResult
}
// parseGeneralTextResult 解析通用文字识别结果
func (s *BaiduOCRService) parseGeneralTextResult(result map[string]interface{}) *dto.GeneralTextResult {
wordsResult := result["words_result"].([]interface{})
textResult := &dto.GeneralTextResult{
Confidence: s.extractConfidence(result),
Words: make([]string, 0, len(wordsResult)),
}
// 提取所有识别的文字
for _, word := range wordsResult {
if wordMap, ok := word.(map[string]interface{}); ok {
if words, ok := wordMap["words"].(string); ok {
textResult.Words = append(textResult.Words, words)
}
}
}
return textResult
}
// extractConfidence 提取置信度
func (s *BaiduOCRService) extractConfidence(result map[string]interface{}) float64 {
if confidence, ok := result["confidence"].(float64); ok {
return confidence
}
return 0.0
}
// extractWords 提取识别的文字
func (s *BaiduOCRService) extractWords(result map[string]interface{}) []string {
words := make([]string, 0)
if wordsResult, ok := result["words_result"]; ok {
switch v := wordsResult.(type) {
case map[string]interface{}:
// 营业执照等结构化文档
for _, word := range v {
if wordMap, ok := word.(map[string]interface{}); ok {
if wordsStr, ok := wordMap["words"].(string); ok {
words = append(words, wordsStr)
}
}
}
case []interface{}:
// 通用文字识别
for _, word := range v {
if wordMap, ok := word.(map[string]interface{}); ok {
if wordsStr, ok := wordMap["words"].(string); ok {
words = append(words, wordsStr)
}
}
}
}
}
return words
}
// downloadImage 下载图片
func (s *BaiduOCRService) downloadImage(ctx context.Context, imageURL string) ([]byte, error) {
// 创建HTTP客户端
client := &http.Client{
Timeout: 30 * time.Second,
}
// 创建请求
req, err := http.NewRequestWithContext(ctx, "GET", imageURL, nil)
if err != nil {
return nil, fmt.Errorf("创建请求失败: %w", err)
}
// 发送请求
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("下载图片失败: %w", err)
}
defer resp.Body.Close()
// 检查响应状态
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("下载图片失败,状态码: %d", resp.StatusCode)
}
// 读取响应内容
imageBytes, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("读取图片内容失败: %w", err)
}
return imageBytes, nil
}
// ValidateBusinessLicense 验证营业执照识别结果
func (s *BaiduOCRService) ValidateBusinessLicense(result *dto.BusinessLicenseResult) error {
if result.Confidence < 0.8 {
return fmt.Errorf("识别置信度过低: %.2f", result.Confidence)
}
if result.CompanyName == "" {
return fmt.Errorf("未能识别公司名称")
}
if result.LegalRepresentative == "" {
return fmt.Errorf("未能识别法定代表人")
}
if result.RegistrationNumber == "" {
return fmt.Errorf("未能识别统一社会信用代码")
}
return nil
}
// ValidateIDCard 验证身份证识别结果
func (s *BaiduOCRService) ValidateIDCard(result *dto.IDCardResult) error {
if result.Confidence < 0.8 {
return fmt.Errorf("识别置信度过低: %.2f", result.Confidence)
}
if result.Side == "front" {
if result.Name == "" {
return fmt.Errorf("未能识别姓名")
}
if result.IDNumber == "" {
return fmt.Errorf("未能识别身份证号码")
}
} else {
if result.IssuingAuthority == "" {
return fmt.Errorf("未能识别签发机关")
}
if result.ValidDate == "" {
return fmt.Errorf("未能识别有效期限")
}
}
return nil
}