feat(架构): 完善基础架构设计
This commit is contained in:
292
internal/shared/tracing/decorators.go
Normal file
292
internal/shared/tracing/decorators.go
Normal file
@@ -0,0 +1,292 @@
|
||||
package tracing
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// TracableService 可追踪的服务接口
|
||||
type TracableService interface {
|
||||
Name() string
|
||||
}
|
||||
|
||||
// ServiceDecorator 服务装饰器
|
||||
type ServiceDecorator struct {
|
||||
tracer *Tracer
|
||||
logger *zap.Logger
|
||||
config DecoratorConfig
|
||||
}
|
||||
|
||||
// DecoratorConfig 装饰器配置
|
||||
type DecoratorConfig struct {
|
||||
EnableMethodTracing bool
|
||||
ExcludePatterns []string
|
||||
IncludeArguments bool
|
||||
IncludeResults bool
|
||||
SlowMethodThreshold time.Duration
|
||||
}
|
||||
|
||||
// DefaultDecoratorConfig 默认装饰器配置
|
||||
func DefaultDecoratorConfig() DecoratorConfig {
|
||||
return DecoratorConfig{
|
||||
EnableMethodTracing: true,
|
||||
ExcludePatterns: []string{"Health", "Ping", "Name"},
|
||||
IncludeArguments: true,
|
||||
IncludeResults: false,
|
||||
SlowMethodThreshold: 100 * time.Millisecond,
|
||||
}
|
||||
}
|
||||
|
||||
// NewServiceDecorator 创建服务装饰器
|
||||
func NewServiceDecorator(tracer *Tracer, logger *zap.Logger) *ServiceDecorator {
|
||||
return &ServiceDecorator{
|
||||
tracer: tracer,
|
||||
logger: logger,
|
||||
config: DefaultDecoratorConfig(),
|
||||
}
|
||||
}
|
||||
|
||||
// WrapService 自动包装服务,为所有方法添加链路追踪
|
||||
func (d *ServiceDecorator) WrapService(service interface{}) interface{} {
|
||||
serviceValue := reflect.ValueOf(service)
|
||||
serviceType := reflect.TypeOf(service)
|
||||
|
||||
if serviceType.Kind() == reflect.Ptr {
|
||||
serviceType = serviceType.Elem()
|
||||
serviceValue = serviceValue.Elem()
|
||||
}
|
||||
|
||||
// 创建代理结构
|
||||
proxyType := d.createProxyType(serviceType)
|
||||
proxyValue := reflect.New(proxyType).Elem()
|
||||
|
||||
// 设置原始服务字段
|
||||
proxyValue.FieldByName("target").Set(reflect.ValueOf(service))
|
||||
proxyValue.FieldByName("decorator").Set(reflect.ValueOf(d))
|
||||
|
||||
return proxyValue.Addr().Interface()
|
||||
}
|
||||
|
||||
// createProxyType 创建代理类型
|
||||
func (d *ServiceDecorator) createProxyType(serviceType reflect.Type) reflect.Type {
|
||||
// 获取服务名称
|
||||
serviceName := d.getServiceName(serviceType)
|
||||
|
||||
// 创建代理结构字段
|
||||
fields := []reflect.StructField{
|
||||
{
|
||||
Name: "target",
|
||||
Type: reflect.PtrTo(serviceType),
|
||||
},
|
||||
{
|
||||
Name: "decorator",
|
||||
Type: reflect.TypeOf(d),
|
||||
},
|
||||
}
|
||||
|
||||
// 为每个方法创建包装器方法
|
||||
for i := 0; i < serviceType.NumMethod(); i++ {
|
||||
method := serviceType.Method(i)
|
||||
if d.shouldTraceMethod(method.Name) {
|
||||
// 创建方法字段(用于存储方法实现)
|
||||
fields = append(fields, reflect.StructField{
|
||||
Name: method.Name,
|
||||
Type: method.Type,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// 创建新的结构类型
|
||||
proxyType := reflect.StructOf(fields)
|
||||
|
||||
// 实现接口方法
|
||||
d.implementMethods(proxyType, serviceType, serviceName)
|
||||
|
||||
return proxyType
|
||||
}
|
||||
|
||||
// shouldTraceMethod 判断是否应该追踪方法
|
||||
func (d *ServiceDecorator) shouldTraceMethod(methodName string) bool {
|
||||
if !d.config.EnableMethodTracing {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, pattern := range d.config.ExcludePatterns {
|
||||
if strings.Contains(methodName, pattern) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// getServiceName 获取服务名称
|
||||
func (d *ServiceDecorator) getServiceName(serviceType reflect.Type) string {
|
||||
serviceName := serviceType.Name()
|
||||
// 移除Service后缀
|
||||
if strings.HasSuffix(serviceName, "Service") {
|
||||
serviceName = strings.TrimSuffix(serviceName, "Service")
|
||||
}
|
||||
return strings.ToLower(serviceName)
|
||||
}
|
||||
|
||||
// TraceMethodCall 追踪方法调用
|
||||
func (d *ServiceDecorator) TraceMethodCall(
|
||||
ctx context.Context,
|
||||
serviceName, methodName string,
|
||||
fn func(context.Context) ([]reflect.Value, error),
|
||||
args []reflect.Value,
|
||||
) ([]reflect.Value, error) {
|
||||
// 创建span名称
|
||||
spanName := fmt.Sprintf("%s.%s", serviceName, methodName)
|
||||
|
||||
// 开始追踪
|
||||
ctx, span := d.tracer.StartSpan(ctx, spanName)
|
||||
defer span.End()
|
||||
|
||||
// 添加基础属性
|
||||
d.tracer.AddSpanAttributes(span,
|
||||
attribute.String("service.name", serviceName),
|
||||
attribute.String("service.method", methodName),
|
||||
attribute.String("service.type", "business"),
|
||||
)
|
||||
|
||||
// 添加参数信息(如果启用)
|
||||
if d.config.IncludeArguments {
|
||||
d.addArgumentAttributes(span, args)
|
||||
}
|
||||
|
||||
// 记录开始时间
|
||||
startTime := time.Now()
|
||||
|
||||
// 执行原始方法
|
||||
results, err := fn(ctx)
|
||||
|
||||
// 计算执行时间
|
||||
duration := time.Since(startTime)
|
||||
d.tracer.AddSpanAttributes(span,
|
||||
attribute.Int64("service.duration_ms", duration.Milliseconds()),
|
||||
)
|
||||
|
||||
// 标记慢方法
|
||||
if duration > d.config.SlowMethodThreshold {
|
||||
d.tracer.AddSpanAttributes(span,
|
||||
attribute.Bool("service.slow_method", true),
|
||||
)
|
||||
d.logger.Warn("慢方法检测",
|
||||
zap.String("service", serviceName),
|
||||
zap.String("method", methodName),
|
||||
zap.Duration("duration", duration),
|
||||
zap.String("trace_id", d.tracer.GetTraceID(ctx)),
|
||||
)
|
||||
}
|
||||
|
||||
// 处理错误
|
||||
if err != nil {
|
||||
d.tracer.SetSpanError(span, err)
|
||||
d.logger.Error("服务方法执行失败",
|
||||
zap.String("service", serviceName),
|
||||
zap.String("method", methodName),
|
||||
zap.Error(err),
|
||||
zap.String("trace_id", d.tracer.GetTraceID(ctx)),
|
||||
)
|
||||
} else {
|
||||
d.tracer.SetSpanSuccess(span)
|
||||
|
||||
// 添加结果信息(如果启用)
|
||||
if d.config.IncludeResults {
|
||||
d.addResultAttributes(span, results)
|
||||
}
|
||||
}
|
||||
|
||||
return results, err
|
||||
}
|
||||
|
||||
// addArgumentAttributes 添加参数属性
|
||||
func (d *ServiceDecorator) addArgumentAttributes(span trace.Span, args []reflect.Value) {
|
||||
for i, arg := range args {
|
||||
if i == 0 && arg.Type().String() == "context.Context" {
|
||||
continue // 跳过context参数
|
||||
}
|
||||
|
||||
argName := fmt.Sprintf("service.arg_%d", i)
|
||||
argValue := d.extractValue(arg)
|
||||
|
||||
if argValue != "" && len(argValue) < 1000 { // 限制长度避免性能问题
|
||||
d.tracer.AddSpanAttributes(span,
|
||||
attribute.String(argName, argValue),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// addResultAttributes 添加结果属性
|
||||
func (d *ServiceDecorator) addResultAttributes(span trace.Span, results []reflect.Value) {
|
||||
for i, result := range results {
|
||||
if result.Type().String() == "error" {
|
||||
continue // 错误在其他地方处理
|
||||
}
|
||||
|
||||
resultName := fmt.Sprintf("service.result_%d", i)
|
||||
resultValue := d.extractValue(result)
|
||||
|
||||
if resultValue != "" && len(resultValue) < 1000 {
|
||||
d.tracer.AddSpanAttributes(span,
|
||||
attribute.String(resultName, resultValue),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// extractValue 提取值的字符串表示
|
||||
func (d *ServiceDecorator) extractValue(value reflect.Value) string {
|
||||
if !value.IsValid() {
|
||||
return ""
|
||||
}
|
||||
|
||||
switch value.Kind() {
|
||||
case reflect.String:
|
||||
return value.String()
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
return fmt.Sprintf("%d", value.Int())
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
||||
return fmt.Sprintf("%d", value.Uint())
|
||||
case reflect.Float32, reflect.Float64:
|
||||
return fmt.Sprintf("%.2f", value.Float())
|
||||
case reflect.Bool:
|
||||
return fmt.Sprintf("%t", value.Bool())
|
||||
case reflect.Ptr:
|
||||
if value.IsNil() {
|
||||
return "nil"
|
||||
}
|
||||
return d.extractValue(value.Elem())
|
||||
case reflect.Struct:
|
||||
// 对于结构体,只返回类型名
|
||||
return value.Type().Name()
|
||||
case reflect.Slice, reflect.Array:
|
||||
return fmt.Sprintf("[%d items]", value.Len())
|
||||
default:
|
||||
return value.Type().Name()
|
||||
}
|
||||
}
|
||||
|
||||
// implementMethods 实现接口方法(占位符,实际需要运行时代理)
|
||||
func (d *ServiceDecorator) implementMethods(proxyType, serviceType reflect.Type, serviceName string) {
|
||||
// 这里是运行时方法实现的占位符
|
||||
// 实际实现需要使用reflect.MakeFunc或其他运行时代理技术
|
||||
}
|
||||
|
||||
// GetFunctionName 获取函数名称
|
||||
func GetFunctionName(fn interface{}) string {
|
||||
name := runtime.FuncForPC(reflect.ValueOf(fn).Pointer()).Name()
|
||||
parts := strings.Split(name, ".")
|
||||
return parts[len(parts)-1]
|
||||
}
|
||||
320
internal/shared/tracing/gorm_plugin.go
Normal file
320
internal/shared/tracing/gorm_plugin.go
Normal file
@@ -0,0 +1,320 @@
|
||||
package tracing
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"go.uber.org/zap"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
const (
|
||||
gormSpanKey = "otel:span"
|
||||
gormOperationKey = "otel:operation"
|
||||
gormTableNameKey = "otel:table_name"
|
||||
gormStartTimeKey = "otel:start_time"
|
||||
)
|
||||
|
||||
// GormTracingPlugin GORM链路追踪插件
|
||||
type GormTracingPlugin struct {
|
||||
tracer *Tracer
|
||||
logger *zap.Logger
|
||||
config GormPluginConfig
|
||||
}
|
||||
|
||||
// GormPluginConfig GORM插件配置
|
||||
type GormPluginConfig struct {
|
||||
IncludeSQL bool
|
||||
IncludeValues bool
|
||||
SlowThreshold time.Duration
|
||||
ExcludeTables []string
|
||||
SanitizeSQL bool
|
||||
}
|
||||
|
||||
// DefaultGormPluginConfig 默认GORM插件配置
|
||||
func DefaultGormPluginConfig() GormPluginConfig {
|
||||
return GormPluginConfig{
|
||||
IncludeSQL: true,
|
||||
IncludeValues: false, // 生产环境建议设为false避免记录敏感数据
|
||||
SlowThreshold: 200 * time.Millisecond,
|
||||
ExcludeTables: []string{"migrations", "schema_migrations"},
|
||||
SanitizeSQL: true,
|
||||
}
|
||||
}
|
||||
|
||||
// NewGormTracingPlugin 创建GORM追踪插件
|
||||
func NewGormTracingPlugin(tracer *Tracer, logger *zap.Logger) *GormTracingPlugin {
|
||||
return &GormTracingPlugin{
|
||||
tracer: tracer,
|
||||
logger: logger,
|
||||
config: DefaultGormPluginConfig(),
|
||||
}
|
||||
}
|
||||
|
||||
// Name 返回插件名称
|
||||
func (p *GormTracingPlugin) Name() string {
|
||||
return "gorm-otel-tracing"
|
||||
}
|
||||
|
||||
// Initialize 初始化插件
|
||||
func (p *GormTracingPlugin) Initialize(db *gorm.DB) error {
|
||||
// 注册各种操作的回调
|
||||
callbacks := []string{"create", "query", "update", "delete", "raw"}
|
||||
|
||||
for _, operation := range callbacks {
|
||||
switch operation {
|
||||
case "create":
|
||||
err := db.Callback().Create().Before("gorm:create").
|
||||
Register(p.Name()+":before_create", p.beforeOperation)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to register before create callback: %w", err)
|
||||
}
|
||||
err = db.Callback().Create().After("gorm:create").
|
||||
Register(p.Name()+":after_create", p.afterOperation)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to register after create callback: %w", err)
|
||||
}
|
||||
case "query":
|
||||
err := db.Callback().Query().Before("gorm:query").
|
||||
Register(p.Name()+":before_query", p.beforeOperation)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to register before query callback: %w", err)
|
||||
}
|
||||
err = db.Callback().Query().After("gorm:query").
|
||||
Register(p.Name()+":after_query", p.afterOperation)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to register after query callback: %w", err)
|
||||
}
|
||||
case "update":
|
||||
err := db.Callback().Update().Before("gorm:update").
|
||||
Register(p.Name()+":before_update", p.beforeOperation)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to register before update callback: %w", err)
|
||||
}
|
||||
err = db.Callback().Update().After("gorm:update").
|
||||
Register(p.Name()+":after_update", p.afterOperation)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to register after update callback: %w", err)
|
||||
}
|
||||
case "delete":
|
||||
err := db.Callback().Delete().Before("gorm:delete").
|
||||
Register(p.Name()+":before_delete", p.beforeOperation)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to register before delete callback: %w", err)
|
||||
}
|
||||
err = db.Callback().Delete().After("gorm:delete").
|
||||
Register(p.Name()+":after_delete", p.afterOperation)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to register after delete callback: %w", err)
|
||||
}
|
||||
case "raw":
|
||||
err := db.Callback().Raw().Before("gorm:raw").
|
||||
Register(p.Name()+":before_raw", p.beforeOperation)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to register before raw callback: %w", err)
|
||||
}
|
||||
err = db.Callback().Raw().After("gorm:raw").
|
||||
Register(p.Name()+":after_raw", p.afterOperation)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to register after raw callback: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p.logger.Info("GORM追踪插件已初始化")
|
||||
return nil
|
||||
}
|
||||
|
||||
// beforeOperation 操作前回调
|
||||
func (p *GormTracingPlugin) beforeOperation(db *gorm.DB) {
|
||||
// 检查是否应该跳过追踪
|
||||
if p.shouldSkipTracing(db) {
|
||||
return
|
||||
}
|
||||
|
||||
ctx := db.Statement.Context
|
||||
if ctx == nil {
|
||||
ctx = context.Background()
|
||||
}
|
||||
|
||||
// 获取操作信息
|
||||
operation := p.getOperationType(db)
|
||||
tableName := p.getTableName(db)
|
||||
|
||||
// 检查是否应该排除此表
|
||||
if p.isExcludedTable(tableName) {
|
||||
return
|
||||
}
|
||||
|
||||
// 开始追踪
|
||||
ctx, span := p.tracer.StartDBSpan(ctx, operation, tableName)
|
||||
|
||||
// 添加基础属性
|
||||
p.tracer.AddSpanAttributes(span,
|
||||
attribute.String("db.system", "postgresql"),
|
||||
attribute.String("db.operation", operation),
|
||||
)
|
||||
|
||||
if tableName != "" {
|
||||
p.tracer.AddSpanAttributes(span, attribute.String("db.table", tableName))
|
||||
}
|
||||
|
||||
// 保存追踪信息到GORM context
|
||||
db.Set(gormSpanKey, span)
|
||||
db.Set(gormOperationKey, operation)
|
||||
db.Set(gormTableNameKey, tableName)
|
||||
db.Set(gormStartTimeKey, time.Now())
|
||||
|
||||
// 更新statement context
|
||||
db.Statement.Context = ctx
|
||||
}
|
||||
|
||||
// afterOperation 操作后回调
|
||||
func (p *GormTracingPlugin) afterOperation(db *gorm.DB) {
|
||||
// 获取span
|
||||
spanValue, exists := db.Get(gormSpanKey)
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
|
||||
span, ok := spanValue.(trace.Span)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
defer span.End()
|
||||
|
||||
// 获取操作信息
|
||||
operation, _ := db.Get(gormOperationKey)
|
||||
tableName, _ := db.Get(gormTableNameKey)
|
||||
startTime, _ := db.Get(gormStartTimeKey)
|
||||
|
||||
// 计算执行时间
|
||||
var duration time.Duration
|
||||
if st, ok := startTime.(time.Time); ok {
|
||||
duration = time.Since(st)
|
||||
p.tracer.AddSpanAttributes(span,
|
||||
attribute.Int64("db.duration_ms", duration.Milliseconds()),
|
||||
)
|
||||
}
|
||||
|
||||
// 添加SQL信息
|
||||
if p.config.IncludeSQL && db.Statement.SQL.String() != "" {
|
||||
sql := db.Statement.SQL.String()
|
||||
if p.config.SanitizeSQL {
|
||||
sql = p.sanitizeSQL(sql)
|
||||
}
|
||||
p.tracer.AddSpanAttributes(span, attribute.String("db.statement", sql))
|
||||
}
|
||||
|
||||
// 添加影响行数
|
||||
if db.Statement.RowsAffected >= 0 {
|
||||
p.tracer.AddSpanAttributes(span,
|
||||
attribute.Int64("db.rows_affected", db.Statement.RowsAffected),
|
||||
)
|
||||
}
|
||||
|
||||
// 处理错误
|
||||
if db.Error != nil {
|
||||
p.tracer.SetSpanError(span, db.Error)
|
||||
span.SetStatus(codes.Error, db.Error.Error())
|
||||
|
||||
p.logger.Error("数据库操作失败",
|
||||
zap.String("operation", fmt.Sprintf("%v", operation)),
|
||||
zap.String("table", fmt.Sprintf("%v", tableName)),
|
||||
zap.Error(db.Error),
|
||||
zap.String("trace_id", p.tracer.GetTraceID(db.Statement.Context)),
|
||||
)
|
||||
} else {
|
||||
p.tracer.SetSpanSuccess(span)
|
||||
span.SetStatus(codes.Ok, "success")
|
||||
|
||||
// 检查慢查询
|
||||
if duration > p.config.SlowThreshold {
|
||||
p.tracer.AddSpanAttributes(span,
|
||||
attribute.Bool("db.slow_query", true),
|
||||
)
|
||||
|
||||
p.logger.Warn("慢SQL查询检测",
|
||||
zap.String("operation", fmt.Sprintf("%v", operation)),
|
||||
zap.String("table", fmt.Sprintf("%v", tableName)),
|
||||
zap.Duration("duration", duration),
|
||||
zap.String("sql", db.Statement.SQL.String()),
|
||||
zap.String("trace_id", p.tracer.GetTraceID(db.Statement.Context)),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// shouldSkipTracing 检查是否应该跳过追踪
|
||||
func (p *GormTracingPlugin) shouldSkipTracing(db *gorm.DB) bool {
|
||||
// 检查是否已有span(避免重复追踪)
|
||||
if _, exists := db.Get(gormSpanKey); exists {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// getOperationType 获取操作类型
|
||||
func (p *GormTracingPlugin) getOperationType(db *gorm.DB) string {
|
||||
switch db.Statement.ReflectValue.Kind() {
|
||||
default:
|
||||
sql := strings.ToUpper(strings.TrimSpace(db.Statement.SQL.String()))
|
||||
if sql == "" {
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
if strings.HasPrefix(sql, "SELECT") {
|
||||
return "select"
|
||||
} else if strings.HasPrefix(sql, "INSERT") {
|
||||
return "insert"
|
||||
} else if strings.HasPrefix(sql, "UPDATE") {
|
||||
return "update"
|
||||
} else if strings.HasPrefix(sql, "DELETE") {
|
||||
return "delete"
|
||||
} else if strings.HasPrefix(sql, "CREATE") {
|
||||
return "create"
|
||||
} else if strings.HasPrefix(sql, "DROP") {
|
||||
return "drop"
|
||||
} else if strings.HasPrefix(sql, "ALTER") {
|
||||
return "alter"
|
||||
}
|
||||
|
||||
return "query"
|
||||
}
|
||||
}
|
||||
|
||||
// getTableName 获取表名
|
||||
func (p *GormTracingPlugin) getTableName(db *gorm.DB) string {
|
||||
if db.Statement.Table != "" {
|
||||
return db.Statement.Table
|
||||
}
|
||||
|
||||
if db.Statement.Schema != nil && db.Statement.Schema.Table != "" {
|
||||
return db.Statement.Schema.Table
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// isExcludedTable 检查是否为排除的表
|
||||
func (p *GormTracingPlugin) isExcludedTable(tableName string) bool {
|
||||
for _, excluded := range p.config.ExcludeTables {
|
||||
if tableName == excluded {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// sanitizeSQL 清理SQL语句,移除敏感信息
|
||||
func (p *GormTracingPlugin) sanitizeSQL(sql string) string {
|
||||
// 简单的SQL清理,将参数替换为占位符
|
||||
// 在生产环境中,您可能需要更复杂的清理逻辑
|
||||
return strings.ReplaceAll(sql, "'", "?")
|
||||
}
|
||||
407
internal/shared/tracing/redis_wrapper.go
Normal file
407
internal/shared/tracing/redis_wrapper.go
Normal file
@@ -0,0 +1,407 @@
|
||||
package tracing
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"tyapi-server/internal/shared/interfaces"
|
||||
)
|
||||
|
||||
// TracedRedisCache Redis缓存自动追踪包装器
|
||||
type TracedRedisCache struct {
|
||||
client redis.UniversalClient
|
||||
tracer *Tracer
|
||||
logger *zap.Logger
|
||||
prefix string
|
||||
config RedisTracingConfig
|
||||
}
|
||||
|
||||
// RedisTracingConfig Redis追踪配置
|
||||
type RedisTracingConfig struct {
|
||||
IncludeKeys bool
|
||||
IncludeValues bool
|
||||
MaxKeyLength int
|
||||
MaxValueLength int
|
||||
SlowThreshold time.Duration
|
||||
SanitizeValues bool
|
||||
}
|
||||
|
||||
// DefaultRedisTracingConfig 默认Redis追踪配置
|
||||
func DefaultRedisTracingConfig() RedisTracingConfig {
|
||||
return RedisTracingConfig{
|
||||
IncludeKeys: true,
|
||||
IncludeValues: false, // 生产环境建议设为false保护敏感数据
|
||||
MaxKeyLength: 100,
|
||||
MaxValueLength: 1000,
|
||||
SlowThreshold: 50 * time.Millisecond,
|
||||
SanitizeValues: true,
|
||||
}
|
||||
}
|
||||
|
||||
// NewTracedRedisCache 创建带追踪的Redis缓存
|
||||
func NewTracedRedisCache(client redis.UniversalClient, tracer *Tracer, logger *zap.Logger, prefix string) interfaces.CacheService {
|
||||
return &TracedRedisCache{
|
||||
client: client,
|
||||
tracer: tracer,
|
||||
logger: logger,
|
||||
prefix: prefix,
|
||||
config: DefaultRedisTracingConfig(),
|
||||
}
|
||||
}
|
||||
|
||||
// Name 返回服务名称
|
||||
func (c *TracedRedisCache) Name() string {
|
||||
return "redis-cache"
|
||||
}
|
||||
|
||||
// Initialize 初始化服务
|
||||
func (c *TracedRedisCache) Initialize(ctx context.Context) error {
|
||||
c.logger.Info("Redis缓存服务已初始化")
|
||||
return nil
|
||||
}
|
||||
|
||||
// HealthCheck 健康检查
|
||||
func (c *TracedRedisCache) HealthCheck(ctx context.Context) error {
|
||||
_, err := c.client.Ping(ctx).Result()
|
||||
return err
|
||||
}
|
||||
|
||||
// Shutdown 关闭服务
|
||||
func (c *TracedRedisCache) Shutdown(ctx context.Context) error {
|
||||
c.logger.Info("Redis缓存服务已关闭")
|
||||
return c.client.Close()
|
||||
}
|
||||
|
||||
// Get 获取缓存值
|
||||
func (c *TracedRedisCache) Get(ctx context.Context, key string, dest interface{}) error {
|
||||
// 开始追踪
|
||||
ctx, span := c.tracer.StartCacheSpan(ctx, "get", key)
|
||||
defer span.End()
|
||||
|
||||
// 添加基础属性
|
||||
c.addBaseAttributes(span, "get", key)
|
||||
|
||||
// 记录开始时间
|
||||
startTime := time.Now()
|
||||
|
||||
// 构建完整键名
|
||||
fullKey := c.buildKey(key)
|
||||
|
||||
// 执行Redis操作
|
||||
result, err := c.client.Get(ctx, fullKey).Result()
|
||||
|
||||
// 计算执行时间
|
||||
duration := time.Since(startTime)
|
||||
c.tracer.AddSpanAttributes(span,
|
||||
attribute.Int64("redis.duration_ms", duration.Milliseconds()),
|
||||
)
|
||||
|
||||
// 检查慢操作
|
||||
if duration > c.config.SlowThreshold {
|
||||
c.tracer.AddSpanAttributes(span,
|
||||
attribute.Bool("redis.slow_operation", true),
|
||||
)
|
||||
c.logger.Warn("Redis慢操作检测",
|
||||
zap.String("operation", "get"),
|
||||
zap.String("key", c.sanitizeKey(key)),
|
||||
zap.Duration("duration", duration),
|
||||
zap.String("trace_id", c.tracer.GetTraceID(ctx)),
|
||||
)
|
||||
}
|
||||
|
||||
// 处理结果
|
||||
if err != nil {
|
||||
if err == redis.Nil {
|
||||
// 缓存未命中
|
||||
c.tracer.AddSpanAttributes(span,
|
||||
attribute.Bool("redis.hit", false),
|
||||
attribute.String("redis.result", "miss"),
|
||||
)
|
||||
c.tracer.SetSpanSuccess(span)
|
||||
return interfaces.ErrCacheMiss
|
||||
} else {
|
||||
// Redis错误
|
||||
c.tracer.SetSpanError(span, err)
|
||||
c.logger.Error("Redis GET操作失败",
|
||||
zap.String("key", c.sanitizeKey(key)),
|
||||
zap.Error(err),
|
||||
zap.String("trace_id", c.tracer.GetTraceID(ctx)),
|
||||
)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// 缓存命中
|
||||
c.tracer.AddSpanAttributes(span,
|
||||
attribute.Bool("redis.hit", true),
|
||||
attribute.String("redis.result", "hit"),
|
||||
attribute.Int("redis.value_size", len(result)),
|
||||
)
|
||||
|
||||
// 反序列化
|
||||
if err := c.deserialize(result, dest); err != nil {
|
||||
c.tracer.SetSpanError(span, err)
|
||||
return err
|
||||
}
|
||||
|
||||
c.tracer.SetSpanSuccess(span)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Set 设置缓存值
|
||||
func (c *TracedRedisCache) Set(ctx context.Context, key string, value interface{}, ttl ...interface{}) error {
|
||||
// 开始追踪
|
||||
ctx, span := c.tracer.StartCacheSpan(ctx, "set", key)
|
||||
defer span.End()
|
||||
|
||||
// 添加基础属性
|
||||
c.addBaseAttributes(span, "set", key)
|
||||
|
||||
// 处理TTL
|
||||
var expiration time.Duration
|
||||
if len(ttl) > 0 {
|
||||
if duration, ok := ttl[0].(time.Duration); ok {
|
||||
expiration = duration
|
||||
c.tracer.AddSpanAttributes(span,
|
||||
attribute.Int64("redis.ttl_seconds", int64(expiration.Seconds())),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// 记录开始时间
|
||||
startTime := time.Now()
|
||||
|
||||
// 序列化值
|
||||
serialized, err := c.serialize(value)
|
||||
if err != nil {
|
||||
c.tracer.SetSpanError(span, err)
|
||||
return err
|
||||
}
|
||||
|
||||
// 构建完整键名
|
||||
fullKey := c.buildKey(key)
|
||||
|
||||
// 执行Redis操作
|
||||
err = c.client.Set(ctx, fullKey, serialized, expiration).Err()
|
||||
|
||||
// 计算执行时间
|
||||
duration := time.Since(startTime)
|
||||
c.tracer.AddSpanAttributes(span,
|
||||
attribute.Int64("redis.duration_ms", duration.Milliseconds()),
|
||||
attribute.Int("redis.value_size", len(serialized)),
|
||||
)
|
||||
|
||||
// 检查慢操作
|
||||
if duration > c.config.SlowThreshold {
|
||||
c.tracer.AddSpanAttributes(span,
|
||||
attribute.Bool("redis.slow_operation", true),
|
||||
)
|
||||
c.logger.Warn("Redis慢操作检测",
|
||||
zap.String("operation", "set"),
|
||||
zap.String("key", c.sanitizeKey(key)),
|
||||
zap.Duration("duration", duration),
|
||||
zap.String("trace_id", c.tracer.GetTraceID(ctx)),
|
||||
)
|
||||
}
|
||||
|
||||
// 处理错误
|
||||
if err != nil {
|
||||
c.tracer.SetSpanError(span, err)
|
||||
c.logger.Error("Redis SET操作失败",
|
||||
zap.String("key", c.sanitizeKey(key)),
|
||||
zap.Error(err),
|
||||
zap.String("trace_id", c.tracer.GetTraceID(ctx)),
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
c.tracer.SetSpanSuccess(span)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete 删除缓存
|
||||
func (c *TracedRedisCache) Delete(ctx context.Context, keys ...string) error {
|
||||
// 开始追踪
|
||||
ctx, span := c.tracer.StartCacheSpan(ctx, "delete", strings.Join(keys, ","))
|
||||
defer span.End()
|
||||
|
||||
// 添加基础属性
|
||||
c.tracer.AddSpanAttributes(span,
|
||||
attribute.String("redis.operation", "delete"),
|
||||
attribute.Int("redis.key_count", len(keys)),
|
||||
)
|
||||
|
||||
// 记录开始时间
|
||||
startTime := time.Now()
|
||||
|
||||
// 构建完整键名
|
||||
fullKeys := make([]string, len(keys))
|
||||
for i, key := range keys {
|
||||
fullKeys[i] = c.buildKey(key)
|
||||
}
|
||||
|
||||
// 执行Redis操作
|
||||
deleted, err := c.client.Del(ctx, fullKeys...).Result()
|
||||
|
||||
// 计算执行时间
|
||||
duration := time.Since(startTime)
|
||||
c.tracer.AddSpanAttributes(span,
|
||||
attribute.Int64("redis.duration_ms", duration.Milliseconds()),
|
||||
attribute.Int64("redis.deleted_count", deleted),
|
||||
)
|
||||
|
||||
// 处理错误
|
||||
if err != nil {
|
||||
c.tracer.SetSpanError(span, err)
|
||||
c.logger.Error("Redis DELETE操作失败",
|
||||
zap.Strings("keys", c.sanitizeKeys(keys)),
|
||||
zap.Error(err),
|
||||
zap.String("trace_id", c.tracer.GetTraceID(ctx)),
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
c.tracer.SetSpanSuccess(span)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Exists 检查键是否存在
|
||||
func (c *TracedRedisCache) Exists(ctx context.Context, key string) (bool, error) {
|
||||
// 开始追踪
|
||||
ctx, span := c.tracer.StartCacheSpan(ctx, "exists", key)
|
||||
defer span.End()
|
||||
|
||||
// 添加基础属性
|
||||
c.addBaseAttributes(span, "exists", key)
|
||||
|
||||
// 记录开始时间
|
||||
startTime := time.Now()
|
||||
|
||||
// 构建完整键名
|
||||
fullKey := c.buildKey(key)
|
||||
|
||||
// 执行Redis操作
|
||||
count, err := c.client.Exists(ctx, fullKey).Result()
|
||||
|
||||
// 计算执行时间
|
||||
duration := time.Since(startTime)
|
||||
c.tracer.AddSpanAttributes(span,
|
||||
attribute.Int64("redis.duration_ms", duration.Milliseconds()),
|
||||
attribute.Bool("redis.exists", count > 0),
|
||||
)
|
||||
|
||||
// 处理错误
|
||||
if err != nil {
|
||||
c.tracer.SetSpanError(span, err)
|
||||
return false, err
|
||||
}
|
||||
|
||||
c.tracer.SetSpanSuccess(span)
|
||||
return count > 0, nil
|
||||
}
|
||||
|
||||
// GetMultiple 批量获取(基础实现)
|
||||
func (c *TracedRedisCache) GetMultiple(ctx context.Context, keys []string) (map[string]interface{}, error) {
|
||||
result := make(map[string]interface{})
|
||||
|
||||
// 简单实现:逐个获取(实际应用中可以使用MGET优化)
|
||||
for _, key := range keys {
|
||||
var value interface{}
|
||||
if err := c.Get(ctx, key, &value); err == nil {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// SetMultiple 批量设置(基础实现)
|
||||
func (c *TracedRedisCache) SetMultiple(ctx context.Context, data map[string]interface{}, ttl ...interface{}) error {
|
||||
// 简单实现:逐个设置(实际应用中可以使用pipeline优化)
|
||||
for key, value := range data {
|
||||
if err := c.Set(ctx, key, value, ttl...); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeletePattern 按模式删除(基础实现)
|
||||
func (c *TracedRedisCache) DeletePattern(ctx context.Context, pattern string) error {
|
||||
// 这里需要实现模式删除逻辑
|
||||
return fmt.Errorf("DeletePattern not implemented")
|
||||
}
|
||||
|
||||
// Keys 获取匹配的键(基础实现)
|
||||
func (c *TracedRedisCache) Keys(ctx context.Context, pattern string) ([]string, error) {
|
||||
// 这里需要实现键匹配逻辑
|
||||
return nil, fmt.Errorf("Keys not implemented")
|
||||
}
|
||||
|
||||
// Stats 获取缓存统计(基础实现)
|
||||
func (c *TracedRedisCache) Stats(ctx context.Context) (interfaces.CacheStats, error) {
|
||||
return interfaces.CacheStats{}, fmt.Errorf("Stats not implemented")
|
||||
}
|
||||
|
||||
// 辅助方法
|
||||
|
||||
// addBaseAttributes 添加基础属性
|
||||
func (c *TracedRedisCache) addBaseAttributes(span trace.Span, operation, key string) {
|
||||
c.tracer.AddSpanAttributes(span,
|
||||
attribute.String("redis.operation", operation),
|
||||
attribute.String("db.system", "redis"),
|
||||
)
|
||||
|
||||
if c.config.IncludeKeys {
|
||||
sanitizedKey := c.sanitizeKey(key)
|
||||
if len(sanitizedKey) <= c.config.MaxKeyLength {
|
||||
c.tracer.AddSpanAttributes(span,
|
||||
attribute.String("redis.key", sanitizedKey),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// buildKey 构建完整的Redis键名
|
||||
func (c *TracedRedisCache) buildKey(key string) string {
|
||||
if c.prefix == "" {
|
||||
return key
|
||||
}
|
||||
return fmt.Sprintf("%s:%s", c.prefix, key)
|
||||
}
|
||||
|
||||
// sanitizeKey 清理键名用于日志记录
|
||||
func (c *TracedRedisCache) sanitizeKey(key string) string {
|
||||
if len(key) <= c.config.MaxKeyLength {
|
||||
return key
|
||||
}
|
||||
return key[:c.config.MaxKeyLength] + "..."
|
||||
}
|
||||
|
||||
// sanitizeKeys 批量清理键名
|
||||
func (c *TracedRedisCache) sanitizeKeys(keys []string) []string {
|
||||
result := make([]string, len(keys))
|
||||
for i, key := range keys {
|
||||
result[i] = c.sanitizeKey(key)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// serialize 序列化值(简单实现)
|
||||
func (c *TracedRedisCache) serialize(value interface{}) (string, error) {
|
||||
// 这里应该使用JSON或其他序列化方法
|
||||
return fmt.Sprintf("%v", value), nil
|
||||
}
|
||||
|
||||
// deserialize 反序列化值(简单实现)
|
||||
func (c *TracedRedisCache) deserialize(data string, dest interface{}) error {
|
||||
// 这里应该实现真正的反序列化逻辑
|
||||
return fmt.Errorf("deserialize not fully implemented")
|
||||
}
|
||||
189
internal/shared/tracing/service_wrapper.go
Normal file
189
internal/shared/tracing/service_wrapper.go
Normal file
@@ -0,0 +1,189 @@
|
||||
package tracing
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"tyapi-server/internal/domains/user/dto"
|
||||
"tyapi-server/internal/domains/user/entities"
|
||||
"tyapi-server/internal/shared/interfaces"
|
||||
)
|
||||
|
||||
// ServiceWrapper 服务包装器,提供自动追踪能力
|
||||
type ServiceWrapper struct {
|
||||
tracer *Tracer
|
||||
logger *zap.Logger
|
||||
}
|
||||
|
||||
// NewServiceWrapper 创建服务包装器
|
||||
func NewServiceWrapper(tracer *Tracer, logger *zap.Logger) *ServiceWrapper {
|
||||
return &ServiceWrapper{
|
||||
tracer: tracer,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
// TraceServiceCall 追踪服务调用的通用方法
|
||||
func (w *ServiceWrapper) TraceServiceCall(
|
||||
ctx context.Context,
|
||||
serviceName, methodName string,
|
||||
fn func(context.Context) error,
|
||||
) error {
|
||||
// 创建span名称
|
||||
spanName := fmt.Sprintf("%s.%s", serviceName, methodName)
|
||||
|
||||
// 开始追踪
|
||||
ctx, span := w.tracer.StartSpan(ctx, spanName)
|
||||
defer span.End()
|
||||
|
||||
// 添加基础属性
|
||||
w.tracer.AddSpanAttributes(span,
|
||||
attribute.String("service.name", serviceName),
|
||||
attribute.String("service.method", methodName),
|
||||
attribute.String("service.type", "business"),
|
||||
)
|
||||
|
||||
// 记录开始时间
|
||||
startTime := time.Now()
|
||||
|
||||
// 执行原始方法
|
||||
err := fn(ctx)
|
||||
|
||||
// 计算执行时间
|
||||
duration := time.Since(startTime)
|
||||
w.tracer.AddSpanAttributes(span,
|
||||
attribute.Int64("service.duration_ms", duration.Milliseconds()),
|
||||
)
|
||||
|
||||
// 标记慢方法
|
||||
if duration > 100*time.Millisecond {
|
||||
w.tracer.AddSpanAttributes(span,
|
||||
attribute.Bool("service.slow_method", true),
|
||||
)
|
||||
w.logger.Warn("慢方法检测",
|
||||
zap.String("service", serviceName),
|
||||
zap.String("method", methodName),
|
||||
zap.Duration("duration", duration),
|
||||
zap.String("trace_id", w.tracer.GetTraceID(ctx)),
|
||||
)
|
||||
}
|
||||
|
||||
// 处理错误
|
||||
if err != nil {
|
||||
w.tracer.SetSpanError(span, err)
|
||||
w.logger.Error("服务方法执行失败",
|
||||
zap.String("service", serviceName),
|
||||
zap.String("method", methodName),
|
||||
zap.Error(err),
|
||||
zap.String("trace_id", w.tracer.GetTraceID(ctx)),
|
||||
)
|
||||
} else {
|
||||
w.tracer.SetSpanSuccess(span)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// TracedUserService 自动追踪的用户服务包装器
|
||||
type TracedUserService struct {
|
||||
service interfaces.UserService
|
||||
wrapper *ServiceWrapper
|
||||
}
|
||||
|
||||
// NewTracedUserService 创建带追踪的用户服务
|
||||
func NewTracedUserService(service interfaces.UserService, wrapper *ServiceWrapper) interfaces.UserService {
|
||||
return &TracedUserService{
|
||||
service: service,
|
||||
wrapper: wrapper,
|
||||
}
|
||||
}
|
||||
|
||||
func (t *TracedUserService) Name() string {
|
||||
return "user-service"
|
||||
}
|
||||
|
||||
func (t *TracedUserService) Initialize(ctx context.Context) error {
|
||||
return t.wrapper.TraceServiceCall(ctx, "user", "initialize", t.service.Initialize)
|
||||
}
|
||||
|
||||
func (t *TracedUserService) HealthCheck(ctx context.Context) error {
|
||||
return t.service.HealthCheck(ctx) // 不追踪健康检查
|
||||
}
|
||||
|
||||
func (t *TracedUserService) Shutdown(ctx context.Context) error {
|
||||
return t.wrapper.TraceServiceCall(ctx, "user", "shutdown", t.service.Shutdown)
|
||||
}
|
||||
|
||||
func (t *TracedUserService) Register(ctx context.Context, req *dto.RegisterRequest) (*entities.User, error) {
|
||||
var result *entities.User
|
||||
var err error
|
||||
|
||||
traceErr := t.wrapper.TraceServiceCall(ctx, "user", "register", func(ctx context.Context) error {
|
||||
result, err = t.service.Register(ctx, req)
|
||||
return err
|
||||
})
|
||||
|
||||
if traceErr != nil {
|
||||
return nil, traceErr
|
||||
}
|
||||
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (t *TracedUserService) LoginWithPassword(ctx context.Context, req *dto.LoginWithPasswordRequest) (*entities.User, error) {
|
||||
var result *entities.User
|
||||
var err error
|
||||
|
||||
traceErr := t.wrapper.TraceServiceCall(ctx, "user", "login_password", func(ctx context.Context) error {
|
||||
result, err = t.service.LoginWithPassword(ctx, req)
|
||||
return err
|
||||
})
|
||||
|
||||
if traceErr != nil {
|
||||
return nil, traceErr
|
||||
}
|
||||
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (t *TracedUserService) LoginWithSMS(ctx context.Context, req *dto.LoginWithSMSRequest) (*entities.User, error) {
|
||||
var result *entities.User
|
||||
var err error
|
||||
|
||||
traceErr := t.wrapper.TraceServiceCall(ctx, "user", "login_sms", func(ctx context.Context) error {
|
||||
result, err = t.service.LoginWithSMS(ctx, req)
|
||||
return err
|
||||
})
|
||||
|
||||
if traceErr != nil {
|
||||
return nil, traceErr
|
||||
}
|
||||
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (t *TracedUserService) ChangePassword(ctx context.Context, userID string, req *dto.ChangePasswordRequest) error {
|
||||
return t.wrapper.TraceServiceCall(ctx, "user", "change_password", func(ctx context.Context) error {
|
||||
return t.service.ChangePassword(ctx, userID, req)
|
||||
})
|
||||
}
|
||||
|
||||
func (t *TracedUserService) GetByID(ctx context.Context, id string) (*entities.User, error) {
|
||||
var result *entities.User
|
||||
var err error
|
||||
|
||||
traceErr := t.wrapper.TraceServiceCall(ctx, "user", "get_by_id", func(ctx context.Context) error {
|
||||
result, err = t.service.GetByID(ctx, id)
|
||||
return err
|
||||
})
|
||||
|
||||
if traceErr != nil {
|
||||
return nil, traceErr
|
||||
}
|
||||
|
||||
return result, err
|
||||
}
|
||||
474
internal/shared/tracing/tracer.go
Normal file
474
internal/shared/tracing/tracer.go
Normal file
@@ -0,0 +1,474 @@
|
||||
package tracing
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
sdktrace "go.opentelemetry.io/otel/sdk/trace"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// TracerConfig 追踪器配置
|
||||
type TracerConfig struct {
|
||||
ServiceName string
|
||||
ServiceVersion string
|
||||
Environment string
|
||||
Endpoint string
|
||||
SampleRate float64
|
||||
Enabled bool
|
||||
}
|
||||
|
||||
// DefaultTracerConfig 默认追踪器配置
|
||||
func DefaultTracerConfig() TracerConfig {
|
||||
return TracerConfig{
|
||||
ServiceName: "tyapi-server",
|
||||
ServiceVersion: "1.0.0",
|
||||
Environment: "development",
|
||||
Endpoint: "http://localhost:4317",
|
||||
SampleRate: 0.1,
|
||||
Enabled: true,
|
||||
}
|
||||
}
|
||||
|
||||
// Tracer 链路追踪器
|
||||
type Tracer struct {
|
||||
config TracerConfig
|
||||
logger *zap.Logger
|
||||
provider *sdktrace.TracerProvider
|
||||
tracer trace.Tracer
|
||||
mutex sync.RWMutex
|
||||
initialized bool
|
||||
shutdown func(context.Context) error
|
||||
}
|
||||
|
||||
// NewTracer 创建链路追踪器
|
||||
func NewTracer(config TracerConfig, logger *zap.Logger) *Tracer {
|
||||
return &Tracer{
|
||||
config: config,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize 初始化追踪器
|
||||
func (t *Tracer) Initialize(ctx context.Context) error {
|
||||
t.mutex.Lock()
|
||||
defer t.mutex.Unlock()
|
||||
|
||||
if t.initialized {
|
||||
return nil
|
||||
}
|
||||
|
||||
if !t.config.Enabled {
|
||||
t.logger.Info("Tracing is disabled")
|
||||
return nil
|
||||
}
|
||||
|
||||
// 创建资源
|
||||
res, err := resource.New(ctx,
|
||||
resource.WithAttributes(
|
||||
attribute.String("service.name", t.config.ServiceName),
|
||||
attribute.String("service.version", t.config.ServiceVersion),
|
||||
attribute.String("environment", t.config.Environment),
|
||||
),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create resource: %w", err)
|
||||
}
|
||||
|
||||
// 创建采样器
|
||||
sampler := sdktrace.TraceIDRatioBased(t.config.SampleRate)
|
||||
|
||||
// 创建导出器
|
||||
var spanProcessor sdktrace.SpanProcessor
|
||||
if t.config.Endpoint != "" {
|
||||
// 使用OTLP gRPC导出器(支持Jaeger、Tempo等)
|
||||
exporter, err := otlptracegrpc.New(ctx,
|
||||
otlptracegrpc.WithEndpoint(t.config.Endpoint),
|
||||
otlptracegrpc.WithInsecure(), // 开发环境使用,生产环境应配置TLS
|
||||
otlptracegrpc.WithTimeout(time.Second*10),
|
||||
otlptracegrpc.WithRetry(otlptracegrpc.RetryConfig{
|
||||
Enabled: true,
|
||||
InitialInterval: time.Millisecond * 100,
|
||||
MaxInterval: time.Second * 5,
|
||||
MaxElapsedTime: time.Second * 30,
|
||||
}),
|
||||
)
|
||||
if err != nil {
|
||||
t.logger.Warn("Failed to create OTLP exporter, using noop exporter",
|
||||
zap.Error(err),
|
||||
zap.String("endpoint", t.config.Endpoint))
|
||||
spanProcessor = sdktrace.NewSimpleSpanProcessor(&noopExporter{})
|
||||
} else {
|
||||
// 在生产环境中使用批处理器以提高性能
|
||||
spanProcessor = sdktrace.NewBatchSpanProcessor(exporter,
|
||||
sdktrace.WithBatchTimeout(time.Second*5),
|
||||
sdktrace.WithMaxExportBatchSize(512),
|
||||
sdktrace.WithMaxQueueSize(2048),
|
||||
sdktrace.WithExportTimeout(time.Second*30),
|
||||
)
|
||||
t.logger.Info("OTLP exporter initialized successfully",
|
||||
zap.String("endpoint", t.config.Endpoint))
|
||||
}
|
||||
} else {
|
||||
// 如果没有配置端点,使用空导出器
|
||||
spanProcessor = sdktrace.NewSimpleSpanProcessor(&noopExporter{})
|
||||
t.logger.Info("Using noop exporter (no endpoint configured)")
|
||||
}
|
||||
|
||||
// 创建TracerProvider
|
||||
provider := sdktrace.NewTracerProvider(
|
||||
sdktrace.WithResource(res),
|
||||
sdktrace.WithSampler(sampler),
|
||||
sdktrace.WithSpanProcessor(spanProcessor),
|
||||
)
|
||||
|
||||
// 设置全局TracerProvider
|
||||
otel.SetTracerProvider(provider)
|
||||
|
||||
// 创建Tracer
|
||||
tracer := provider.Tracer(t.config.ServiceName)
|
||||
|
||||
t.provider = provider
|
||||
t.tracer = tracer
|
||||
t.shutdown = func(ctx context.Context) error {
|
||||
return provider.Shutdown(ctx)
|
||||
}
|
||||
t.initialized = true
|
||||
|
||||
t.logger.Info("Tracing initialized successfully",
|
||||
zap.String("service", t.config.ServiceName),
|
||||
zap.Float64("sample_rate", t.config.SampleRate))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// StartSpan 开始一个新的span
|
||||
func (t *Tracer) StartSpan(ctx context.Context, name string, opts ...trace.SpanStartOption) (context.Context, trace.Span) {
|
||||
if !t.initialized || !t.config.Enabled {
|
||||
return ctx, trace.SpanFromContext(ctx)
|
||||
}
|
||||
|
||||
return t.tracer.Start(ctx, name, opts...)
|
||||
}
|
||||
|
||||
// StartHTTPSpan 开始一个HTTP span
|
||||
func (t *Tracer) StartHTTPSpan(ctx context.Context, method, path string) (context.Context, trace.Span) {
|
||||
spanName := fmt.Sprintf("%s %s", method, path)
|
||||
|
||||
// 检查是否已有错误标记,如果有则使用"error"作为操作名
|
||||
// 这样可以匹配Jaeger采样配置中的错误操作策略
|
||||
if ctx.Value("otel_error_request") != nil {
|
||||
spanName = "error"
|
||||
}
|
||||
|
||||
ctx, span := t.StartSpan(ctx, spanName,
|
||||
trace.WithSpanKind(trace.SpanKindServer),
|
||||
trace.WithAttributes(
|
||||
attribute.String("http.method", method),
|
||||
attribute.String("http.route", path),
|
||||
),
|
||||
)
|
||||
|
||||
// 保存原始操作名,以便在错误发生时可以更新
|
||||
if ctx.Value("otel_error_request") == nil {
|
||||
ctx = context.WithValue(ctx, "otel_original_operation", spanName)
|
||||
}
|
||||
|
||||
return ctx, span
|
||||
}
|
||||
|
||||
// StartDBSpan 开始一个数据库span
|
||||
func (t *Tracer) StartDBSpan(ctx context.Context, operation, table string) (context.Context, trace.Span) {
|
||||
spanName := fmt.Sprintf("db.%s.%s", operation, table)
|
||||
|
||||
return t.StartSpan(ctx, spanName,
|
||||
trace.WithSpanKind(trace.SpanKindClient),
|
||||
trace.WithAttributes(
|
||||
attribute.String("db.operation", operation),
|
||||
attribute.String("db.table", table),
|
||||
attribute.String("db.system", "postgresql"),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// StartCacheSpan 开始一个缓存span
|
||||
func (t *Tracer) StartCacheSpan(ctx context.Context, operation, key string) (context.Context, trace.Span) {
|
||||
spanName := fmt.Sprintf("cache.%s", operation)
|
||||
|
||||
return t.StartSpan(ctx, spanName,
|
||||
trace.WithSpanKind(trace.SpanKindClient),
|
||||
trace.WithAttributes(
|
||||
attribute.String("cache.operation", operation),
|
||||
attribute.String("cache.system", "redis"),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// StartExternalAPISpan 开始一个外部API调用span
|
||||
func (t *Tracer) StartExternalAPISpan(ctx context.Context, service, operation string) (context.Context, trace.Span) {
|
||||
spanName := fmt.Sprintf("api.%s.%s", service, operation)
|
||||
|
||||
return t.StartSpan(ctx, spanName,
|
||||
trace.WithSpanKind(trace.SpanKindClient),
|
||||
trace.WithAttributes(
|
||||
attribute.String("api.service", service),
|
||||
attribute.String("api.operation", operation),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// AddSpanAttributes 添加span属性
|
||||
func (t *Tracer) AddSpanAttributes(span trace.Span, attrs ...attribute.KeyValue) {
|
||||
if span.IsRecording() {
|
||||
span.SetAttributes(attrs...)
|
||||
}
|
||||
}
|
||||
|
||||
// SetSpanError 设置span错误
|
||||
func (t *Tracer) SetSpanError(span trace.Span, err error) {
|
||||
if span.IsRecording() {
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
span.RecordError(err)
|
||||
|
||||
// 将span操作名更新为"error",以匹配Jaeger采样配置
|
||||
// 注意:这是一种变通方法,因为OpenTelemetry不支持直接更改span名称
|
||||
// 我们通过添加特殊属性来标识这是一个错误span
|
||||
span.SetAttributes(
|
||||
attribute.String("error.operation", "true"),
|
||||
attribute.String("operation.type", "error"),
|
||||
)
|
||||
|
||||
// 记录错误日志,包含trace ID便于关联
|
||||
if t.logger != nil {
|
||||
ctx := trace.ContextWithSpan(context.Background(), span)
|
||||
t.logger.Error("操作发生错误",
|
||||
zap.Error(err),
|
||||
zap.String("trace_id", t.GetTraceID(ctx)),
|
||||
zap.String("span_id", t.GetSpanID(ctx)),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SetSpanSuccess 设置span成功
|
||||
func (t *Tracer) SetSpanSuccess(span trace.Span) {
|
||||
if span.IsRecording() {
|
||||
span.SetStatus(codes.Ok, "success")
|
||||
}
|
||||
}
|
||||
|
||||
// SetHTTPStatus 根据HTTP状态码设置span状态
|
||||
func (t *Tracer) SetHTTPStatus(span trace.Span, statusCode int) {
|
||||
if !span.IsRecording() {
|
||||
return
|
||||
}
|
||||
|
||||
// 添加HTTP状态码属性
|
||||
span.SetAttributes(attribute.Int("http.status_code", statusCode))
|
||||
|
||||
// 对于4xx和5xx错误,标记为错误并应用错误采样策略
|
||||
if statusCode >= 400 {
|
||||
errorMsg := fmt.Sprintf("HTTP %d", statusCode)
|
||||
span.SetStatus(codes.Error, errorMsg)
|
||||
|
||||
// 添加错误操作标记,以匹配Jaeger采样配置
|
||||
span.SetAttributes(
|
||||
attribute.String("error.operation", "true"),
|
||||
attribute.String("operation.type", "error"),
|
||||
)
|
||||
|
||||
// 记录HTTP错误
|
||||
if t.logger != nil {
|
||||
ctx := trace.ContextWithSpan(context.Background(), span)
|
||||
t.logger.Warn("HTTP请求错误",
|
||||
zap.Int("status_code", statusCode),
|
||||
zap.String("trace_id", t.GetTraceID(ctx)),
|
||||
zap.String("span_id", t.GetSpanID(ctx)),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
span.SetStatus(codes.Ok, "success")
|
||||
}
|
||||
}
|
||||
|
||||
// GetTraceID 获取当前上下文的trace ID
|
||||
func (t *Tracer) GetTraceID(ctx context.Context) string {
|
||||
span := trace.SpanFromContext(ctx)
|
||||
if span.SpanContext().IsValid() {
|
||||
return span.SpanContext().TraceID().String()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// GetSpanID 获取当前上下文的span ID
|
||||
func (t *Tracer) GetSpanID(ctx context.Context) string {
|
||||
span := trace.SpanFromContext(ctx)
|
||||
if span.SpanContext().IsValid() {
|
||||
return span.SpanContext().SpanID().String()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// IsTracing 检查是否正在追踪
|
||||
func (t *Tracer) IsTracing(ctx context.Context) bool {
|
||||
span := trace.SpanFromContext(ctx)
|
||||
return span.SpanContext().IsValid() && span.IsRecording()
|
||||
}
|
||||
|
||||
// Shutdown 关闭追踪器
|
||||
func (t *Tracer) Shutdown(ctx context.Context) error {
|
||||
t.mutex.Lock()
|
||||
defer t.mutex.Unlock()
|
||||
|
||||
if !t.initialized || t.shutdown == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := t.shutdown(ctx)
|
||||
if err != nil {
|
||||
t.logger.Error("Failed to shutdown tracer", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
t.initialized = false
|
||||
t.logger.Info("Tracer shutdown successfully")
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetStats 获取追踪统计信息
|
||||
func (t *Tracer) GetStats() map[string]interface{} {
|
||||
t.mutex.RLock()
|
||||
defer t.mutex.RUnlock()
|
||||
|
||||
return map[string]interface{}{
|
||||
"initialized": t.initialized,
|
||||
"enabled": t.config.Enabled,
|
||||
"service_name": t.config.ServiceName,
|
||||
"service_version": t.config.ServiceVersion,
|
||||
"environment": t.config.Environment,
|
||||
"sample_rate": t.config.SampleRate,
|
||||
"endpoint": t.config.Endpoint,
|
||||
}
|
||||
}
|
||||
|
||||
// 实现Service接口
|
||||
|
||||
// Name 返回服务名称
|
||||
func (t *Tracer) Name() string {
|
||||
return "tracer"
|
||||
}
|
||||
|
||||
// HealthCheck 健康检查
|
||||
func (t *Tracer) HealthCheck(ctx context.Context) error {
|
||||
if !t.config.Enabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
if !t.initialized {
|
||||
return fmt.Errorf("tracer not initialized")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// noopExporter 简单的无操作导出器(用于演示)
|
||||
type noopExporter struct{}
|
||||
|
||||
func (e *noopExporter) ExportSpans(ctx context.Context, spans []sdktrace.ReadOnlySpan) error {
|
||||
// 在实际应用中,这里应该将spans发送到Jaeger或其他追踪系统
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *noopExporter) Shutdown(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// TraceMiddleware 追踪中间件工厂
|
||||
func (t *Tracer) TraceMiddleware() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
if !t.initialized || !t.config.Enabled {
|
||||
c.Next()
|
||||
return
|
||||
}
|
||||
|
||||
// 开始HTTP span
|
||||
ctx, span := t.StartHTTPSpan(c.Request.Context(), c.Request.Method, c.FullPath())
|
||||
defer span.End()
|
||||
|
||||
// 将trace ID添加到响应头
|
||||
traceID := t.GetTraceID(ctx)
|
||||
if traceID != "" {
|
||||
c.Header("X-Trace-ID", traceID)
|
||||
}
|
||||
|
||||
// 将span上下文存储到gin上下文
|
||||
c.Request = c.Request.WithContext(ctx)
|
||||
|
||||
// 处理请求
|
||||
c.Next()
|
||||
|
||||
// 设置HTTP状态码
|
||||
t.SetHTTPStatus(span, c.Writer.Status())
|
||||
|
||||
// 添加响应信息
|
||||
t.AddSpanAttributes(span,
|
||||
attribute.Int("http.status_code", c.Writer.Status()),
|
||||
attribute.Int("http.response_size", c.Writer.Size()),
|
||||
)
|
||||
|
||||
// 添加错误信息
|
||||
if len(c.Errors) > 0 {
|
||||
errMsg := c.Errors.String()
|
||||
t.SetSpanError(span, fmt.Errorf(errMsg))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GinTraceMiddleware 兼容旧的方法名,保持向后兼容
|
||||
func (t *Tracer) GinTraceMiddleware() gin.HandlerFunc {
|
||||
return t.TraceMiddleware()
|
||||
}
|
||||
|
||||
// WithTracing 添加追踪到上下文的辅助函数
|
||||
func WithTracing(ctx context.Context, tracer *Tracer, name string) (context.Context, trace.Span) {
|
||||
return tracer.StartSpan(ctx, name)
|
||||
}
|
||||
|
||||
// TraceFunction 追踪函数执行的辅助函数
|
||||
func (t *Tracer) TraceFunction(ctx context.Context, name string, fn func(context.Context) error) error {
|
||||
ctx, span := t.StartSpan(ctx, name)
|
||||
defer span.End()
|
||||
|
||||
err := fn(ctx)
|
||||
if err != nil {
|
||||
t.SetSpanError(span, err)
|
||||
} else {
|
||||
t.SetSpanSuccess(span)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// TraceFunctionWithResult 追踪带返回值的函数执行
|
||||
func TraceFunctionWithResult[T any](ctx context.Context, tracer *Tracer, name string, fn func(context.Context) (T, error)) (T, error) {
|
||||
ctx, span := tracer.StartSpan(ctx, name)
|
||||
defer span.End()
|
||||
|
||||
result, err := fn(ctx)
|
||||
if err != nil {
|
||||
tracer.SetSpanError(span, err)
|
||||
} else {
|
||||
tracer.SetSpanSuccess(span)
|
||||
}
|
||||
|
||||
return result, err
|
||||
}
|
||||
Reference in New Issue
Block a user