feat(架构): 完善基础架构设计

This commit is contained in:
2025-07-02 16:17:59 +08:00
parent 03e615a8fd
commit 5b4392894f
89 changed files with 18555 additions and 3521 deletions

View File

@@ -0,0 +1,292 @@
package tracing
import (
"context"
"fmt"
"reflect"
"runtime"
"strings"
"time"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
)
// TracableService 可追踪的服务接口
type TracableService interface {
Name() string
}
// ServiceDecorator 服务装饰器
type ServiceDecorator struct {
tracer *Tracer
logger *zap.Logger
config DecoratorConfig
}
// DecoratorConfig 装饰器配置
type DecoratorConfig struct {
EnableMethodTracing bool
ExcludePatterns []string
IncludeArguments bool
IncludeResults bool
SlowMethodThreshold time.Duration
}
// DefaultDecoratorConfig 默认装饰器配置
func DefaultDecoratorConfig() DecoratorConfig {
return DecoratorConfig{
EnableMethodTracing: true,
ExcludePatterns: []string{"Health", "Ping", "Name"},
IncludeArguments: true,
IncludeResults: false,
SlowMethodThreshold: 100 * time.Millisecond,
}
}
// NewServiceDecorator 创建服务装饰器
func NewServiceDecorator(tracer *Tracer, logger *zap.Logger) *ServiceDecorator {
return &ServiceDecorator{
tracer: tracer,
logger: logger,
config: DefaultDecoratorConfig(),
}
}
// WrapService 自动包装服务,为所有方法添加链路追踪
func (d *ServiceDecorator) WrapService(service interface{}) interface{} {
serviceValue := reflect.ValueOf(service)
serviceType := reflect.TypeOf(service)
if serviceType.Kind() == reflect.Ptr {
serviceType = serviceType.Elem()
serviceValue = serviceValue.Elem()
}
// 创建代理结构
proxyType := d.createProxyType(serviceType)
proxyValue := reflect.New(proxyType).Elem()
// 设置原始服务字段
proxyValue.FieldByName("target").Set(reflect.ValueOf(service))
proxyValue.FieldByName("decorator").Set(reflect.ValueOf(d))
return proxyValue.Addr().Interface()
}
// createProxyType 创建代理类型
func (d *ServiceDecorator) createProxyType(serviceType reflect.Type) reflect.Type {
// 获取服务名称
serviceName := d.getServiceName(serviceType)
// 创建代理结构字段
fields := []reflect.StructField{
{
Name: "target",
Type: reflect.PtrTo(serviceType),
},
{
Name: "decorator",
Type: reflect.TypeOf(d),
},
}
// 为每个方法创建包装器方法
for i := 0; i < serviceType.NumMethod(); i++ {
method := serviceType.Method(i)
if d.shouldTraceMethod(method.Name) {
// 创建方法字段(用于存储方法实现)
fields = append(fields, reflect.StructField{
Name: method.Name,
Type: method.Type,
})
}
}
// 创建新的结构类型
proxyType := reflect.StructOf(fields)
// 实现接口方法
d.implementMethods(proxyType, serviceType, serviceName)
return proxyType
}
// shouldTraceMethod 判断是否应该追踪方法
func (d *ServiceDecorator) shouldTraceMethod(methodName string) bool {
if !d.config.EnableMethodTracing {
return false
}
for _, pattern := range d.config.ExcludePatterns {
if strings.Contains(methodName, pattern) {
return false
}
}
return true
}
// getServiceName 获取服务名称
func (d *ServiceDecorator) getServiceName(serviceType reflect.Type) string {
serviceName := serviceType.Name()
// 移除Service后缀
if strings.HasSuffix(serviceName, "Service") {
serviceName = strings.TrimSuffix(serviceName, "Service")
}
return strings.ToLower(serviceName)
}
// TraceMethodCall 追踪方法调用
func (d *ServiceDecorator) TraceMethodCall(
ctx context.Context,
serviceName, methodName string,
fn func(context.Context) ([]reflect.Value, error),
args []reflect.Value,
) ([]reflect.Value, error) {
// 创建span名称
spanName := fmt.Sprintf("%s.%s", serviceName, methodName)
// 开始追踪
ctx, span := d.tracer.StartSpan(ctx, spanName)
defer span.End()
// 添加基础属性
d.tracer.AddSpanAttributes(span,
attribute.String("service.name", serviceName),
attribute.String("service.method", methodName),
attribute.String("service.type", "business"),
)
// 添加参数信息(如果启用)
if d.config.IncludeArguments {
d.addArgumentAttributes(span, args)
}
// 记录开始时间
startTime := time.Now()
// 执行原始方法
results, err := fn(ctx)
// 计算执行时间
duration := time.Since(startTime)
d.tracer.AddSpanAttributes(span,
attribute.Int64("service.duration_ms", duration.Milliseconds()),
)
// 标记慢方法
if duration > d.config.SlowMethodThreshold {
d.tracer.AddSpanAttributes(span,
attribute.Bool("service.slow_method", true),
)
d.logger.Warn("慢方法检测",
zap.String("service", serviceName),
zap.String("method", methodName),
zap.Duration("duration", duration),
zap.String("trace_id", d.tracer.GetTraceID(ctx)),
)
}
// 处理错误
if err != nil {
d.tracer.SetSpanError(span, err)
d.logger.Error("服务方法执行失败",
zap.String("service", serviceName),
zap.String("method", methodName),
zap.Error(err),
zap.String("trace_id", d.tracer.GetTraceID(ctx)),
)
} else {
d.tracer.SetSpanSuccess(span)
// 添加结果信息(如果启用)
if d.config.IncludeResults {
d.addResultAttributes(span, results)
}
}
return results, err
}
// addArgumentAttributes 添加参数属性
func (d *ServiceDecorator) addArgumentAttributes(span trace.Span, args []reflect.Value) {
for i, arg := range args {
if i == 0 && arg.Type().String() == "context.Context" {
continue // 跳过context参数
}
argName := fmt.Sprintf("service.arg_%d", i)
argValue := d.extractValue(arg)
if argValue != "" && len(argValue) < 1000 { // 限制长度避免性能问题
d.tracer.AddSpanAttributes(span,
attribute.String(argName, argValue),
)
}
}
}
// addResultAttributes 添加结果属性
func (d *ServiceDecorator) addResultAttributes(span trace.Span, results []reflect.Value) {
for i, result := range results {
if result.Type().String() == "error" {
continue // 错误在其他地方处理
}
resultName := fmt.Sprintf("service.result_%d", i)
resultValue := d.extractValue(result)
if resultValue != "" && len(resultValue) < 1000 {
d.tracer.AddSpanAttributes(span,
attribute.String(resultName, resultValue),
)
}
}
}
// extractValue 提取值的字符串表示
func (d *ServiceDecorator) extractValue(value reflect.Value) string {
if !value.IsValid() {
return ""
}
switch value.Kind() {
case reflect.String:
return value.String()
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return fmt.Sprintf("%d", value.Int())
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return fmt.Sprintf("%d", value.Uint())
case reflect.Float32, reflect.Float64:
return fmt.Sprintf("%.2f", value.Float())
case reflect.Bool:
return fmt.Sprintf("%t", value.Bool())
case reflect.Ptr:
if value.IsNil() {
return "nil"
}
return d.extractValue(value.Elem())
case reflect.Struct:
// 对于结构体,只返回类型名
return value.Type().Name()
case reflect.Slice, reflect.Array:
return fmt.Sprintf("[%d items]", value.Len())
default:
return value.Type().Name()
}
}
// implementMethods 实现接口方法(占位符,实际需要运行时代理)
func (d *ServiceDecorator) implementMethods(proxyType, serviceType reflect.Type, serviceName string) {
// 这里是运行时方法实现的占位符
// 实际实现需要使用reflect.MakeFunc或其他运行时代理技术
}
// GetFunctionName 获取函数名称
func GetFunctionName(fn interface{}) string {
name := runtime.FuncForPC(reflect.ValueOf(fn).Pointer()).Name()
parts := strings.Split(name, ".")
return parts[len(parts)-1]
}

View File

@@ -0,0 +1,320 @@
package tracing
import (
"context"
"fmt"
"strings"
"time"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
"gorm.io/gorm"
)
const (
gormSpanKey = "otel:span"
gormOperationKey = "otel:operation"
gormTableNameKey = "otel:table_name"
gormStartTimeKey = "otel:start_time"
)
// GormTracingPlugin GORM链路追踪插件
type GormTracingPlugin struct {
tracer *Tracer
logger *zap.Logger
config GormPluginConfig
}
// GormPluginConfig GORM插件配置
type GormPluginConfig struct {
IncludeSQL bool
IncludeValues bool
SlowThreshold time.Duration
ExcludeTables []string
SanitizeSQL bool
}
// DefaultGormPluginConfig 默认GORM插件配置
func DefaultGormPluginConfig() GormPluginConfig {
return GormPluginConfig{
IncludeSQL: true,
IncludeValues: false, // 生产环境建议设为false避免记录敏感数据
SlowThreshold: 200 * time.Millisecond,
ExcludeTables: []string{"migrations", "schema_migrations"},
SanitizeSQL: true,
}
}
// NewGormTracingPlugin 创建GORM追踪插件
func NewGormTracingPlugin(tracer *Tracer, logger *zap.Logger) *GormTracingPlugin {
return &GormTracingPlugin{
tracer: tracer,
logger: logger,
config: DefaultGormPluginConfig(),
}
}
// Name 返回插件名称
func (p *GormTracingPlugin) Name() string {
return "gorm-otel-tracing"
}
// Initialize 初始化插件
func (p *GormTracingPlugin) Initialize(db *gorm.DB) error {
// 注册各种操作的回调
callbacks := []string{"create", "query", "update", "delete", "raw"}
for _, operation := range callbacks {
switch operation {
case "create":
err := db.Callback().Create().Before("gorm:create").
Register(p.Name()+":before_create", p.beforeOperation)
if err != nil {
return fmt.Errorf("failed to register before create callback: %w", err)
}
err = db.Callback().Create().After("gorm:create").
Register(p.Name()+":after_create", p.afterOperation)
if err != nil {
return fmt.Errorf("failed to register after create callback: %w", err)
}
case "query":
err := db.Callback().Query().Before("gorm:query").
Register(p.Name()+":before_query", p.beforeOperation)
if err != nil {
return fmt.Errorf("failed to register before query callback: %w", err)
}
err = db.Callback().Query().After("gorm:query").
Register(p.Name()+":after_query", p.afterOperation)
if err != nil {
return fmt.Errorf("failed to register after query callback: %w", err)
}
case "update":
err := db.Callback().Update().Before("gorm:update").
Register(p.Name()+":before_update", p.beforeOperation)
if err != nil {
return fmt.Errorf("failed to register before update callback: %w", err)
}
err = db.Callback().Update().After("gorm:update").
Register(p.Name()+":after_update", p.afterOperation)
if err != nil {
return fmt.Errorf("failed to register after update callback: %w", err)
}
case "delete":
err := db.Callback().Delete().Before("gorm:delete").
Register(p.Name()+":before_delete", p.beforeOperation)
if err != nil {
return fmt.Errorf("failed to register before delete callback: %w", err)
}
err = db.Callback().Delete().After("gorm:delete").
Register(p.Name()+":after_delete", p.afterOperation)
if err != nil {
return fmt.Errorf("failed to register after delete callback: %w", err)
}
case "raw":
err := db.Callback().Raw().Before("gorm:raw").
Register(p.Name()+":before_raw", p.beforeOperation)
if err != nil {
return fmt.Errorf("failed to register before raw callback: %w", err)
}
err = db.Callback().Raw().After("gorm:raw").
Register(p.Name()+":after_raw", p.afterOperation)
if err != nil {
return fmt.Errorf("failed to register after raw callback: %w", err)
}
}
}
p.logger.Info("GORM追踪插件已初始化")
return nil
}
// beforeOperation 操作前回调
func (p *GormTracingPlugin) beforeOperation(db *gorm.DB) {
// 检查是否应该跳过追踪
if p.shouldSkipTracing(db) {
return
}
ctx := db.Statement.Context
if ctx == nil {
ctx = context.Background()
}
// 获取操作信息
operation := p.getOperationType(db)
tableName := p.getTableName(db)
// 检查是否应该排除此表
if p.isExcludedTable(tableName) {
return
}
// 开始追踪
ctx, span := p.tracer.StartDBSpan(ctx, operation, tableName)
// 添加基础属性
p.tracer.AddSpanAttributes(span,
attribute.String("db.system", "postgresql"),
attribute.String("db.operation", operation),
)
if tableName != "" {
p.tracer.AddSpanAttributes(span, attribute.String("db.table", tableName))
}
// 保存追踪信息到GORM context
db.Set(gormSpanKey, span)
db.Set(gormOperationKey, operation)
db.Set(gormTableNameKey, tableName)
db.Set(gormStartTimeKey, time.Now())
// 更新statement context
db.Statement.Context = ctx
}
// afterOperation 操作后回调
func (p *GormTracingPlugin) afterOperation(db *gorm.DB) {
// 获取span
spanValue, exists := db.Get(gormSpanKey)
if !exists {
return
}
span, ok := spanValue.(trace.Span)
if !ok {
return
}
defer span.End()
// 获取操作信息
operation, _ := db.Get(gormOperationKey)
tableName, _ := db.Get(gormTableNameKey)
startTime, _ := db.Get(gormStartTimeKey)
// 计算执行时间
var duration time.Duration
if st, ok := startTime.(time.Time); ok {
duration = time.Since(st)
p.tracer.AddSpanAttributes(span,
attribute.Int64("db.duration_ms", duration.Milliseconds()),
)
}
// 添加SQL信息
if p.config.IncludeSQL && db.Statement.SQL.String() != "" {
sql := db.Statement.SQL.String()
if p.config.SanitizeSQL {
sql = p.sanitizeSQL(sql)
}
p.tracer.AddSpanAttributes(span, attribute.String("db.statement", sql))
}
// 添加影响行数
if db.Statement.RowsAffected >= 0 {
p.tracer.AddSpanAttributes(span,
attribute.Int64("db.rows_affected", db.Statement.RowsAffected),
)
}
// 处理错误
if db.Error != nil {
p.tracer.SetSpanError(span, db.Error)
span.SetStatus(codes.Error, db.Error.Error())
p.logger.Error("数据库操作失败",
zap.String("operation", fmt.Sprintf("%v", operation)),
zap.String("table", fmt.Sprintf("%v", tableName)),
zap.Error(db.Error),
zap.String("trace_id", p.tracer.GetTraceID(db.Statement.Context)),
)
} else {
p.tracer.SetSpanSuccess(span)
span.SetStatus(codes.Ok, "success")
// 检查慢查询
if duration > p.config.SlowThreshold {
p.tracer.AddSpanAttributes(span,
attribute.Bool("db.slow_query", true),
)
p.logger.Warn("慢SQL查询检测",
zap.String("operation", fmt.Sprintf("%v", operation)),
zap.String("table", fmt.Sprintf("%v", tableName)),
zap.Duration("duration", duration),
zap.String("sql", db.Statement.SQL.String()),
zap.String("trace_id", p.tracer.GetTraceID(db.Statement.Context)),
)
}
}
}
// shouldSkipTracing 检查是否应该跳过追踪
func (p *GormTracingPlugin) shouldSkipTracing(db *gorm.DB) bool {
// 检查是否已有span避免重复追踪
if _, exists := db.Get(gormSpanKey); exists {
return true
}
return false
}
// getOperationType 获取操作类型
func (p *GormTracingPlugin) getOperationType(db *gorm.DB) string {
switch db.Statement.ReflectValue.Kind() {
default:
sql := strings.ToUpper(strings.TrimSpace(db.Statement.SQL.String()))
if sql == "" {
return "unknown"
}
if strings.HasPrefix(sql, "SELECT") {
return "select"
} else if strings.HasPrefix(sql, "INSERT") {
return "insert"
} else if strings.HasPrefix(sql, "UPDATE") {
return "update"
} else if strings.HasPrefix(sql, "DELETE") {
return "delete"
} else if strings.HasPrefix(sql, "CREATE") {
return "create"
} else if strings.HasPrefix(sql, "DROP") {
return "drop"
} else if strings.HasPrefix(sql, "ALTER") {
return "alter"
}
return "query"
}
}
// getTableName 获取表名
func (p *GormTracingPlugin) getTableName(db *gorm.DB) string {
if db.Statement.Table != "" {
return db.Statement.Table
}
if db.Statement.Schema != nil && db.Statement.Schema.Table != "" {
return db.Statement.Schema.Table
}
return ""
}
// isExcludedTable 检查是否为排除的表
func (p *GormTracingPlugin) isExcludedTable(tableName string) bool {
for _, excluded := range p.config.ExcludeTables {
if tableName == excluded {
return true
}
}
return false
}
// sanitizeSQL 清理SQL语句移除敏感信息
func (p *GormTracingPlugin) sanitizeSQL(sql string) string {
// 简单的SQL清理将参数替换为占位符
// 在生产环境中,您可能需要更复杂的清理逻辑
return strings.ReplaceAll(sql, "'", "?")
}

View File

@@ -0,0 +1,407 @@
package tracing
import (
"context"
"fmt"
"strings"
"time"
"github.com/redis/go-redis/v9"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
"tyapi-server/internal/shared/interfaces"
)
// TracedRedisCache Redis缓存自动追踪包装器
type TracedRedisCache struct {
client redis.UniversalClient
tracer *Tracer
logger *zap.Logger
prefix string
config RedisTracingConfig
}
// RedisTracingConfig Redis追踪配置
type RedisTracingConfig struct {
IncludeKeys bool
IncludeValues bool
MaxKeyLength int
MaxValueLength int
SlowThreshold time.Duration
SanitizeValues bool
}
// DefaultRedisTracingConfig 默认Redis追踪配置
func DefaultRedisTracingConfig() RedisTracingConfig {
return RedisTracingConfig{
IncludeKeys: true,
IncludeValues: false, // 生产环境建议设为false保护敏感数据
MaxKeyLength: 100,
MaxValueLength: 1000,
SlowThreshold: 50 * time.Millisecond,
SanitizeValues: true,
}
}
// NewTracedRedisCache 创建带追踪的Redis缓存
func NewTracedRedisCache(client redis.UniversalClient, tracer *Tracer, logger *zap.Logger, prefix string) interfaces.CacheService {
return &TracedRedisCache{
client: client,
tracer: tracer,
logger: logger,
prefix: prefix,
config: DefaultRedisTracingConfig(),
}
}
// Name 返回服务名称
func (c *TracedRedisCache) Name() string {
return "redis-cache"
}
// Initialize 初始化服务
func (c *TracedRedisCache) Initialize(ctx context.Context) error {
c.logger.Info("Redis缓存服务已初始化")
return nil
}
// HealthCheck 健康检查
func (c *TracedRedisCache) HealthCheck(ctx context.Context) error {
_, err := c.client.Ping(ctx).Result()
return err
}
// Shutdown 关闭服务
func (c *TracedRedisCache) Shutdown(ctx context.Context) error {
c.logger.Info("Redis缓存服务已关闭")
return c.client.Close()
}
// Get 获取缓存值
func (c *TracedRedisCache) Get(ctx context.Context, key string, dest interface{}) error {
// 开始追踪
ctx, span := c.tracer.StartCacheSpan(ctx, "get", key)
defer span.End()
// 添加基础属性
c.addBaseAttributes(span, "get", key)
// 记录开始时间
startTime := time.Now()
// 构建完整键名
fullKey := c.buildKey(key)
// 执行Redis操作
result, err := c.client.Get(ctx, fullKey).Result()
// 计算执行时间
duration := time.Since(startTime)
c.tracer.AddSpanAttributes(span,
attribute.Int64("redis.duration_ms", duration.Milliseconds()),
)
// 检查慢操作
if duration > c.config.SlowThreshold {
c.tracer.AddSpanAttributes(span,
attribute.Bool("redis.slow_operation", true),
)
c.logger.Warn("Redis慢操作检测",
zap.String("operation", "get"),
zap.String("key", c.sanitizeKey(key)),
zap.Duration("duration", duration),
zap.String("trace_id", c.tracer.GetTraceID(ctx)),
)
}
// 处理结果
if err != nil {
if err == redis.Nil {
// 缓存未命中
c.tracer.AddSpanAttributes(span,
attribute.Bool("redis.hit", false),
attribute.String("redis.result", "miss"),
)
c.tracer.SetSpanSuccess(span)
return interfaces.ErrCacheMiss
} else {
// Redis错误
c.tracer.SetSpanError(span, err)
c.logger.Error("Redis GET操作失败",
zap.String("key", c.sanitizeKey(key)),
zap.Error(err),
zap.String("trace_id", c.tracer.GetTraceID(ctx)),
)
return err
}
}
// 缓存命中
c.tracer.AddSpanAttributes(span,
attribute.Bool("redis.hit", true),
attribute.String("redis.result", "hit"),
attribute.Int("redis.value_size", len(result)),
)
// 反序列化
if err := c.deserialize(result, dest); err != nil {
c.tracer.SetSpanError(span, err)
return err
}
c.tracer.SetSpanSuccess(span)
return nil
}
// Set 设置缓存值
func (c *TracedRedisCache) Set(ctx context.Context, key string, value interface{}, ttl ...interface{}) error {
// 开始追踪
ctx, span := c.tracer.StartCacheSpan(ctx, "set", key)
defer span.End()
// 添加基础属性
c.addBaseAttributes(span, "set", key)
// 处理TTL
var expiration time.Duration
if len(ttl) > 0 {
if duration, ok := ttl[0].(time.Duration); ok {
expiration = duration
c.tracer.AddSpanAttributes(span,
attribute.Int64("redis.ttl_seconds", int64(expiration.Seconds())),
)
}
}
// 记录开始时间
startTime := time.Now()
// 序列化值
serialized, err := c.serialize(value)
if err != nil {
c.tracer.SetSpanError(span, err)
return err
}
// 构建完整键名
fullKey := c.buildKey(key)
// 执行Redis操作
err = c.client.Set(ctx, fullKey, serialized, expiration).Err()
// 计算执行时间
duration := time.Since(startTime)
c.tracer.AddSpanAttributes(span,
attribute.Int64("redis.duration_ms", duration.Milliseconds()),
attribute.Int("redis.value_size", len(serialized)),
)
// 检查慢操作
if duration > c.config.SlowThreshold {
c.tracer.AddSpanAttributes(span,
attribute.Bool("redis.slow_operation", true),
)
c.logger.Warn("Redis慢操作检测",
zap.String("operation", "set"),
zap.String("key", c.sanitizeKey(key)),
zap.Duration("duration", duration),
zap.String("trace_id", c.tracer.GetTraceID(ctx)),
)
}
// 处理错误
if err != nil {
c.tracer.SetSpanError(span, err)
c.logger.Error("Redis SET操作失败",
zap.String("key", c.sanitizeKey(key)),
zap.Error(err),
zap.String("trace_id", c.tracer.GetTraceID(ctx)),
)
return err
}
c.tracer.SetSpanSuccess(span)
return nil
}
// Delete 删除缓存
func (c *TracedRedisCache) Delete(ctx context.Context, keys ...string) error {
// 开始追踪
ctx, span := c.tracer.StartCacheSpan(ctx, "delete", strings.Join(keys, ","))
defer span.End()
// 添加基础属性
c.tracer.AddSpanAttributes(span,
attribute.String("redis.operation", "delete"),
attribute.Int("redis.key_count", len(keys)),
)
// 记录开始时间
startTime := time.Now()
// 构建完整键名
fullKeys := make([]string, len(keys))
for i, key := range keys {
fullKeys[i] = c.buildKey(key)
}
// 执行Redis操作
deleted, err := c.client.Del(ctx, fullKeys...).Result()
// 计算执行时间
duration := time.Since(startTime)
c.tracer.AddSpanAttributes(span,
attribute.Int64("redis.duration_ms", duration.Milliseconds()),
attribute.Int64("redis.deleted_count", deleted),
)
// 处理错误
if err != nil {
c.tracer.SetSpanError(span, err)
c.logger.Error("Redis DELETE操作失败",
zap.Strings("keys", c.sanitizeKeys(keys)),
zap.Error(err),
zap.String("trace_id", c.tracer.GetTraceID(ctx)),
)
return err
}
c.tracer.SetSpanSuccess(span)
return nil
}
// Exists 检查键是否存在
func (c *TracedRedisCache) Exists(ctx context.Context, key string) (bool, error) {
// 开始追踪
ctx, span := c.tracer.StartCacheSpan(ctx, "exists", key)
defer span.End()
// 添加基础属性
c.addBaseAttributes(span, "exists", key)
// 记录开始时间
startTime := time.Now()
// 构建完整键名
fullKey := c.buildKey(key)
// 执行Redis操作
count, err := c.client.Exists(ctx, fullKey).Result()
// 计算执行时间
duration := time.Since(startTime)
c.tracer.AddSpanAttributes(span,
attribute.Int64("redis.duration_ms", duration.Milliseconds()),
attribute.Bool("redis.exists", count > 0),
)
// 处理错误
if err != nil {
c.tracer.SetSpanError(span, err)
return false, err
}
c.tracer.SetSpanSuccess(span)
return count > 0, nil
}
// GetMultiple 批量获取(基础实现)
func (c *TracedRedisCache) GetMultiple(ctx context.Context, keys []string) (map[string]interface{}, error) {
result := make(map[string]interface{})
// 简单实现逐个获取实际应用中可以使用MGET优化
for _, key := range keys {
var value interface{}
if err := c.Get(ctx, key, &value); err == nil {
result[key] = value
}
}
return result, nil
}
// SetMultiple 批量设置(基础实现)
func (c *TracedRedisCache) SetMultiple(ctx context.Context, data map[string]interface{}, ttl ...interface{}) error {
// 简单实现逐个设置实际应用中可以使用pipeline优化
for key, value := range data {
if err := c.Set(ctx, key, value, ttl...); err != nil {
return err
}
}
return nil
}
// DeletePattern 按模式删除(基础实现)
func (c *TracedRedisCache) DeletePattern(ctx context.Context, pattern string) error {
// 这里需要实现模式删除逻辑
return fmt.Errorf("DeletePattern not implemented")
}
// Keys 获取匹配的键(基础实现)
func (c *TracedRedisCache) Keys(ctx context.Context, pattern string) ([]string, error) {
// 这里需要实现键匹配逻辑
return nil, fmt.Errorf("Keys not implemented")
}
// Stats 获取缓存统计(基础实现)
func (c *TracedRedisCache) Stats(ctx context.Context) (interfaces.CacheStats, error) {
return interfaces.CacheStats{}, fmt.Errorf("Stats not implemented")
}
// 辅助方法
// addBaseAttributes 添加基础属性
func (c *TracedRedisCache) addBaseAttributes(span trace.Span, operation, key string) {
c.tracer.AddSpanAttributes(span,
attribute.String("redis.operation", operation),
attribute.String("db.system", "redis"),
)
if c.config.IncludeKeys {
sanitizedKey := c.sanitizeKey(key)
if len(sanitizedKey) <= c.config.MaxKeyLength {
c.tracer.AddSpanAttributes(span,
attribute.String("redis.key", sanitizedKey),
)
}
}
}
// buildKey 构建完整的Redis键名
func (c *TracedRedisCache) buildKey(key string) string {
if c.prefix == "" {
return key
}
return fmt.Sprintf("%s:%s", c.prefix, key)
}
// sanitizeKey 清理键名用于日志记录
func (c *TracedRedisCache) sanitizeKey(key string) string {
if len(key) <= c.config.MaxKeyLength {
return key
}
return key[:c.config.MaxKeyLength] + "..."
}
// sanitizeKeys 批量清理键名
func (c *TracedRedisCache) sanitizeKeys(keys []string) []string {
result := make([]string, len(keys))
for i, key := range keys {
result[i] = c.sanitizeKey(key)
}
return result
}
// serialize 序列化值(简单实现)
func (c *TracedRedisCache) serialize(value interface{}) (string, error) {
// 这里应该使用JSON或其他序列化方法
return fmt.Sprintf("%v", value), nil
}
// deserialize 反序列化值(简单实现)
func (c *TracedRedisCache) deserialize(data string, dest interface{}) error {
// 这里应该实现真正的反序列化逻辑
return fmt.Errorf("deserialize not fully implemented")
}

View File

@@ -0,0 +1,189 @@
package tracing
import (
"context"
"fmt"
"time"
"go.opentelemetry.io/otel/attribute"
"go.uber.org/zap"
"tyapi-server/internal/domains/user/dto"
"tyapi-server/internal/domains/user/entities"
"tyapi-server/internal/shared/interfaces"
)
// ServiceWrapper 服务包装器,提供自动追踪能力
type ServiceWrapper struct {
tracer *Tracer
logger *zap.Logger
}
// NewServiceWrapper 创建服务包装器
func NewServiceWrapper(tracer *Tracer, logger *zap.Logger) *ServiceWrapper {
return &ServiceWrapper{
tracer: tracer,
logger: logger,
}
}
// TraceServiceCall 追踪服务调用的通用方法
func (w *ServiceWrapper) TraceServiceCall(
ctx context.Context,
serviceName, methodName string,
fn func(context.Context) error,
) error {
// 创建span名称
spanName := fmt.Sprintf("%s.%s", serviceName, methodName)
// 开始追踪
ctx, span := w.tracer.StartSpan(ctx, spanName)
defer span.End()
// 添加基础属性
w.tracer.AddSpanAttributes(span,
attribute.String("service.name", serviceName),
attribute.String("service.method", methodName),
attribute.String("service.type", "business"),
)
// 记录开始时间
startTime := time.Now()
// 执行原始方法
err := fn(ctx)
// 计算执行时间
duration := time.Since(startTime)
w.tracer.AddSpanAttributes(span,
attribute.Int64("service.duration_ms", duration.Milliseconds()),
)
// 标记慢方法
if duration > 100*time.Millisecond {
w.tracer.AddSpanAttributes(span,
attribute.Bool("service.slow_method", true),
)
w.logger.Warn("慢方法检测",
zap.String("service", serviceName),
zap.String("method", methodName),
zap.Duration("duration", duration),
zap.String("trace_id", w.tracer.GetTraceID(ctx)),
)
}
// 处理错误
if err != nil {
w.tracer.SetSpanError(span, err)
w.logger.Error("服务方法执行失败",
zap.String("service", serviceName),
zap.String("method", methodName),
zap.Error(err),
zap.String("trace_id", w.tracer.GetTraceID(ctx)),
)
} else {
w.tracer.SetSpanSuccess(span)
}
return err
}
// TracedUserService 自动追踪的用户服务包装器
type TracedUserService struct {
service interfaces.UserService
wrapper *ServiceWrapper
}
// NewTracedUserService 创建带追踪的用户服务
func NewTracedUserService(service interfaces.UserService, wrapper *ServiceWrapper) interfaces.UserService {
return &TracedUserService{
service: service,
wrapper: wrapper,
}
}
func (t *TracedUserService) Name() string {
return "user-service"
}
func (t *TracedUserService) Initialize(ctx context.Context) error {
return t.wrapper.TraceServiceCall(ctx, "user", "initialize", t.service.Initialize)
}
func (t *TracedUserService) HealthCheck(ctx context.Context) error {
return t.service.HealthCheck(ctx) // 不追踪健康检查
}
func (t *TracedUserService) Shutdown(ctx context.Context) error {
return t.wrapper.TraceServiceCall(ctx, "user", "shutdown", t.service.Shutdown)
}
func (t *TracedUserService) Register(ctx context.Context, req *dto.RegisterRequest) (*entities.User, error) {
var result *entities.User
var err error
traceErr := t.wrapper.TraceServiceCall(ctx, "user", "register", func(ctx context.Context) error {
result, err = t.service.Register(ctx, req)
return err
})
if traceErr != nil {
return nil, traceErr
}
return result, err
}
func (t *TracedUserService) LoginWithPassword(ctx context.Context, req *dto.LoginWithPasswordRequest) (*entities.User, error) {
var result *entities.User
var err error
traceErr := t.wrapper.TraceServiceCall(ctx, "user", "login_password", func(ctx context.Context) error {
result, err = t.service.LoginWithPassword(ctx, req)
return err
})
if traceErr != nil {
return nil, traceErr
}
return result, err
}
func (t *TracedUserService) LoginWithSMS(ctx context.Context, req *dto.LoginWithSMSRequest) (*entities.User, error) {
var result *entities.User
var err error
traceErr := t.wrapper.TraceServiceCall(ctx, "user", "login_sms", func(ctx context.Context) error {
result, err = t.service.LoginWithSMS(ctx, req)
return err
})
if traceErr != nil {
return nil, traceErr
}
return result, err
}
func (t *TracedUserService) ChangePassword(ctx context.Context, userID string, req *dto.ChangePasswordRequest) error {
return t.wrapper.TraceServiceCall(ctx, "user", "change_password", func(ctx context.Context) error {
return t.service.ChangePassword(ctx, userID, req)
})
}
func (t *TracedUserService) GetByID(ctx context.Context, id string) (*entities.User, error) {
var result *entities.User
var err error
traceErr := t.wrapper.TraceServiceCall(ctx, "user", "get_by_id", func(ctx context.Context) error {
result, err = t.service.GetByID(ctx, id)
return err
})
if traceErr != nil {
return nil, traceErr
}
return result, err
}

View File

@@ -0,0 +1,474 @@
package tracing
import (
"context"
"fmt"
"sync"
"time"
"github.com/gin-gonic/gin"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
)
// TracerConfig 追踪器配置
type TracerConfig struct {
ServiceName string
ServiceVersion string
Environment string
Endpoint string
SampleRate float64
Enabled bool
}
// DefaultTracerConfig 默认追踪器配置
func DefaultTracerConfig() TracerConfig {
return TracerConfig{
ServiceName: "tyapi-server",
ServiceVersion: "1.0.0",
Environment: "development",
Endpoint: "http://localhost:4317",
SampleRate: 0.1,
Enabled: true,
}
}
// Tracer 链路追踪器
type Tracer struct {
config TracerConfig
logger *zap.Logger
provider *sdktrace.TracerProvider
tracer trace.Tracer
mutex sync.RWMutex
initialized bool
shutdown func(context.Context) error
}
// NewTracer 创建链路追踪器
func NewTracer(config TracerConfig, logger *zap.Logger) *Tracer {
return &Tracer{
config: config,
logger: logger,
}
}
// Initialize 初始化追踪器
func (t *Tracer) Initialize(ctx context.Context) error {
t.mutex.Lock()
defer t.mutex.Unlock()
if t.initialized {
return nil
}
if !t.config.Enabled {
t.logger.Info("Tracing is disabled")
return nil
}
// 创建资源
res, err := resource.New(ctx,
resource.WithAttributes(
attribute.String("service.name", t.config.ServiceName),
attribute.String("service.version", t.config.ServiceVersion),
attribute.String("environment", t.config.Environment),
),
)
if err != nil {
return fmt.Errorf("failed to create resource: %w", err)
}
// 创建采样器
sampler := sdktrace.TraceIDRatioBased(t.config.SampleRate)
// 创建导出器
var spanProcessor sdktrace.SpanProcessor
if t.config.Endpoint != "" {
// 使用OTLP gRPC导出器支持Jaeger、Tempo等
exporter, err := otlptracegrpc.New(ctx,
otlptracegrpc.WithEndpoint(t.config.Endpoint),
otlptracegrpc.WithInsecure(), // 开发环境使用生产环境应配置TLS
otlptracegrpc.WithTimeout(time.Second*10),
otlptracegrpc.WithRetry(otlptracegrpc.RetryConfig{
Enabled: true,
InitialInterval: time.Millisecond * 100,
MaxInterval: time.Second * 5,
MaxElapsedTime: time.Second * 30,
}),
)
if err != nil {
t.logger.Warn("Failed to create OTLP exporter, using noop exporter",
zap.Error(err),
zap.String("endpoint", t.config.Endpoint))
spanProcessor = sdktrace.NewSimpleSpanProcessor(&noopExporter{})
} else {
// 在生产环境中使用批处理器以提高性能
spanProcessor = sdktrace.NewBatchSpanProcessor(exporter,
sdktrace.WithBatchTimeout(time.Second*5),
sdktrace.WithMaxExportBatchSize(512),
sdktrace.WithMaxQueueSize(2048),
sdktrace.WithExportTimeout(time.Second*30),
)
t.logger.Info("OTLP exporter initialized successfully",
zap.String("endpoint", t.config.Endpoint))
}
} else {
// 如果没有配置端点,使用空导出器
spanProcessor = sdktrace.NewSimpleSpanProcessor(&noopExporter{})
t.logger.Info("Using noop exporter (no endpoint configured)")
}
// 创建TracerProvider
provider := sdktrace.NewTracerProvider(
sdktrace.WithResource(res),
sdktrace.WithSampler(sampler),
sdktrace.WithSpanProcessor(spanProcessor),
)
// 设置全局TracerProvider
otel.SetTracerProvider(provider)
// 创建Tracer
tracer := provider.Tracer(t.config.ServiceName)
t.provider = provider
t.tracer = tracer
t.shutdown = func(ctx context.Context) error {
return provider.Shutdown(ctx)
}
t.initialized = true
t.logger.Info("Tracing initialized successfully",
zap.String("service", t.config.ServiceName),
zap.Float64("sample_rate", t.config.SampleRate))
return nil
}
// StartSpan 开始一个新的span
func (t *Tracer) StartSpan(ctx context.Context, name string, opts ...trace.SpanStartOption) (context.Context, trace.Span) {
if !t.initialized || !t.config.Enabled {
return ctx, trace.SpanFromContext(ctx)
}
return t.tracer.Start(ctx, name, opts...)
}
// StartHTTPSpan 开始一个HTTP span
func (t *Tracer) StartHTTPSpan(ctx context.Context, method, path string) (context.Context, trace.Span) {
spanName := fmt.Sprintf("%s %s", method, path)
// 检查是否已有错误标记,如果有则使用"error"作为操作名
// 这样可以匹配Jaeger采样配置中的错误操作策略
if ctx.Value("otel_error_request") != nil {
spanName = "error"
}
ctx, span := t.StartSpan(ctx, spanName,
trace.WithSpanKind(trace.SpanKindServer),
trace.WithAttributes(
attribute.String("http.method", method),
attribute.String("http.route", path),
),
)
// 保存原始操作名,以便在错误发生时可以更新
if ctx.Value("otel_error_request") == nil {
ctx = context.WithValue(ctx, "otel_original_operation", spanName)
}
return ctx, span
}
// StartDBSpan 开始一个数据库span
func (t *Tracer) StartDBSpan(ctx context.Context, operation, table string) (context.Context, trace.Span) {
spanName := fmt.Sprintf("db.%s.%s", operation, table)
return t.StartSpan(ctx, spanName,
trace.WithSpanKind(trace.SpanKindClient),
trace.WithAttributes(
attribute.String("db.operation", operation),
attribute.String("db.table", table),
attribute.String("db.system", "postgresql"),
),
)
}
// StartCacheSpan 开始一个缓存span
func (t *Tracer) StartCacheSpan(ctx context.Context, operation, key string) (context.Context, trace.Span) {
spanName := fmt.Sprintf("cache.%s", operation)
return t.StartSpan(ctx, spanName,
trace.WithSpanKind(trace.SpanKindClient),
trace.WithAttributes(
attribute.String("cache.operation", operation),
attribute.String("cache.system", "redis"),
),
)
}
// StartExternalAPISpan 开始一个外部API调用span
func (t *Tracer) StartExternalAPISpan(ctx context.Context, service, operation string) (context.Context, trace.Span) {
spanName := fmt.Sprintf("api.%s.%s", service, operation)
return t.StartSpan(ctx, spanName,
trace.WithSpanKind(trace.SpanKindClient),
trace.WithAttributes(
attribute.String("api.service", service),
attribute.String("api.operation", operation),
),
)
}
// AddSpanAttributes 添加span属性
func (t *Tracer) AddSpanAttributes(span trace.Span, attrs ...attribute.KeyValue) {
if span.IsRecording() {
span.SetAttributes(attrs...)
}
}
// SetSpanError 设置span错误
func (t *Tracer) SetSpanError(span trace.Span, err error) {
if span.IsRecording() {
span.SetStatus(codes.Error, err.Error())
span.RecordError(err)
// 将span操作名更新为"error"以匹配Jaeger采样配置
// 注意这是一种变通方法因为OpenTelemetry不支持直接更改span名称
// 我们通过添加特殊属性来标识这是一个错误span
span.SetAttributes(
attribute.String("error.operation", "true"),
attribute.String("operation.type", "error"),
)
// 记录错误日志包含trace ID便于关联
if t.logger != nil {
ctx := trace.ContextWithSpan(context.Background(), span)
t.logger.Error("操作发生错误",
zap.Error(err),
zap.String("trace_id", t.GetTraceID(ctx)),
zap.String("span_id", t.GetSpanID(ctx)),
)
}
}
}
// SetSpanSuccess 设置span成功
func (t *Tracer) SetSpanSuccess(span trace.Span) {
if span.IsRecording() {
span.SetStatus(codes.Ok, "success")
}
}
// SetHTTPStatus 根据HTTP状态码设置span状态
func (t *Tracer) SetHTTPStatus(span trace.Span, statusCode int) {
if !span.IsRecording() {
return
}
// 添加HTTP状态码属性
span.SetAttributes(attribute.Int("http.status_code", statusCode))
// 对于4xx和5xx错误标记为错误并应用错误采样策略
if statusCode >= 400 {
errorMsg := fmt.Sprintf("HTTP %d", statusCode)
span.SetStatus(codes.Error, errorMsg)
// 添加错误操作标记以匹配Jaeger采样配置
span.SetAttributes(
attribute.String("error.operation", "true"),
attribute.String("operation.type", "error"),
)
// 记录HTTP错误
if t.logger != nil {
ctx := trace.ContextWithSpan(context.Background(), span)
t.logger.Warn("HTTP请求错误",
zap.Int("status_code", statusCode),
zap.String("trace_id", t.GetTraceID(ctx)),
zap.String("span_id", t.GetSpanID(ctx)),
)
}
} else {
span.SetStatus(codes.Ok, "success")
}
}
// GetTraceID 获取当前上下文的trace ID
func (t *Tracer) GetTraceID(ctx context.Context) string {
span := trace.SpanFromContext(ctx)
if span.SpanContext().IsValid() {
return span.SpanContext().TraceID().String()
}
return ""
}
// GetSpanID 获取当前上下文的span ID
func (t *Tracer) GetSpanID(ctx context.Context) string {
span := trace.SpanFromContext(ctx)
if span.SpanContext().IsValid() {
return span.SpanContext().SpanID().String()
}
return ""
}
// IsTracing 检查是否正在追踪
func (t *Tracer) IsTracing(ctx context.Context) bool {
span := trace.SpanFromContext(ctx)
return span.SpanContext().IsValid() && span.IsRecording()
}
// Shutdown 关闭追踪器
func (t *Tracer) Shutdown(ctx context.Context) error {
t.mutex.Lock()
defer t.mutex.Unlock()
if !t.initialized || t.shutdown == nil {
return nil
}
err := t.shutdown(ctx)
if err != nil {
t.logger.Error("Failed to shutdown tracer", zap.Error(err))
return err
}
t.initialized = false
t.logger.Info("Tracer shutdown successfully")
return nil
}
// GetStats 获取追踪统计信息
func (t *Tracer) GetStats() map[string]interface{} {
t.mutex.RLock()
defer t.mutex.RUnlock()
return map[string]interface{}{
"initialized": t.initialized,
"enabled": t.config.Enabled,
"service_name": t.config.ServiceName,
"service_version": t.config.ServiceVersion,
"environment": t.config.Environment,
"sample_rate": t.config.SampleRate,
"endpoint": t.config.Endpoint,
}
}
// 实现Service接口
// Name 返回服务名称
func (t *Tracer) Name() string {
return "tracer"
}
// HealthCheck 健康检查
func (t *Tracer) HealthCheck(ctx context.Context) error {
if !t.config.Enabled {
return nil
}
if !t.initialized {
return fmt.Errorf("tracer not initialized")
}
return nil
}
// noopExporter 简单的无操作导出器(用于演示)
type noopExporter struct{}
func (e *noopExporter) ExportSpans(ctx context.Context, spans []sdktrace.ReadOnlySpan) error {
// 在实际应用中这里应该将spans发送到Jaeger或其他追踪系统
return nil
}
func (e *noopExporter) Shutdown(ctx context.Context) error {
return nil
}
// TraceMiddleware 追踪中间件工厂
func (t *Tracer) TraceMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
if !t.initialized || !t.config.Enabled {
c.Next()
return
}
// 开始HTTP span
ctx, span := t.StartHTTPSpan(c.Request.Context(), c.Request.Method, c.FullPath())
defer span.End()
// 将trace ID添加到响应头
traceID := t.GetTraceID(ctx)
if traceID != "" {
c.Header("X-Trace-ID", traceID)
}
// 将span上下文存储到gin上下文
c.Request = c.Request.WithContext(ctx)
// 处理请求
c.Next()
// 设置HTTP状态码
t.SetHTTPStatus(span, c.Writer.Status())
// 添加响应信息
t.AddSpanAttributes(span,
attribute.Int("http.status_code", c.Writer.Status()),
attribute.Int("http.response_size", c.Writer.Size()),
)
// 添加错误信息
if len(c.Errors) > 0 {
errMsg := c.Errors.String()
t.SetSpanError(span, fmt.Errorf(errMsg))
}
}
}
// GinTraceMiddleware 兼容旧的方法名,保持向后兼容
func (t *Tracer) GinTraceMiddleware() gin.HandlerFunc {
return t.TraceMiddleware()
}
// WithTracing 添加追踪到上下文的辅助函数
func WithTracing(ctx context.Context, tracer *Tracer, name string) (context.Context, trace.Span) {
return tracer.StartSpan(ctx, name)
}
// TraceFunction 追踪函数执行的辅助函数
func (t *Tracer) TraceFunction(ctx context.Context, name string, fn func(context.Context) error) error {
ctx, span := t.StartSpan(ctx, name)
defer span.End()
err := fn(ctx)
if err != nil {
t.SetSpanError(span, err)
} else {
t.SetSpanSuccess(span)
}
return err
}
// TraceFunctionWithResult 追踪带返回值的函数执行
func TraceFunctionWithResult[T any](ctx context.Context, tracer *Tracer, name string, fn func(context.Context) (T, error)) (T, error) {
ctx, span := tracer.StartSpan(ctx, name)
defer span.End()
result, err := fn(ctx)
if err != nil {
tracer.SetSpanError(span, err)
} else {
tracer.SetSpanSuccess(span)
}
return result, err
}