package service import ( "context" "encoding/json" "fmt" "log" "regexp" "strings" "sync" "time" "unicode/utf8" "carrot_bbs/internal/model" redisclient "carrot_bbs/internal/pkg/redis" "gorm.io/gorm" ) // ==================== DFA 敏感词过滤实现 ==================== // SensitiveNode 敏感词树节点 type SensitiveNode struct { // 子节点映射 Children map[rune]*SensitiveNode // 是否为敏感词结尾 IsEnd bool // 敏感词信息(仅在 IsEnd 为 true 时有效) Word string Level model.SensitiveWordLevel Category model.SensitiveWordCategory } // NewSensitiveNode 创建新的敏感词节点 func NewSensitiveNode() *SensitiveNode { return &SensitiveNode{ Children: make(map[rune]*SensitiveNode), IsEnd: false, } } // SensitiveWordTree 敏感词树 type SensitiveWordTree struct { root *SensitiveNode wordCount int mu sync.RWMutex lastReload time.Time } // NewSensitiveWordTree 创建新的敏感词树 func NewSensitiveWordTree() *SensitiveWordTree { return &SensitiveWordTree{ root: NewSensitiveNode(), wordCount: 0, lastReload: time.Now(), } } // AddWord 添加敏感词到树中 func (t *SensitiveWordTree) AddWord(word string, level model.SensitiveWordLevel, category model.SensitiveWordCategory) { if word == "" { return } t.mu.Lock() defer t.mu.Unlock() node := t.root // 转换为小写进行匹配(不区分大小写) lowerWord := strings.ToLower(word) runes := []rune(lowerWord) for _, r := range runes { child, exists := node.Children[r] if !exists { child = NewSensitiveNode() node.Children[r] = child } node = child } // 如果不是已存在的敏感词,则计数+1 if !node.IsEnd { t.wordCount++ } node.IsEnd = true node.Word = word node.Level = level node.Category = category } // RemoveWord 从树中移除敏感词 func (t *SensitiveWordTree) RemoveWord(word string) { if word == "" { return } t.mu.Lock() defer t.mu.Unlock() lowerWord := strings.ToLower(word) runes := []rune(lowerWord) // 查找节点 node := t.root for _, r := range runes { child, exists := node.Children[r] if !exists { return // 敏感词不存在 } node = child } if node.IsEnd { node.IsEnd = false node.Word = "" t.wordCount-- } } // Check 检查文本是否包含敏感词,返回是否包含及敏感词列表 func (t *SensitiveWordTree) Check(text string) (bool, []string) { if text == "" { return false, nil } t.mu.RLock() defer t.mu.RUnlock() var foundWords []string runes := []rune(strings.ToLower(text)) length := len(runes) // 用于标记已找到的敏感词位置,避免重复计算 marked := make([]bool, length) for i := 0; i < length; i++ { // 从当前位置开始搜索 node := t.root matchEnd := -1 matchWord := "" for j := i; j < length; j++ { child, exists := node.Children[runes[j]] if !exists { break } node = child if node.IsEnd { matchEnd = j matchWord = node.Word } } // 标记找到的敏感词位置 if matchEnd >= 0 && !marked[i] { for k := i; k <= matchEnd; k++ { marked[k] = true } foundWords = append(foundWords, matchWord) } } return len(foundWords) > 0, foundWords } // Replace 替换文本中的敏感词 func (t *SensitiveWordTree) Replace(text string, repl string) string { if text == "" { return text } t.mu.RLock() defer t.mu.RUnlock() runes := []rune(text) length := len(runes) result := make([]rune, 0, length) // 用于标记已替换的位置 marked := make([]bool, length) for i := 0; i < length; i++ { if marked[i] { continue } // 从当前位置开始搜索 node := t.root matchEnd := -1 for j := i; j < length; j++ { child, exists := node.Children[runes[j]] if !exists { break } node = child if node.IsEnd { matchEnd = j } } if matchEnd >= 0 { // 标记已替换的位置 for k := i; k <= matchEnd; k++ { marked[k] = true } // 追加替换符 replRunes := []rune(repl) result = append(result, replRunes...) // 跳过已匹配的字符 i = matchEnd } else { // 追加原字符 result = append(result, runes[i]) } } return string(result) } // WordCount 获取敏感词数量 func (t *SensitiveWordTree) WordCount() int { t.mu.RLock() defer t.mu.RUnlock() return t.wordCount } // ==================== 敏感词服务实现 ==================== // SensitiveService 敏感词服务接口 type SensitiveService interface { // Check 检查文本是否包含敏感词 Check(ctx context.Context, text string) (bool, []string) // Replace 替换敏感词 Replace(ctx context.Context, text string, repl string) string // AddWord 添加敏感词 AddWord(ctx context.Context, word string, category string, level int) error // RemoveWord 移除敏感词 RemoveWord(ctx context.Context, word string) error // Reload 重新加载敏感词库 Reload(ctx context.Context) error // GetWordCount 获取敏感词数量 GetWordCount(ctx context.Context) int } // sensitiveServiceImpl 敏感词服务实现 type sensitiveServiceImpl struct { tree *SensitiveWordTree db *gorm.DB redis *redisclient.Client config *SensitiveConfig mu sync.RWMutex replaceStr string } // SensitiveConfig 敏感词服务配置 type SensitiveConfig struct { Enabled bool `mapstructure:"enabled" yaml:"enabled"` ReplaceStr string `mapstructure:"replace_str" yaml:"replace_str"` // 最小匹配长度 MinMatchLen int `mapstructure:"min_match_len" yaml:"min_match_len"` // 是否从数据库加载 LoadFromDB bool `mapstructure:"load_from_db" yaml:"load_from_db"` // 是否从Redis加载 LoadFromRedis bool `mapstructure:"load_from_redis" yaml:"load_from_redis"` // Redis键前缀 RedisKeyPrefix string `mapstructure:"redis_key_prefix" yaml:"redis_key_prefix"` } // NewSensitiveService 创建敏感词服务 func NewSensitiveService(db *gorm.DB, redisClient *redisclient.Client, config *SensitiveConfig) SensitiveService { s := &sensitiveServiceImpl{ tree: NewSensitiveWordTree(), db: db, redis: redisClient, config: config, replaceStr: config.ReplaceStr, } // 如果未设置替换符,默认使用 *** if s.replaceStr == "" { s.replaceStr = "***" } // 初始化加载敏感词 if config.LoadFromDB { if err := s.loadFromDB(context.Background()); err != nil { log.Printf("Failed to load sensitive words from database: %v", err) } } if config.LoadFromRedis && redisClient != nil { if err := s.loadFromRedis(context.Background()); err != nil { log.Printf("Failed to load sensitive words from redis: %v", err) } } return s } // Check 检查文本是否包含敏感词 func (s *sensitiveServiceImpl) Check(ctx context.Context, text string) (bool, []string) { if !s.config.Enabled { return false, nil } if text == "" { return false, nil } return s.tree.Check(text) } // Replace 替换敏感词 func (s *sensitiveServiceImpl) Replace(ctx context.Context, text string, repl string) string { if !s.config.Enabled { return text } if text == "" { return text } // 如果未指定替换符,使用默认替换符 if repl == "" { repl = s.replaceStr } return s.tree.Replace(text, repl) } // AddWord 添加敏感词 func (s *sensitiveServiceImpl) AddWord(ctx context.Context, word string, category string, level int) error { if word == "" { return fmt.Errorf("word cannot be empty") } // 转换为敏感词级别 wordLevel := model.SensitiveWordLevel(level) if wordLevel < 1 || wordLevel > 3 { wordLevel = model.SensitiveWordLevelLow } // 转换为敏感词分类 wordCategory := model.SensitiveWordCategory(category) if wordCategory == "" { wordCategory = model.SensitiveWordCategoryOther } // 添加到树 s.tree.AddWord(word, wordLevel, wordCategory) // 持久化到数据库 if s.db != nil { sensitiveWord := model.SensitiveWord{ Word: word, Category: wordCategory, Level: wordLevel, IsActive: true, } // 使用 upsert 逻辑 var existing model.SensitiveWord result := s.db.Where("word = ?", word).First(&existing) if result.Error == gorm.ErrRecordNotFound { if err := s.db.Create(&sensitiveWord).Error; err != nil { log.Printf("Failed to save sensitive word to database: %v", err) } } else if result.Error == nil { // 更新已存在的记录 existing.Category = wordCategory existing.Level = wordLevel existing.IsActive = true if err := s.db.Save(&existing).Error; err != nil { log.Printf("Failed to update sensitive word in database: %v", err) } } } // 同步到 Redis if s.redis != nil && s.config.RedisKeyPrefix != "" { key := fmt.Sprintf("%s:%s", s.config.RedisKeyPrefix, word) data := map[string]interface{}{ "word": word, "category": category, "level": level, } jsonData, _ := json.Marshal(data) s.redis.Set(ctx, key, jsonData, 0) } return nil } // RemoveWord 移除敏感词 func (s *sensitiveServiceImpl) RemoveWord(ctx context.Context, word string) error { if word == "" { return fmt.Errorf("word cannot be empty") } // 从树中移除 s.tree.RemoveWord(word) // 从数据库中标记为不活跃 if s.db != nil { result := s.db.Model(&model.SensitiveWord{}).Where("word = ?", word).Update("is_active", false) if result.Error != nil { log.Printf("Failed to deactivate sensitive word in database: %v", result.Error) } } // 从 Redis 中删除 if s.redis != nil && s.config.RedisKeyPrefix != "" { key := fmt.Sprintf("%s:%s", s.config.RedisKeyPrefix, word) s.redis.Del(ctx, key) } return nil } // Reload 重新加载敏感词库 func (s *sensitiveServiceImpl) Reload(ctx context.Context) error { // 清空现有树 s.tree = NewSensitiveWordTree() // 从数据库加载 if s.config.LoadFromDB { if err := s.loadFromDB(ctx); err != nil { return fmt.Errorf("failed to load from database: %w", err) } } // 从 Redis 加载 if s.config.LoadFromRedis && s.redis != nil { if err := s.loadFromRedis(ctx); err != nil { return fmt.Errorf("failed to load from redis: %w", err) } } return nil } // GetWordCount 获取敏感词数量 func (s *sensitiveServiceImpl) GetWordCount(ctx context.Context) int { return s.tree.WordCount() } // loadFromDB 从数据库加载敏感词 func (s *sensitiveServiceImpl) loadFromDB(ctx context.Context) error { if s.db == nil { return nil } var words []model.SensitiveWord if err := s.db.Where("is_active = ?", true).Find(&words).Error; err != nil { return err } for _, word := range words { s.tree.AddWord(word.Word, word.Level, word.Category) } log.Printf("Loaded %d sensitive words from database", len(words)) return nil } // loadFromRedis 从 Redis 加载敏感词 func (s *sensitiveServiceImpl) loadFromRedis(ctx context.Context) error { if s.redis == nil || s.config.RedisKeyPrefix == "" { return nil } // 使用 SCAN 命令代替 KEYS,避免阻塞 pattern := fmt.Sprintf("%s:*", s.config.RedisKeyPrefix) var cursor uint64 for { keys, nextCursor, err := s.redis.GetClient().Scan(ctx, cursor, pattern, 100).Result() if err != nil { return err } for _, key := range keys { data, err := s.redis.Get(ctx, key) if err != nil { continue } var wordData map[string]interface{} if err := json.Unmarshal([]byte(data), &wordData); err != nil { continue } word, _ := wordData["word"].(string) category, _ := wordData["category"].(string) level, _ := wordData["level"].(float64) if word != "" { s.tree.AddWord(word, model.SensitiveWordLevel(int(level)), model.SensitiveWordCategory(category)) } } cursor = nextCursor if cursor == 0 { break } } return nil } // ==================== 辅助函数 ==================== // ContainsSensitiveWord 快速检查文本是否包含敏感词 func ContainsSensitiveWord(text string, tree *SensitiveWordTree) bool { if tree == nil || text == "" { return false } hasSensitive, _ := tree.Check(text) return hasSensitive } // FilterSensitiveWords 过滤敏感词并返回替换后的文本 func FilterSensitiveWords(text string, tree *SensitiveWordTree, repl string) string { if tree == nil || text == "" { return text } if repl == "" { repl = "***" } return tree.Replace(text, repl) } // ValidateTextLength 验证文本长度是否合法 func ValidateTextLength(text string, minLen, maxLen int) bool { length := utf8.RuneCountInString(text) return length >= minLen && length <= maxLen } // SanitizeText 清理文本,移除多余空白字符 func SanitizeText(text string) string { // 替换多个连续空白字符为单个空格 spaceReg := regexp.MustCompile(`\s+`) text = spaceReg.ReplaceAllString(text, " ") // 去除首尾空白 return strings.TrimSpace(text) } // ==================== 默认敏感词列表 ==================== // DefaultSensitiveWords 返回默认敏感词列表(示例) func DefaultSensitiveWords() map[string]struct{} { return map[string]struct{}{ // 示例敏感词,实际需要从数据库或配置加载 "测试敏感词1": {}, "测试敏感词2": {}, "测试敏感词3": {}, } }