Files
backend/internal/service/sensitive_service.go

560 lines
13 KiB
Go
Raw Permalink Normal View History

package service
import (
"context"
"encoding/json"
"fmt"
"log"
"regexp"
"strings"
"sync"
"time"
"unicode/utf8"
"carrot_bbs/internal/model"
redisclient "carrot_bbs/internal/pkg/redis"
"gorm.io/gorm"
)
// ==================== DFA 敏感词过滤实现 ====================
// SensitiveNode 敏感词树节点
type SensitiveNode struct {
// 子节点映射
Children map[rune]*SensitiveNode
// 是否为敏感词结尾
IsEnd bool
// 敏感词信息(仅在 IsEnd 为 true 时有效)
Word string
Level model.SensitiveWordLevel
Category model.SensitiveWordCategory
}
// NewSensitiveNode 创建新的敏感词节点
func NewSensitiveNode() *SensitiveNode {
return &SensitiveNode{
Children: make(map[rune]*SensitiveNode),
IsEnd: false,
}
}
// SensitiveWordTree 敏感词树
type SensitiveWordTree struct {
root *SensitiveNode
wordCount int
mu sync.RWMutex
lastReload time.Time
}
// NewSensitiveWordTree 创建新的敏感词树
func NewSensitiveWordTree() *SensitiveWordTree {
return &SensitiveWordTree{
root: NewSensitiveNode(),
wordCount: 0,
lastReload: time.Now(),
}
}
// AddWord 添加敏感词到树中
func (t *SensitiveWordTree) AddWord(word string, level model.SensitiveWordLevel, category model.SensitiveWordCategory) {
if word == "" {
return
}
t.mu.Lock()
defer t.mu.Unlock()
node := t.root
// 转换为小写进行匹配(不区分大小写)
lowerWord := strings.ToLower(word)
runes := []rune(lowerWord)
for _, r := range runes {
child, exists := node.Children[r]
if !exists {
child = NewSensitiveNode()
node.Children[r] = child
}
node = child
}
// 如果不是已存在的敏感词,则计数+1
if !node.IsEnd {
t.wordCount++
}
node.IsEnd = true
node.Word = word
node.Level = level
node.Category = category
}
// RemoveWord 从树中移除敏感词
func (t *SensitiveWordTree) RemoveWord(word string) {
if word == "" {
return
}
t.mu.Lock()
defer t.mu.Unlock()
lowerWord := strings.ToLower(word)
runes := []rune(lowerWord)
// 查找节点
node := t.root
for _, r := range runes {
child, exists := node.Children[r]
if !exists {
return // 敏感词不存在
}
node = child
}
if node.IsEnd {
node.IsEnd = false
node.Word = ""
t.wordCount--
}
}
// Check 检查文本是否包含敏感词,返回是否包含及敏感词列表
func (t *SensitiveWordTree) Check(text string) (bool, []string) {
if text == "" {
return false, nil
}
t.mu.RLock()
defer t.mu.RUnlock()
var foundWords []string
runes := []rune(strings.ToLower(text))
length := len(runes)
// 用于标记已找到的敏感词位置,避免重复计算
marked := make([]bool, length)
for i := 0; i < length; i++ {
// 从当前位置开始搜索
node := t.root
matchEnd := -1
matchWord := ""
for j := i; j < length; j++ {
child, exists := node.Children[runes[j]]
if !exists {
break
}
node = child
if node.IsEnd {
matchEnd = j
matchWord = node.Word
}
}
// 标记找到的敏感词位置
if matchEnd >= 0 && !marked[i] {
for k := i; k <= matchEnd; k++ {
marked[k] = true
}
foundWords = append(foundWords, matchWord)
}
}
return len(foundWords) > 0, foundWords
}
// Replace 替换文本中的敏感词
func (t *SensitiveWordTree) Replace(text string, repl string) string {
if text == "" {
return text
}
t.mu.RLock()
defer t.mu.RUnlock()
runes := []rune(text)
length := len(runes)
result := make([]rune, 0, length)
// 用于标记已替换的位置
marked := make([]bool, length)
for i := 0; i < length; i++ {
if marked[i] {
continue
}
// 从当前位置开始搜索
node := t.root
matchEnd := -1
for j := i; j < length; j++ {
child, exists := node.Children[runes[j]]
if !exists {
break
}
node = child
if node.IsEnd {
matchEnd = j
}
}
if matchEnd >= 0 {
// 标记已替换的位置
for k := i; k <= matchEnd; k++ {
marked[k] = true
}
// 追加替换符
replRunes := []rune(repl)
result = append(result, replRunes...)
// 跳过已匹配的字符
i = matchEnd
} else {
// 追加原字符
result = append(result, runes[i])
}
}
return string(result)
}
// WordCount 获取敏感词数量
func (t *SensitiveWordTree) WordCount() int {
t.mu.RLock()
defer t.mu.RUnlock()
return t.wordCount
}
// ==================== 敏感词服务实现 ====================
// SensitiveService 敏感词服务接口
type SensitiveService interface {
// Check 检查文本是否包含敏感词
Check(ctx context.Context, text string) (bool, []string)
// Replace 替换敏感词
Replace(ctx context.Context, text string, repl string) string
// AddWord 添加敏感词
AddWord(ctx context.Context, word string, category string, level int) error
// RemoveWord 移除敏感词
RemoveWord(ctx context.Context, word string) error
// Reload 重新加载敏感词库
Reload(ctx context.Context) error
// GetWordCount 获取敏感词数量
GetWordCount(ctx context.Context) int
}
// sensitiveServiceImpl 敏感词服务实现
type sensitiveServiceImpl struct {
tree *SensitiveWordTree
db *gorm.DB
redis *redisclient.Client
config *SensitiveConfig
mu sync.RWMutex
replaceStr string
}
// SensitiveConfig 敏感词服务配置
type SensitiveConfig struct {
Enabled bool `mapstructure:"enabled" yaml:"enabled"`
ReplaceStr string `mapstructure:"replace_str" yaml:"replace_str"`
// 最小匹配长度
MinMatchLen int `mapstructure:"min_match_len" yaml:"min_match_len"`
// 是否从数据库加载
LoadFromDB bool `mapstructure:"load_from_db" yaml:"load_from_db"`
// 是否从Redis加载
LoadFromRedis bool `mapstructure:"load_from_redis" yaml:"load_from_redis"`
// Redis键前缀
RedisKeyPrefix string `mapstructure:"redis_key_prefix" yaml:"redis_key_prefix"`
}
// NewSensitiveService 创建敏感词服务
func NewSensitiveService(db *gorm.DB, redisClient *redisclient.Client, config *SensitiveConfig) SensitiveService {
s := &sensitiveServiceImpl{
tree: NewSensitiveWordTree(),
db: db,
redis: redisClient,
config: config,
replaceStr: config.ReplaceStr,
}
// 如果未设置替换符,默认使用 ***
if s.replaceStr == "" {
s.replaceStr = "***"
}
// 初始化加载敏感词
if config.LoadFromDB {
if err := s.loadFromDB(context.Background()); err != nil {
log.Printf("Failed to load sensitive words from database: %v", err)
}
}
if config.LoadFromRedis && redisClient != nil {
if err := s.loadFromRedis(context.Background()); err != nil {
log.Printf("Failed to load sensitive words from redis: %v", err)
}
}
return s
}
// Check 检查文本是否包含敏感词
func (s *sensitiveServiceImpl) Check(ctx context.Context, text string) (bool, []string) {
if !s.config.Enabled {
return false, nil
}
if text == "" {
return false, nil
}
return s.tree.Check(text)
}
// Replace 替换敏感词
func (s *sensitiveServiceImpl) Replace(ctx context.Context, text string, repl string) string {
if !s.config.Enabled {
return text
}
if text == "" {
return text
}
// 如果未指定替换符,使用默认替换符
if repl == "" {
repl = s.replaceStr
}
return s.tree.Replace(text, repl)
}
// AddWord 添加敏感词
func (s *sensitiveServiceImpl) AddWord(ctx context.Context, word string, category string, level int) error {
if word == "" {
return fmt.Errorf("word cannot be empty")
}
// 转换为敏感词级别
wordLevel := model.SensitiveWordLevel(level)
if wordLevel < 1 || wordLevel > 3 {
wordLevel = model.SensitiveWordLevelLow
}
// 转换为敏感词分类
wordCategory := model.SensitiveWordCategory(category)
if wordCategory == "" {
wordCategory = model.SensitiveWordCategoryOther
}
// 添加到树
s.tree.AddWord(word, wordLevel, wordCategory)
// 持久化到数据库
if s.db != nil {
sensitiveWord := model.SensitiveWord{
Word: word,
Category: wordCategory,
Level: wordLevel,
IsActive: true,
}
// 使用 upsert 逻辑
var existing model.SensitiveWord
result := s.db.Where("word = ?", word).First(&existing)
if result.Error == gorm.ErrRecordNotFound {
if err := s.db.Create(&sensitiveWord).Error; err != nil {
log.Printf("Failed to save sensitive word to database: %v", err)
}
} else if result.Error == nil {
// 更新已存在的记录
existing.Category = wordCategory
existing.Level = wordLevel
existing.IsActive = true
if err := s.db.Save(&existing).Error; err != nil {
log.Printf("Failed to update sensitive word in database: %v", err)
}
}
}
// 同步到 Redis
if s.redis != nil && s.config.RedisKeyPrefix != "" {
key := fmt.Sprintf("%s:%s", s.config.RedisKeyPrefix, word)
data := map[string]interface{}{
"word": word,
"category": category,
"level": level,
}
jsonData, _ := json.Marshal(data)
s.redis.Set(ctx, key, jsonData, 0)
}
return nil
}
// RemoveWord 移除敏感词
func (s *sensitiveServiceImpl) RemoveWord(ctx context.Context, word string) error {
if word == "" {
return fmt.Errorf("word cannot be empty")
}
// 从树中移除
s.tree.RemoveWord(word)
// 从数据库中标记为不活跃
if s.db != nil {
result := s.db.Model(&model.SensitiveWord{}).Where("word = ?", word).Update("is_active", false)
if result.Error != nil {
log.Printf("Failed to deactivate sensitive word in database: %v", result.Error)
}
}
// 从 Redis 中删除
if s.redis != nil && s.config.RedisKeyPrefix != "" {
key := fmt.Sprintf("%s:%s", s.config.RedisKeyPrefix, word)
s.redis.Del(ctx, key)
}
return nil
}
// Reload 重新加载敏感词库
func (s *sensitiveServiceImpl) Reload(ctx context.Context) error {
// 清空现有树
s.tree = NewSensitiveWordTree()
// 从数据库加载
if s.config.LoadFromDB {
if err := s.loadFromDB(ctx); err != nil {
return fmt.Errorf("failed to load from database: %w", err)
}
}
// 从 Redis 加载
if s.config.LoadFromRedis && s.redis != nil {
if err := s.loadFromRedis(ctx); err != nil {
return fmt.Errorf("failed to load from redis: %w", err)
}
}
return nil
}
// GetWordCount 获取敏感词数量
func (s *sensitiveServiceImpl) GetWordCount(ctx context.Context) int {
return s.tree.WordCount()
}
// loadFromDB 从数据库加载敏感词
func (s *sensitiveServiceImpl) loadFromDB(ctx context.Context) error {
if s.db == nil {
return nil
}
var words []model.SensitiveWord
if err := s.db.Where("is_active = ?", true).Find(&words).Error; err != nil {
return err
}
for _, word := range words {
s.tree.AddWord(word.Word, word.Level, word.Category)
}
log.Printf("Loaded %d sensitive words from database", len(words))
return nil
}
// loadFromRedis 从 Redis 加载敏感词
func (s *sensitiveServiceImpl) loadFromRedis(ctx context.Context) error {
if s.redis == nil || s.config.RedisKeyPrefix == "" {
return nil
}
// 使用 SCAN 命令代替 KEYS避免阻塞
pattern := fmt.Sprintf("%s:*", s.config.RedisKeyPrefix)
var cursor uint64
for {
keys, nextCursor, err := s.redis.GetClient().Scan(ctx, cursor, pattern, 100).Result()
if err != nil {
return err
}
for _, key := range keys {
data, err := s.redis.Get(ctx, key)
if err != nil {
continue
}
var wordData map[string]interface{}
if err := json.Unmarshal([]byte(data), &wordData); err != nil {
continue
}
word, _ := wordData["word"].(string)
category, _ := wordData["category"].(string)
level, _ := wordData["level"].(float64)
if word != "" {
s.tree.AddWord(word, model.SensitiveWordLevel(int(level)), model.SensitiveWordCategory(category))
}
}
cursor = nextCursor
if cursor == 0 {
break
}
}
return nil
}
// ==================== 辅助函数 ====================
// ContainsSensitiveWord 快速检查文本是否包含敏感词
func ContainsSensitiveWord(text string, tree *SensitiveWordTree) bool {
if tree == nil || text == "" {
return false
}
hasSensitive, _ := tree.Check(text)
return hasSensitive
}
// FilterSensitiveWords 过滤敏感词并返回替换后的文本
func FilterSensitiveWords(text string, tree *SensitiveWordTree, repl string) string {
if tree == nil || text == "" {
return text
}
if repl == "" {
repl = "***"
}
return tree.Replace(text, repl)
}
// ValidateTextLength 验证文本长度是否合法
func ValidateTextLength(text string, minLen, maxLen int) bool {
length := utf8.RuneCountInString(text)
return length >= minLen && length <= maxLen
}
// SanitizeText 清理文本,移除多余空白字符
func SanitizeText(text string) string {
// 替换多个连续空白字符为单个空格
spaceReg := regexp.MustCompile(`\s+`)
text = spaceReg.ReplaceAllString(text, " ")
// 去除首尾空白
return strings.TrimSpace(text)
}
// ==================== 默认敏感词列表 ====================
// DefaultSensitiveWords 返回默认敏感词列表(示例)
func DefaultSensitiveWords() map[string]struct{} {
return map[string]struct{}{
// 示例敏感词,实际需要从数据库或配置加载
"测试敏感词1": {},
"测试敏感词2": {},
"测试敏感词3": {},
}
}