Files
backend/internal/service/sensitive_service.go
lan 4d8f2ec997 Initial backend repository commit.
Set up project files and add .gitignore to exclude local build/runtime artifacts.

Made-with: Cursor
2026-03-09 21:28:58 +08:00

560 lines
13 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package service
import (
"context"
"encoding/json"
"fmt"
"log"
"regexp"
"strings"
"sync"
"time"
"unicode/utf8"
"carrot_bbs/internal/model"
redisclient "carrot_bbs/internal/pkg/redis"
"gorm.io/gorm"
)
// ==================== DFA 敏感词过滤实现 ====================
// SensitiveNode 敏感词树节点
type SensitiveNode struct {
// 子节点映射
Children map[rune]*SensitiveNode
// 是否为敏感词结尾
IsEnd bool
// 敏感词信息(仅在 IsEnd 为 true 时有效)
Word string
Level model.SensitiveWordLevel
Category model.SensitiveWordCategory
}
// NewSensitiveNode 创建新的敏感词节点
func NewSensitiveNode() *SensitiveNode {
return &SensitiveNode{
Children: make(map[rune]*SensitiveNode),
IsEnd: false,
}
}
// SensitiveWordTree 敏感词树
type SensitiveWordTree struct {
root *SensitiveNode
wordCount int
mu sync.RWMutex
lastReload time.Time
}
// NewSensitiveWordTree 创建新的敏感词树
func NewSensitiveWordTree() *SensitiveWordTree {
return &SensitiveWordTree{
root: NewSensitiveNode(),
wordCount: 0,
lastReload: time.Now(),
}
}
// AddWord 添加敏感词到树中
func (t *SensitiveWordTree) AddWord(word string, level model.SensitiveWordLevel, category model.SensitiveWordCategory) {
if word == "" {
return
}
t.mu.Lock()
defer t.mu.Unlock()
node := t.root
// 转换为小写进行匹配(不区分大小写)
lowerWord := strings.ToLower(word)
runes := []rune(lowerWord)
for _, r := range runes {
child, exists := node.Children[r]
if !exists {
child = NewSensitiveNode()
node.Children[r] = child
}
node = child
}
// 如果不是已存在的敏感词,则计数+1
if !node.IsEnd {
t.wordCount++
}
node.IsEnd = true
node.Word = word
node.Level = level
node.Category = category
}
// RemoveWord 从树中移除敏感词
func (t *SensitiveWordTree) RemoveWord(word string) {
if word == "" {
return
}
t.mu.Lock()
defer t.mu.Unlock()
lowerWord := strings.ToLower(word)
runes := []rune(lowerWord)
// 查找节点
node := t.root
for _, r := range runes {
child, exists := node.Children[r]
if !exists {
return // 敏感词不存在
}
node = child
}
if node.IsEnd {
node.IsEnd = false
node.Word = ""
t.wordCount--
}
}
// Check 检查文本是否包含敏感词,返回是否包含及敏感词列表
func (t *SensitiveWordTree) Check(text string) (bool, []string) {
if text == "" {
return false, nil
}
t.mu.RLock()
defer t.mu.RUnlock()
var foundWords []string
runes := []rune(strings.ToLower(text))
length := len(runes)
// 用于标记已找到的敏感词位置,避免重复计算
marked := make([]bool, length)
for i := 0; i < length; i++ {
// 从当前位置开始搜索
node := t.root
matchEnd := -1
matchWord := ""
for j := i; j < length; j++ {
child, exists := node.Children[runes[j]]
if !exists {
break
}
node = child
if node.IsEnd {
matchEnd = j
matchWord = node.Word
}
}
// 标记找到的敏感词位置
if matchEnd >= 0 && !marked[i] {
for k := i; k <= matchEnd; k++ {
marked[k] = true
}
foundWords = append(foundWords, matchWord)
}
}
return len(foundWords) > 0, foundWords
}
// Replace 替换文本中的敏感词
func (t *SensitiveWordTree) Replace(text string, repl string) string {
if text == "" {
return text
}
t.mu.RLock()
defer t.mu.RUnlock()
runes := []rune(text)
length := len(runes)
result := make([]rune, 0, length)
// 用于标记已替换的位置
marked := make([]bool, length)
for i := 0; i < length; i++ {
if marked[i] {
continue
}
// 从当前位置开始搜索
node := t.root
matchEnd := -1
for j := i; j < length; j++ {
child, exists := node.Children[runes[j]]
if !exists {
break
}
node = child
if node.IsEnd {
matchEnd = j
}
}
if matchEnd >= 0 {
// 标记已替换的位置
for k := i; k <= matchEnd; k++ {
marked[k] = true
}
// 追加替换符
replRunes := []rune(repl)
result = append(result, replRunes...)
// 跳过已匹配的字符
i = matchEnd
} else {
// 追加原字符
result = append(result, runes[i])
}
}
return string(result)
}
// WordCount 获取敏感词数量
func (t *SensitiveWordTree) WordCount() int {
t.mu.RLock()
defer t.mu.RUnlock()
return t.wordCount
}
// ==================== 敏感词服务实现 ====================
// SensitiveService 敏感词服务接口
type SensitiveService interface {
// Check 检查文本是否包含敏感词
Check(ctx context.Context, text string) (bool, []string)
// Replace 替换敏感词
Replace(ctx context.Context, text string, repl string) string
// AddWord 添加敏感词
AddWord(ctx context.Context, word string, category string, level int) error
// RemoveWord 移除敏感词
RemoveWord(ctx context.Context, word string) error
// Reload 重新加载敏感词库
Reload(ctx context.Context) error
// GetWordCount 获取敏感词数量
GetWordCount(ctx context.Context) int
}
// sensitiveServiceImpl 敏感词服务实现
type sensitiveServiceImpl struct {
tree *SensitiveWordTree
db *gorm.DB
redis *redisclient.Client
config *SensitiveConfig
mu sync.RWMutex
replaceStr string
}
// SensitiveConfig 敏感词服务配置
type SensitiveConfig struct {
Enabled bool `mapstructure:"enabled" yaml:"enabled"`
ReplaceStr string `mapstructure:"replace_str" yaml:"replace_str"`
// 最小匹配长度
MinMatchLen int `mapstructure:"min_match_len" yaml:"min_match_len"`
// 是否从数据库加载
LoadFromDB bool `mapstructure:"load_from_db" yaml:"load_from_db"`
// 是否从Redis加载
LoadFromRedis bool `mapstructure:"load_from_redis" yaml:"load_from_redis"`
// Redis键前缀
RedisKeyPrefix string `mapstructure:"redis_key_prefix" yaml:"redis_key_prefix"`
}
// NewSensitiveService 创建敏感词服务
func NewSensitiveService(db *gorm.DB, redisClient *redisclient.Client, config *SensitiveConfig) SensitiveService {
s := &sensitiveServiceImpl{
tree: NewSensitiveWordTree(),
db: db,
redis: redisClient,
config: config,
replaceStr: config.ReplaceStr,
}
// 如果未设置替换符,默认使用 ***
if s.replaceStr == "" {
s.replaceStr = "***"
}
// 初始化加载敏感词
if config.LoadFromDB {
if err := s.loadFromDB(context.Background()); err != nil {
log.Printf("Failed to load sensitive words from database: %v", err)
}
}
if config.LoadFromRedis && redisClient != nil {
if err := s.loadFromRedis(context.Background()); err != nil {
log.Printf("Failed to load sensitive words from redis: %v", err)
}
}
return s
}
// Check 检查文本是否包含敏感词
func (s *sensitiveServiceImpl) Check(ctx context.Context, text string) (bool, []string) {
if !s.config.Enabled {
return false, nil
}
if text == "" {
return false, nil
}
return s.tree.Check(text)
}
// Replace 替换敏感词
func (s *sensitiveServiceImpl) Replace(ctx context.Context, text string, repl string) string {
if !s.config.Enabled {
return text
}
if text == "" {
return text
}
// 如果未指定替换符,使用默认替换符
if repl == "" {
repl = s.replaceStr
}
return s.tree.Replace(text, repl)
}
// AddWord 添加敏感词
func (s *sensitiveServiceImpl) AddWord(ctx context.Context, word string, category string, level int) error {
if word == "" {
return fmt.Errorf("word cannot be empty")
}
// 转换为敏感词级别
wordLevel := model.SensitiveWordLevel(level)
if wordLevel < 1 || wordLevel > 3 {
wordLevel = model.SensitiveWordLevelLow
}
// 转换为敏感词分类
wordCategory := model.SensitiveWordCategory(category)
if wordCategory == "" {
wordCategory = model.SensitiveWordCategoryOther
}
// 添加到树
s.tree.AddWord(word, wordLevel, wordCategory)
// 持久化到数据库
if s.db != nil {
sensitiveWord := model.SensitiveWord{
Word: word,
Category: wordCategory,
Level: wordLevel,
IsActive: true,
}
// 使用 upsert 逻辑
var existing model.SensitiveWord
result := s.db.Where("word = ?", word).First(&existing)
if result.Error == gorm.ErrRecordNotFound {
if err := s.db.Create(&sensitiveWord).Error; err != nil {
log.Printf("Failed to save sensitive word to database: %v", err)
}
} else if result.Error == nil {
// 更新已存在的记录
existing.Category = wordCategory
existing.Level = wordLevel
existing.IsActive = true
if err := s.db.Save(&existing).Error; err != nil {
log.Printf("Failed to update sensitive word in database: %v", err)
}
}
}
// 同步到 Redis
if s.redis != nil && s.config.RedisKeyPrefix != "" {
key := fmt.Sprintf("%s:%s", s.config.RedisKeyPrefix, word)
data := map[string]interface{}{
"word": word,
"category": category,
"level": level,
}
jsonData, _ := json.Marshal(data)
s.redis.Set(ctx, key, jsonData, 0)
}
return nil
}
// RemoveWord 移除敏感词
func (s *sensitiveServiceImpl) RemoveWord(ctx context.Context, word string) error {
if word == "" {
return fmt.Errorf("word cannot be empty")
}
// 从树中移除
s.tree.RemoveWord(word)
// 从数据库中标记为不活跃
if s.db != nil {
result := s.db.Model(&model.SensitiveWord{}).Where("word = ?", word).Update("is_active", false)
if result.Error != nil {
log.Printf("Failed to deactivate sensitive word in database: %v", result.Error)
}
}
// 从 Redis 中删除
if s.redis != nil && s.config.RedisKeyPrefix != "" {
key := fmt.Sprintf("%s:%s", s.config.RedisKeyPrefix, word)
s.redis.Del(ctx, key)
}
return nil
}
// Reload 重新加载敏感词库
func (s *sensitiveServiceImpl) Reload(ctx context.Context) error {
// 清空现有树
s.tree = NewSensitiveWordTree()
// 从数据库加载
if s.config.LoadFromDB {
if err := s.loadFromDB(ctx); err != nil {
return fmt.Errorf("failed to load from database: %w", err)
}
}
// 从 Redis 加载
if s.config.LoadFromRedis && s.redis != nil {
if err := s.loadFromRedis(ctx); err != nil {
return fmt.Errorf("failed to load from redis: %w", err)
}
}
return nil
}
// GetWordCount 获取敏感词数量
func (s *sensitiveServiceImpl) GetWordCount(ctx context.Context) int {
return s.tree.WordCount()
}
// loadFromDB 从数据库加载敏感词
func (s *sensitiveServiceImpl) loadFromDB(ctx context.Context) error {
if s.db == nil {
return nil
}
var words []model.SensitiveWord
if err := s.db.Where("is_active = ?", true).Find(&words).Error; err != nil {
return err
}
for _, word := range words {
s.tree.AddWord(word.Word, word.Level, word.Category)
}
log.Printf("Loaded %d sensitive words from database", len(words))
return nil
}
// loadFromRedis 从 Redis 加载敏感词
func (s *sensitiveServiceImpl) loadFromRedis(ctx context.Context) error {
if s.redis == nil || s.config.RedisKeyPrefix == "" {
return nil
}
// 使用 SCAN 命令代替 KEYS避免阻塞
pattern := fmt.Sprintf("%s:*", s.config.RedisKeyPrefix)
var cursor uint64
for {
keys, nextCursor, err := s.redis.GetClient().Scan(ctx, cursor, pattern, 100).Result()
if err != nil {
return err
}
for _, key := range keys {
data, err := s.redis.Get(ctx, key)
if err != nil {
continue
}
var wordData map[string]interface{}
if err := json.Unmarshal([]byte(data), &wordData); err != nil {
continue
}
word, _ := wordData["word"].(string)
category, _ := wordData["category"].(string)
level, _ := wordData["level"].(float64)
if word != "" {
s.tree.AddWord(word, model.SensitiveWordLevel(int(level)), model.SensitiveWordCategory(category))
}
}
cursor = nextCursor
if cursor == 0 {
break
}
}
return nil
}
// ==================== 辅助函数 ====================
// ContainsSensitiveWord 快速检查文本是否包含敏感词
func ContainsSensitiveWord(text string, tree *SensitiveWordTree) bool {
if tree == nil || text == "" {
return false
}
hasSensitive, _ := tree.Check(text)
return hasSensitive
}
// FilterSensitiveWords 过滤敏感词并返回替换后的文本
func FilterSensitiveWords(text string, tree *SensitiveWordTree, repl string) string {
if tree == nil || text == "" {
return text
}
if repl == "" {
repl = "***"
}
return tree.Replace(text, repl)
}
// ValidateTextLength 验证文本长度是否合法
func ValidateTextLength(text string, minLen, maxLen int) bool {
length := utf8.RuneCountInString(text)
return length >= minLen && length <= maxLen
}
// SanitizeText 清理文本,移除多余空白字符
func SanitizeText(text string) string {
// 替换多个连续空白字符为单个空格
spaceReg := regexp.MustCompile(`\s+`)
text = spaceReg.ReplaceAllString(text, " ")
// 去除首尾空白
return strings.TrimSpace(text)
}
// ==================== 默认敏感词列表 ====================
// DefaultSensitiveWords 返回默认敏感词列表(示例)
func DefaultSensitiveWords() map[string]struct{} {
return map[string]struct{}{
// 示例敏感词,实际需要从数据库或配置加载
"测试敏感词1": {},
"测试敏感词2": {},
"测试敏感词3": {},
}
}