Set up project files and add .gitignore to exclude local build/runtime artifacts. Made-with: Cursor
560 lines
13 KiB
Go
560 lines
13 KiB
Go
package service
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
"fmt"
|
||
"log"
|
||
"regexp"
|
||
"strings"
|
||
"sync"
|
||
"time"
|
||
"unicode/utf8"
|
||
|
||
"carrot_bbs/internal/model"
|
||
redisclient "carrot_bbs/internal/pkg/redis"
|
||
|
||
"gorm.io/gorm"
|
||
)
|
||
|
||
// ==================== DFA 敏感词过滤实现 ====================
|
||
|
||
// SensitiveNode 敏感词树节点
|
||
type SensitiveNode struct {
|
||
// 子节点映射
|
||
Children map[rune]*SensitiveNode
|
||
// 是否为敏感词结尾
|
||
IsEnd bool
|
||
// 敏感词信息(仅在 IsEnd 为 true 时有效)
|
||
Word string
|
||
Level model.SensitiveWordLevel
|
||
Category model.SensitiveWordCategory
|
||
}
|
||
|
||
// NewSensitiveNode 创建新的敏感词节点
|
||
func NewSensitiveNode() *SensitiveNode {
|
||
return &SensitiveNode{
|
||
Children: make(map[rune]*SensitiveNode),
|
||
IsEnd: false,
|
||
}
|
||
}
|
||
|
||
// SensitiveWordTree 敏感词树
|
||
type SensitiveWordTree struct {
|
||
root *SensitiveNode
|
||
wordCount int
|
||
mu sync.RWMutex
|
||
lastReload time.Time
|
||
}
|
||
|
||
// NewSensitiveWordTree 创建新的敏感词树
|
||
func NewSensitiveWordTree() *SensitiveWordTree {
|
||
return &SensitiveWordTree{
|
||
root: NewSensitiveNode(),
|
||
wordCount: 0,
|
||
lastReload: time.Now(),
|
||
}
|
||
}
|
||
|
||
// AddWord 添加敏感词到树中
|
||
func (t *SensitiveWordTree) AddWord(word string, level model.SensitiveWordLevel, category model.SensitiveWordCategory) {
|
||
if word == "" {
|
||
return
|
||
}
|
||
|
||
t.mu.Lock()
|
||
defer t.mu.Unlock()
|
||
|
||
node := t.root
|
||
// 转换为小写进行匹配(不区分大小写)
|
||
lowerWord := strings.ToLower(word)
|
||
runes := []rune(lowerWord)
|
||
|
||
for _, r := range runes {
|
||
child, exists := node.Children[r]
|
||
if !exists {
|
||
child = NewSensitiveNode()
|
||
node.Children[r] = child
|
||
}
|
||
node = child
|
||
}
|
||
|
||
// 如果不是已存在的敏感词,则计数+1
|
||
if !node.IsEnd {
|
||
t.wordCount++
|
||
}
|
||
|
||
node.IsEnd = true
|
||
node.Word = word
|
||
node.Level = level
|
||
node.Category = category
|
||
}
|
||
|
||
// RemoveWord 从树中移除敏感词
|
||
func (t *SensitiveWordTree) RemoveWord(word string) {
|
||
if word == "" {
|
||
return
|
||
}
|
||
|
||
t.mu.Lock()
|
||
defer t.mu.Unlock()
|
||
|
||
lowerWord := strings.ToLower(word)
|
||
runes := []rune(lowerWord)
|
||
|
||
// 查找节点
|
||
node := t.root
|
||
for _, r := range runes {
|
||
child, exists := node.Children[r]
|
||
if !exists {
|
||
return // 敏感词不存在
|
||
}
|
||
node = child
|
||
}
|
||
|
||
if node.IsEnd {
|
||
node.IsEnd = false
|
||
node.Word = ""
|
||
t.wordCount--
|
||
}
|
||
}
|
||
|
||
// Check 检查文本是否包含敏感词,返回是否包含及敏感词列表
|
||
func (t *SensitiveWordTree) Check(text string) (bool, []string) {
|
||
if text == "" {
|
||
return false, nil
|
||
}
|
||
|
||
t.mu.RLock()
|
||
defer t.mu.RUnlock()
|
||
|
||
var foundWords []string
|
||
runes := []rune(strings.ToLower(text))
|
||
length := len(runes)
|
||
|
||
// 用于标记已找到的敏感词位置,避免重复计算
|
||
marked := make([]bool, length)
|
||
|
||
for i := 0; i < length; i++ {
|
||
// 从当前位置开始搜索
|
||
node := t.root
|
||
matchEnd := -1
|
||
matchWord := ""
|
||
|
||
for j := i; j < length; j++ {
|
||
child, exists := node.Children[runes[j]]
|
||
if !exists {
|
||
break
|
||
}
|
||
node = child
|
||
|
||
if node.IsEnd {
|
||
matchEnd = j
|
||
matchWord = node.Word
|
||
}
|
||
}
|
||
|
||
// 标记找到的敏感词位置
|
||
if matchEnd >= 0 && !marked[i] {
|
||
for k := i; k <= matchEnd; k++ {
|
||
marked[k] = true
|
||
}
|
||
foundWords = append(foundWords, matchWord)
|
||
}
|
||
}
|
||
|
||
return len(foundWords) > 0, foundWords
|
||
}
|
||
|
||
// Replace 替换文本中的敏感词
|
||
func (t *SensitiveWordTree) Replace(text string, repl string) string {
|
||
if text == "" {
|
||
return text
|
||
}
|
||
|
||
t.mu.RLock()
|
||
defer t.mu.RUnlock()
|
||
|
||
runes := []rune(text)
|
||
length := len(runes)
|
||
result := make([]rune, 0, length)
|
||
|
||
// 用于标记已替换的位置
|
||
marked := make([]bool, length)
|
||
|
||
for i := 0; i < length; i++ {
|
||
if marked[i] {
|
||
continue
|
||
}
|
||
|
||
// 从当前位置开始搜索
|
||
node := t.root
|
||
matchEnd := -1
|
||
|
||
for j := i; j < length; j++ {
|
||
child, exists := node.Children[runes[j]]
|
||
if !exists {
|
||
break
|
||
}
|
||
node = child
|
||
|
||
if node.IsEnd {
|
||
matchEnd = j
|
||
}
|
||
}
|
||
|
||
if matchEnd >= 0 {
|
||
// 标记已替换的位置
|
||
for k := i; k <= matchEnd; k++ {
|
||
marked[k] = true
|
||
}
|
||
// 追加替换符
|
||
replRunes := []rune(repl)
|
||
result = append(result, replRunes...)
|
||
// 跳过已匹配的字符
|
||
i = matchEnd
|
||
} else {
|
||
// 追加原字符
|
||
result = append(result, runes[i])
|
||
}
|
||
}
|
||
|
||
return string(result)
|
||
}
|
||
|
||
// WordCount 获取敏感词数量
|
||
func (t *SensitiveWordTree) WordCount() int {
|
||
t.mu.RLock()
|
||
defer t.mu.RUnlock()
|
||
return t.wordCount
|
||
}
|
||
|
||
// ==================== 敏感词服务实现 ====================
|
||
|
||
// SensitiveService 敏感词服务接口
|
||
type SensitiveService interface {
|
||
// Check 检查文本是否包含敏感词
|
||
Check(ctx context.Context, text string) (bool, []string)
|
||
// Replace 替换敏感词
|
||
Replace(ctx context.Context, text string, repl string) string
|
||
// AddWord 添加敏感词
|
||
AddWord(ctx context.Context, word string, category string, level int) error
|
||
// RemoveWord 移除敏感词
|
||
RemoveWord(ctx context.Context, word string) error
|
||
// Reload 重新加载敏感词库
|
||
Reload(ctx context.Context) error
|
||
// GetWordCount 获取敏感词数量
|
||
GetWordCount(ctx context.Context) int
|
||
}
|
||
|
||
// sensitiveServiceImpl 敏感词服务实现
|
||
type sensitiveServiceImpl struct {
|
||
tree *SensitiveWordTree
|
||
db *gorm.DB
|
||
redis *redisclient.Client
|
||
config *SensitiveConfig
|
||
mu sync.RWMutex
|
||
replaceStr string
|
||
}
|
||
|
||
// SensitiveConfig 敏感词服务配置
|
||
type SensitiveConfig struct {
|
||
Enabled bool `mapstructure:"enabled" yaml:"enabled"`
|
||
ReplaceStr string `mapstructure:"replace_str" yaml:"replace_str"`
|
||
// 最小匹配长度
|
||
MinMatchLen int `mapstructure:"min_match_len" yaml:"min_match_len"`
|
||
// 是否从数据库加载
|
||
LoadFromDB bool `mapstructure:"load_from_db" yaml:"load_from_db"`
|
||
// 是否从Redis加载
|
||
LoadFromRedis bool `mapstructure:"load_from_redis" yaml:"load_from_redis"`
|
||
// Redis键前缀
|
||
RedisKeyPrefix string `mapstructure:"redis_key_prefix" yaml:"redis_key_prefix"`
|
||
}
|
||
|
||
// NewSensitiveService 创建敏感词服务
|
||
func NewSensitiveService(db *gorm.DB, redisClient *redisclient.Client, config *SensitiveConfig) SensitiveService {
|
||
s := &sensitiveServiceImpl{
|
||
tree: NewSensitiveWordTree(),
|
||
db: db,
|
||
redis: redisClient,
|
||
config: config,
|
||
replaceStr: config.ReplaceStr,
|
||
}
|
||
|
||
// 如果未设置替换符,默认使用 ***
|
||
if s.replaceStr == "" {
|
||
s.replaceStr = "***"
|
||
}
|
||
|
||
// 初始化加载敏感词
|
||
if config.LoadFromDB {
|
||
if err := s.loadFromDB(context.Background()); err != nil {
|
||
log.Printf("Failed to load sensitive words from database: %v", err)
|
||
}
|
||
}
|
||
|
||
if config.LoadFromRedis && redisClient != nil {
|
||
if err := s.loadFromRedis(context.Background()); err != nil {
|
||
log.Printf("Failed to load sensitive words from redis: %v", err)
|
||
}
|
||
}
|
||
|
||
return s
|
||
}
|
||
|
||
// Check 检查文本是否包含敏感词
|
||
func (s *sensitiveServiceImpl) Check(ctx context.Context, text string) (bool, []string) {
|
||
if !s.config.Enabled {
|
||
return false, nil
|
||
}
|
||
if text == "" {
|
||
return false, nil
|
||
}
|
||
return s.tree.Check(text)
|
||
}
|
||
|
||
// Replace 替换敏感词
|
||
func (s *sensitiveServiceImpl) Replace(ctx context.Context, text string, repl string) string {
|
||
if !s.config.Enabled {
|
||
return text
|
||
}
|
||
if text == "" {
|
||
return text
|
||
}
|
||
|
||
// 如果未指定替换符,使用默认替换符
|
||
if repl == "" {
|
||
repl = s.replaceStr
|
||
}
|
||
|
||
return s.tree.Replace(text, repl)
|
||
}
|
||
|
||
// AddWord 添加敏感词
|
||
func (s *sensitiveServiceImpl) AddWord(ctx context.Context, word string, category string, level int) error {
|
||
if word == "" {
|
||
return fmt.Errorf("word cannot be empty")
|
||
}
|
||
|
||
// 转换为敏感词级别
|
||
wordLevel := model.SensitiveWordLevel(level)
|
||
if wordLevel < 1 || wordLevel > 3 {
|
||
wordLevel = model.SensitiveWordLevelLow
|
||
}
|
||
|
||
// 转换为敏感词分类
|
||
wordCategory := model.SensitiveWordCategory(category)
|
||
if wordCategory == "" {
|
||
wordCategory = model.SensitiveWordCategoryOther
|
||
}
|
||
|
||
// 添加到树
|
||
s.tree.AddWord(word, wordLevel, wordCategory)
|
||
|
||
// 持久化到数据库
|
||
if s.db != nil {
|
||
sensitiveWord := model.SensitiveWord{
|
||
Word: word,
|
||
Category: wordCategory,
|
||
Level: wordLevel,
|
||
IsActive: true,
|
||
}
|
||
|
||
// 使用 upsert 逻辑
|
||
var existing model.SensitiveWord
|
||
result := s.db.Where("word = ?", word).First(&existing)
|
||
if result.Error == gorm.ErrRecordNotFound {
|
||
if err := s.db.Create(&sensitiveWord).Error; err != nil {
|
||
log.Printf("Failed to save sensitive word to database: %v", err)
|
||
}
|
||
} else if result.Error == nil {
|
||
// 更新已存在的记录
|
||
existing.Category = wordCategory
|
||
existing.Level = wordLevel
|
||
existing.IsActive = true
|
||
if err := s.db.Save(&existing).Error; err != nil {
|
||
log.Printf("Failed to update sensitive word in database: %v", err)
|
||
}
|
||
}
|
||
}
|
||
|
||
// 同步到 Redis
|
||
if s.redis != nil && s.config.RedisKeyPrefix != "" {
|
||
key := fmt.Sprintf("%s:%s", s.config.RedisKeyPrefix, word)
|
||
data := map[string]interface{}{
|
||
"word": word,
|
||
"category": category,
|
||
"level": level,
|
||
}
|
||
jsonData, _ := json.Marshal(data)
|
||
s.redis.Set(ctx, key, jsonData, 0)
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// RemoveWord 移除敏感词
|
||
func (s *sensitiveServiceImpl) RemoveWord(ctx context.Context, word string) error {
|
||
if word == "" {
|
||
return fmt.Errorf("word cannot be empty")
|
||
}
|
||
|
||
// 从树中移除
|
||
s.tree.RemoveWord(word)
|
||
|
||
// 从数据库中标记为不活跃
|
||
if s.db != nil {
|
||
result := s.db.Model(&model.SensitiveWord{}).Where("word = ?", word).Update("is_active", false)
|
||
if result.Error != nil {
|
||
log.Printf("Failed to deactivate sensitive word in database: %v", result.Error)
|
||
}
|
||
}
|
||
|
||
// 从 Redis 中删除
|
||
if s.redis != nil && s.config.RedisKeyPrefix != "" {
|
||
key := fmt.Sprintf("%s:%s", s.config.RedisKeyPrefix, word)
|
||
s.redis.Del(ctx, key)
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// Reload 重新加载敏感词库
|
||
func (s *sensitiveServiceImpl) Reload(ctx context.Context) error {
|
||
// 清空现有树
|
||
s.tree = NewSensitiveWordTree()
|
||
|
||
// 从数据库加载
|
||
if s.config.LoadFromDB {
|
||
if err := s.loadFromDB(ctx); err != nil {
|
||
return fmt.Errorf("failed to load from database: %w", err)
|
||
}
|
||
}
|
||
|
||
// 从 Redis 加载
|
||
if s.config.LoadFromRedis && s.redis != nil {
|
||
if err := s.loadFromRedis(ctx); err != nil {
|
||
return fmt.Errorf("failed to load from redis: %w", err)
|
||
}
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// GetWordCount 获取敏感词数量
|
||
func (s *sensitiveServiceImpl) GetWordCount(ctx context.Context) int {
|
||
return s.tree.WordCount()
|
||
}
|
||
|
||
// loadFromDB 从数据库加载敏感词
|
||
func (s *sensitiveServiceImpl) loadFromDB(ctx context.Context) error {
|
||
if s.db == nil {
|
||
return nil
|
||
}
|
||
|
||
var words []model.SensitiveWord
|
||
if err := s.db.Where("is_active = ?", true).Find(&words).Error; err != nil {
|
||
return err
|
||
}
|
||
|
||
for _, word := range words {
|
||
s.tree.AddWord(word.Word, word.Level, word.Category)
|
||
}
|
||
|
||
log.Printf("Loaded %d sensitive words from database", len(words))
|
||
return nil
|
||
}
|
||
|
||
// loadFromRedis 从 Redis 加载敏感词
|
||
func (s *sensitiveServiceImpl) loadFromRedis(ctx context.Context) error {
|
||
if s.redis == nil || s.config.RedisKeyPrefix == "" {
|
||
return nil
|
||
}
|
||
|
||
// 使用 SCAN 命令代替 KEYS,避免阻塞
|
||
pattern := fmt.Sprintf("%s:*", s.config.RedisKeyPrefix)
|
||
var cursor uint64
|
||
for {
|
||
keys, nextCursor, err := s.redis.GetClient().Scan(ctx, cursor, pattern, 100).Result()
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
for _, key := range keys {
|
||
data, err := s.redis.Get(ctx, key)
|
||
if err != nil {
|
||
continue
|
||
}
|
||
|
||
var wordData map[string]interface{}
|
||
if err := json.Unmarshal([]byte(data), &wordData); err != nil {
|
||
continue
|
||
}
|
||
|
||
word, _ := wordData["word"].(string)
|
||
category, _ := wordData["category"].(string)
|
||
level, _ := wordData["level"].(float64)
|
||
|
||
if word != "" {
|
||
s.tree.AddWord(word, model.SensitiveWordLevel(int(level)), model.SensitiveWordCategory(category))
|
||
}
|
||
}
|
||
|
||
cursor = nextCursor
|
||
if cursor == 0 {
|
||
break
|
||
}
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// ==================== 辅助函数 ====================
|
||
|
||
// ContainsSensitiveWord 快速检查文本是否包含敏感词
|
||
func ContainsSensitiveWord(text string, tree *SensitiveWordTree) bool {
|
||
if tree == nil || text == "" {
|
||
return false
|
||
}
|
||
hasSensitive, _ := tree.Check(text)
|
||
return hasSensitive
|
||
}
|
||
|
||
// FilterSensitiveWords 过滤敏感词并返回替换后的文本
|
||
func FilterSensitiveWords(text string, tree *SensitiveWordTree, repl string) string {
|
||
if tree == nil || text == "" {
|
||
return text
|
||
}
|
||
if repl == "" {
|
||
repl = "***"
|
||
}
|
||
return tree.Replace(text, repl)
|
||
}
|
||
|
||
// ValidateTextLength 验证文本长度是否合法
|
||
func ValidateTextLength(text string, minLen, maxLen int) bool {
|
||
length := utf8.RuneCountInString(text)
|
||
return length >= minLen && length <= maxLen
|
||
}
|
||
|
||
// SanitizeText 清理文本,移除多余空白字符
|
||
func SanitizeText(text string) string {
|
||
// 替换多个连续空白字符为单个空格
|
||
spaceReg := regexp.MustCompile(`\s+`)
|
||
text = spaceReg.ReplaceAllString(text, " ")
|
||
// 去除首尾空白
|
||
return strings.TrimSpace(text)
|
||
}
|
||
|
||
// ==================== 默认敏感词列表 ====================
|
||
|
||
// DefaultSensitiveWords 返回默认敏感词列表(示例)
|
||
func DefaultSensitiveWords() map[string]struct{} {
|
||
return map[string]struct{}{
|
||
// 示例敏感词,实际需要从数据库或配置加载
|
||
"测试敏感词1": {},
|
||
"测试敏感词2": {},
|
||
"测试敏感词3": {},
|
||
}
|
||
}
|