fix(wordcount): 修改分词模块至外部gse仓库 (#1165)
Some checks failed
打包最新版为 Docker Image / build docker (push) Waiting to run
最新版 / Build binary CI (386, linux) (push) Failing after 1s
最新版 / Build binary CI (386, windows) (push) Failing after 1s
最新版 / Build binary CI (amd64, linux) (push) Failing after 1s
最新版 / Build binary CI (amd64, windows) (push) Failing after 1s
最新版 / Build binary CI (arm, linux) (push) Failing after 1s
最新版 / Build binary CI (arm64, linux) (push) Failing after 1s
PushLint / lint (push) Failing after 1s

Co-authored-by: 源文雨 <41315874+fumiama@users.noreply.github.com>
This commit is contained in:
Dodoj 2025-05-13 22:05:24 +10:00 committed by GitHub
parent c888936489
commit 076b113455
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,202 +1,214 @@
// Package wordcount 聊天热词 // Package wordcount 聊天热词
package wordcount package wordcount
import ( import (
"fmt" "fmt"
"os" "os"
"regexp" "regexp"
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
"time" "time"
"github.com/FloatTech/floatbox/binary" "github.com/go-ego/gse"
fcext "github.com/FloatTech/floatbox/ctxext" "github.com/golang/freetype"
"github.com/FloatTech/floatbox/file" "github.com/sirupsen/logrus"
ctrl "github.com/FloatTech/zbpctrl" "github.com/tidwall/gjson"
"github.com/FloatTech/zbputils/control" "github.com/wcharczuk/go-chart/v2"
"github.com/FloatTech/zbputils/ctxext"
"github.com/FloatTech/zbputils/img/text" "github.com/FloatTech/floatbox/binary"
"github.com/golang/freetype" fcext "github.com/FloatTech/floatbox/ctxext"
"github.com/sirupsen/logrus" "github.com/FloatTech/floatbox/file"
"github.com/tidwall/gjson" ctrl "github.com/FloatTech/zbpctrl"
"github.com/wcharczuk/go-chart/v2" "github.com/FloatTech/zbputils/control"
zero "github.com/wdvxdr1123/ZeroBot" "github.com/FloatTech/zbputils/ctxext"
"github.com/wdvxdr1123/ZeroBot/message" "github.com/FloatTech/zbputils/img/text"
)
zero "github.com/wdvxdr1123/ZeroBot"
var ( "github.com/wdvxdr1123/ZeroBot/message"
re = regexp.MustCompile(`^[一-龥]+$`) "github.com/wdvxdr1123/ZeroBot/utils/helper"
stopwords []string )
)
var (
func init() { re = regexp.MustCompile(`^[一-龥]+$`)
engine := control.AutoRegister(&ctrl.Options[*zero.Ctx]{ stopwords []string
DisableOnDefault: false, seg gse.Segmenter
Brief: "聊天热词", )
Help: "- 热词 [群号] [消息数目]|热词 123456 1000",
PublicDataFolder: "WordCount", func init() {
}) engine := control.AutoRegister(&ctrl.Options[*zero.Ctx]{
cachePath := engine.DataFolder() + "cache/" DisableOnDefault: false,
_ = os.RemoveAll(cachePath) Brief: "聊天热词",
_ = os.MkdirAll(cachePath, 0755) Help: "- 热词 [群号] [消息数目]|热词 123456 1000",
engine.OnRegex(`^热词\s?(\d*)\s?(\d*)$`, zero.OnlyGroup, fcext.DoOnceOnSuccess(func(ctx *zero.Ctx) bool { PublicDataFolder: "WordCount",
_, err := engine.GetLazyData("stopwords.txt", false) })
if err != nil { cachePath := engine.DataFolder() + "cache/"
ctx.SendChain(message.Text("ERROR: ", err)) // 读取gse内置中文词典
return false err := seg.LoadDictEmbed()
} if err != nil {
data, err := os.ReadFile(engine.DataFolder() + "stopwords.txt") panic(err)
if err != nil { }
ctx.SendChain(message.Text("ERROR: ", err)) _ = os.RemoveAll(cachePath)
return false _ = os.MkdirAll(cachePath, 0755)
} engine.OnRegex(`^热词\s?(\d*)\s?(\d*)$`, zero.OnlyGroup, fcext.DoOnceOnSuccess(func(ctx *zero.Ctx) bool {
stopwords = strings.Split(strings.ReplaceAll(binary.BytesToString(data), "\r", ""), "\n") _, err := engine.GetLazyData("stopwords.txt", false)
sort.Strings(stopwords) if err != nil {
logrus.Infoln("[wordcount]加载", len(stopwords), "条停用词") ctx.SendChain(message.Text("ERROR: ", err))
return true return false
})).Limit(ctxext.LimitByUser).SetBlock(true). }
Handle(func(ctx *zero.Ctx) { data, err := os.ReadFile(engine.DataFolder() + "stopwords.txt")
_, err := file.GetLazyData(text.FontFile, control.Md5File, true) if err != nil {
if err != nil { ctx.SendChain(message.Text("ERROR: ", err))
ctx.SendChain(message.Text("ERROR: ", err)) return false
return }
} stopwords = strings.Split(strings.ReplaceAll(binary.BytesToString(data), "\r", ""), "\n")
b, err := os.ReadFile(text.FontFile) sort.Strings(stopwords)
if err != nil { logrus.Infoln("[wordcount]加载", len(stopwords), "条停用词")
ctx.SendChain(message.Text("ERROR: ", err)) return true
return })).Limit(ctxext.LimitByUser).SetBlock(true).
} Handle(func(ctx *zero.Ctx) {
font, err := freetype.ParseFont(b) _, err := file.GetLazyData(text.FontFile, control.Md5File, true)
if err != nil { if err != nil {
ctx.SendChain(message.Text("ERROR: ", err)) ctx.SendChain(message.Text("ERROR: ", err))
return return
} }
b, err := os.ReadFile(text.FontFile)
ctx.SendChain(message.Text("少女祈祷中...")) if err != nil {
gid, _ := strconv.ParseInt(ctx.State["regex_matched"].([]string)[1], 10, 64) ctx.SendChain(message.Text("ERROR: ", err))
p, _ := strconv.ParseInt(ctx.State["regex_matched"].([]string)[2], 10, 64) return
if p > 10000 { }
p = 10000 font, err := freetype.ParseFont(b)
} if err != nil {
if p == 0 { ctx.SendChain(message.Text("ERROR: ", err))
p = 1000 return
} }
if gid == 0 {
gid = ctx.Event.GroupID ctx.SendChain(message.Text("少女祈祷中..."))
} gid, _ := strconv.ParseInt(ctx.State["regex_matched"].([]string)[1], 10, 64)
group := ctx.GetGroupInfo(gid, false) p, _ := strconv.ParseInt(ctx.State["regex_matched"].([]string)[2], 10, 64)
if group.MemberCount == 0 { if p > 10000 {
ctx.SendChain(message.Text(zero.BotConfig.NickName[0], "未加入", group.Name, "(", gid, "),无法获得热词呢")) p = 10000
return }
} if p == 0 {
today := time.Now().Format("20060102") p = 1000
drawedFile := fmt.Sprintf("%s%d%s%dwordCount.png", cachePath, gid, today, p) }
if file.IsExist(drawedFile) { if gid == 0 {
ctx.SendChain(message.Image("file:///" + file.BOTPATH + "/" + drawedFile)) gid = ctx.Event.GroupID
return }
} group := ctx.GetGroupInfo(gid, false)
messageMap := make(map[string]int, 256) if group.MemberCount == 0 {
msghists := make(chan *gjson.Result, 256) ctx.SendChain(message.Text(zero.BotConfig.NickName[0], "未加入", group.Name, "(", gid, "),无法获得热词呢"))
go func() { return
h := ctx.GetLatestGroupMessageHistory(gid) }
messageSeq := h.Get("messages.0.message_seq").Int() today := time.Now().Format("20060102")
msghists <- &h drawedFile := fmt.Sprintf("%s%d%s%dwordCount.png", cachePath, gid, today, p)
for i := 1; i < int(p/20) && messageSeq != 0; i++ { if file.IsExist(drawedFile) {
h := ctx.GetGroupMessageHistory(gid, messageSeq) ctx.SendChain(message.Image("file:///" + file.BOTPATH + "/" + drawedFile))
msghists <- &h return
messageSeq = h.Get("messages.0.message_seq").Int() }
} messageMap := make(map[string]int, 256)
close(msghists) msghists := make(chan *gjson.Result, 256)
}() go func() {
var wg sync.WaitGroup h := ctx.GetLatestGroupMessageHistory(gid)
var mapmu sync.Mutex messageSeq := h.Get("messages.0.message_seq").Int()
for h := range msghists { msghists <- &h
wg.Add(1) for i := 1; i < int(p/20) && messageSeq != 0; i++ {
go func(h *gjson.Result) { h := ctx.GetGroupMessageHistory(gid, messageSeq)
for _, v := range h.Get("messages.#.message").Array() { msghists <- &h
tex := strings.TrimSpace(message.ParseMessageFromString(v.Str).ExtractPlainText()) messageSeq = h.Get("messages.0.message_seq").Int()
if tex == "" { }
continue close(msghists)
} }()
for _, t := range ctx.GetWordSlices(tex).Get("slices").Array() { var wg sync.WaitGroup
tex := strings.TrimSpace(t.Str) var mapmu sync.Mutex
i := sort.SearchStrings(stopwords, tex) for h := range msghists {
if re.MatchString(tex) && (i >= len(stopwords) || stopwords[i] != tex) { wg.Add(1)
mapmu.Lock() go func(h *gjson.Result) {
messageMap[tex]++ for _, v := range h.Get("messages.#.message").Array() {
mapmu.Unlock() tex := strings.TrimSpace(message.ParseMessageFromString(v.Str).ExtractPlainText())
} if tex == "" {
} continue
} }
wg.Done() segments := seg.Segment(helper.StringToBytes(tex))
}(h) words := gse.ToSlice(segments, true)
} for _, word := range words {
wg.Wait() word = strings.TrimSpace(word)
i := sort.SearchStrings(stopwords, word)
wc := rankByWordCount(messageMap) if re.MatchString(word) && (i >= len(stopwords) || stopwords[i] != word) {
if len(wc) > 20 { mapmu.Lock()
wc = wc[:20] messageMap[word]++
} mapmu.Unlock()
// 绘图 }
if len(wc) == 0 { }
ctx.SendChain(message.Text("ERROR: 历史消息为空或者无法获得历史消息")) }
return wg.Done()
} }(h)
bars := make([]chart.Value, len(wc)) }
for i, v := range wc { wg.Wait()
bars[i] = chart.Value{
Value: float64(v.Value), wc := rankByWordCount(messageMap)
Label: v.Key, if len(wc) > 20 {
} wc = wc[:20]
} }
graph := chart.BarChart{ // 绘图
Font: font, if len(wc) == 0 {
Title: fmt.Sprintf("%s(%d)在%s号的%d条消息的热词top20", group.Name, gid, time.Now().Format("2006-01-02"), p), ctx.SendChain(message.Text("ERROR: 历史消息为空或者无法获得历史消息"))
Background: chart.Style{ return
Padding: chart.Box{ }
Top: 40, bars := make([]chart.Value, len(wc))
}, for i, v := range wc {
}, bars[i] = chart.Value{
Height: 500, Value: float64(v.Value),
BarWidth: 25, Label: v.Key,
Bars: bars, }
} }
f, err := os.Create(drawedFile) graph := chart.BarChart{
if err != nil { Font: font,
ctx.SendChain(message.Text("ERROR: ", err)) Title: fmt.Sprintf("%s(%d)在%s号的%d条消息的热词top20", group.Name, gid, time.Now().Format("2006-01-02"), p),
return Background: chart.Style{
} Padding: chart.Box{
err = graph.Render(chart.PNG, f) Top: 40,
_ = f.Close() },
if err != nil { },
_ = os.Remove(drawedFile) Height: 500,
ctx.SendChain(message.Text("ERROR: ", err)) BarWidth: 25,
return Bars: bars,
} }
ctx.SendChain(message.Image("file:///" + file.BOTPATH + "/" + drawedFile)) f, err := os.Create(drawedFile)
}) if err != nil {
} ctx.SendChain(message.Text("ERROR: ", err))
return
func rankByWordCount(wordFrequencies map[string]int) pairlist { }
pl := make(pairlist, len(wordFrequencies)) err = graph.Render(chart.PNG, f)
i := 0 _ = f.Close()
for k, v := range wordFrequencies { if err != nil {
pl[i] = pair{k, v} _ = os.Remove(drawedFile)
i++ ctx.SendChain(message.Text("ERROR: ", err))
} return
sort.Sort(sort.Reverse(pl)) }
return pl ctx.SendChain(message.Image("file:///" + file.BOTPATH + "/" + drawedFile))
} })
}
type pair struct {
Key string func rankByWordCount(wordFrequencies map[string]int) pairlist {
Value int pl := make(pairlist, len(wordFrequencies))
} i := 0
for k, v := range wordFrequencies {
type pairlist []pair pl[i] = pair{k, v}
i++
func (p pairlist) Len() int { return len(p) } }
func (p pairlist) Less(i, j int) bool { return p[i].Value < p[j].Value } sort.Sort(sort.Reverse(pl))
func (p pairlist) Swap(i, j int) { p[i], p[j] = p[j], p[i] } return pl
}
type pair struct {
Key string
Value int
}
type pairlist []pair
func (p pairlist) Len() int { return len(p) }
func (p pairlist) Less(i, j int) bool { return p[i].Value < p[j].Value }
func (p pairlist) Swap(i, j int) { p[i], p[j] = p[j], p[i] }