mirror of
https://github.com/FloatTech/ZeroBot-Plugin.git
synced 2025-12-19 22:00:11 +08:00
fix(wordcount): 修改分词模块至外部gse仓库 (#1165)
Some checks failed
打包最新版为 Docker Image / build docker (push) Waiting to run
最新版 / Build binary CI (386, linux) (push) Failing after 1s
最新版 / Build binary CI (386, windows) (push) Failing after 1s
最新版 / Build binary CI (amd64, linux) (push) Failing after 1s
最新版 / Build binary CI (amd64, windows) (push) Failing after 1s
最新版 / Build binary CI (arm, linux) (push) Failing after 1s
最新版 / Build binary CI (arm64, linux) (push) Failing after 1s
PushLint / lint (push) Failing after 1s
Some checks failed
打包最新版为 Docker Image / build docker (push) Waiting to run
最新版 / Build binary CI (386, linux) (push) Failing after 1s
最新版 / Build binary CI (386, windows) (push) Failing after 1s
最新版 / Build binary CI (amd64, linux) (push) Failing after 1s
最新版 / Build binary CI (amd64, windows) (push) Failing after 1s
最新版 / Build binary CI (arm, linux) (push) Failing after 1s
最新版 / Build binary CI (arm64, linux) (push) Failing after 1s
PushLint / lint (push) Failing after 1s
Co-authored-by: 源文雨 <41315874+fumiama@users.noreply.github.com>
This commit is contained in:
parent
c888936489
commit
076b113455
@ -1,202 +1,214 @@
|
|||||||
// Package wordcount 聊天热词
|
// Package wordcount 聊天热词
|
||||||
package wordcount
|
package wordcount
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/FloatTech/floatbox/binary"
|
"github.com/go-ego/gse"
|
||||||
fcext "github.com/FloatTech/floatbox/ctxext"
|
"github.com/golang/freetype"
|
||||||
"github.com/FloatTech/floatbox/file"
|
"github.com/sirupsen/logrus"
|
||||||
ctrl "github.com/FloatTech/zbpctrl"
|
"github.com/tidwall/gjson"
|
||||||
"github.com/FloatTech/zbputils/control"
|
"github.com/wcharczuk/go-chart/v2"
|
||||||
"github.com/FloatTech/zbputils/ctxext"
|
|
||||||
"github.com/FloatTech/zbputils/img/text"
|
"github.com/FloatTech/floatbox/binary"
|
||||||
"github.com/golang/freetype"
|
fcext "github.com/FloatTech/floatbox/ctxext"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/FloatTech/floatbox/file"
|
||||||
"github.com/tidwall/gjson"
|
ctrl "github.com/FloatTech/zbpctrl"
|
||||||
"github.com/wcharczuk/go-chart/v2"
|
"github.com/FloatTech/zbputils/control"
|
||||||
zero "github.com/wdvxdr1123/ZeroBot"
|
"github.com/FloatTech/zbputils/ctxext"
|
||||||
"github.com/wdvxdr1123/ZeroBot/message"
|
"github.com/FloatTech/zbputils/img/text"
|
||||||
)
|
|
||||||
|
zero "github.com/wdvxdr1123/ZeroBot"
|
||||||
var (
|
"github.com/wdvxdr1123/ZeroBot/message"
|
||||||
re = regexp.MustCompile(`^[一-龥]+$`)
|
"github.com/wdvxdr1123/ZeroBot/utils/helper"
|
||||||
stopwords []string
|
)
|
||||||
)
|
|
||||||
|
var (
|
||||||
func init() {
|
re = regexp.MustCompile(`^[一-龥]+$`)
|
||||||
engine := control.AutoRegister(&ctrl.Options[*zero.Ctx]{
|
stopwords []string
|
||||||
DisableOnDefault: false,
|
seg gse.Segmenter
|
||||||
Brief: "聊天热词",
|
)
|
||||||
Help: "- 热词 [群号] [消息数目]|热词 123456 1000",
|
|
||||||
PublicDataFolder: "WordCount",
|
func init() {
|
||||||
})
|
engine := control.AutoRegister(&ctrl.Options[*zero.Ctx]{
|
||||||
cachePath := engine.DataFolder() + "cache/"
|
DisableOnDefault: false,
|
||||||
_ = os.RemoveAll(cachePath)
|
Brief: "聊天热词",
|
||||||
_ = os.MkdirAll(cachePath, 0755)
|
Help: "- 热词 [群号] [消息数目]|热词 123456 1000",
|
||||||
engine.OnRegex(`^热词\s?(\d*)\s?(\d*)$`, zero.OnlyGroup, fcext.DoOnceOnSuccess(func(ctx *zero.Ctx) bool {
|
PublicDataFolder: "WordCount",
|
||||||
_, err := engine.GetLazyData("stopwords.txt", false)
|
})
|
||||||
if err != nil {
|
cachePath := engine.DataFolder() + "cache/"
|
||||||
ctx.SendChain(message.Text("ERROR: ", err))
|
// 读取gse内置中文词典
|
||||||
return false
|
err := seg.LoadDictEmbed()
|
||||||
}
|
if err != nil {
|
||||||
data, err := os.ReadFile(engine.DataFolder() + "stopwords.txt")
|
panic(err)
|
||||||
if err != nil {
|
}
|
||||||
ctx.SendChain(message.Text("ERROR: ", err))
|
_ = os.RemoveAll(cachePath)
|
||||||
return false
|
_ = os.MkdirAll(cachePath, 0755)
|
||||||
}
|
engine.OnRegex(`^热词\s?(\d*)\s?(\d*)$`, zero.OnlyGroup, fcext.DoOnceOnSuccess(func(ctx *zero.Ctx) bool {
|
||||||
stopwords = strings.Split(strings.ReplaceAll(binary.BytesToString(data), "\r", ""), "\n")
|
_, err := engine.GetLazyData("stopwords.txt", false)
|
||||||
sort.Strings(stopwords)
|
if err != nil {
|
||||||
logrus.Infoln("[wordcount]加载", len(stopwords), "条停用词")
|
ctx.SendChain(message.Text("ERROR: ", err))
|
||||||
return true
|
return false
|
||||||
})).Limit(ctxext.LimitByUser).SetBlock(true).
|
}
|
||||||
Handle(func(ctx *zero.Ctx) {
|
data, err := os.ReadFile(engine.DataFolder() + "stopwords.txt")
|
||||||
_, err := file.GetLazyData(text.FontFile, control.Md5File, true)
|
if err != nil {
|
||||||
if err != nil {
|
ctx.SendChain(message.Text("ERROR: ", err))
|
||||||
ctx.SendChain(message.Text("ERROR: ", err))
|
return false
|
||||||
return
|
}
|
||||||
}
|
stopwords = strings.Split(strings.ReplaceAll(binary.BytesToString(data), "\r", ""), "\n")
|
||||||
b, err := os.ReadFile(text.FontFile)
|
sort.Strings(stopwords)
|
||||||
if err != nil {
|
logrus.Infoln("[wordcount]加载", len(stopwords), "条停用词")
|
||||||
ctx.SendChain(message.Text("ERROR: ", err))
|
return true
|
||||||
return
|
})).Limit(ctxext.LimitByUser).SetBlock(true).
|
||||||
}
|
Handle(func(ctx *zero.Ctx) {
|
||||||
font, err := freetype.ParseFont(b)
|
_, err := file.GetLazyData(text.FontFile, control.Md5File, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ctx.SendChain(message.Text("ERROR: ", err))
|
ctx.SendChain(message.Text("ERROR: ", err))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
b, err := os.ReadFile(text.FontFile)
|
||||||
ctx.SendChain(message.Text("少女祈祷中..."))
|
if err != nil {
|
||||||
gid, _ := strconv.ParseInt(ctx.State["regex_matched"].([]string)[1], 10, 64)
|
ctx.SendChain(message.Text("ERROR: ", err))
|
||||||
p, _ := strconv.ParseInt(ctx.State["regex_matched"].([]string)[2], 10, 64)
|
return
|
||||||
if p > 10000 {
|
}
|
||||||
p = 10000
|
font, err := freetype.ParseFont(b)
|
||||||
}
|
if err != nil {
|
||||||
if p == 0 {
|
ctx.SendChain(message.Text("ERROR: ", err))
|
||||||
p = 1000
|
return
|
||||||
}
|
}
|
||||||
if gid == 0 {
|
|
||||||
gid = ctx.Event.GroupID
|
ctx.SendChain(message.Text("少女祈祷中..."))
|
||||||
}
|
gid, _ := strconv.ParseInt(ctx.State["regex_matched"].([]string)[1], 10, 64)
|
||||||
group := ctx.GetGroupInfo(gid, false)
|
p, _ := strconv.ParseInt(ctx.State["regex_matched"].([]string)[2], 10, 64)
|
||||||
if group.MemberCount == 0 {
|
if p > 10000 {
|
||||||
ctx.SendChain(message.Text(zero.BotConfig.NickName[0], "未加入", group.Name, "(", gid, "),无法获得热词呢"))
|
p = 10000
|
||||||
return
|
}
|
||||||
}
|
if p == 0 {
|
||||||
today := time.Now().Format("20060102")
|
p = 1000
|
||||||
drawedFile := fmt.Sprintf("%s%d%s%dwordCount.png", cachePath, gid, today, p)
|
}
|
||||||
if file.IsExist(drawedFile) {
|
if gid == 0 {
|
||||||
ctx.SendChain(message.Image("file:///" + file.BOTPATH + "/" + drawedFile))
|
gid = ctx.Event.GroupID
|
||||||
return
|
}
|
||||||
}
|
group := ctx.GetGroupInfo(gid, false)
|
||||||
messageMap := make(map[string]int, 256)
|
if group.MemberCount == 0 {
|
||||||
msghists := make(chan *gjson.Result, 256)
|
ctx.SendChain(message.Text(zero.BotConfig.NickName[0], "未加入", group.Name, "(", gid, "),无法获得热词呢"))
|
||||||
go func() {
|
return
|
||||||
h := ctx.GetLatestGroupMessageHistory(gid)
|
}
|
||||||
messageSeq := h.Get("messages.0.message_seq").Int()
|
today := time.Now().Format("20060102")
|
||||||
msghists <- &h
|
drawedFile := fmt.Sprintf("%s%d%s%dwordCount.png", cachePath, gid, today, p)
|
||||||
for i := 1; i < int(p/20) && messageSeq != 0; i++ {
|
if file.IsExist(drawedFile) {
|
||||||
h := ctx.GetGroupMessageHistory(gid, messageSeq)
|
ctx.SendChain(message.Image("file:///" + file.BOTPATH + "/" + drawedFile))
|
||||||
msghists <- &h
|
return
|
||||||
messageSeq = h.Get("messages.0.message_seq").Int()
|
}
|
||||||
}
|
messageMap := make(map[string]int, 256)
|
||||||
close(msghists)
|
msghists := make(chan *gjson.Result, 256)
|
||||||
}()
|
go func() {
|
||||||
var wg sync.WaitGroup
|
h := ctx.GetLatestGroupMessageHistory(gid)
|
||||||
var mapmu sync.Mutex
|
messageSeq := h.Get("messages.0.message_seq").Int()
|
||||||
for h := range msghists {
|
msghists <- &h
|
||||||
wg.Add(1)
|
for i := 1; i < int(p/20) && messageSeq != 0; i++ {
|
||||||
go func(h *gjson.Result) {
|
h := ctx.GetGroupMessageHistory(gid, messageSeq)
|
||||||
for _, v := range h.Get("messages.#.message").Array() {
|
msghists <- &h
|
||||||
tex := strings.TrimSpace(message.ParseMessageFromString(v.Str).ExtractPlainText())
|
messageSeq = h.Get("messages.0.message_seq").Int()
|
||||||
if tex == "" {
|
}
|
||||||
continue
|
close(msghists)
|
||||||
}
|
}()
|
||||||
for _, t := range ctx.GetWordSlices(tex).Get("slices").Array() {
|
var wg sync.WaitGroup
|
||||||
tex := strings.TrimSpace(t.Str)
|
var mapmu sync.Mutex
|
||||||
i := sort.SearchStrings(stopwords, tex)
|
for h := range msghists {
|
||||||
if re.MatchString(tex) && (i >= len(stopwords) || stopwords[i] != tex) {
|
wg.Add(1)
|
||||||
mapmu.Lock()
|
go func(h *gjson.Result) {
|
||||||
messageMap[tex]++
|
for _, v := range h.Get("messages.#.message").Array() {
|
||||||
mapmu.Unlock()
|
tex := strings.TrimSpace(message.ParseMessageFromString(v.Str).ExtractPlainText())
|
||||||
}
|
if tex == "" {
|
||||||
}
|
continue
|
||||||
}
|
}
|
||||||
wg.Done()
|
segments := seg.Segment(helper.StringToBytes(tex))
|
||||||
}(h)
|
words := gse.ToSlice(segments, true)
|
||||||
}
|
for _, word := range words {
|
||||||
wg.Wait()
|
word = strings.TrimSpace(word)
|
||||||
|
i := sort.SearchStrings(stopwords, word)
|
||||||
wc := rankByWordCount(messageMap)
|
if re.MatchString(word) && (i >= len(stopwords) || stopwords[i] != word) {
|
||||||
if len(wc) > 20 {
|
mapmu.Lock()
|
||||||
wc = wc[:20]
|
messageMap[word]++
|
||||||
}
|
mapmu.Unlock()
|
||||||
// 绘图
|
}
|
||||||
if len(wc) == 0 {
|
}
|
||||||
ctx.SendChain(message.Text("ERROR: 历史消息为空或者无法获得历史消息"))
|
}
|
||||||
return
|
wg.Done()
|
||||||
}
|
}(h)
|
||||||
bars := make([]chart.Value, len(wc))
|
}
|
||||||
for i, v := range wc {
|
wg.Wait()
|
||||||
bars[i] = chart.Value{
|
|
||||||
Value: float64(v.Value),
|
wc := rankByWordCount(messageMap)
|
||||||
Label: v.Key,
|
if len(wc) > 20 {
|
||||||
}
|
wc = wc[:20]
|
||||||
}
|
}
|
||||||
graph := chart.BarChart{
|
// 绘图
|
||||||
Font: font,
|
if len(wc) == 0 {
|
||||||
Title: fmt.Sprintf("%s(%d)在%s号的%d条消息的热词top20", group.Name, gid, time.Now().Format("2006-01-02"), p),
|
ctx.SendChain(message.Text("ERROR: 历史消息为空或者无法获得历史消息"))
|
||||||
Background: chart.Style{
|
return
|
||||||
Padding: chart.Box{
|
}
|
||||||
Top: 40,
|
bars := make([]chart.Value, len(wc))
|
||||||
},
|
for i, v := range wc {
|
||||||
},
|
bars[i] = chart.Value{
|
||||||
Height: 500,
|
Value: float64(v.Value),
|
||||||
BarWidth: 25,
|
Label: v.Key,
|
||||||
Bars: bars,
|
}
|
||||||
}
|
}
|
||||||
f, err := os.Create(drawedFile)
|
graph := chart.BarChart{
|
||||||
if err != nil {
|
Font: font,
|
||||||
ctx.SendChain(message.Text("ERROR: ", err))
|
Title: fmt.Sprintf("%s(%d)在%s号的%d条消息的热词top20", group.Name, gid, time.Now().Format("2006-01-02"), p),
|
||||||
return
|
Background: chart.Style{
|
||||||
}
|
Padding: chart.Box{
|
||||||
err = graph.Render(chart.PNG, f)
|
Top: 40,
|
||||||
_ = f.Close()
|
},
|
||||||
if err != nil {
|
},
|
||||||
_ = os.Remove(drawedFile)
|
Height: 500,
|
||||||
ctx.SendChain(message.Text("ERROR: ", err))
|
BarWidth: 25,
|
||||||
return
|
Bars: bars,
|
||||||
}
|
}
|
||||||
ctx.SendChain(message.Image("file:///" + file.BOTPATH + "/" + drawedFile))
|
f, err := os.Create(drawedFile)
|
||||||
})
|
if err != nil {
|
||||||
}
|
ctx.SendChain(message.Text("ERROR: ", err))
|
||||||
|
return
|
||||||
func rankByWordCount(wordFrequencies map[string]int) pairlist {
|
}
|
||||||
pl := make(pairlist, len(wordFrequencies))
|
err = graph.Render(chart.PNG, f)
|
||||||
i := 0
|
_ = f.Close()
|
||||||
for k, v := range wordFrequencies {
|
if err != nil {
|
||||||
pl[i] = pair{k, v}
|
_ = os.Remove(drawedFile)
|
||||||
i++
|
ctx.SendChain(message.Text("ERROR: ", err))
|
||||||
}
|
return
|
||||||
sort.Sort(sort.Reverse(pl))
|
}
|
||||||
return pl
|
ctx.SendChain(message.Image("file:///" + file.BOTPATH + "/" + drawedFile))
|
||||||
}
|
})
|
||||||
|
}
|
||||||
type pair struct {
|
|
||||||
Key string
|
func rankByWordCount(wordFrequencies map[string]int) pairlist {
|
||||||
Value int
|
pl := make(pairlist, len(wordFrequencies))
|
||||||
}
|
i := 0
|
||||||
|
for k, v := range wordFrequencies {
|
||||||
type pairlist []pair
|
pl[i] = pair{k, v}
|
||||||
|
i++
|
||||||
func (p pairlist) Len() int { return len(p) }
|
}
|
||||||
func (p pairlist) Less(i, j int) bool { return p[i].Value < p[j].Value }
|
sort.Sort(sort.Reverse(pl))
|
||||||
func (p pairlist) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
return pl
|
||||||
|
}
|
||||||
|
|
||||||
|
type pair struct {
|
||||||
|
Key string
|
||||||
|
Value int
|
||||||
|
}
|
||||||
|
|
||||||
|
type pairlist []pair
|
||||||
|
|
||||||
|
func (p pairlist) Len() int { return len(p) }
|
||||||
|
func (p pairlist) Less(i, j int) bool { return p[i].Value < p[j].Value }
|
||||||
|
func (p pairlist) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user