实现通过直播间获取furry等
用户信息新增直播间房间号字段 能用了,但不是很好用
This commit is contained in:
@@ -10,3 +10,4 @@
|
||||
[spider_core]
|
||||
host = "localhost"
|
||||
port = 9101
|
||||
bili_cookie = "buvid3=E114B569-ECF7-50D6-5874-7221C956F4F323116infoc; i-wanna-go-back=-1; _uuid=9DC109D51-C164-D1A8-D18D-CDB410A6B733223200infoc; buvid4=2EFA9D48-F352-9314-066E-800EAD1A3B3024163-022061118-7XDfT9HnZ75xcCl6GNqbKvvAaotbbCqK1dff9AggS2AFysI2RsiDgg%3D%3D; CURRENT_BLACKGAP=0; blackside_state=0; nostalgia_conf=-1; buvid_fp_plain=undefined; b_ut=5; LIVE_BUVID=AUTO2216549597211800; hit-dyn-v2=1; b_nut=100; fingerprint3=8b45c9c8e2c3bde2334046651927bc11; CURRENT_QUALITY=80; hit-new-style-dyn=0; CURRENT_FNVAL=4048; rpdid=|(J~R|~)~|Y)0J'uYY)Y|JmkY; dy_spec_agreed=1; fingerprint=64f6f507fa149d279783afea30316e2b; SESSDATA=0312b6e2%2C1687167210%2C5c48e%2Ac1; bili_jct=12bdeb674ff87c20afd2d245128d6734; DedeUserID=90931399; DedeUserID__ckMd5=507c5d481d60b7bb; sid=5n2lvshx; buvid_fp=64f6f507fa149d279783afea30316e2b; bp_t_offset_90931399=742134893995098144; PVID=2; share_source_origin=QQ; bsource=share_source_qqchat; b_lsid=4578914A_1853786E484; innersign=0"
|
||||
46
pkg/task/liveroom.go
Normal file
46
pkg/task/liveroom.go
Normal file
@@ -0,0 +1,46 @@
|
||||
package task
|
||||
|
||||
//func UpdateLiveRoomInfo() {
|
||||
// // 预查询数量
|
||||
// // TODO: 预查询数量和分页策略功能可复用
|
||||
// var count int64
|
||||
// tx := dao.DB().Model(&model.Furry{}).Count(&count)
|
||||
// if tx.Error != nil {
|
||||
// err := exception.ErrFetchFurries("获取furries数量失败:" + tx.Error.Error())
|
||||
// log.Logger().Errorf("获取furries数量失败:%s", err)
|
||||
// return
|
||||
// }
|
||||
//
|
||||
// pageSize := 32
|
||||
// maxPage := int(math.Ceil(float64(count) / float64(pageSize)))
|
||||
// for page := 0; page <= maxPage; page++ {
|
||||
// // 获取目标用户列表(分页)
|
||||
// users, err := logic.GetUsers(page, pageSize)
|
||||
// if err != nil {
|
||||
// log.Logger().Errorf("获取用户列表时发生错误:%s", err)
|
||||
// continue
|
||||
// }
|
||||
// // 已获取完毕
|
||||
// if users == nil {
|
||||
// return
|
||||
// }
|
||||
// // 抽取uid列表
|
||||
// var uids []uint
|
||||
// for _, user := range users {
|
||||
// uids = append(uids, user.UID)
|
||||
// }
|
||||
// // 通过API获取用户信息
|
||||
// infos := logic.BatchGetUserInfo(&uids)
|
||||
//
|
||||
// // 更新live_rooms表
|
||||
// c, err := logic.UpdateLiveRoom(infos)
|
||||
// if err != nil {
|
||||
// log.Logger().Errorf("更新数据时发生错误:%s", err)
|
||||
// continue
|
||||
// }
|
||||
// log.Logger().Infof("成功更新了%d个用户信息", c)
|
||||
// // delay
|
||||
// time.Sleep(2 * time.Second)
|
||||
// }
|
||||
//
|
||||
//}
|
||||
141
pkg/task/search.go
Normal file
141
pkg/task/search.go
Normal file
@@ -0,0 +1,141 @@
|
||||
package task
|
||||
|
||||
import (
|
||||
"github.com/eigeen/furryboard/spider-scheduler/pkg/conf"
|
||||
"github.com/eigeen/furryboard/spider-scheduler/pkg/dao/model"
|
||||
"github.com/eigeen/furryboard/spider-scheduler/pkg/log"
|
||||
"github.com/eigeen/furryboard/spider-scheduler/pkg/logic"
|
||||
"time"
|
||||
)
|
||||
|
||||
// SearchUpsFromVideo
|
||||
// 通过关键词进行视频搜索的方式获取用户
|
||||
// 新用户即写入用户表,状态为StatusPending
|
||||
//
|
||||
// 策略:每10分钟抓取前3页(每页42条)视频,若抓取的内容中存在
|
||||
// 一条视频与上次抓取内容的最后一条视频相同,则判断为已到达结尾
|
||||
func SearchUpsFromVideo() {
|
||||
// 获取最后一次抓取的记录
|
||||
meta, err := logic.GetMetadata("SearchVideoCheckpoint")
|
||||
if err != nil {
|
||||
log.Logger().Warnf("获取上一次视频抓取记录失败:%s", err)
|
||||
err = nil
|
||||
}
|
||||
var history = ""
|
||||
if meta != nil {
|
||||
history = meta.Value
|
||||
}
|
||||
var newHistory = ""
|
||||
|
||||
// 逐页获取判断
|
||||
for page := 1; page <= 3; page++ {
|
||||
videos, err := logic.SearchNewestVideos("furry", conf.Conf.SpiderCore.BiliCookie, 1)
|
||||
if err != nil {
|
||||
log.Logger().Errorf("搜索视频失败:%s", err)
|
||||
return
|
||||
}
|
||||
if len(videos) == 0 {
|
||||
log.Logger().Info("视频搜索已达末页")
|
||||
break
|
||||
}
|
||||
// 第一次获取,将第一个视频bv号作为newHistory
|
||||
if page == 1 {
|
||||
newHistory = videos[0].Bvid
|
||||
}
|
||||
// 从videos列表抽取视频bv号 map索引
|
||||
var bvids = make(map[string]int, 0)
|
||||
for i, video := range videos {
|
||||
bvids[video.Bvid] = i
|
||||
}
|
||||
// 先判断是否到达上一次结尾
|
||||
var historyPos = 42
|
||||
if i, ok := bvids[history]; ok {
|
||||
historyPos = i
|
||||
}
|
||||
// 防越界
|
||||
if len(videos) < historyPos {
|
||||
historyPos = len(videos)
|
||||
}
|
||||
// 先判断再抽取uid列表,节省数据库资源
|
||||
// 从videos列表抽取uid(mid) map去重
|
||||
var uids = make(map[uint64]*model.Furry, 0)
|
||||
for i := 0; i < historyPos; i++ {
|
||||
uids[videos[i].Mid] = &model.Furry{
|
||||
UID: uint(videos[i].Mid),
|
||||
Name: videos[i].Author,
|
||||
Status: model.StatusPendingVideo,
|
||||
}
|
||||
}
|
||||
// 逐个判断用户是否存在于数据库,不存在则新增
|
||||
for uid, user := range uids {
|
||||
exists, err := logic.IsUserExistsByUID(uint(uid))
|
||||
if err != nil {
|
||||
log.Logger().Warnf("判断用户存在性时发生错误:%s", err)
|
||||
continue
|
||||
}
|
||||
if !exists {
|
||||
err = logic.AddUser(user)
|
||||
if err != nil {
|
||||
log.Logger().Warnf("新增用户[%d]时失败:%s", uid, err)
|
||||
continue
|
||||
}
|
||||
log.Logger().Infof("通过视频搜索新增用户:%s(%d)", user.Name, uid)
|
||||
}
|
||||
}
|
||||
// 已到达上一次结尾,中断循环,否则继续获取下一页
|
||||
if historyPos != 42 {
|
||||
break
|
||||
}
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
// 更新history
|
||||
err = logic.UpdateVideoCheckpoint(newHistory)
|
||||
if err != nil {
|
||||
log.Logger().Warnf("更新Metadata: SearchVideoCheckpoint时发生错误:%s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func SearchUpsFromLiveRoom() {
|
||||
// TODO: 多关键词可复用
|
||||
// 先获取页数,后逐页获取
|
||||
pageNum, err := logic.GetLiveRoomPageNum("furry", conf.Conf.SpiderCore.BiliCookie)
|
||||
if err != nil {
|
||||
log.Logger().Errorf("搜索直播间错误:%s", err)
|
||||
return
|
||||
}
|
||||
for page := 1; page <= pageNum; page++ {
|
||||
rooms, err := logic.SearchLiveRooms("furry", conf.Conf.SpiderCore.BiliCookie, uint(page))
|
||||
if err != nil {
|
||||
log.Logger().Errorf("搜索直播间错误:%s", err)
|
||||
return
|
||||
}
|
||||
count, err := logic.UpdateLiveRoomInfo(rooms)
|
||||
if err != nil {
|
||||
log.Logger().Errorf("更新直播间信息错误:%s", err)
|
||||
return
|
||||
}
|
||||
log.Logger().Infof("更新了%d个直播间信息", count)
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
|
||||
// 先获取页数,后逐页获取
|
||||
pageNum, err = logic.GetLiveRoomPageNum("冬聚", conf.Conf.SpiderCore.BiliCookie)
|
||||
if err != nil {
|
||||
log.Logger().Errorf("搜索直播间错误:%s", err)
|
||||
return
|
||||
}
|
||||
for page := 1; page <= pageNum; page++ {
|
||||
rooms, err := logic.SearchLiveRooms("冬聚", conf.Conf.SpiderCore.BiliCookie, uint(page))
|
||||
if err != nil {
|
||||
log.Logger().Errorf("搜索直播间错误:%s", err)
|
||||
return
|
||||
}
|
||||
count, err := logic.UpdateLiveRoomInfo(rooms)
|
||||
if err != nil {
|
||||
log.Logger().Errorf("更新直播间信息错误:%s", err)
|
||||
return
|
||||
}
|
||||
log.Logger().Infof("更新了%d个直播间信息", count)
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
}
|
||||
13
pkg/task/search_test.go
Normal file
13
pkg/task/search_test.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package task
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestSearchUntrackedUps(t *testing.T) {
|
||||
BeforeTesting()
|
||||
SearchUpsFromVideo()
|
||||
}
|
||||
|
||||
func TestSearchUpsFromLiveRoom(t *testing.T) {
|
||||
BeforeTesting()
|
||||
SearchUpsFromLiveRoom()
|
||||
}
|
||||
@@ -27,7 +27,7 @@ func UpdateUserInfo() {
|
||||
maxPage := int(math.Ceil(float64(count) / float64(pageSize)))
|
||||
for page := 0; page <= maxPage; page++ {
|
||||
// 获取目标用户列表(分页)
|
||||
users, err := logic.GetUsers(page, pageSize)
|
||||
users, err := logic.GetValidUsers(page, pageSize)
|
||||
if err != nil {
|
||||
log.Logger().Errorf("获取用户列表时发生错误:%s", err)
|
||||
continue
|
||||
@@ -56,11 +56,12 @@ func UpdateUserInfo() {
|
||||
}
|
||||
}
|
||||
|
||||
// UpdateFans 更新粉丝数,将记录写入表中,增量记录
|
||||
func UpdateFans() {
|
||||
// TODO: 与UpdateUserInfo有大量重复,可抽离复用代码
|
||||
// 预查询数量
|
||||
var count int64
|
||||
tx := dao.DB().Model(&model.Furry{}).Count(&count)
|
||||
tx := dao.DB().Model(&model.Furry{}).Where("status = 0").Count(&count)
|
||||
if tx.Error != nil {
|
||||
err := exception.ErrFetchFurries("获取furries数量失败:" + tx.Error.Error())
|
||||
log.Logger().Errorf("获取furries数量失败:%s", err)
|
||||
@@ -71,7 +72,7 @@ func UpdateFans() {
|
||||
maxPage := int(math.Ceil(float64(count) / float64(pageSize)))
|
||||
for page := 0; page <= maxPage; page++ {
|
||||
// 获取目标用户列表(分页)
|
||||
users, err := logic.GetUsers(page, pageSize)
|
||||
users, err := logic.GetValidUsers(page, pageSize)
|
||||
if err != nil {
|
||||
log.Logger().Errorf("获取用户列表时发生错误:%s", err)
|
||||
continue
|
||||
@@ -97,7 +98,3 @@ func UpdateFans() {
|
||||
time.Sleep(2 * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
func UpdateFansAndInfo() {
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user