142 lines
4.2 KiB
Go
142 lines
4.2 KiB
Go
|
package task
|
|||
|
|
|||
|
import (
|
|||
|
"github.com/eigeen/furryboard/spider-scheduler/pkg/conf"
|
|||
|
"github.com/eigeen/furryboard/spider-scheduler/pkg/dao/model"
|
|||
|
"github.com/eigeen/furryboard/spider-scheduler/pkg/log"
|
|||
|
"github.com/eigeen/furryboard/spider-scheduler/pkg/logic"
|
|||
|
"time"
|
|||
|
)
|
|||
|
|
|||
|
// SearchUpsFromVideo
|
|||
|
// 通过关键词进行视频搜索的方式获取用户
|
|||
|
// 新用户即写入用户表,状态为StatusPending
|
|||
|
//
|
|||
|
// 策略:每10分钟抓取前3页(每页42条)视频,若抓取的内容中存在
|
|||
|
// 一条视频与上次抓取内容的最后一条视频相同,则判断为已到达结尾
|
|||
|
func SearchUpsFromVideo() {
|
|||
|
// 获取最后一次抓取的记录
|
|||
|
meta, err := logic.GetMetadata("SearchVideoCheckpoint")
|
|||
|
if err != nil {
|
|||
|
log.Logger().Warnf("获取上一次视频抓取记录失败:%s", err)
|
|||
|
err = nil
|
|||
|
}
|
|||
|
var history = ""
|
|||
|
if meta != nil {
|
|||
|
history = meta.Value
|
|||
|
}
|
|||
|
var newHistory = ""
|
|||
|
|
|||
|
// 逐页获取判断
|
|||
|
for page := 1; page <= 3; page++ {
|
|||
|
videos, err := logic.SearchNewestVideos("furry", conf.Conf.SpiderCore.BiliCookie, 1)
|
|||
|
if err != nil {
|
|||
|
log.Logger().Errorf("搜索视频失败:%s", err)
|
|||
|
return
|
|||
|
}
|
|||
|
if len(videos) == 0 {
|
|||
|
log.Logger().Info("视频搜索已达末页")
|
|||
|
break
|
|||
|
}
|
|||
|
// 第一次获取,将第一个视频bv号作为newHistory
|
|||
|
if page == 1 {
|
|||
|
newHistory = videos[0].Bvid
|
|||
|
}
|
|||
|
// 从videos列表抽取视频bv号 map索引
|
|||
|
var bvids = make(map[string]int, 0)
|
|||
|
for i, video := range videos {
|
|||
|
bvids[video.Bvid] = i
|
|||
|
}
|
|||
|
// 先判断是否到达上一次结尾
|
|||
|
var historyPos = 42
|
|||
|
if i, ok := bvids[history]; ok {
|
|||
|
historyPos = i
|
|||
|
}
|
|||
|
// 防越界
|
|||
|
if len(videos) < historyPos {
|
|||
|
historyPos = len(videos)
|
|||
|
}
|
|||
|
// 先判断再抽取uid列表,节省数据库资源
|
|||
|
// 从videos列表抽取uid(mid) map去重
|
|||
|
var uids = make(map[uint64]*model.Furry, 0)
|
|||
|
for i := 0; i < historyPos; i++ {
|
|||
|
uids[videos[i].Mid] = &model.Furry{
|
|||
|
UID: uint(videos[i].Mid),
|
|||
|
Name: videos[i].Author,
|
|||
|
Status: model.StatusPendingVideo,
|
|||
|
}
|
|||
|
}
|
|||
|
// 逐个判断用户是否存在于数据库,不存在则新增
|
|||
|
for uid, user := range uids {
|
|||
|
exists, err := logic.IsUserExistsByUID(uint(uid))
|
|||
|
if err != nil {
|
|||
|
log.Logger().Warnf("判断用户存在性时发生错误:%s", err)
|
|||
|
continue
|
|||
|
}
|
|||
|
if !exists {
|
|||
|
err = logic.AddUser(user)
|
|||
|
if err != nil {
|
|||
|
log.Logger().Warnf("新增用户[%d]时失败:%s", uid, err)
|
|||
|
continue
|
|||
|
}
|
|||
|
log.Logger().Infof("通过视频搜索新增用户:%s(%d)", user.Name, uid)
|
|||
|
}
|
|||
|
}
|
|||
|
// 已到达上一次结尾,中断循环,否则继续获取下一页
|
|||
|
if historyPos != 42 {
|
|||
|
break
|
|||
|
}
|
|||
|
time.Sleep(1 * time.Second)
|
|||
|
}
|
|||
|
// 更新history
|
|||
|
err = logic.UpdateVideoCheckpoint(newHistory)
|
|||
|
if err != nil {
|
|||
|
log.Logger().Warnf("更新Metadata: SearchVideoCheckpoint时发生错误:%s", err)
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
func SearchUpsFromLiveRoom() {
|
|||
|
// TODO: 多关键词可复用
|
|||
|
// 先获取页数,后逐页获取
|
|||
|
pageNum, err := logic.GetLiveRoomPageNum("furry", conf.Conf.SpiderCore.BiliCookie)
|
|||
|
if err != nil {
|
|||
|
log.Logger().Errorf("搜索直播间错误:%s", err)
|
|||
|
return
|
|||
|
}
|
|||
|
for page := 1; page <= pageNum; page++ {
|
|||
|
rooms, err := logic.SearchLiveRooms("furry", conf.Conf.SpiderCore.BiliCookie, uint(page))
|
|||
|
if err != nil {
|
|||
|
log.Logger().Errorf("搜索直播间错误:%s", err)
|
|||
|
return
|
|||
|
}
|
|||
|
count, err := logic.UpdateLiveRoomInfo(rooms)
|
|||
|
if err != nil {
|
|||
|
log.Logger().Errorf("更新直播间信息错误:%s", err)
|
|||
|
return
|
|||
|
}
|
|||
|
log.Logger().Infof("更新了%d个直播间信息", count)
|
|||
|
time.Sleep(1 * time.Second)
|
|||
|
}
|
|||
|
|
|||
|
// 先获取页数,后逐页获取
|
|||
|
pageNum, err = logic.GetLiveRoomPageNum("冬聚", conf.Conf.SpiderCore.BiliCookie)
|
|||
|
if err != nil {
|
|||
|
log.Logger().Errorf("搜索直播间错误:%s", err)
|
|||
|
return
|
|||
|
}
|
|||
|
for page := 1; page <= pageNum; page++ {
|
|||
|
rooms, err := logic.SearchLiveRooms("冬聚", conf.Conf.SpiderCore.BiliCookie, uint(page))
|
|||
|
if err != nil {
|
|||
|
log.Logger().Errorf("搜索直播间错误:%s", err)
|
|||
|
return
|
|||
|
}
|
|||
|
count, err := logic.UpdateLiveRoomInfo(rooms)
|
|||
|
if err != nil {
|
|||
|
log.Logger().Errorf("更新直播间信息错误:%s", err)
|
|||
|
return
|
|||
|
}
|
|||
|
log.Logger().Infof("更新了%d个直播间信息", count)
|
|||
|
time.Sleep(1 * time.Second)
|
|||
|
}
|
|||
|
}
|