forked from TomoeMami/S1PlainTextBackup
-
Notifications
You must be signed in to change notification settings - Fork 0
/
autorun.py
98 lines (96 loc) · 4.6 KB
/
autorun.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# -*- coding: UTF-8 -*-
import requests
from bs4 import BeautifulSoup
import re
import time
import sys
import io
import os
import json
#2022-09-19
old_number = 2095520
# old_number = 2095520++int((int(time.time())-1664434472)/86400)*175
#2021-01-01
# dead_number = 1980710
def parse_html(html,threadict):
# soup = BeautifulSoup(html,from_encoding="utf-8",features="lxml")
soup = BeautifulSoup(html, 'html.parser')
namelist = soup.find_all(name="tbody")
for i in namelist:
threadids = re.search(r'normalthread_\d{7}',str(i))
if (threadids):
levels = re.search(r'\d{1,5}</a></span>',str(i))
threadid = re.sub(r'normalthread_','',str(threadids.group(0)))
if levels:
#在这里进行是否添加的检查
# if(int(threadid) > old_number):
level = re.sub(r'</a></span>','',str(levels.group(0)))
lastreplytime = re.findall(r'\d{4}-\d{1,2}-\d{1,2} \d{2}:\d{2}',str(i))
replytime = time.mktime(time.strptime(str(lastreplytime[1]), "%Y-%m-%d %H:%M"))
if(int(level) > 2) and ((int(time.time()) - replytime ) < 259200):
# 3天之内回复过
threadict[threadid] = {}
threadict[threadid]['replytime'] = replytime
threadict[threadid]['level'] = level
if __name__ == '__main__':
blacklist = [2104652,2056385,1842868,334540,1971007,1915305,2023780,2085181,2105283,2045161]
high_level = 14
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8') #改变标准输出的默认编码
# # 浏览器登录后得到的cookie,也就是刚才复制的字符串
with open ('/home/ubuntu/s1cookie-1.txt','r',encoding='utf-8') as f:
cookie_str1 = f.read()
#cookie_str1 = os.getenv('S1_COOKIE')
cookie_str = repr(cookie_str1)[1:-1]
# #把cookie字符串处理成字典,以便接下来使用
cookies = {}
for line in cookie_str.split(';'):
key, value = line.split('=', 1)
cookies[key] = value
# 设置请求头
headers = {'User-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'}
forumdict = {'外野': '75','虚拟主播区专楼':'151','游戏区':'4','漫区':'6','手游专楼':'135'}
# forumdict = {'外野': '75','游戏区':'4','漫区':'6'}
rootdir = "./"
session = requests.session()
for k in forumdict.keys():
threadict = {}
# 仅查看第一页
for i in range(1,2):
RURL = 'https://bbs.saraba1st.com/2b/forum-'+forumdict[k]+'-'+str(i)+'.html'
s1 = session.get(RURL, headers=headers, cookies=cookies)
# s1 = requests.get(RURL, headers=headers)
# s1.encoding='utf-8'
data = s1.content
parse_html(data,threadict)
with open(rootdir+'RefreshingData.json',"r",encoding='utf-8') as f:
thdata = json.load(f)
ids = thdata.keys()
for l in threadict.keys():
if (int(l) not in blacklist):
if l in ids:
if thdata[l]['totalreply']//30 > high_level:
if thdata[l]['totalreply']//30 < int(threadict[l]['level']):
thdata[l]['active'] = True
else:
thdata[l]['active'] = True
thdata[l]['lastedit'] = int(threadict[l]['replytime'])
else:
thdata[l] = {}
thdata[l]['totalreply'] =0
thdata[l]['title'] = "待更新"
thdata[l]['newtitle'] = "待更新"
thdata[l]['lastedit'] = int(threadict[l]['replytime'])
thdata[l]['category']= k
thdata[l]["active"] = True
print('add:'+l)
with open(rootdir+'RefreshingData.json',"w",encoding='utf-8') as f:
f.write(json.dumps(thdata,indent=2,ensure_ascii=False))
activethdata={}
with open(rootdir+'RefreshingData.json',"r",encoding='utf-8') as f:
thdata=json.load(f)
for i in thdata.keys():
# if(thdata[i]['active']) or ((int(time.time()) -thdata[i]['lastedit']) < 1209600) or (int(i) > old_number) or(thdata[i]['totalreply'] // 30 > high_level):
if(thdata[i]['active']) or ((int(time.time()) -thdata[i]['lastedit']) < 1209600) or(thdata[i]['totalreply'] // 30 > high_level):
activethdata[i] = thdata[i]
with open(rootdir+'RefreshingData.json',"w",encoding='utf-8') as f:
f.write(json.dumps(activethdata,indent=2,ensure_ascii=False))