165 lines
5.4 KiB
Python
165 lines
5.4 KiB
Python
|
import tomllib
|
||
|
import pymysql
|
||
|
import json
|
||
|
import os
|
||
|
import shutil
|
||
|
|
||
|
from t_parser import parser
|
||
|
|
||
|
|
||
|
print("Load config")
|
||
|
|
||
|
with open("config.toml", 'rb') as file:
|
||
|
cfg = tomllib.load(file)["dump"]
|
||
|
server = cfg["server"]
|
||
|
port = cfg["port"]
|
||
|
user = cfg["user"]
|
||
|
passwd = cfg["passwd"]
|
||
|
charset = cfg["charset"]
|
||
|
db = cfg["db"]
|
||
|
table = cfg["table"]
|
||
|
needclean = cfg["clean"]
|
||
|
|
||
|
print(f"Connect {server}:{port} as {user}")
|
||
|
|
||
|
conn = pymysql.connect(
|
||
|
host=server,
|
||
|
port=port,
|
||
|
user=user,
|
||
|
password=passwd,
|
||
|
charset=charset
|
||
|
)
|
||
|
|
||
|
print("Server version:"+conn.get_server_info())
|
||
|
|
||
|
cursor = conn.cursor()
|
||
|
conn.select_db(db)
|
||
|
|
||
|
if (needclean):
|
||
|
print("Init dump")
|
||
|
if os.path.exists("dump"):
|
||
|
shutil.rmtree("dump")
|
||
|
os.mkdir("dump")
|
||
|
|
||
|
if not (os.path.exists("dump/forums.json")):
|
||
|
print("Load forums")
|
||
|
cursor.execute(f'SELECT * FROM {table}forums')
|
||
|
result = cursor.fetchall()
|
||
|
forums_struct = [{"name": "杂项", "description": "无法归类的帖子", "id": -1}]
|
||
|
count = 0
|
||
|
forum_id_list = []
|
||
|
for t in result:
|
||
|
count += 1
|
||
|
forums_struct.append({"name": t[1], "description": t[2], "id": t[0]})
|
||
|
forum_id_list.append(t[0])
|
||
|
print(f"Forums count:{count}")
|
||
|
with open("dump/forums.json", 'w') as file:
|
||
|
json.dump(forums_struct, file, ensure_ascii=False, indent=4)
|
||
|
with open("dump/forums_tmp.json", 'w') as file:
|
||
|
json.dump(forum_id_list, file, ensure_ascii=False)
|
||
|
|
||
|
if not (os.path.exists("dump/forums")):
|
||
|
print(f"Load topics")
|
||
|
os.mkdir("dump/forums")
|
||
|
print("Load forums from json")
|
||
|
with open("dump/forums_tmp.json", 'r') as file:
|
||
|
forum_id_list = json.load(file)
|
||
|
cursor.execute(f'SELECT COUNT(*) FROM {table}topics')
|
||
|
counts = cursor.fetchall()[0][0]
|
||
|
print(f"Topics count:{counts}")
|
||
|
cursor.execute(f'SELECT * FROM {table}topics')
|
||
|
result = cursor.fetchall()
|
||
|
print(f"Get topics")
|
||
|
count = 0
|
||
|
for i in result:
|
||
|
count += 1
|
||
|
forums_ids = i[7]
|
||
|
if (forums_ids not in forum_id_list):
|
||
|
forums_ids = -1
|
||
|
fpath = f"dump/forums/{forums_ids}.manifest"
|
||
|
if not (os.path.exists(fpath)):
|
||
|
with open(fpath, "w") as file:
|
||
|
file.write("")
|
||
|
with open(fpath, 'a') as file:
|
||
|
file.write(json.dumps(
|
||
|
{"name": i[1], "time": i[3], "id": i[0]}, ensure_ascii=False)+'\n')
|
||
|
if (count % 100 == 0):
|
||
|
print(f" load {count}", end="\r")
|
||
|
print(f" load {count}")
|
||
|
|
||
|
if not (os.path.exists("dump/users.json")):
|
||
|
print("Load users")
|
||
|
cursor.execute(f'SELECT * FROM {table}users')
|
||
|
result = cursor.fetchall()
|
||
|
user_json = {
|
||
|
str(i[0]): {
|
||
|
"name": i[1],
|
||
|
"time": i[2],
|
||
|
"email": (i[4]) if (i[4] is not None) else "",
|
||
|
"description": parser.format(
|
||
|
("签名(sig): "+i[10]+"\n") if (i[10] is not None) else "" +
|
||
|
("邮箱(Email): "+i[4]+"\n") if (i[4] is not None and i[11] == 1) else "" +
|
||
|
("网站(website): [url="+i[6]+"]"+i[6]+"[/url]\n") if (i[6] is not None) else "" +
|
||
|
("状态(status): "+i[7]+"\n") if (i[7] is not None) else "" +
|
||
|
("属地(from): "+i[8]+"\n") if (i[8] is not None) else "" +
|
||
|
("爱好(interests): "+i[9] +
|
||
|
"\n") if (i[9] is not None) else ""
|
||
|
)
|
||
|
} for i in result
|
||
|
}
|
||
|
with open("dump/users.json", 'w') as file:
|
||
|
json.dump(user_json, file, ensure_ascii=False)
|
||
|
|
||
|
if not (os.path.exists("dump/topics")):
|
||
|
print("Load posts")
|
||
|
os.mkdir("dump/topics")
|
||
|
print("Load users from json")
|
||
|
with open("dump/users.json", 'r') as file:
|
||
|
user_json = json.load(file)
|
||
|
cursor.execute(f'SELECT COUNT(*) FROM {table}posts')
|
||
|
counts = cursor.fetchall()[0][0]
|
||
|
print(f"Posts count:{counts}")
|
||
|
page_size = 100
|
||
|
oppsize = 0
|
||
|
last_id = -1
|
||
|
while 1:
|
||
|
if (oppsize >= counts):
|
||
|
break
|
||
|
cursor.execute(
|
||
|
f'SELECT * FROM {table}posts WHERE `post_id` > {last_id} ORDER BY post_id LIMIT {page_size}')
|
||
|
result = cursor.fetchall()
|
||
|
last_id = result[-1][0]
|
||
|
oppsize += page_size
|
||
|
for i in result:
|
||
|
s_retcode = cursor.execute(
|
||
|
f'SELECT * FROM {table}posts_text WHERE `post_id`={i[0]}')
|
||
|
if (s_retcode == 0):
|
||
|
continue
|
||
|
s_res = cursor.fetchall()[0]
|
||
|
if (s_res[1] is None):
|
||
|
continue
|
||
|
|
||
|
def rend(str, en_bbcode):
|
||
|
if (en_bbcode == 1):
|
||
|
return parser.format(str)
|
||
|
return str
|
||
|
s_text = rend((
|
||
|
(("主题:"+s_res[4]+"\n\n") if s_res[4] is not None else "") + s_res[1]).replace(":"+str(s_res[3]), ""), i[10])
|
||
|
sender_id = i[3]
|
||
|
send_name = (i[9] if (i[9] is not None) else "")
|
||
|
if (send_name == "" and sender_id != -1 and str(sender_id) in user_json):
|
||
|
send_name = user_json[str(sender_id)]["name"]
|
||
|
if (sender_id == -1):
|
||
|
send_name = "[未注册]"+send_name
|
||
|
topic_id = i[1]
|
||
|
fpath = f"dump/topics/{topic_id}.manifest"
|
||
|
with open(fpath, 'a') as file:
|
||
|
file.write(json.dumps(
|
||
|
{"text": s_text, "poster_id": sender_id, "poster_name": send_name}, ensure_ascii=False)+"\n")
|
||
|
print(f" load {oppsize}", end="\r")
|
||
|
print(f" load {counts}")
|
||
|
|
||
|
|
||
|
cursor.close()
|
||
|
conn.close()
|