bbcovert/dump.py

165 lines
5.4 KiB
Python

import tomllib
import pymysql
import json
import os
import shutil
from t_parser import parser
print("Load config")
with open("config.toml", 'rb') as file:
cfg = tomllib.load(file)["dump"]
server = cfg["server"]
port = cfg["port"]
user = cfg["user"]
passwd = cfg["passwd"]
charset = cfg["charset"]
db = cfg["db"]
table = cfg["table"]
needclean = cfg["clean"]
print(f"Connect {server}:{port} as {user}")
conn = pymysql.connect(
host=server,
port=port,
user=user,
password=passwd,
charset=charset
)
print("Server version:"+conn.get_server_info())
cursor = conn.cursor()
conn.select_db(db)
if (needclean):
print("Init dump")
if os.path.exists("dump"):
shutil.rmtree("dump")
os.mkdir("dump")
if not (os.path.exists("dump/forums.json")):
print("Load forums")
cursor.execute(f'SELECT * FROM {table}forums')
result = cursor.fetchall()
forums_struct = [{"name": "杂项", "description": "无法归类的帖子", "id": -1}]
count = 0
forum_id_list = []
for t in result:
count += 1
forums_struct.append({"name": t[1], "description": t[2], "id": t[0]})
forum_id_list.append(t[0])
print(f"Forums count:{count}")
with open("dump/forums.json", 'w') as file:
json.dump(forums_struct, file, ensure_ascii=False, indent=4)
with open("dump/forums_tmp.json", 'w') as file:
json.dump(forum_id_list, file, ensure_ascii=False)
if not (os.path.exists("dump/forums")):
print(f"Load topics")
os.mkdir("dump/forums")
print("Load forums from json")
with open("dump/forums_tmp.json", 'r') as file:
forum_id_list = json.load(file)
cursor.execute(f'SELECT COUNT(*) FROM {table}topics')
counts = cursor.fetchall()[0][0]
print(f"Topics count:{counts}")
cursor.execute(f'SELECT * FROM {table}topics')
result = cursor.fetchall()
print(f"Get topics")
count = 0
for i in result:
count += 1
forums_ids = i[7]
if (forums_ids not in forum_id_list):
forums_ids = -1
fpath = f"dump/forums/{forums_ids}.manifest"
if not (os.path.exists(fpath)):
with open(fpath, "w") as file:
file.write("")
with open(fpath, 'a') as file:
file.write(json.dumps(
{"name": i[1], "time": i[3], "id": i[0]}, ensure_ascii=False)+'\n')
if (count % 100 == 0):
print(f" load {count}", end="\r")
print(f" load {count}")
if not (os.path.exists("dump/users.json")):
print("Load users")
cursor.execute(f'SELECT * FROM {table}users')
result = cursor.fetchall()
user_json = {
str(i[0]): {
"name": i[1],
"time": i[2],
"email": (i[4]) if (i[4] is not None) else "",
"description": parser.format(
("签名(sig): "+i[10]+"\n") if (i[10] is not None) else "" +
("邮箱(Email): "+i[4]+"\n") if (i[4] is not None and i[11] == 1) else "" +
("网站(website): [url="+i[6]+"]"+i[6]+"[/url]\n") if (i[6] is not None) else "" +
("状态(status): "+i[7]+"\n") if (i[7] is not None) else "" +
("属地(from): "+i[8]+"\n") if (i[8] is not None) else "" +
("爱好(interests): "+i[9] +
"\n") if (i[9] is not None) else ""
)
} for i in result
}
with open("dump/users.json", 'w') as file:
json.dump(user_json, file, ensure_ascii=False)
if not (os.path.exists("dump/topics")):
print("Load posts")
os.mkdir("dump/topics")
print("Load users from json")
with open("dump/users.json", 'r') as file:
user_json = json.load(file)
cursor.execute(f'SELECT COUNT(*) FROM {table}posts')
counts = cursor.fetchall()[0][0]
print(f"Posts count:{counts}")
page_size = 100
oppsize = 0
last_id = -1
while 1:
if (oppsize >= counts):
break
cursor.execute(
f'SELECT * FROM {table}posts WHERE `post_id` > {last_id} ORDER BY post_id LIMIT {page_size}')
result = cursor.fetchall()
last_id = result[-1][0]
oppsize += page_size
for i in result:
s_retcode = cursor.execute(
f'SELECT * FROM {table}posts_text WHERE `post_id`={i[0]}')
if (s_retcode == 0):
continue
s_res = cursor.fetchall()[0]
if (s_res[1] is None):
continue
def rend(str, en_bbcode):
if (en_bbcode == 1):
return parser.format(str)
return str
s_text = rend((
(("主题:"+s_res[4]+"\n\n") if s_res[4] is not None else "") + s_res[1]).replace(":"+str(s_res[3]), ""), i[10])
sender_id = i[3]
send_name = (i[9] if (i[9] is not None) else "")
if (send_name == "" and sender_id != -1 and str(sender_id) in user_json):
send_name = user_json[str(sender_id)]["name"]
if (sender_id == -1):
send_name = "[未注册]"+send_name
topic_id = i[1]
fpath = f"dump/topics/{topic_id}.manifest"
with open(fpath, 'a') as file:
file.write(json.dumps(
{"text": s_text, "poster_id": sender_id, "poster_name": send_name}, ensure_ascii=False)+"\n")
print(f" load {oppsize}", end="\r")
print(f" load {counts}")
cursor.close()
conn.close()