import tomllib import pymysql import json import os import shutil from t_parser import parser print("Load config") with open("config.toml", 'rb') as file: cfg = tomllib.load(file)["dump"] server = cfg["server"] port = cfg["port"] user = cfg["user"] passwd = cfg["passwd"] charset = cfg["charset"] db = cfg["db"] table = cfg["table"] needclean = cfg["clean"] print(f"Connect {server}:{port} as {user}") conn = pymysql.connect( host=server, port=port, user=user, password=passwd, charset=charset ) print("Server version:"+conn.get_server_info()) cursor = conn.cursor() conn.select_db(db) if (needclean): print("Init dump") if os.path.exists("dump"): shutil.rmtree("dump") os.mkdir("dump") if not (os.path.exists("dump/forums.json")): print("Load forums") cursor.execute(f'SELECT * FROM {table}forums') result = cursor.fetchall() forums_struct = [{"name": "杂项", "description": "无法归类的帖子", "id": -1}] count = 0 forum_id_list = [] for t in result: count += 1 forums_struct.append({"name": t[1], "description": t[2], "id": t[0]}) forum_id_list.append(t[0]) print(f"Forums count:{count}") with open("dump/forums.json", 'w') as file: json.dump(forums_struct, file, ensure_ascii=False, indent=4) with open("dump/forums_tmp.json", 'w') as file: json.dump(forum_id_list, file, ensure_ascii=False) if not (os.path.exists("dump/forums")): print(f"Load topics") os.mkdir("dump/forums") print("Load forums from json") with open("dump/forums_tmp.json", 'r') as file: forum_id_list = json.load(file) cursor.execute(f'SELECT COUNT(*) FROM {table}topics') counts = cursor.fetchall()[0][0] print(f"Topics count:{counts}") cursor.execute(f'SELECT * FROM {table}topics') result = cursor.fetchall() print(f"Get topics") count = 0 for i in result: count += 1 forums_ids = i[7] if (forums_ids not in forum_id_list): forums_ids = -1 fpath = f"dump/forums/{forums_ids}.manifest" if not (os.path.exists(fpath)): with open(fpath, "w") as file: file.write("") with open(fpath, 'a') as file: file.write(json.dumps( {"name": i[1], "time": i[3], "id": i[0]}, ensure_ascii=False)+'\n') if (count % 100 == 0): print(f" load {count}", end="\r") print(f" load {count}") if not (os.path.exists("dump/users.json")): print("Load users") cursor.execute(f'SELECT * FROM {table}users') result = cursor.fetchall() user_json = { str(i[0]): { "name": i[1], "time": i[2], "email": (i[4]) if (i[4] is not None) else "", "description": parser.format( ("签名(sig): "+i[10]+"\n") if (i[10] is not None) else "" + ("邮箱(Email): "+i[4]+"\n") if (i[4] is not None and i[11] == 1) else "" + ("网站(website): [url="+i[6]+"]"+i[6]+"[/url]\n") if (i[6] is not None) else "" + ("状态(status): "+i[7]+"\n") if (i[7] is not None) else "" + ("属地(from): "+i[8]+"\n") if (i[8] is not None) else "" + ("爱好(interests): "+i[9] + "\n") if (i[9] is not None) else "" ) } for i in result } with open("dump/users.json", 'w') as file: json.dump(user_json, file, ensure_ascii=False) if not (os.path.exists("dump/topics")): print("Load posts") os.mkdir("dump/topics") print("Load users from json") with open("dump/users.json", 'r') as file: user_json = json.load(file) cursor.execute(f'SELECT COUNT(*) FROM {table}posts') counts = cursor.fetchall()[0][0] print(f"Posts count:{counts}") page_size = 100 oppsize = 0 last_id = -1 while 1: if (oppsize >= counts): break cursor.execute( f'SELECT * FROM {table}posts WHERE `post_id` > {last_id} ORDER BY post_id LIMIT {page_size}') result = cursor.fetchall() last_id = result[-1][0] oppsize += page_size for i in result: s_retcode = cursor.execute( f'SELECT * FROM {table}posts_text WHERE `post_id`={i[0]}') if (s_retcode == 0): continue s_res = cursor.fetchall()[0] if (s_res[1] is None): continue def rend(str, en_bbcode): if (en_bbcode == 1): return parser.format(str) return str s_text = rend(( (("主题:"+s_res[4]+"\n\n") if s_res[4] is not None else "") + s_res[1]).replace(":"+str(s_res[3]), ""), i[10]) sender_id = i[3] send_name = (i[9] if (i[9] is not None) else "") if (send_name == "" and sender_id != -1 and str(sender_id) in user_json): send_name = user_json[str(sender_id)]["name"] if (sender_id == -1): send_name = "[未注册]"+send_name topic_id = i[1] fpath = f"dump/topics/{topic_id}.manifest" with open(fpath, 'a') as file: file.write(json.dumps( {"text": s_text, "poster_id": sender_id, "poster_name": send_name}, ensure_ascii=False)+"\n") print(f" load {oppsize}", end="\r") print(f" load {counts}") cursor.close() conn.close()