From ac437a0f6ada62c2ee60253a0c98b35cc09e64d9 Mon Sep 17 00:00:00 2001 From: cxykevin Date: Sat, 22 Jun 2024 14:50:40 +0800 Subject: [PATCH] origin commit --- .gitignore | 6 ++ .vscode/launch.json | 29 +++++++ .vscode/settings.json | 3 + config.sample.toml | 14 ++++ dump.py | 164 +++++++++++++++++++++++++++++++++++++ readme.md | 35 ++++++++ render.py | 170 +++++++++++++++++++++++++++++++++++++++ requirements.txt | 4 + t_parser.py | 39 +++++++++ test.py | 7 ++ theme.sample/home.html | 25 ++++++ theme.sample/posts.html | 41 ++++++++++ theme.sample/topics.html | 35 ++++++++ theme.sample/user.html | 22 +++++ theme/home.html | 25 ++++++ theme/posts.html | 41 ++++++++++ theme/topics.html | 35 ++++++++ theme/user.html | 22 +++++ 18 files changed, 717 insertions(+) create mode 100644 .gitignore create mode 100644 .vscode/launch.json create mode 100644 .vscode/settings.json create mode 100644 config.sample.toml create mode 100644 dump.py create mode 100644 readme.md create mode 100644 render.py create mode 100644 requirements.txt create mode 100644 t_parser.py create mode 100644 test.py create mode 100644 theme.sample/home.html create mode 100644 theme.sample/posts.html create mode 100644 theme.sample/topics.html create mode 100644 theme.sample/user.html create mode 100644 theme/home.html create mode 100644 theme/posts.html create mode 100644 theme/topics.html create mode 100644 theme/user.html diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..44872b5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +__pycache__/** +build +dump +config.toml + +venv diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..87939b0 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,29 @@ +{ + // 使用 IntelliSense 了解相关属性。 + // 悬停以查看现有属性的描述。 + // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python 调试: dump", + "type": "debugpy", + "request": "launch", + "program": "dump.py", + "console": "integratedTerminal" + }, + { + "name": "Python 调试: build", + "type": "debugpy", + "request": "launch", + "program": "render.py", + "console": "integratedTerminal" + }, + { + "name": "Python 调试: http server", + "type": "debugpy", + "request": "launch", + "program": "test.py", + "console": "integratedTerminal" + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..642ff51 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.REPL.enableREPLSmartSend": false +} \ No newline at end of file diff --git a/config.sample.toml b/config.sample.toml new file mode 100644 index 0000000..c0e9e51 --- /dev/null +++ b/config.sample.toml @@ -0,0 +1,14 @@ +[dump] +server = "127.0.0.1" # 服务器 IP,推荐本地数据库或者直接在服务器上运行脚本 +port = 3306 # 端口,一般不用改 +user = "root" # 用户名 +passwd = "your_passwd" # 密码,必填 +charset = "utf8mb4" # 字符集,出问题可以试试 `utf8` +db = "smf" # 数据库名,根据自己情况 +table = "phpbb2" # 表前缀,一般不用改 +clean = true # 是否清理先前生成,推荐打开 + +[render] +show_st = false # 显示详细信息,打开会降低生成速度 +clean = true # 是否清理先前生成,推荐打开 +page_split = 20 # 多少内容为一页,推荐 20,25,50 diff --git a/dump.py b/dump.py new file mode 100644 index 0000000..5b0da58 --- /dev/null +++ b/dump.py @@ -0,0 +1,164 @@ +import tomllib +import pymysql +import json +import os +import shutil + +from t_parser import parser + + +print("Load config") + +with open("config.toml", 'rb') as file: + cfg = tomllib.load(file)["dump"] +server = cfg["server"] +port = cfg["port"] +user = cfg["user"] +passwd = cfg["passwd"] +charset = cfg["charset"] +db = cfg["db"] +table = cfg["table"] +needclean = cfg["clean"] + +print(f"Connect {server}:{port} as {user}") + +conn = pymysql.connect( + host=server, + port=port, + user=user, + password=passwd, + charset=charset +) + +print("Server version:"+conn.get_server_info()) + +cursor = conn.cursor() +conn.select_db(db) + +if (needclean): + print("Init dump") + if os.path.exists("dump"): + shutil.rmtree("dump") + os.mkdir("dump") + +if not (os.path.exists("dump/forums.json")): + print("Load forums") + cursor.execute(f'SELECT * FROM {table}forums') + result = cursor.fetchall() + forums_struct = [{"name": "杂项", "description": "无法归类的帖子", "id": -1}] + count = 0 + forum_id_list = [] + for t in result: + count += 1 + forums_struct.append({"name": t[1], "description": t[2], "id": t[0]}) + forum_id_list.append(t[0]) + print(f"Forums count:{count}") + with open("dump/forums.json", 'w') as file: + json.dump(forums_struct, file, ensure_ascii=False, indent=4) + with open("dump/forums_tmp.json", 'w') as file: + json.dump(forum_id_list, file, ensure_ascii=False) + +if not (os.path.exists("dump/forums")): + print(f"Load topics") + os.mkdir("dump/forums") + print("Load forums from json") + with open("dump/forums_tmp.json", 'r') as file: + forum_id_list = json.load(file) + cursor.execute(f'SELECT COUNT(*) FROM {table}topics') + counts = cursor.fetchall()[0][0] + print(f"Topics count:{counts}") + cursor.execute(f'SELECT * FROM {table}topics') + result = cursor.fetchall() + print(f"Get topics") + count = 0 + for i in result: + count += 1 + forums_ids = i[7] + if (forums_ids not in forum_id_list): + forums_ids = -1 + fpath = f"dump/forums/{forums_ids}.manifest" + if not (os.path.exists(fpath)): + with open(fpath, "w") as file: + file.write("") + with open(fpath, 'a') as file: + file.write(json.dumps( + {"name": i[1], "time": i[3], "id": i[0]}, ensure_ascii=False)+'\n') + if (count % 100 == 0): + print(f" load {count}", end="\r") + print(f" load {count}") + +if not (os.path.exists("dump/users.json")): + print("Load users") + cursor.execute(f'SELECT * FROM {table}users') + result = cursor.fetchall() + user_json = { + str(i[0]): { + "name": i[1], + "time": i[2], + "email": (i[4]) if (i[4] is not None) else "", + "description": parser.format( + ("签名(sig): "+i[10]+"\n") if (i[10] is not None) else "" + + ("邮箱(Email): "+i[4]+"\n") if (i[4] is not None and i[11] == 1) else "" + + ("网站(website): [url="+i[6]+"]"+i[6]+"[/url]\n") if (i[6] is not None) else "" + + ("状态(status): "+i[7]+"\n") if (i[7] is not None) else "" + + ("属地(from): "+i[8]+"\n") if (i[8] is not None) else "" + + ("爱好(interests): "+i[9] + + "\n") if (i[9] is not None) else "" + ) + } for i in result + } + with open("dump/users.json", 'w') as file: + json.dump(user_json, file, ensure_ascii=False) + +if not (os.path.exists("dump/topics")): + print("Load posts") + os.mkdir("dump/topics") + print("Load users from json") + with open("dump/users.json", 'r') as file: + user_json = json.load(file) + cursor.execute(f'SELECT COUNT(*) FROM {table}posts') + counts = cursor.fetchall()[0][0] + print(f"Posts count:{counts}") + page_size = 100 + oppsize = 0 + last_id = -1 + while 1: + if (oppsize >= counts): + break + cursor.execute( + f'SELECT * FROM {table}posts WHERE `post_id` > {last_id} ORDER BY post_id LIMIT {page_size}') + result = cursor.fetchall() + last_id = result[-1][0] + oppsize += page_size + for i in result: + s_retcode = cursor.execute( + f'SELECT * FROM {table}posts_text WHERE `post_id`={i[0]}') + if (s_retcode == 0): + continue + s_res = cursor.fetchall()[0] + if (s_res[1] is None): + continue + + def rend(str, en_bbcode): + if (en_bbcode == 1): + return parser.format(str) + return str + s_text = rend(( + (("主题:"+s_res[4]+"\n\n") if s_res[4] is not None else "") + s_res[1]).replace(":"+str(s_res[3]), ""), i[10]) + sender_id = i[3] + send_name = (i[9] if (i[9] is not None) else "") + if (send_name == "" and sender_id != -1 and str(sender_id) in user_json): + send_name = user_json[str(sender_id)]["name"] + if (sender_id == -1): + send_name = "[未注册]"+send_name + topic_id = i[1] + fpath = f"dump/topics/{topic_id}.manifest" + with open(fpath, 'a') as file: + file.write(json.dumps( + {"text": s_text, "poster_id": sender_id, "poster_name": send_name}, ensure_ascii=False)+"\n") + print(f" load {oppsize}", end="\r") + print(f" load {counts}") + + +cursor.close() +conn.close() diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..5049b2e --- /dev/null +++ b/readme.md @@ -0,0 +1,35 @@ +# BBcovert + +> 本项目可以将phpbb论坛中的帖子和评论内容转换成纯静态页面 + +## 运行 + +> 请在字符集设置为utf-8的linux下运行! + +```sh +# 请先配置config.toml! +# 从sample复制: +cp config.sample.toml config.toml + +# 初始化虚拟环境,推荐先换源 +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt + +# 下载数据库 +python dump.py + +# 生成静态页面 +python render.py + +# 测试生成 +python test.py +``` + +## 结构 + +`theme` 文件夹内是主题文件 + +数据库会被下载到 `dump` 文件夹 + +生成产物在 `build` 文件夹 diff --git a/render.py b/render.py new file mode 100644 index 0000000..1e82a74 --- /dev/null +++ b/render.py @@ -0,0 +1,170 @@ +import jinja2 +import os +import shutil +import json +import sys +import time +import tomllib +import math + +print("Load config") +with open("config.toml", 'rb') as file: + cfg = tomllib.load(file)["render"] +show_st = cfg["show_st"] +clean = cfg["clean"] +page_split = cfg["page_split"] + +print("Init build") + +if (clean): + if os.path.exists("build"): + shutil.rmtree("build") + os.mkdir("build") + +if not os.path.exists("theme"): + print("Connot find any themes") + sys.exit(1) + + +def render_func(filepath, extpath, **kwargs): + if (show_st): + print(f' render "{extpath}"') + with open(filepath, 'r') as file: + result = jinja2.Template(file.read()).render(**kwargs) + with open(extpath, 'w') as file: + file.write(result) + + +if not (os.path.exists("build/home.html")): + print("Render home page") + with open("dump/forums.json", 'r') as file: + json_res = json.load(file) + render_args = { + "forums": [ + { + "url": "/topics/"+str(i["id"])+"/1.html", + "name": i["name"], + "description": i["description"] + } for i in json_res + ] + } + render_func("theme/home.html", "build/index.html", **render_args) + +if not (os.path.exists("build/users")): + print("Render users") + os.mkdir("build/users") + with open("dump/users.json", 'r') as file: + user_json_res = json.load(file) + count = 0 + print(f"Users count: {len(user_json_res)}") + for ids, context in user_json_res.items(): + count += 1 + render_func("theme/user.html", "build/users/"+str(ids)+".html", user={ + "name": context["name"], + "description": context["description"], + "reg_time": time.strftime("%Y-%m-%d", time.localtime(context["time"])) + }) + if not (show_st): + if (count % 50 == 0): + print(f" render {count}", end='\r') + print(f" render {count}") + +if not (os.path.exists("build/topics")): + print("Render topics") + os.mkdir("build/topics") + with open("dump/forums.json", 'r') as file: + json_res = json.load(file) + count = 0 + for r in os.listdir("dump/forums"): + count += 1 + u_cfg = [] + with open("dump/forums/"+r, 'r') as file: + strs = file.readlines() + n_count = 0 + for i in strs: + n_count += 1 + line_st = i.rstrip("\r\n").lstrip("\r\n") + if (line_st == ""): + continue + line_json = json.loads(line_st) + u_cfg.append( + { + "id": n_count, + "name": line_json["name"], + "time": time.strftime("%Y-%m-%d", time.localtime(line_json["time"])), + "url": "/posts/"+str(line_json["id"])+"/1.html" + } + ) + ids = int(r.split(".")[0]) + os.mkdir("build/topics/" + str(ids)) + for page_counts in range(1, math.ceil(len(strs)/page_split)+1): + render_func("theme/topics.html", "build/topics/" + + str(ids)+"/"+str(page_counts)+".html", topics={ + "topics": u_cfg[page_split*(page_counts-1):page_split*page_counts], + "from_forum": list(filter(lambda d: d.get('id') == ids, json_res))[0]["name"], + "from_url": "/home.html" + }, page={ + "now": page_counts, + "count": math.ceil(len(strs)/page_split), + "first_url": "/topics/"+str(ids)+"/"+str(1)+".html", + "prev_url": ("/topics/"+str(ids)+"/"+str(page_counts-1)+".html") if (page_counts > 1) else "", + "next_url": ("/topics/"+str(ids)+"/"+str(page_counts+1)+".html") if ((page_counts) < math.ceil(len(strs)/page_split)) else "", + "end_url": "/topics/"+str(ids)+"/"+str(math.ceil(len(strs)/page_split))+".html" + }) + if not (show_st): + print(f" render {count}", end='\r') + print(f" render {count}") + +if not (os.path.exists("build/posts")): + print("Render posts") + os.mkdir("build/posts") + n_count = 0 + for r in os.listdir("dump/forums"): + with open("dump/forums/"+r, 'r') as file: + strs = file.readlines() + page_ncount = 0 + for i in strs: + page_ncount += 1 + line_st = i.rstrip("\r\n").lstrip("\r\n") + if (line_st == ""): + continue + n_json = json.loads(line_st) + n_id = n_json["id"] + if not (os.path.exists("dump/topics/"+str(n_id)+".manifest")): + continue + with open("dump/topics/"+str(n_id)+".manifest", 'r') as file: + strs_m = file.readlines() + u_cfg = [] + t_count = 0 + for j in strs_m: + n_count += 1 + t_count += 1 + m_st = j.rstrip("\r\n").lstrip("\r\n") + if (m_st == ""): + continue + m_json = json.loads(m_st) + ids = n_id + u_cfg.append({ + "id": t_count, + "text": m_json["text"], + "postername": m_json["poster_name"], + "posterurl": "/users/"+str(m_json["poster_id"])+".html" + }) + if (n_count % 10 == 0): + print(f" render {n_count}", end='\r') + os.mkdir("build/posts/" + str(ids)) + for page_counts in range(1, math.ceil(len(u_cfg)/page_split)+1): + render_func("theme/posts.html", "build/posts/" + + str(ids)+"/"+str(page_counts)+".html", posts={ + "posts": u_cfg[page_split*(page_counts-1):page_split*page_counts], + "from_topic": n_json["name"], + "from_url": "/topics/"+str(r.split(".")[0])+"/"+str((page_ncount-1)//page_split+1)+".html" + }, page={ + "now": page_counts, + "count": math.ceil(len(u_cfg)/page_split), + "first_url": "/posts/"+str(ids)+"/"+str(1)+".html", + "prev_url": ("/posts/"+str(ids)+"/"+str(page_counts-1)+".html") if (page_counts > 1) else "", + "next_url": ("/posts/"+str(ids)+"/"+str(page_counts+1)+".html") if ((page_counts) < math.ceil(len(u_cfg)/page_split)) else "", + "end_url": "/posts/"+str(ids)+"/"+str(math.ceil(len(u_cfg)/page_split))+".html" + }) + print(f" render {n_count}") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..eb09114 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +bbcode==1.1.0 +Jinja2==3.1.4 +MarkupSafe==2.1.5 +PyMySQL==1.1.1 diff --git a/t_parser.py b/t_parser.py new file mode 100644 index 0000000..dfca76e --- /dev/null +++ b/t_parser.py @@ -0,0 +1,39 @@ +import bbcode + +# init parser +parser = bbcode.Parser() +parser.add_simple_formatter('hr', '
', standalone=True) +parser.add_simple_formatter('hl', '%(value)s') +parser.add_simple_formatter('sub', '%(value)s') +parser.add_simple_formatter('sup', '%(value)s') +parser.add_simple_formatter( + 'iframe', '') +parser.add_simple_formatter( + 'showhide', '--- 隐藏内容 ---
%(value)s
----------------') + + +def render_color(tag_name, value, options, parent, context): + colors = "#0099ff" + if 'color' in options: + colors = options['color'] + return f'{value}' + + +def render_bgcolor(tag_name, value, options, parent, context): + colors = "#0099ff" + if 'bgcolor' in options: + colors = options['bgcolor'] + return f'{value}' + + +def render_size(tag_name, value, options, parent, context): + if 'size' in options: + sizes = options['size'] + else: + return value + return f'{value}' + + +parser.add_formatter("color", render_color) +parser.add_formatter("bgcolor", render_bgcolor) +parser.add_formatter("size", render_size) diff --git a/test.py b/test.py new file mode 100644 index 0000000..425696d --- /dev/null +++ b/test.py @@ -0,0 +1,7 @@ +from http.server import test, SimpleHTTPRequestHandler as RH +from functools import partial +RH.extensions_map = {k: v+';charset=UTF-8' for k, + v in RH.extensions_map.items()} +RH.extensions_map['.html'] = 'text/html; charset=utf-8' +test(HandlerClass=partial(RH, + directory="build"), port=8000) diff --git a/theme.sample/home.html b/theme.sample/home.html new file mode 100644 index 0000000..1aaa0f7 --- /dev/null +++ b/theme.sample/home.html @@ -0,0 +1,25 @@ + + + + Home + + + +

Home Page

+
    + {% for forum in forums %} +
  1. + +
  2. + {% endfor %} +
+ + + \ No newline at end of file diff --git a/theme.sample/posts.html b/theme.sample/posts.html new file mode 100644 index 0000000..e064ec4 --- /dev/null +++ b/theme.sample/posts.html @@ -0,0 +1,41 @@ + + + + Posts -- from {{ posts.from_topic }} + + + +

Posts

+ from {{ posts.from_topic }} +
    + {% for posts in posts.posts %} +
  1. +
      +
    • +

      {{ posts.id }}

      +
    • +
    • +

      {{ posts.text | safe }}

      +
    • +
    • + Poster: {{ posts.postername }} + {% if posts.posterurl != "" %} + 主页 + {% endif %} +
    • +
    +
  2. + {% endfor %} + {{ page.now }}/{{ page.count }} 页 + 第一页 + {% if page.prev_url != "" %} + 上一页 + {% endif %} + {% if page.next_url != "" %} + 下一页 + {% endif %} + 最后一页 +
+ + + \ No newline at end of file diff --git a/theme.sample/topics.html b/theme.sample/topics.html new file mode 100644 index 0000000..fb7413f --- /dev/null +++ b/theme.sample/topics.html @@ -0,0 +1,35 @@ + + + + Topics -- from {{ topics.from_forum }} + + + +

Topics

+ from {{ topics.from_forum }} +
    + {% for topic in topics.topics %} +
  1. + +
  2. + {% endfor %} + {{ page.now }}/{{ page.count }} 页 + 第一页 + {% if page.prev_url != "" %} + 上一页 + {% endif %} + {% if page.next_url != "" %} + 下一页 + {% endif %} + 最后一页 +
+ + + \ No newline at end of file diff --git a/theme.sample/user.html b/theme.sample/user.html new file mode 100644 index 0000000..63bebd2 --- /dev/null +++ b/theme.sample/user.html @@ -0,0 +1,22 @@ + + + + User - {{ user.name }} + + + +

User Page

+
    +
  1. +

    {{ user.name }}

    +
  2. +
  3. + {{ user.description | safe }} +
  4. +
  5. + Register time: {{ user.reg_time }} +
  6. +
+ + + \ No newline at end of file diff --git a/theme/home.html b/theme/home.html new file mode 100644 index 0000000..1aaa0f7 --- /dev/null +++ b/theme/home.html @@ -0,0 +1,25 @@ + + + + Home + + + +

Home Page

+
    + {% for forum in forums %} +
  1. + +
  2. + {% endfor %} +
+ + + \ No newline at end of file diff --git a/theme/posts.html b/theme/posts.html new file mode 100644 index 0000000..e064ec4 --- /dev/null +++ b/theme/posts.html @@ -0,0 +1,41 @@ + + + + Posts -- from {{ posts.from_topic }} + + + +

Posts

+ from {{ posts.from_topic }} +
    + {% for posts in posts.posts %} +
  1. +
      +
    • +

      {{ posts.id }}

      +
    • +
    • +

      {{ posts.text | safe }}

      +
    • +
    • + Poster: {{ posts.postername }} + {% if posts.posterurl != "" %} + 主页 + {% endif %} +
    • +
    +
  2. + {% endfor %} + {{ page.now }}/{{ page.count }} 页 + 第一页 + {% if page.prev_url != "" %} + 上一页 + {% endif %} + {% if page.next_url != "" %} + 下一页 + {% endif %} + 最后一页 +
+ + + \ No newline at end of file diff --git a/theme/topics.html b/theme/topics.html new file mode 100644 index 0000000..fb7413f --- /dev/null +++ b/theme/topics.html @@ -0,0 +1,35 @@ + + + + Topics -- from {{ topics.from_forum }} + + + +

Topics

+ from {{ topics.from_forum }} +
    + {% for topic in topics.topics %} +
  1. + +
  2. + {% endfor %} + {{ page.now }}/{{ page.count }} 页 + 第一页 + {% if page.prev_url != "" %} + 上一页 + {% endif %} + {% if page.next_url != "" %} + 下一页 + {% endif %} + 最后一页 +
+ + + \ No newline at end of file diff --git a/theme/user.html b/theme/user.html new file mode 100644 index 0000000..63bebd2 --- /dev/null +++ b/theme/user.html @@ -0,0 +1,22 @@ + + + + User - {{ user.name }} + + + +

User Page

+
    +
  1. +

    {{ user.name }}

    +
  2. +
  3. + {{ user.description | safe }} +
  4. +
  5. + Register time: {{ user.reg_time }} +
  6. +
+ + + \ No newline at end of file