origin commit

This commit is contained in:
cxykevin 2024-06-22 14:50:40 +08:00
commit ac437a0f6a
18 changed files with 717 additions and 0 deletions

6
.gitignore vendored Normal file
View File

@ -0,0 +1,6 @@
__pycache__/**
build
dump
config.toml
venv

29
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,29 @@
{
// 使 IntelliSense
//
// 访: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python 调试: dump",
"type": "debugpy",
"request": "launch",
"program": "dump.py",
"console": "integratedTerminal"
},
{
"name": "Python 调试: build",
"type": "debugpy",
"request": "launch",
"program": "render.py",
"console": "integratedTerminal"
},
{
"name": "Python 调试: http server",
"type": "debugpy",
"request": "launch",
"program": "test.py",
"console": "integratedTerminal"
}
]
}

3
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,3 @@
{
"python.REPL.enableREPLSmartSend": false
}

14
config.sample.toml Normal file
View File

@ -0,0 +1,14 @@
[dump]
server = "127.0.0.1" # 服务器 IP推荐本地数据库或者直接在服务器上运行脚本
port = 3306 # 端口,一般不用改
user = "root" # 用户名
passwd = "your_passwd" # 密码,必填
charset = "utf8mb4" # 字符集,出问题可以试试 `utf8`
db = "smf" # 数据库名,根据自己情况
table = "phpbb2" # 表前缀,一般不用改
clean = true # 是否清理先前生成,推荐打开
[render]
show_st = false # 显示详细信息,打开会降低生成速度
clean = true # 是否清理先前生成,推荐打开
page_split = 20 # 多少内容为一页,推荐 202550

164
dump.py Normal file
View File

@ -0,0 +1,164 @@
import tomllib
import pymysql
import json
import os
import shutil
from t_parser import parser
print("Load config")
with open("config.toml", 'rb') as file:
cfg = tomllib.load(file)["dump"]
server = cfg["server"]
port = cfg["port"]
user = cfg["user"]
passwd = cfg["passwd"]
charset = cfg["charset"]
db = cfg["db"]
table = cfg["table"]
needclean = cfg["clean"]
print(f"Connect {server}:{port} as {user}")
conn = pymysql.connect(
host=server,
port=port,
user=user,
password=passwd,
charset=charset
)
print("Server version:"+conn.get_server_info())
cursor = conn.cursor()
conn.select_db(db)
if (needclean):
print("Init dump")
if os.path.exists("dump"):
shutil.rmtree("dump")
os.mkdir("dump")
if not (os.path.exists("dump/forums.json")):
print("Load forums")
cursor.execute(f'SELECT * FROM {table}forums')
result = cursor.fetchall()
forums_struct = [{"name": "杂项", "description": "无法归类的帖子", "id": -1}]
count = 0
forum_id_list = []
for t in result:
count += 1
forums_struct.append({"name": t[1], "description": t[2], "id": t[0]})
forum_id_list.append(t[0])
print(f"Forums count:{count}")
with open("dump/forums.json", 'w') as file:
json.dump(forums_struct, file, ensure_ascii=False, indent=4)
with open("dump/forums_tmp.json", 'w') as file:
json.dump(forum_id_list, file, ensure_ascii=False)
if not (os.path.exists("dump/forums")):
print(f"Load topics")
os.mkdir("dump/forums")
print("Load forums from json")
with open("dump/forums_tmp.json", 'r') as file:
forum_id_list = json.load(file)
cursor.execute(f'SELECT COUNT(*) FROM {table}topics')
counts = cursor.fetchall()[0][0]
print(f"Topics count:{counts}")
cursor.execute(f'SELECT * FROM {table}topics')
result = cursor.fetchall()
print(f"Get topics")
count = 0
for i in result:
count += 1
forums_ids = i[7]
if (forums_ids not in forum_id_list):
forums_ids = -1
fpath = f"dump/forums/{forums_ids}.manifest"
if not (os.path.exists(fpath)):
with open(fpath, "w") as file:
file.write("")
with open(fpath, 'a') as file:
file.write(json.dumps(
{"name": i[1], "time": i[3], "id": i[0]}, ensure_ascii=False)+'\n')
if (count % 100 == 0):
print(f" load {count}", end="\r")
print(f" load {count}")
if not (os.path.exists("dump/users.json")):
print("Load users")
cursor.execute(f'SELECT * FROM {table}users')
result = cursor.fetchall()
user_json = {
str(i[0]): {
"name": i[1],
"time": i[2],
"email": (i[4]) if (i[4] is not None) else "",
"description": parser.format(
("签名(sig): "+i[10]+"\n") if (i[10] is not None) else "" +
("邮箱(Email): "+i[4]+"\n") if (i[4] is not None and i[11] == 1) else "" +
("网站(website): [url="+i[6]+"]"+i[6]+"[/url]\n") if (i[6] is not None) else "" +
("状态(status): "+i[7]+"\n") if (i[7] is not None) else "" +
("属地(from): "+i[8]+"\n") if (i[8] is not None) else "" +
("爱好(interests): "+i[9] +
"\n") if (i[9] is not None) else ""
)
} for i in result
}
with open("dump/users.json", 'w') as file:
json.dump(user_json, file, ensure_ascii=False)
if not (os.path.exists("dump/topics")):
print("Load posts")
os.mkdir("dump/topics")
print("Load users from json")
with open("dump/users.json", 'r') as file:
user_json = json.load(file)
cursor.execute(f'SELECT COUNT(*) FROM {table}posts')
counts = cursor.fetchall()[0][0]
print(f"Posts count:{counts}")
page_size = 100
oppsize = 0
last_id = -1
while 1:
if (oppsize >= counts):
break
cursor.execute(
f'SELECT * FROM {table}posts WHERE `post_id` > {last_id} ORDER BY post_id LIMIT {page_size}')
result = cursor.fetchall()
last_id = result[-1][0]
oppsize += page_size
for i in result:
s_retcode = cursor.execute(
f'SELECT * FROM {table}posts_text WHERE `post_id`={i[0]}')
if (s_retcode == 0):
continue
s_res = cursor.fetchall()[0]
if (s_res[1] is None):
continue
def rend(str, en_bbcode):
if (en_bbcode == 1):
return parser.format(str)
return str
s_text = rend((
(("主题:"+s_res[4]+"\n\n") if s_res[4] is not None else "") + s_res[1]).replace(":"+str(s_res[3]), ""), i[10])
sender_id = i[3]
send_name = (i[9] if (i[9] is not None) else "")
if (send_name == "" and sender_id != -1 and str(sender_id) in user_json):
send_name = user_json[str(sender_id)]["name"]
if (sender_id == -1):
send_name = "[未注册]"+send_name
topic_id = i[1]
fpath = f"dump/topics/{topic_id}.manifest"
with open(fpath, 'a') as file:
file.write(json.dumps(
{"text": s_text, "poster_id": sender_id, "poster_name": send_name}, ensure_ascii=False)+"\n")
print(f" load {oppsize}", end="\r")
print(f" load {counts}")
cursor.close()
conn.close()

35
readme.md Normal file
View File

@ -0,0 +1,35 @@
# BBcovert
> 本项目可以将phpbb论坛中的帖子和评论内容转换成纯静态页面
## 运行
> 请在字符集设置为utf-8的linux下运行
```sh
# 请先配置config.toml!
# 从sample复制
cp config.sample.toml config.toml
# 初始化虚拟环境,推荐先换源
python -m venv venv
source venv/bin/activate
pip install -r requirements.txt
# 下载数据库
python dump.py
# 生成静态页面
python render.py
# 测试生成
python test.py
```
## 结构
`theme` 文件夹内是主题文件
数据库会被下载到 `dump` 文件夹
生成产物在 `build` 文件夹

170
render.py Normal file
View File

@ -0,0 +1,170 @@
import jinja2
import os
import shutil
import json
import sys
import time
import tomllib
import math
print("Load config")
with open("config.toml", 'rb') as file:
cfg = tomllib.load(file)["render"]
show_st = cfg["show_st"]
clean = cfg["clean"]
page_split = cfg["page_split"]
print("Init build")
if (clean):
if os.path.exists("build"):
shutil.rmtree("build")
os.mkdir("build")
if not os.path.exists("theme"):
print("Connot find any themes")
sys.exit(1)
def render_func(filepath, extpath, **kwargs):
if (show_st):
print(f' render "{extpath}"')
with open(filepath, 'r') as file:
result = jinja2.Template(file.read()).render(**kwargs)
with open(extpath, 'w') as file:
file.write(result)
if not (os.path.exists("build/home.html")):
print("Render home page")
with open("dump/forums.json", 'r') as file:
json_res = json.load(file)
render_args = {
"forums": [
{
"url": "/topics/"+str(i["id"])+"/1.html",
"name": i["name"],
"description": i["description"]
} for i in json_res
]
}
render_func("theme/home.html", "build/index.html", **render_args)
if not (os.path.exists("build/users")):
print("Render users")
os.mkdir("build/users")
with open("dump/users.json", 'r') as file:
user_json_res = json.load(file)
count = 0
print(f"Users count: {len(user_json_res)}")
for ids, context in user_json_res.items():
count += 1
render_func("theme/user.html", "build/users/"+str(ids)+".html", user={
"name": context["name"],
"description": context["description"],
"reg_time": time.strftime("%Y-%m-%d", time.localtime(context["time"]))
})
if not (show_st):
if (count % 50 == 0):
print(f" render {count}", end='\r')
print(f" render {count}")
if not (os.path.exists("build/topics")):
print("Render topics")
os.mkdir("build/topics")
with open("dump/forums.json", 'r') as file:
json_res = json.load(file)
count = 0
for r in os.listdir("dump/forums"):
count += 1
u_cfg = []
with open("dump/forums/"+r, 'r') as file:
strs = file.readlines()
n_count = 0
for i in strs:
n_count += 1
line_st = i.rstrip("\r\n").lstrip("\r\n")
if (line_st == ""):
continue
line_json = json.loads(line_st)
u_cfg.append(
{
"id": n_count,
"name": line_json["name"],
"time": time.strftime("%Y-%m-%d", time.localtime(line_json["time"])),
"url": "/posts/"+str(line_json["id"])+"/1.html"
}
)
ids = int(r.split(".")[0])
os.mkdir("build/topics/" + str(ids))
for page_counts in range(1, math.ceil(len(strs)/page_split)+1):
render_func("theme/topics.html", "build/topics/" +
str(ids)+"/"+str(page_counts)+".html", topics={
"topics": u_cfg[page_split*(page_counts-1):page_split*page_counts],
"from_forum": list(filter(lambda d: d.get('id') == ids, json_res))[0]["name"],
"from_url": "/home.html"
}, page={
"now": page_counts,
"count": math.ceil(len(strs)/page_split),
"first_url": "/topics/"+str(ids)+"/"+str(1)+".html",
"prev_url": ("/topics/"+str(ids)+"/"+str(page_counts-1)+".html") if (page_counts > 1) else "",
"next_url": ("/topics/"+str(ids)+"/"+str(page_counts+1)+".html") if ((page_counts) < math.ceil(len(strs)/page_split)) else "",
"end_url": "/topics/"+str(ids)+"/"+str(math.ceil(len(strs)/page_split))+".html"
})
if not (show_st):
print(f" render {count}", end='\r')
print(f" render {count}")
if not (os.path.exists("build/posts")):
print("Render posts")
os.mkdir("build/posts")
n_count = 0
for r in os.listdir("dump/forums"):
with open("dump/forums/"+r, 'r') as file:
strs = file.readlines()
page_ncount = 0
for i in strs:
page_ncount += 1
line_st = i.rstrip("\r\n").lstrip("\r\n")
if (line_st == ""):
continue
n_json = json.loads(line_st)
n_id = n_json["id"]
if not (os.path.exists("dump/topics/"+str(n_id)+".manifest")):
continue
with open("dump/topics/"+str(n_id)+".manifest", 'r') as file:
strs_m = file.readlines()
u_cfg = []
t_count = 0
for j in strs_m:
n_count += 1
t_count += 1
m_st = j.rstrip("\r\n").lstrip("\r\n")
if (m_st == ""):
continue
m_json = json.loads(m_st)
ids = n_id
u_cfg.append({
"id": t_count,
"text": m_json["text"],
"postername": m_json["poster_name"],
"posterurl": "/users/"+str(m_json["poster_id"])+".html"
})
if (n_count % 10 == 0):
print(f" render {n_count}", end='\r')
os.mkdir("build/posts/" + str(ids))
for page_counts in range(1, math.ceil(len(u_cfg)/page_split)+1):
render_func("theme/posts.html", "build/posts/" +
str(ids)+"/"+str(page_counts)+".html", posts={
"posts": u_cfg[page_split*(page_counts-1):page_split*page_counts],
"from_topic": n_json["name"],
"from_url": "/topics/"+str(r.split(".")[0])+"/"+str((page_ncount-1)//page_split+1)+".html"
}, page={
"now": page_counts,
"count": math.ceil(len(u_cfg)/page_split),
"first_url": "/posts/"+str(ids)+"/"+str(1)+".html",
"prev_url": ("/posts/"+str(ids)+"/"+str(page_counts-1)+".html") if (page_counts > 1) else "",
"next_url": ("/posts/"+str(ids)+"/"+str(page_counts+1)+".html") if ((page_counts) < math.ceil(len(u_cfg)/page_split)) else "",
"end_url": "/posts/"+str(ids)+"/"+str(math.ceil(len(u_cfg)/page_split))+".html"
})
print(f" render {n_count}")

4
requirements.txt Normal file
View File

@ -0,0 +1,4 @@
bbcode==1.1.0
Jinja2==3.1.4
MarkupSafe==2.1.5
PyMySQL==1.1.1

39
t_parser.py Normal file
View File

@ -0,0 +1,39 @@
import bbcode
# init parser
parser = bbcode.Parser()
parser.add_simple_formatter('hr', '<hr />', standalone=True)
parser.add_simple_formatter('hl', '<mark>%(value)s</mark>')
parser.add_simple_formatter('sub', '<sub>%(value)s</sub>')
parser.add_simple_formatter('sup', '<sup>%(value)s</sup>')
parser.add_simple_formatter(
'iframe', '<iframe src="%(value)s">IFrame tag</iframe>')
parser.add_simple_formatter(
'showhide', '--- 隐藏内容 ---<br />%(value)s<br /> ----------------')
def render_color(tag_name, value, options, parent, context):
colors = "#0099ff"
if 'color' in options:
colors = options['color']
return f'<span style="color:{colors};">{value}</span>'
def render_bgcolor(tag_name, value, options, parent, context):
colors = "#0099ff"
if 'bgcolor' in options:
colors = options['bgcolor']
return f'<span style="background-color:{colors};">{value}</span>'
def render_size(tag_name, value, options, parent, context):
if 'size' in options:
sizes = options['size']
else:
return value
return f'<span style="font-size:{sizes};">{value}</span>'
parser.add_formatter("color", render_color)
parser.add_formatter("bgcolor", render_bgcolor)
parser.add_formatter("size", render_size)

7
test.py Normal file
View File

@ -0,0 +1,7 @@
from http.server import test, SimpleHTTPRequestHandler as RH
from functools import partial
RH.extensions_map = {k: v+';charset=UTF-8' for k,
v in RH.extensions_map.items()}
RH.extensions_map['.html'] = 'text/html; charset=utf-8'
test(HandlerClass=partial(RH,
directory="build"), port=8000)

25
theme.sample/home.html Normal file
View File

@ -0,0 +1,25 @@
<html>
<head>
<title>Home</title>
</head>
<body>
<h1>Home Page</h1>
<ol>
{% for forum in forums %}
<li>
<ul>
<li>
<h3><a href="{{ forum.url }}">{{ forum.name }}</a></h3>
</li>
<li>
{{ forum.description | safe }}
</li>
</ul>
</li>
{% endfor %}
</ol>
</body>
</html>

41
theme.sample/posts.html Normal file
View File

@ -0,0 +1,41 @@
<html>
<head>
<title>Posts -- from {{ posts.from_topic }}</title>
</head>
<body>
<h1>Posts</h1>
<a href="{{ posts.from_url }}">from {{ posts.from_topic }}</a>
<ol>
{% for posts in posts.posts %}
<li>
<ul>
<li>
<h3>{{ posts.id }}</h3>
</li>
<li>
<h3>{{ posts.text | safe }}</h3>
</li>
<li>
Poster: {{ posts.postername }}
{% if posts.posterurl != "" %}
<a href="{{ posts.posterurl }}">主页</a>
{% endif %}
</li>
</ul>
</li>
{% endfor %}
{{ page.now }}/{{ page.count }} 页
<a href="{{ page.first_url }}">第一页</a>
{% if page.prev_url != "" %}
<a href="{{ page.prev_url }}">上一页</a>
{% endif %}
{% if page.next_url != "" %}
<a href="{{ page.next_url }}">下一页</a>
{% endif %}
<a href="{{ page.end_url }}">最后一页</a>
</ol>
</body>
</html>

35
theme.sample/topics.html Normal file
View File

@ -0,0 +1,35 @@
<html>
<head>
<title>Topics -- from {{ topics.from_forum }}</title>
</head>
<body>
<h1>Topics</h1>
<a href="{{ topics.from_url }}">from {{ topics.from_forum }}</a>
<ol>
{% for topic in topics.topics %}
<li>
<ul>
<li>
<h3>{{ topic.id }}<a href="{{ topic.url }}">{{ topic.name }}</a></h3>
</li>
<li>
Time: {{ topic.time }}
</li>
</ul>
</li>
{% endfor %}
{{ page.now }}/{{ page.count }} 页
<a href="{{ page.first_url }}">第一页</a>
{% if page.prev_url != "" %}
<a href="{{ page.prev_url }}">上一页</a>
{% endif %}
{% if page.next_url != "" %}
<a href="{{ page.next_url }}">下一页</a>
{% endif %}
<a href="{{ page.end_url }}">最后一页</a>
</ol>
</body>
</html>

22
theme.sample/user.html Normal file
View File

@ -0,0 +1,22 @@
<html>
<head>
<title>User - {{ user.name }}</title>
</head>
<body>
<h1>User Page</h1>
<ol>
<li>
<h3>{{ user.name }}</h3>
</li>
<li>
{{ user.description | safe }}
</li>
<li>
Register time: {{ user.reg_time }}
</li>
</ol>
</body>
</html>

25
theme/home.html Normal file
View File

@ -0,0 +1,25 @@
<html>
<head>
<title>Home</title>
</head>
<body>
<h1>Home Page</h1>
<ol>
{% for forum in forums %}
<li>
<ul>
<li>
<h3><a href="{{ forum.url }}">{{ forum.name }}</a></h3>
</li>
<li>
{{ forum.description | safe }}
</li>
</ul>
</li>
{% endfor %}
</ol>
</body>
</html>

41
theme/posts.html Normal file
View File

@ -0,0 +1,41 @@
<html>
<head>
<title>Posts -- from {{ posts.from_topic }}</title>
</head>
<body>
<h1>Posts</h1>
<a href="{{ posts.from_url }}">from {{ posts.from_topic }}</a>
<ol>
{% for posts in posts.posts %}
<li>
<ul>
<li>
<h3>{{ posts.id }}</h3>
</li>
<li>
<h3>{{ posts.text | safe }}</h3>
</li>
<li>
Poster: {{ posts.postername }}
{% if posts.posterurl != "" %}
<a href="{{ posts.posterurl }}">主页</a>
{% endif %}
</li>
</ul>
</li>
{% endfor %}
{{ page.now }}/{{ page.count }} 页
<a href="{{ page.first_url }}">第一页</a>
{% if page.prev_url != "" %}
<a href="{{ page.prev_url }}">上一页</a>
{% endif %}
{% if page.next_url != "" %}
<a href="{{ page.next_url }}">下一页</a>
{% endif %}
<a href="{{ page.end_url }}">最后一页</a>
</ol>
</body>
</html>

35
theme/topics.html Normal file
View File

@ -0,0 +1,35 @@
<html>
<head>
<title>Topics -- from {{ topics.from_forum }}</title>
</head>
<body>
<h1>Topics</h1>
<a href="{{ topics.from_url }}">from {{ topics.from_forum }}</a>
<ol>
{% for topic in topics.topics %}
<li>
<ul>
<li>
<h3>{{ topic.id }}<a href="{{ topic.url }}">{{ topic.name }}</a></h3>
</li>
<li>
Time: {{ topic.time }}
</li>
</ul>
</li>
{% endfor %}
{{ page.now }}/{{ page.count }} 页
<a href="{{ page.first_url }}">第一页</a>
{% if page.prev_url != "" %}
<a href="{{ page.prev_url }}">上一页</a>
{% endif %}
{% if page.next_url != "" %}
<a href="{{ page.next_url }}">下一页</a>
{% endif %}
<a href="{{ page.end_url }}">最后一页</a>
</ol>
</body>
</html>

22
theme/user.html Normal file
View File

@ -0,0 +1,22 @@
<html>
<head>
<title>User - {{ user.name }}</title>
</head>
<body>
<h1>User Page</h1>
<ol>
<li>
<h3>{{ user.name }}</h3>
</li>
<li>
{{ user.description | safe }}
</li>
<li>
Register time: {{ user.reg_time }}
</li>
</ol>
</body>
</html>