mapbook/build.py

72 lines
1.8 KiB
Python
Raw Permalink Normal View History

2024-06-29 19:23:55 +08:00
import os
2024-08-31 17:22:03 +08:00
import sys
2024-06-29 19:23:55 +08:00
####################################
## CONFIG/配置 ##
####################################
root_url = "https://learn.study-area.org.cn" # 网站域名
roots = f"book" # 设置扫描根目录
igrone_dir = [ # 忽略的文件夹名
"css",
"fonts",
"FontAwesome",
"asserts",
#### 以上为默认,勿动 ####
"licenses"
]
igrone_filename = [ # 忽略的文件夹名
"print.html",
"404.html"
#### 以上为默认,勿动 ####
]
####################################
file_list = []
2024-08-31 17:22:03 +08:00
if(len(sys.argv)==2):
root_url = sys.argv[1]
2024-06-29 19:23:55 +08:00
def dfs_search(path, n_path):
for i in os.listdir(path):
if (os.path.isdir(path+os.sep+i)):
if (i not in igrone_dir):
dfs_search(path+os.sep+i, n_path+"/"+i)
else:
if (i.split(".")[-1] == "html"):
if i not in igrone_filename:
file_list.append(n_path+os.sep+i)
dfs_search(roots, "")
print(f"found {len(file_list)} pages:")
for i in file_list:
print(i)
print()
print("Write to sitemap.xml ...")
xml_tmpl_start = """<?xml version="1.0" encoding="UTF-8"?>
<urlset>
"""
xml_tmpl_link = """
2024-06-29 22:19:26 +08:00
<url>
2024-06-29 19:23:55 +08:00
<loc>{}</loc>
<changefreq>daily</changefreq>
<priority>0.9</priority>
2024-06-29 22:19:26 +08:00
</url>
2024-06-29 19:23:55 +08:00
"""
2024-06-29 22:19:26 +08:00
xml_tmpl_end = """</urlset>
2024-06-29 19:23:55 +08:00
"""
with open(roots+os.sep+"sitemap.xml", "w") as file:
file.write(xml_tmpl_start)
for i in file_list:
file.write(xml_tmpl_link.format(root_url+i))
file.write(xml_tmpl_end)
print("Write robots.txt")
with open(roots+os.sep+"robots.txt", "w") as file:
file.write("User-agent: *\n")
for i in igrone_dir:
file.write("Disallow: /"+i+"\n\n")
for i in igrone_filename:
file.write("Disallow: /"+i+"\n")
print("Finish!")