mapbook/build.py

72 lines
1.8 KiB
Python

import os
import sys
####################################
## CONFIG/配置 ##
####################################
root_url = "https://learn.study-area.org.cn" # 网站域名
roots = f"book" # 设置扫描根目录
igrone_dir = [ # 忽略的文件夹名
"css",
"fonts",
"FontAwesome",
"asserts",
#### 以上为默认,勿动 ####
"licenses"
]
igrone_filename = [ # 忽略的文件夹名
"print.html",
"404.html"
#### 以上为默认,勿动 ####
]
####################################
file_list = []
if(len(sys.argv)==2):
root_url = sys.argv[1]
def dfs_search(path, n_path):
for i in os.listdir(path):
if (os.path.isdir(path+os.sep+i)):
if (i not in igrone_dir):
dfs_search(path+os.sep+i, n_path+"/"+i)
else:
if (i.split(".")[-1] == "html"):
if i not in igrone_filename:
file_list.append(n_path+os.sep+i)
dfs_search(roots, "")
print(f"found {len(file_list)} pages:")
for i in file_list:
print(i)
print()
print("Write to sitemap.xml ...")
xml_tmpl_start = """<?xml version="1.0" encoding="UTF-8"?>
<urlset>
"""
xml_tmpl_link = """
<url>
<loc>{}</loc>
<changefreq>daily</changefreq>
<priority>0.9</priority>
</url>
"""
xml_tmpl_end = """</urlset>
"""
with open(roots+os.sep+"sitemap.xml", "w") as file:
file.write(xml_tmpl_start)
for i in file_list:
file.write(xml_tmpl_link.format(root_url+i))
file.write(xml_tmpl_end)
print("Write robots.txt")
with open(roots+os.sep+"robots.txt", "w") as file:
file.write("User-agent: *\n")
for i in igrone_dir:
file.write("Disallow: /"+i+"\n\n")
for i in igrone_filename:
file.write("Disallow: /"+i+"\n")
print("Finish!")