70 lines
1.7 KiB
Python
70 lines
1.7 KiB
Python
|
|
import os
|
|
####################################
|
|
## CONFIG/配置 ##
|
|
####################################
|
|
root_url = "https://learn.study-area.org.cn" # 网站域名
|
|
roots = f"book" # 设置扫描根目录
|
|
igrone_dir = [ # 忽略的文件夹名
|
|
"css",
|
|
"fonts",
|
|
"FontAwesome",
|
|
"asserts",
|
|
#### 以上为默认,勿动 ####
|
|
"licenses"
|
|
]
|
|
igrone_filename = [ # 忽略的文件夹名
|
|
"print.html",
|
|
"404.html"
|
|
#### 以上为默认,勿动 ####
|
|
]
|
|
####################################
|
|
|
|
|
|
file_list = []
|
|
|
|
|
|
def dfs_search(path, n_path):
|
|
for i in os.listdir(path):
|
|
if (os.path.isdir(path+os.sep+i)):
|
|
if (i not in igrone_dir):
|
|
dfs_search(path+os.sep+i, n_path+"/"+i)
|
|
else:
|
|
if (i.split(".")[-1] == "html"):
|
|
if i not in igrone_filename:
|
|
file_list.append(n_path+os.sep+i)
|
|
|
|
|
|
dfs_search(roots, "")
|
|
|
|
print(f"found {len(file_list)} pages:")
|
|
for i in file_list:
|
|
print(i)
|
|
print()
|
|
print("Write to sitemap.xml ...")
|
|
xml_tmpl_start = """<?xml version="1.0" encoding="UTF-8"?>
|
|
<urlset>
|
|
<url>
|
|
"""
|
|
xml_tmpl_link = """
|
|
<loc>{}</loc>
|
|
<changefreq>daily</changefreq>
|
|
<priority>0.9</priority>
|
|
"""
|
|
xml_tmpl_end = """ </url>
|
|
</urlset>
|
|
"""
|
|
with open(roots+os.sep+"sitemap.xml", "w") as file:
|
|
file.write(xml_tmpl_start)
|
|
for i in file_list:
|
|
file.write(xml_tmpl_link.format(root_url+i))
|
|
file.write(xml_tmpl_end)
|
|
print("Write robots.txt")
|
|
with open(roots+os.sep+"robots.txt", "w") as file:
|
|
file.write("User-agent: *\n")
|
|
for i in igrone_dir:
|
|
file.write("Disallow: /"+i+"\n\n")
|
|
for i in igrone_filename:
|
|
file.write("Disallow: /"+i+"\n")
|
|
print("Finish!")
|