From 8dff2f9ffd34be1b03a0ecd519e5427be5ceb64c Mon Sep 17 00:00:00 2001 From: cxykevin Date: Sat, 29 Jun 2024 19:23:55 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=20build.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 build.py diff --git a/build.py b/build.py new file mode 100644 index 0000000..9bda113 --- /dev/null +++ b/build.py @@ -0,0 +1,69 @@ + +import os +#################################### +## CONFIG/配置 ## +#################################### +root_url = "https://learn.study-area.org.cn" # 网站域名 +roots = f"book" # 设置扫描根目录 +igrone_dir = [ # 忽略的文件夹名 + "css", + "fonts", + "FontAwesome", + "asserts", + #### 以上为默认,勿动 #### + "licenses" +] +igrone_filename = [ # 忽略的文件夹名 + "print.html", + "404.html" + #### 以上为默认,勿动 #### +] +#################################### + + +file_list = [] + + +def dfs_search(path, n_path): + for i in os.listdir(path): + if (os.path.isdir(path+os.sep+i)): + if (i not in igrone_dir): + dfs_search(path+os.sep+i, n_path+"/"+i) + else: + if (i.split(".")[-1] == "html"): + if i not in igrone_filename: + file_list.append(n_path+os.sep+i) + + +dfs_search(roots, "") + +print(f"found {len(file_list)} pages:") +for i in file_list: + print(i) +print() +print("Write to sitemap.xml ...") +xml_tmpl_start = """ + + +""" +xml_tmpl_link = """ + {} + daily + 0.9 +""" +xml_tmpl_end = """ + +""" +with open(roots+os.sep+"sitemap.xml", "w") as file: + file.write(xml_tmpl_start) + for i in file_list: + file.write(xml_tmpl_link.format(root_url+i)) + file.write(xml_tmpl_end) +print("Write robots.txt") +with open(roots+os.sep+"robots.txt", "w") as file: + file.write("User-agent: *\n") + for i in igrone_dir: + file.write("Disallow: /"+i+"\n\n") + for i in igrone_filename: + file.write("Disallow: /"+i+"\n") +print("Finish!")