import os #################################### ## CONFIG/配置 ## #################################### root_url = "https://learn.study-area.org.cn" # 网站域名 roots = f"book" # 设置扫描根目录 igrone_dir = [ # 忽略的文件夹名 "css", "fonts", "FontAwesome", "asserts", #### 以上为默认,勿动 #### "licenses" ] igrone_filename = [ # 忽略的文件夹名 "print.html", "404.html" #### 以上为默认,勿动 #### ] #################################### file_list = [] def dfs_search(path, n_path): for i in os.listdir(path): if (os.path.isdir(path+os.sep+i)): if (i not in igrone_dir): dfs_search(path+os.sep+i, n_path+"/"+i) else: if (i.split(".")[-1] == "html"): if i not in igrone_filename: file_list.append(n_path+os.sep+i) dfs_search(roots, "") print(f"found {len(file_list)} pages:") for i in file_list: print(i) print() print("Write to sitemap.xml ...") xml_tmpl_start = """ """ xml_tmpl_link = """ {} daily 0.9 """ xml_tmpl_end = """ """ with open(roots+os.sep+"sitemap.xml", "w") as file: file.write(xml_tmpl_start) for i in file_list: file.write(xml_tmpl_link.format(root_url+i)) file.write(xml_tmpl_end) print("Write robots.txt") with open(roots+os.sep+"robots.txt", "w") as file: file.write("User-agent: *\n") for i in igrone_dir: file.write("Disallow: /"+i+"\n\n") for i in igrone_filename: file.write("Disallow: /"+i+"\n") print("Finish!")