#!/usr/bin/env python3 import os from xml.dom.minidom import Document import time html_path = '_book' http_path = 'http://www.zeekling.cn/book/ml' site_map_name = 'ml.xml' def dirlist(path, all_file): file_list = os.listdir(path) for file_name in file_list: file_path = os.path.join(path, file_name) if os.path.isdir(file_path): if str(file_path).endswith('gitbook'): continue all_file.append(file_path + '/') dirlist(file_path, all_file) else: all_file.append(file_path) return all_file def write_xml(url_paths): doc = Document() doc.encoding = 'UTF-8' url_set = doc.createElement('urlset') doc.appendChild(url_set) url_set.setAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9') date_str = time.strftime('%Y-%m-%d', time.localtime()) for url_path in url_paths: url = doc.createElement('url') url_set.appendChild(url) loc = doc.createElement('loc') loc_value = doc.createTextNode(url_path) loc.appendChild(loc_value) changefreq = doc.createElement('changefreq') freq_value = doc.createTextNode('weekly') changefreq.appendChild(freq_value) priority = doc.createElement('priority') prio_value = doc.createTextNode('0.8') priority.appendChild(prio_value) lastmod = doc.createElement('lastmod') mode_value = doc.createTextNode(date_str) lastmod.appendChild(mode_value) url.appendChild(loc) url.appendChild(changefreq) url.appendChild(priority) url.appendChild(lastmod) path = os.getcwd() + '/' + site_map_name f = open(path, 'w') f.write(doc.toprettyxml(indent=' ')) f.close() if __name__ == '__main__': pwd = os.getcwd() + '/' + html_path all_file = [] all_file = dirlist(pwd, all_file) all_html_file = [] for file_name in all_file: file_name = str(file_name) if file_name.endswith('.html') or file_name.endswith('/'): html_name = file_name.replace(pwd, http_path) all_html_file.append(html_name) write_xml(all_html_file)