72 lines
2.1 KiB
Python
72 lines
2.1 KiB
Python
|
#!/usr/bin/env python3
|
||
|
import os
|
||
|
from xml.dom.minidom import Document
|
||
|
import time
|
||
|
|
||
|
|
||
|
html_path = '_book'
|
||
|
http_path = 'http://www.zeekling.cn/book/ml'
|
||
|
site_map_name = 'ml.xml'
|
||
|
|
||
|
|
||
|
def dirlist(path, all_file):
|
||
|
file_list = os.listdir(path)
|
||
|
for file_name in file_list:
|
||
|
file_path = os.path.join(path, file_name)
|
||
|
if os.path.isdir(file_path):
|
||
|
if str(file_path).endswith('gitbook'):
|
||
|
continue
|
||
|
all_file.append(file_path + '/')
|
||
|
dirlist(file_path, all_file)
|
||
|
else:
|
||
|
all_file.append(file_path)
|
||
|
|
||
|
return all_file
|
||
|
|
||
|
|
||
|
def write_xml(url_paths):
|
||
|
doc = Document()
|
||
|
doc.encoding = 'UTF-8'
|
||
|
url_set = doc.createElement('urlset')
|
||
|
doc.appendChild(url_set)
|
||
|
url_set.setAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9')
|
||
|
date_str = time.strftime('%Y-%m-%d', time.localtime())
|
||
|
for url_path in url_paths:
|
||
|
url = doc.createElement('url')
|
||
|
url_set.appendChild(url)
|
||
|
loc = doc.createElement('loc')
|
||
|
loc_value = doc.createTextNode(url_path)
|
||
|
loc.appendChild(loc_value)
|
||
|
changefreq = doc.createElement('changefreq')
|
||
|
freq_value = doc.createTextNode('weekly')
|
||
|
changefreq.appendChild(freq_value)
|
||
|
priority = doc.createElement('priority')
|
||
|
prio_value = doc.createTextNode('0.8')
|
||
|
priority.appendChild(prio_value)
|
||
|
lastmod = doc.createElement('lastmod')
|
||
|
mode_value = doc.createTextNode(date_str)
|
||
|
lastmod.appendChild(mode_value)
|
||
|
url.appendChild(loc)
|
||
|
url.appendChild(changefreq)
|
||
|
url.appendChild(priority)
|
||
|
url.appendChild(lastmod)
|
||
|
path = os.getcwd() + '/' + site_map_name
|
||
|
f = open(path, 'w')
|
||
|
f.write(doc.toprettyxml(indent=' '))
|
||
|
f.close()
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
pwd = os.getcwd() + '/' + html_path
|
||
|
all_file = []
|
||
|
all_file = dirlist(pwd, all_file)
|
||
|
all_html_file = []
|
||
|
for file_name in all_file:
|
||
|
file_name = str(file_name)
|
||
|
if file_name.endswith('.html') or file_name.endswith('/'):
|
||
|
html_name = file_name.replace(pwd, http_path)
|
||
|
all_html_file.append(html_name)
|
||
|
|
||
|
write_xml(all_html_file)
|
||
|
|