通过爬取网站,生成sitemap.xml
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

29 lines
946 B

#!/usr/bin/env python3
# coding=utf-8
# -*- coding: utf-8 -*-
import datetime
import re
import get_url
def create_xml(filename, url_list):
    header = '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
    file = open(filename, 'w', encoding='utf-8')
    file.writelines(header)
    file.close()
    times = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S+00:00")
    for url in url_list:
        urls = re.sub(r"&", "&amp;", url)
        ment = "  <url>\n    <loc>%s</loc>\n    <lastmod>%s</lastmod>\n    <changefreq>weekly</changefreq>\n    <priority>0.8</priority>\n  </url>\n" % (urls, times)
        file = open(filename, 'a', encoding='utf-8')
        file.writelines(ment)
    last = "</urlset>"
    file.writelines(last)
    file.close()
if __name__ == '__main__':
    get_url.parser(get_url.url_mine_list, 0)
    print('url size:', len(get_url.url_res_final))
    create_xml('sitemap.xml', get_url.url_res_final)