通过爬取网站,生成sitemap.xml
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

30 lines
946 B

  1. #!/usr/bin/env python3
  2. # coding=utf-8
  3. # -*- coding: utf-8 -*-
  4. import datetime
  5. import re
  6. import get_url
  7. def create_xml(filename, url_list):
  8. header = '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
  9. file = open(filename, 'w', encoding='utf-8')
  10. file.writelines(header)
  11. file.close()
  12. times = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S+00:00")
  13. for url in url_list:
  14. urls = re.sub(r"&", "&amp;", url)
  15. ment = " <url>\n <loc>%s</loc>\n <lastmod>%s</lastmod>\n <changefreq>weekly</changefreq>\n <priority>0.8</priority>\n </url>\n" % (urls, times)
  16. file = open(filename, 'a', encoding='utf-8')
  17. file.writelines(ment)
  18. last = "</urlset>"
  19. file.writelines(last)
  20. file.close()
  21. if __name__ == '__main__':
  22. get_url.parser(get_url.url_mine_list, 0)
  23. print('url size:', len(get_url.url_res_final))
  24. create_xml('sitemap.xml', get_url.url_res_final)