2020-03-21 14:23:46 +00:00
|
|
|
|
## 简介
|
2020-03-21 12:55:40 +00:00
|
|
|
|
|
2020-03-21 14:10:41 +00:00
|
|
|
|
通过爬取网站,生成sitemap.xml,方便搜索引擎收录本站链接
|
2020-03-21 14:23:46 +00:00
|
|
|
|
|
|
|
|
|
## 使用
|
|
|
|
|
|
|
|
|
|
安装依赖:
|
|
|
|
|
```sh
|
|
|
|
|
pip3 install -r requirement.txt
|
|
|
|
|
```
|
2020-03-22 05:22:31 +00:00
|
|
|
|
|
|
|
|
|
修改get_url.py
|
|
|
|
|
```py
|
|
|
|
|
# 当前域名的http链接
|
|
|
|
|
url_root = 'https://git.zeekling.cn'
|
|
|
|
|
# 需要抓取的根链接,可以多写几个
|
|
|
|
|
url_mine_list = [
|
|
|
|
|
'https://git.zeekling.cn/',
|
|
|
|
|
'https://git.zeekling.cn/zeekling'
|
|
|
|
|
]
|
|
|
|
|
# 抓取的最大栈深度,默认为2
|
|
|
|
|
max_depth = 2
|
|
|
|
|
# 不需要写进sitemap.xml的链接
|
|
|
|
|
url_robot_arr = [
|
|
|
|
|
'/user/sign_up',
|
|
|
|
|
'/user/login',
|
|
|
|
|
'/user/forgot_password'
|
|
|
|
|
]
|
|
|
|
|
```
|
|
|
|
|
修改sitemap.xml位置,sitemap.py
|
|
|
|
|
```py
|
|
|
|
|
# 第一个参数为sitemap.xml的位置
|
|
|
|
|
create_xml('sitemap.xml', get_url.url_res_final)
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
修改完了之后执行
|
|
|
|
|
```sh
|
|
|
|
|
./sitemap.py
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|