45 rindas
1.0 KiB
Python
Executable File
45 rindas
1.0 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# coding=utf-8
|
|
# -*- coding: utf-8 -*-
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
urlmine = "https://git.zeekling.cn/"
|
|
|
|
headers = {
|
|
'User-Agent':
|
|
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
|
|
}
|
|
|
|
|
|
def getlinks(url):
|
|
pages = requests.get(url)
|
|
html = pages.text
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
links = soup.find_all('a')
|
|
return filterlinks(links, url)
|
|
|
|
|
|
def filterlinks(links, url_org):
|
|
tmplinks = []
|
|
for link in links:
|
|
url = str(link['href'])
|
|
ishttp = url.startswith('http')
|
|
ismine = url.startswith(urlmine)
|
|
if ishttp and (not ismine):
|
|
continue
|
|
if url.startswith('#') or '/' == url.strip():
|
|
continue
|
|
if url.startswith("?"):
|
|
continue
|
|
if not ishttp:
|
|
url = url_org + url
|
|
tmplinks.append(url)
|
|
reslinks = list(set(tmplinks))
|
|
return reslinks
|
|
|
|
links = getlinks(urlmine)
|
|
|
|
for link in links:
|
|
print(link)
|