#!/usr/bin/env python # coding=utf-8 # -*- coding: utf-8 -*- import requests from bs4 import BeautifulSoup urlmine = "https://git.zeekling.cn/" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' } def getlinks(url): pages = requests.get(url) html = pages.text soup = BeautifulSoup(html, 'html.parser') links = soup.find_all('a') return filterlinks(links, url) def filterlinks(links, url_org): tmplinks = [] for link in links: url = str(link['href']) ishttp = url.startswith('http') ismine = url.startswith(urlmine) if ishttp and (not ismine): continue if url.startswith('#') or '/' == url.strip(): continue if url.startswith("?"): continue if not ishttp: url = url_org + url tmplinks.append(url) reslinks = list(set(tmplinks)) return reslinks links = getlinks(urlmine) for link in links: print(link)