import requests
from bs4 import BeautifulSoup
def fetch_and_parse(url):
# 发送HTTP请求到指定URL
response = requests.get(url)
# 检查请求是否成功
if response.status_code == 200:
# 使用BeautifulSoup解析HTML内容
soup = BeautifulSoup(response.text, 'html.parser')
# 查找所有的class为"layout_li ajaxpost"的li元素
url_cards = soup.find_all('li', {'class': 'layout_li ajaxpost'})
print(len(url_cards))
for url_card in url_cards:
# 提取文章链接
link = url_card.find('a', {'class': 'imgeffect'})['href']
# 提取文章封面图片的src属性值
img_src = url_card.find('img', {'class': 'thumb'})['src']
# 提取文章标题
title = url_card.find('h2').get_text().strip()
# 提取文章分类
category = url_card.find('span', {'class': 'is_category'}).get_text().strip()
print("文章链接:", link)
print("文章封面:", img_src)
print("文章标题:", title)
print("文章分类:", category)
print("-" * 50) # 打印分隔线以区分不同卡片的信息
else:
print(f"无法访问{url},状态码:{response.status_code}")
# 示例:将你想要爬取的网页URL替换下面的'YOUR_TARGET_URL'
fetch_and_parse('https://www.xxx.com/vcat)