需求:读取wangzhi.txt文件内的网址,把想要的内容输入为文本
import requests
from bs4 import BeautifulSoup
# 从文件中读取URL列表
url_list = []
with open('wangzhi.txt', 'r', encoding='utf-8') as file:
for line in file:
url = line.strip()
if url:
url_list.append(url)
# 打开一个文本文件以写入结果
with open('result.txt', 'w', encoding='utf-8') as file:
for inurl in url_list:
req = requests.get(url=inurl)
req.encoding = "utf-8"
html = req.text
soup = BeautifulSoup(req.text, features="html.parser")
pinzhong_items = soup.find_all(
"h1") + soup.find_all("div", id="content")
for pinzhong_item in pinzhong_items:
dd = pinzhong_item.text.strip()
# 将结果写入文件
file.write(dd + '\n')
file.write('=' * 50 + '\n')
print("结果已保存到 result.txt 文件")