from urllib import request
from lxml import etree
import time
for pagenum in range(330,400):
#伪造头信息
headers = { "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36" }
#网址
url = r"https://www.baidu.com"+str(pagenum)+'.html'
#获取网页源代码
response = request.Request(url=url,headers=headers)
res = request.urlopen(response)
html = res.read().decode('utf-8')
#根据xpath获取图片地址
name_list = etree.HTML(html).xpath("/html/body/div[2]/div[1]/div[3]/div/p/img/@src")[0]
response2 = request.Request(url=name_list,headers=headers)
res = request.urlopen(response2).read()
#将图片写入硬盘
with open(str(pagenum)+'.webp','wb') as f:
f.write(res)
#等待1s,避免网站会因为访问过快导致访问失败。
time.sleep(1)
推荐python3运行该脚本,自动访问网页分页,分析地址中包含的图片地址,并保存到硬盘。