任务需求:输入关键字下载100个图片保存到本地,每个关键字单独存放一个文件夹(GUI版)
任务描述:当输入关键字时会爬取100个与关键词有关的图片到本地每个关键词单独保存到一个文件夹中,比如说我输入黑客下载了100个关于黑客的图片这些图片都保存到“黑客”文件夹,然后输入python会爬取100个与python有关的图片保存到本地的"python"文件夹中
pip install 模块名 [-i Simple Index]
pip install pyinstaller 打包程序
pip install PyQt5
import requests
from urllib.parse import quote
from pprint import pprint
from pdb import set_trace
class 批量爬取百度图片:
headers = {
'Accept': 'text/plain, */*; q=0.01',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
# 'Cookie': 'BDqhfp=%E9%BB%91%E5%AE%A2%26%26NaN-1undefined%26%265916%26%265; BIDUPSID=E695E9B2AF2F6BFFED9BD684584A8956; PSTM=1712380467; BAIDUID=34F3B544DDD48A4C76CCDD75A6DB9841:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=40299_40377_40416_40459_40439_40510_40446_60026_60032_60046_40080; BAIDUID_BFESS=34F3B544DDD48A4C76CCDD75A6DB9841:FG=1; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; userFrom=null; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; ab_sr=1.0.1_Y2E1NjA2ODI1NmIyMWE0OGY3Y2Y5YzA1ZWE1MzBkY2YwMGQ0M2RjYWE5Mjk3YjdiNTUwMmEwZTk2ZGNiODZkNGI0NWVmYzAxODEwNTk5ZjA2NTA4ZTg0OTZhZjAzYjcwNjM3NjU5M2Y2MzY5YTRjNzJhY2MxNDc5MmMzN2ZhMTUwYTQ4MDVlZDViNWZlNDNhZGE1NjRlYjMyOWYwMzY1Mw==',
'Referer': 'https://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&dyTabStr=MCwxLDMsMiw2LDQsNSw4LDcsOQ%3D%3D&word=%E9%BB%91%E5%AE%A2',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
'sec-ch-ua': '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
def __init__(self):
self.cont=1
url='https://image.baidu.com/search/acjson'
word=quote('黑客')
li=self.发送请求(url,word,30)
for i in li:
#i就是图片链接
self.发送请求(url=i)
def 发送请求(self,url,word='黑客',pn=30):
if 'JPEG' in url:
response=requests.get(url,headers=self.headers).content
self.存储数据(response)
else:
params = {
"tn": "resultjson_com",
"logid": "7871683271133482576",
"ipn": "rj",
"ct": "201326592",
"is": "",
"fp": "result",
"fr": "",
"word": word,
"queryWord": word,
"cl": "2",
"lm": "-1",
"ie": "utf-8",
"oe": "utf-8",
"adpicid": "",
"st": "",
"z": "",
"ic": "",
"hd": "",
"latest": "",
"copyright": "",
"s": "",
"se": "",
"tab": "",
"width": "",
"height": "",
"face": "",
"istype": "",
"qc": "",
"nc": "1",
"expermode": "",
"nojc": "",
"isAsync": "",
"pn": pn,
"rn": "30",
}
try:
res = requests.get(url, params=params, headers=self.headers).json()
return self.解析源代码(res)
except:
pass
def 解析源代码(self,res):
if res:
li=[]
for i in res['data']:
if i.get('hoverURL',False):li.append(i.get('hoverURL',False))
return li
else:
return False
def 存储数据(self,res):
with open(str(self.cont)+'.jpg','wb') as f:
f.write(res)
批量爬取百度图片()




















