《王者荣耀》皮肤爬虫源码

news2026/2/20 13:55:17

1.爬取网页

https://pvp.qq.com/web201605/herolist.shtml

2.python代码

import requests
from bs4 import BeautifulSoup
import os
import threading
from queue import Queue

def mul(x):
    if not os.path.exists(x):
        os.mkdir(x)
        print("目录创建成功")
    else:
        pass
header={
    'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Mobile Safari/537.36 Edg/113.0.1774.57'
}
url1='https://pvp.qq.com/web201605/herolist.shtml'
url2='https://pvp.qq.com/web201605/'
res=requests.get(url=url1,headers=header)
res.encoding = res.apparent_encoding
soup = BeautifulSoup(res.text, 'html.parser')
title=soup.find('title').text[13:17]
f=soup.select("div>div>ul>li>a")
s=0
wy_list=[]
name_list=[]
for i in f:
    if s>=50:
        wy_list.append(i.attrs['href'])
        name_list.append(i.text)
    else:
        pass
    s+=1
def wy(x,y):
    for i in range(len(x)):
        resa=requests.get(url=url2+x[i],headers=header)
        soup = BeautifulSoup(resa.text, 'html.parser')
        f=soup.find(class_="zk-con1 zk-con")
        wy1="https:"+f.attrs['style'][16:90]
        res=requests.get(url=wy1,headers=header)
        with open("王者荣耀/"+str(y[i])+".png",mode='wb') as file:
            file.write(res.content)
            print("{}图片爬取成功".format(y[i]))

if __name__ == "__main__":
    mul(title)
    wy(wy_list,name_list)