前言
嗨喽,大家好呀~这里是爱看美女的茜茜呐

开发环境 & 第三方模块:
-  解释器版本: python 3.8 
-  代码编辑器: pycharm 2021.2 
-  requests: pip install requests 爬虫 
-  pyecharts: pip install pyecharts 数据分析 
-  pandas: pip install pandas 数据分析 
基本流程
一. 思路分析
采集什么数据 怎么采集
找到数据来源: 从network当中去找到数据所在的位置
二. 代码实现(正常情况下有四个步骤)
-  发送请求 
-  获取数据 
-  解析数据 
-  保存数据 
采集数据代码
导入模块
import requests     # 第三方模块
import csv
创建表格
f = open('股票.csv', mode='a', newline='', encoding='utf-8')
csv_writer = csv.writer(f)
csv_writer.writerow(['股票代码','股票名称','当前价','涨跌额','涨跌幅','年初至今','成交量','成交额','换手率','市盈率(TTM)','股息率','市值'])
伪装
headers = {
    # 用户身份信息
    'cookie': 's=bq119wflib; device_id=90ec0683f24e4d1dd28a383d87fa03c5; xq_a_token=df4b782b118f7f9cabab6989b39a24cb04685f95; xqat=df4b782b118f7f9cabab6989b39a24cb04685f95; xq_r_token=3ae1ada2a33de0f698daa53fb4e1b61edf335952; xq_id_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJ1aWQiOi0xLCJpc3MiOiJ1YyIsImV4cCI6MTY3MjE4Njc1MSwiY3RtIjoxNjcwNTAxMjA5MTEyLCJjaWQiOiJkOWQwbjRBWnVwIn0.iuLASkwB6LkAYhG8X8HE-M7AM0n0QUULimW1C4bmkko-wwnPv8YgdakTC1Ju6TPQLjGhMqHuSXqiWdOqVIjy_OMEj9L_HScDot-7kn63uc2lZbEdGnjyF3sDrqGBCpocuxTTwuSFuQoQ1lL7ZWLYOcvz2pRgCw64I0zLZ9LogQU8rNP-a_1Nc91V8moONFqPWD5Lt3JxqcuyJbmb86OpfJZRycnh1Gjnl0Aj1ltGa4sNGSMXoY2iNM8NB56LLIp9dztEwExiRSWlWZifpl9ERTIIpHFBq6L2lSTRKqXKb0V3McmgwQ1X0_MdNdLAZaLZjSIIcQgBU26T8Z4YBZ39dA; u=511670501221348; Hm_lvt_1db88642e346389874251b5a1eded6e3=1667994737,1670480781,1670501222; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1670501922',
    # 防盗链
    'referer': 'https://****',
    # 浏览器的基本信息
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
}
多页采集
for page in range(1, 167):
    url = f'https://*****/v5/stock/screener/quote/list.json?page={page}&size=30&order=desc&orderby=percent&order_by=percent&market=CN&type=sh_sz'
发送请求
    response = requests.get(url=url, headers=headers)
获取数据
    json_data = response.json()
解析数据 提取数据 把想要的内容取出来
    data_list = json_data['data']['list']
    # data_list[0]
    # data_list[1]
    for i in range(0, len(data_list)):
        symbol = data_list[i]['symbol']
        name = data_list[i]['name']
        current = data_list[i]['current']
        chg = data_list[i]['chg']
        percent = data_list[i]['percent']
        current_year_percent = data_list[i]['current_year_percent']
        volume = data_list[i]['volume']
        amount = data_list[i]['amount']
        turnover_rate = data_list[i]['turnover_rate']
        pe_ttm = data_list[i]['pe_ttm']
        dividend_yield = data_list[i]['dividend_yield']
        market_capital = data_list[i]['market_capital']
        print(symbol, name, current, chg, percent, current_year_percent, volume, amount, turnover_rate, pe_ttm, dividend_yield, market_capital)
- 保存数据
        csv_writer.writerow([symbol, name, current, chg, percent, current_year_percent, volume, amount, turnover_rate, pe_ttm, dividend_yield, market_capital])
括展小知识
-  <Response [200]>: 请求成功
-  404: 访问不到资源
-  什么是json数据: 以 {}/[] 所包裹起来的数据 {"":"", "":""}
-  .text: 字符串 文本内容
-  .content: 二进制 图片/音频/视频
-  .json(): 取出来的数据 本身就已经是Python里面的一个字典数据了
-  列表: []包裹的内容 [{},{},{},{},{},{},{}]
成交量图表
import pandas as pd         # 做表格数据处理模块 第三方的
from pyecharts.charts import Bar    # 可视化模块 第三方模块
from pyecharts import options as opts   # 可视化模块里面的设置模块(图表样式)
# 1. 读取数据
df = pd.read_csv('股票.csv')
x = list(df['股票名称'].values)
y = list(df['成交量'].values)
c = (
    Bar()
    .add_xaxis(x[:10])
    .add_yaxis("成交额", y[:10])
    .set_global_opts(
        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-15)),
        title_opts=opts.TitleOpts(title="Bar-旋转X轴标签", subtitle="解决标签名字过长的问题"),
    )
    .render("成交量图表.html")
)
# c = (
#     Bar()
#     .add_xaxis(x[:10])
#     .add_yaxis('成交量情况', y[:10])
#     .add_yaxis('成交额情况', y2[:10])
#     .set_global_opts(
#         title_opts=opts.TitleOpts(title='成交量图表'),
#         datazoom_opts=opts.DataZoomOpts()
#     )
# )
# c.render('成交量图表.html')
尾语
感谢你观看我的文章呐~本次航班到这里就结束啦 🛬
希望本篇文章有对你带来帮助 🎉,有学习到一点知识~
躲起来的星星🍥也在努力发光,你也要努力加油(让我们一起努力叭)。




















