一、问题出现

# import requests # url = 'https://www.regulations.gov/search?sortBy=postedDate&sortDirection=desc' # response = requests.get(url) # print(response.text) 正常页面打开没有自己看到的内容。
F12找到新的地址
import requests
# headers = {
# 'accept': 'application/vnd.api+json',
# 'accept-language': 'zh-CN,zh;q=0.9',
# # 'origin': 'https://www.regulations.gov',
# 'priority': 'u=1, i',
# 'referer': 'https://www.regulations.gov/',
# 'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"',
# 'sec-ch-ua-mobile': '?0',
# 'sec-ch-ua-platform': '"Windows"',
# 'sec-fetch-dest': 'empty',
# 'sec-fetch-mode': 'cors',
# 'sec-fetch-site': 'same-site',
# 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
# 'x-api-key': '5F20SbTVakeYfU9i5gX1dxx96sw4KELUQxAHhcHa',
# }
#
# params = {
# 'page[number]': '1',
# 'sort': '-postedDate',
# }
#
# response = requests.get('https://api.regulations.gov/v4/documents', params=params, headers=headers)
# # print(response.status_code)
响应内容出现了,但是有一个问题反爬的内容在header中
'x-api-key': '5F20SbTVakeYfU9i5gX1dxx96sw4KELUQxAHhcHa',
'referer': 'https://www.regulations.gov/',页面跳转处
二、如何解决这个问题
通过搜索发现在这个页面中
通过请求获取响应
# import requests
#
# headers = {
# 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
# 'accept-language': 'zh-CN,zh;q=0.9',
# 'cache-control': 'max-age=0',
# # 'cookie': '_gid=GA1.2.2002642573.1724280773; _ga_CSLL4ZEK4L=GS1.1.1724280769.1.1.1724281416.0.0.0; _ga=GA1.2.708221610.1724280770; _ga_JGTFN0MKWK=GS1.2.1724280773.1.1.1724281419.0.0.0',
# 'if-modified-since': 'Wed, 22 May 2024 21:16:53 GMT',
# 'if-none-match': 'W/"664e60c5-d68"',
# 'priority': 'u=0, i',
# 'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"',
# 'sec-ch-ua-mobile': '?0',
# 'sec-ch-ua-platform': '"Windows"',
# 'sec-fetch-dest': 'document',
# 'sec-fetch-mode': 'navigate',
# 'sec-fetch-site': 'same-origin',
# 'sec-fetch-user': '?1',
# 'upgrade-insecure-requests': '1',
# 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
# }
# response = requests.get('https://www.regulations.gov/search?sortBy=postedDate&sortDirection=desc', headers=headers)
# print(response.text)
#
# # apiKey%22%3A%22 5F20SbTVakeYfU9i5gX1dxx96sw4KELUQxAHhcHa %22%2C%22apiCommentKey
# import re
# api_key= re.search(r"apiKey%22%3A%22(.*?)%22%2C%22apiCommentKey",response.text).group(1)
# print(api_key)
# headers.update({"x-api-key":api_key})
# # apiKey%22%3A%22 5F20SbTVakeYfU9i5gX1dxx96sw4KELUQxAHhcHa %22%2C%22apiCommentKey
这里要用到正则,re.search(r"apiKey%22%3A%22(.*?)%22%2C%22apiCommentKey",response.text).group(1)
并更换x-api-key的值注意格式问题
# headers.update({"x-api-key":api_key})
三、代码实现
import requests
headers = {
'accept': 'application/vnd.api+json',
'accept-language': 'zh-CN,zh;q=0.9',
# 'origin': 'https://www.regulations.gov',
'priority': 'u=1, i',
'referer': 'https://www.regulations.gov/',
'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
'x-api-key': '2',
}
# response1 = requests.get('https://www.regulations.gov/search?sortBy=postedDate&sortDirection=desc', headers=headers)
# print(response1.text)
#
# # apiKey%22%3A%22 5F20SbTVakeYfU9i5gX1dxx96sw4KELUQxAHhcHa %22%2C%22apiCommentKey
# import re
# api_key= re.search(r"apiKey%22%3A%22(.*?)%22%2C%22apiCommentKey",response1.text).group(1)
# print(api_key)
# headers.update({"x-api-key":api_key})
response = requests.get('https://api.regulations.gov/v4/documents', headers=headers)
print(response.status_code)
正常打开页面










![[ACL 2024] Revisiting Knowledge Distillation for Autoregressive Language Models](https://i-blog.csdnimg.cn/direct/4da5c1c555904ab9835e9150bf61edb2.png#pic_center)








