Java中使用四叶天动态代理IP构建代理池——HttpClient与Jsoup爬虫实战
本文档详细介绍如何使用四叶天动态代理IP服务在Java中构建高效的IP代理池并结合HttpClient和Jsoup实现高可用的网络爬虫。1. 为什么需要动态代理IP池1.1 爬虫被封的痛点做过爬虫开发的都知道同一个IP频繁请求目标网站轻则被限速返回403、429重则直接拉黑。实测数据显示使用同一IP连续请求某电商平台平均在第37次请求时就会被限制访问。1.2 动态代理IP池的价值IP轮换每次请求更换不同的出口IP让目标网站以为是不同用户在访问降低封禁风险分散请求来源避免单一IP负载过高提高采集效率可并行使用多个IP突破并发限制地理分布可模拟不同地区的用户访问1.3 四叶天动态代理简介四叶天提供高质量的动态代理IP服务具有以下特点覆盖全国200城市支持HTTP/HTTPS协议高匿代理不泄露真实IPAPI接口实时获取可用IP2. 准备工作2.1 注册四叶天账号访问四叶天官网注册账号获取API接口地址和认证凭证选择合适的套餐建议先试用测试2.2 项目依赖配置xmldependencies !-- Apache HttpClient -- dependency groupIdorg.apache.httpcomponents/groupId artifactIdhttpclient/artifactId version4.5.14/version /dependency !-- Jsoup HTML解析器 -- dependency groupIdorg.jsoup/groupId artifactIdjsoup/artifactId version1.17.2/version /dependency !-- 日志框架 -- dependency groupIdorg.slf4j/groupId artifactIdslf4j-api/artifactId version1.7.36/version /dependency dependency groupIdch.qos.logback/groupId artifactIdlogback-classic/artifactId version1.2.12/version /dependency !-- JSON处理 -- dependency groupIdcom.alibaba/groupId artifactIdfastjson/artifactId version2.0.52/version /dependency !-- 连接池 -- dependency groupIdorg.apache.httpcomponents/groupId artifactIdhttpclient/artifactId version4.5.14/version /dependency dependency groupIdorg.apache.httpcomponents/groupId artifactIdhttpmime/artifactId version4.5.14/version /dependency /dependencies3. 四叶天API集成3.1 API调用基础封装javaimport com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.util.ArrayList; import java.util.List; /** * 四叶天代理API客户端 */ public class SiYeTianProxyClient { private static final Logger logger LoggerFactory.getLogger(SiYeTianProxyClient.class); // 四叶天API地址实际使用时替换为真实地址 private static final String API_URL https://api.siyetian.com/proxy/get; // 用户认证信息 private final String apiKey; private final String secretKey; public SiYeTianProxyClient(String apiKey, String secretKey) { this.apiKey apiKey; this.secretKey secretKey; } /** * 从四叶天API获取代理IP列表 * param count 获取数量 * param protocol 协议类型http/https * return 代理IP列表格式ip:port */ public ListString fetchProxies(int count, String protocol) { ListString proxyList new ArrayList(); try { // 构建请求URL String urlStr API_URL ?api_key apiKey secret secretKey num count protocol protocol formatjson; URL url new URL(urlStr); HttpURLConnection conn (HttpURLConnection) url.openConnection(); conn.setRequestMethod(GET); conn.setConnectTimeout(5000); conn.setReadTimeout(10000); // 读取响应 BufferedReader reader new BufferedReader( new InputStreamReader(conn.getInputStream(), UTF-8)); StringBuilder response new StringBuilder(); String line; while ((line reader.readLine()) ! null) { response.append(line); } reader.close(); // 解析JSON响应 JSONObject json JSONObject.parseObject(response.toString()); if (json.getInteger(code) 200) { JSONArray data json.getJSONArray(data); for (int i 0; i data.size(); i) { JSONObject proxy data.getJSONObject(i); String ip proxy.getString(ip); Integer port proxy.getInteger(port); proxyList.add(ip : port); } logger.info(成功获取{}个代理IP, proxyList.size()); } else { logger.error(API返回错误: {}, json.getString(msg)); } } catch (Exception e) { logger.error(获取代理IP失败, e); } return proxyList; } /** * 获取单个代理IP */ public String fetchOneProxy(String protocol) { ListString proxies fetchProxies(1, protocol); return proxies.isEmpty() ? null : proxies.get(0); } }3.2 代理IP实体类java/** * 代理IP实体类 */ public class ProxyIP { private String ip; private int port; private String protocol; // http / https private long createTime; private int successCount; private int failCount; private long lastUsedTime; public ProxyIP(String ip, int port, String protocol) { this.ip ip; this.port port; this.protocol protocol; this.createTime System.currentTimeMillis(); this.successCount 0; this.failCount 0; this.lastUsedTime 0; } public static ProxyIP parse(String proxyStr, String protocol) { String[] parts proxyStr.split(:); if (parts.length 2) { return new ProxyIP(parts[0], Integer.parseInt(parts[1]), protocol); } return null; } /** * 获取代理的HttpHost对象用于HttpClient */ public org.apache.http.HttpHost toHttpHost() { return new org.apache.http.HttpHost(ip, port, protocol); } /** * 获取代理的Proxy对象用于Jsoup */ public java.net.Proxy toJavaProxy() { java.net.InetSocketAddress address new java.net.InetSocketAddress(ip, port); return new java.net.Proxy(java.net.Proxy.Type.HTTP, address); } /** * 计算健康度成功率 */ public double getHealthRate() { int total successCount failCount; if (total 0) return 1.0; return (double) successCount / total; } /** * 判断是否可用失败次数过多则不可用 */ public boolean isAvailable() { // 失败超过3次则标记为不可用 return failCount 3; } public void recordSuccess() { successCount; lastUsedTime System.currentTimeMillis(); } public void recordFailure() { failCount; lastUsedTime System.currentTimeMillis(); } // getters and setters public String getIp() { return ip; } public int getPort() { return port; } public String getProtocol() { return protocol; } public String getProxyString() { return ip : port; } Override public String toString() { return protocol :// ip : port; } }4. 代理IP池管理器4.1 核心代理池实现javaimport org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.*; import java.util.concurrent.*; import java.util.concurrent.atomic.AtomicInteger; /** * 四叶天代理IP池管理器 * 实现IP的自动获取、轮换、健康检查和失效剔除 */ public class ProxyPoolManager { private static final Logger logger LoggerFactory.getLogger(ProxyPoolManager.class); // IP池队列线程安全 private final QueueProxyIP availableProxyQueue new ConcurrentLinkedQueue(); // 所有IP的集合用于快速查找 private final SetString proxySet ConcurrentHashMap.newKeySet(); // 健康度统计 private final MapString, ProxyIP proxyStats new ConcurrentHashMap(); // 四叶天API客户端 private final SiYeTianProxyClient proxyClient; // 配置参数 private final int poolSize; // 池大小 private final int maxFailCount; // 最大失败次数 private final long healthCheckInterval; // 健康检查间隔毫秒 // 轮询索引 private final AtomicInteger roundRobinIndex new AtomicInteger(0); // 定时任务 private ScheduledExecutorService scheduler; public ProxyPoolManager(SiYeTianProxyClient proxyClient, int poolSize) { this(proxyClient, poolSize, 3, 300000); // 默认失败3次剔除5分钟检查一次 } public ProxyPoolManager(SiYeTianProxyClient proxyClient, int poolSize, int maxFailCount, long healthCheckInterval) { this.proxyClient proxyClient; this.poolSize poolSize; this.maxFailCount maxFailCount; this.healthCheckInterval healthCheckInterval; // 初始化IP池 initializePool(); // 启动健康检查定时任务 startHealthCheck(); } /** * 初始化IP池 */ private void initializePool() { logger.info(初始化代理IP池目标大小: {}, poolSize); refreshPool(); } /** * 刷新IP池 */ public synchronized void refreshPool() { // 计算需要补充的数量 int needCount poolSize - availableProxyQueue.size(); if (needCount 0) { return; } logger.info(刷新IP池需要补充 {} 个IP, needCount); // 从四叶天API获取新IP ListString newProxies proxyClient.fetchProxies(needCount, http); for (String proxyStr : newProxies) { if (!proxySet.contains(proxyStr)) { ProxyIP proxy ProxyIP.parse(proxyStr, http); if (proxy ! null) { availableProxyQueue.offer(proxy); proxySet.add(proxyStr); proxyStats.put(proxyStr, proxy); } } } logger.info(IP池刷新完成当前可用IP数: {}, availableProxyQueue.size()); } /** * 获取一个可用的代理IP轮询算法 */ public synchronized ProxyIP getProxy() { if (availableProxyQueue.isEmpty()) { logger.warn(代理IP池为空尝试刷新); refreshPool(); if (availableProxyQueue.isEmpty()) { logger.error(代理IP池仍为空返回null); return null; } } // 轮询获取 ProxyIP proxy availableProxyQueue.poll(); if (proxy ! null) { // 重新放回队列末尾实现轮询 availableProxyQueue.offer(proxy); } return proxy; } /** * 获取代理带轮询索引 */ public ProxyIP getProxyByRoundRobin() { ListProxyIP proxyList new ArrayList(availableProxyQueue); if (proxyList.isEmpty()) { refreshPool(); proxyList new ArrayList(availableProxyQueue); if (proxyList.isEmpty()) { return null; } } int index Math.abs(roundRobinIndex.getAndIncrement() % proxyList.size()); return proxyList.get(index); } /** * 报告代理使用成功 */ public void reportSuccess(ProxyIP proxy) { if (proxy ! null) { proxy.recordSuccess(); logger.debug(代理 {} 使用成功成功率: {:.2f}%, proxy.getProxyString(), proxy.getHealthRate() * 100); } } /** * 报告代理使用失败 */ public void reportFailure(ProxyIP proxy) { if (proxy null) return; proxy.recordFailure(); logger.warn(代理 {} 使用失败失败次数: {}, proxy.getProxyString(), proxy.getFailCount()); // 失败次数超过阈值从池中移除 if (!proxy.isAvailable()) { removeProxy(proxy); logger.warn(代理 {} 已被移除失败次数过多, proxy.getProxyString()); } } /** * 从池中移除代理 */ private void removeProxy(ProxyIP proxy) { String proxyStr proxy.getProxyString(); availableProxyQueue.remove(proxy); proxySet.remove(proxyStr); proxyStats.remove(proxyStr); } /** * 启动健康检查定时任务 */ private void startHealthCheck() { scheduler Executors.newSingleThreadScheduledExecutor(); scheduler.scheduleAtFixedRate(() - { try { healthCheck(); } catch (Exception e) { logger.error(健康检查异常, e); } }, healthCheckInterval, healthCheckInterval, TimeUnit.MILLISECONDS); } /** * 健康检查验证代理IP是否仍然可用 */ private void healthCheck() { logger.debug(开始代理IP健康检查); ListProxyIP proxiesToCheck new ArrayList(availableProxyQueue); int validCount 0; for (ProxyIP proxy : proxiesToCheck) { if (testProxy(proxy)) { validCount; } else { removeProxy(proxy); logger.info(代理 {} 健康检查失败已移除, proxy.getProxyString()); } } logger.info(健康检查完成有效代理: {}/{}, validCount, proxiesToCheck.size()); // 如果池子太小补充新IP if (availableProxyQueue.size() poolSize / 2) { refreshPool(); } } /** * 测试代理是否可用 */ private boolean testProxy(ProxyIP proxy) { try { // 使用Jsoup测试代理 org.jsoup.Connection connection Jsoup.connect(http://httpbin.org/ip) .proxy(proxy.toJavaProxy()) .timeout(5000); org.jsoup.nodes.Document doc connection.get(); String body doc.body().text(); // 验证返回的IP是否与代理IP匹配 return body ! null body.contains(proxy.getIp()); } catch (Exception e) { logger.debug(代理测试失败: {}, proxy.getProxyString()); return false; } } /** * 获取池状态 */ public MapString, Object getPoolStatus() { MapString, Object status new HashMap(); status.put(poolSize, availableProxyQueue.size()); status.put(totalRequested, proxyStats.size()); double avgHealth proxyStats.values().stream() .mapToDouble(ProxyIP::getHealthRate) .average() .orElse(0); status.put(avgHealthRate, String.format(%.2f%%, avgHealth * 100)); return status; } /** * 关闭资源 */ public void shutdown() { if (scheduler ! null) { scheduler.shutdown(); try { scheduler.awaitTermination(10, TimeUnit.SECONDS); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } logger.info(代理池已关闭); } }5. HttpClient与Jsoup代理爬虫实现5.1 基础爬虫类javaimport org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.concurrent.atomic.AtomicInteger; /** * 基于代理池的爬虫实现 * 支持HttpClient和Jsoup两种方式 */ public class ProxyCrawler { private static final Logger logger LoggerFactory.getLogger(ProxyCrawler.class); private final ProxyPoolManager proxyPool; private final AtomicInteger requestCount new AtomicInteger(0); private final AtomicInteger successCount new AtomicInteger(0); private final AtomicInteger failCount new AtomicInteger(0); // 重试配置 private int maxRetries 3; private long retryDelay 1000; public ProxyCrawler(ProxyPoolManager proxyPool) { this.proxyPool proxyPool; } /** * 使用HttpClient发送请求带代理 */ public String fetchWithHttpClient(String url) throws IOException { return fetchWithHttpClient(url, maxRetries); } /** * 使用HttpClient发送请求支持重试 */ public String fetchWithHttpClient(String url, int retries) throws IOException { ProxyIP proxy proxyPool.getProxy(); if (proxy null) { throw new IOException(无可用代理IP); } requestCount.incrementAndGet(); // 创建带代理的HttpClient CloseableHttpClient httpClient HttpClients.custom() .setProxy(proxy.toHttpHost()) .build(); HttpGet httpGet new HttpGet(url); httpGet.setHeader(User-Agent, getRandomUserAgent()); httpGet.setHeader(Accept, text/html,application/xhtmlxml,application/xml;q0.9,*/*;q0.8); httpGet.setHeader(Accept-Language, zh-CN,zh;q0.9,en;q0.8); httpGet.setHeader(Connection, keep-alive); try (CloseableHttpResponse response httpClient.execute(httpGet)) { int statusCode response.getStatusLine().getStatusCode(); if (statusCode 200) { String result EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8); proxyPool.reportSuccess(proxy); successCount.incrementAndGet(); logger.info(请求成功: {} [{}], url, proxy.getProxyString()); return result; } else { throw new IOException(HTTP状态码异常: statusCode); } } catch (Exception e) { proxyPool.reportFailure(proxy); failCount.incrementAndGet(); logger.warn(请求失败: {} [{}], 错误: {}, url, proxy.getProxyString(), e.getMessage()); // 重试 if (retries 0) { logger.info(重试请求: {}, 剩余重试次数: {}, url, retries - 1); Thread.sleep(retryDelay); return fetchWithHttpClient(url, retries - 1); } throw new IOException(请求失败已重试 maxRetries 次, e); } finally { httpClient.close(); } } /** * 使用Jsoup发送请求带代理 */ public Document fetchWithJsoup(String url) throws IOException { return fetchWithJsoup(url, maxRetries); } /** * 使用Jsoup发送请求支持重试 */ public Document fetchWithJsoup(String url, int retries) throws IOException { ProxyIP proxy proxyPool.getProxy(); if (proxy null) { throw new IOException(无可用代理IP); } requestCount.incrementAndGet(); try { // Jsoup设置代理的方式直接使用proxy()方法[citation:2][citation:5][citation:8] Document doc Jsoup.connect(url) .proxy(proxy.toJavaProxy()) .userAgent(getRandomUserAgent()) .header(Accept, text/html,application/xhtmlxml,application/xml;q0.9,*/*;q0.8) .header(Accept-Language, zh-CN,zh;q0.9) .timeout(10000) .get(); proxyPool.reportSuccess(proxy); successCount.incrementAndGet(); logger.info(Jsoup请求成功: {} [{}], url, proxy.getProxyString()); return doc; } catch (Exception e) { proxyPool.reportFailure(proxy); failCount.incrementAndGet(); logger.warn(Jsoup请求失败: {} [{}], 错误: {}, url, proxy.getProxyString(), e.getMessage()); if (retries 0) { logger.info(重试请求: {}, 剩余重试次数: {}, url, retries - 1); Thread.sleep(retryDelay); return fetchWithJsoup(url, retries - 1); } throw new IOException(请求失败已重试 maxRetries 次, e); } } /** * 随机User-Agent */ private static final ListString USER_AGENTS Arrays.asList( Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36, Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36, Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36, Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0 ); private String getRandomUserAgent() { return USER_AGENTS.get(new Random().nextInt(USER_AGENTS.size())); } /** * 获取统计信息 */ public MapString, Object getStats() { MapString, Object stats new HashMap(); stats.put(totalRequests, requestCount.get()); stats.put(successCount, successCount.get()); stats.put(failCount, failCount.get()); stats.put(successRate, String.format(%.2f%%, successCount.get() * 100.0 / Math.max(1, requestCount.get()))); stats.put(poolStatus, proxyPool.getPoolStatus()); return stats; } public void setMaxRetries(int maxRetries) { this.maxRetries maxRetries; } public void setRetryDelay(long retryDelay) { this.retryDelay retryDelay; } }5.2 完整使用示例javaimport org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; /** * 完整示例使用四叶天代理池爬取数据 */ public class CrawlerDemo { private static final Logger logger LoggerFactory.getLogger(CrawlerDemo.class); public static void main(String[] args) { // 1. 初始化四叶天代理客户端 SiYeTianProxyClient proxyClient new SiYeTianProxyClient(your_api_key, your_secret_key); // 2. 创建代理池池大小20失败3次剔除5分钟健康检查 ProxyPoolManager proxyPool new ProxyPoolManager(proxyClient, 20, 3, 300000); // 3. 创建爬虫实例 ProxyCrawler crawler new ProxyCrawler(proxyPool); crawler.setMaxRetries(3); crawler.setRetryDelay(2000); // 4. 执行爬取任务 try { // 单次请求示例 String targetUrl https://httpbin.org/ip; String result crawler.fetchWithHttpClient(targetUrl); logger.info(响应内容: {}, result); // Jsoup解析示例 String parseUrl https://www.example.com; Document doc crawler.fetchWithJsoup(parseUrl); String title doc.title(); logger.info(页面标题: {}, title); // 5. 多线程并发爬取 multiThreadCrawl(crawler); } catch (Exception e) { logger.error(爬取失败, e); } finally { // 6. 关闭资源 proxyPool.shutdown(); } } /** * 多线程爬取示例 */ private static void multiThreadCrawl(ProxyCrawler crawler) { ListString urls Arrays.asList( https://httpbin.org/ip, https://httpbin.org/headers, https://httpbin.org/user-agent, https://httpbin.org/get ); ExecutorService executor Executors.newFixedThreadPool(5); for (String url : urls) { executor.submit(() - { try { String result crawler.fetchWithHttpClient(url); logger.info(爬取结果: {}, result.substring(0, Math.min(100, result.length()))); } catch (Exception e) { logger.error(爬取失败: {}, url, e); } }); } executor.shutdown(); try { executor.awaitTermination(30, TimeUnit.SECONDS); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } // 打印统计信息 logger.info(爬虫统计: {}, crawler.getStats()); } /** * 实际业务示例爬取商品信息 */ public static void crawlProductInfo(ProxyCrawler crawler, String productUrl) { try { Document doc crawler.fetchWithJsoup(productUrl); // 提取商品信息示例选择器实际根据页面结构调整 String title doc.select(h1.product-title).text(); String price doc.select(span.price).text(); String description doc.select(div.description).text(); // 提取图片 Elements images doc.select(img.product-image); for (Element img : images) { String imgUrl img.attr(abs:src); logger.info(图片地址: {}, imgUrl); } logger.info(商品: {} - 价格: {}, title, price); } catch (Exception e) { logger.error(解析商品信息失败: {}, productUrl, e); } } }6. 高级优化策略6.1 IP预加载与智能路由java/** * IP预加载机制提前获取一批IP避免实时请求API的延迟[citation:9] */ public class SmartProxyPool extends ProxyPoolManager { private final ScheduledExecutorService preloadScheduler; private final BlockingQueueProxyIP preloadQueue; public SmartProxyPool(SiYeTianProxyClient proxyClient, int poolSize) { super(proxyClient, poolSize); this.preloadQueue new LinkedBlockingQueue(); this.preloadScheduler Executors.newSingleThreadScheduledExecutor(); // 每5分钟预加载一批IP preloadScheduler.scheduleAtFixedRate(this::preloadProxies, 5, 5, TimeUnit.MINUTES); } /** * 预加载代理IP */ private void preloadProxies() { try { ListString proxies proxyClient.fetchProxies(50, http); for (String proxyStr : proxies) { ProxyIP proxy ProxyIP.parse(proxyStr, http); if (proxy ! null testProxy(proxy)) { preloadQueue.offer(proxy); } } logger.info(预加载完成预存IP数: {}, preloadQueue.size()); } catch (Exception e) { logger.error(预加载失败, e); } } /** * 智能获取代理优先从预加载队列获取 */ Override public synchronized ProxyIP getProxy() { ProxyIP proxy preloadQueue.poll(); if (proxy null) { return super.getProxy(); } return proxy; } }6.2 地理位置路由java/** * 根据目标网站地理位置选择代理[citation:3] */ public class GeoAwareProxyPool extends ProxyPoolManager { private final MapString, ListProxyIP geoProxyMap new ConcurrentHashMap(); public GeoAwareProxyPool(SiYeTianProxyClient proxyClient, int poolSize) { super(proxyClient, poolSize); } /** * 获取指定地区的代理 * param city 城市名称 */ public ProxyIP getProxyByCity(String city) { ListProxyIP cityProxies geoProxyMap.get(city); if (cityProxies ! null !cityProxies.isEmpty()) { // 轮询返回该城市代理 return cityProxies.get(new Random().nextInt(cityProxies.size())); } return getProxy(); // 降级到普通代理 } /** * 批量导入带地理信息的代理 */ public void addProxyWithGeo(ProxyIP proxy, String city) { geoProxyMap.computeIfAbsent(city, k - new CopyOnWriteArrayList()).add(proxy); } }6.3 并发控制与限流javaimport java.util.concurrent.Semaphore; /** * 带限流功能的爬虫包装器 */ public class RateLimitedCrawler { private final ProxyCrawler crawler; private final Semaphore semaphore; // 并发控制 private final RateLimiter rateLimiter; // 频率限制 public RateLimitedCrawler(ProxyCrawler crawler, int maxConcurrent, double requestsPerSecond) { this.crawler crawler; this.semaphore new Semaphore(maxConcurrent); this.rateLimiter RateLimiter.create(requestsPerSecond); } public String fetch(String url) throws Exception { semaphore.acquire(); try { rateLimiter.acquire(); return crawler.fetchWithHttpClient(url); } finally { semaphore.release(); } } public Document fetchDoc(String url) throws Exception { semaphore.acquire(); try { rateLimiter.acquire(); return crawler.fetchWithJsoup(url); } finally { semaphore.release(); } } }6.4 故障转移与熔断java/** * 带熔断机制的代理池 */ public class CircuitBreakerProxyPool extends ProxyPoolManager { private final MapString, CircuitBreaker breakers new ConcurrentHashMap(); private final int failureThreshold 5; // 失败阈值 private final long timeoutMs 60000; // 熔断超时1分钟 Override public ProxyIP getProxy() { ProxyIP proxy super.getProxy(); if (proxy null) return null; CircuitBreaker breaker breakers.computeIfAbsent( proxy.getProxyString(), k - new CircuitBreaker(failureThreshold, timeoutMs) ); // 如果熔断器打开跳过此代理 if (breaker.isOpen()) { logger.debug(代理 {} 已熔断跳过, proxy.getProxyString()); removeProxy(proxy); return getProxy(); // 递归获取下一个 } return proxy; } Override public void reportFailure(ProxyIP proxy) { super.reportFailure(proxy); CircuitBreaker breaker breakers.get(proxy.getProxyString()); if (breaker ! null) { breaker.recordFailure(); } } /** * 简单熔断器实现 */ private static class CircuitBreaker { private final int threshold; private final long timeoutMs; private int failureCount 0; private long openTime 0; private State state State.CLOSED; enum State { CLOSED, OPEN, HALF_OPEN } CircuitBreaker(int threshold, long timeoutMs) { this.threshold threshold; this.timeoutMs timeoutMs; } synchronized void recordFailure() { if (state State.CLOSED) { failureCount; if (failureCount threshold) { state State.OPEN; openTime System.currentTimeMillis(); } } } synchronized boolean isOpen() { if (state State.OPEN) { if (System.currentTimeMillis() - openTime timeoutMs) { state State.HALF_OPEN; return false; } return true; } return false; } } }7. 常见问题与解决方案7.1 代理IP失效问题问题刚获取的代理IP立即失效解决方案使用四叶天的粘性会话功能设置session_ttl参数启用健康检查机制定期验证IP可用性java// 设置代理的存活时间 public void setProxyTTL(ProxyIP proxy, int ttlSeconds) { // 存储到缓存设置过期时间 redisTemplate.opsForValue().set(proxy: proxy.getProxyString(), proxy, ttlSeconds, TimeUnit.SECONDS); }
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.coloradmin.cn/o/2469929.html
如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈,一经查实,立即删除!