csv文件生成与读取
1.csv 文件解析入库方法(少量)private boolean doProcessNew(FileInputStream fileInputStream) { long start System.currentTimeMillis(); ListJtComplaintPredictionResultEntity csvFileList new ArrayList(); try (BufferedReader reader new BufferedReader(new InputStreamReader(fileInputStream, StandardCharsets.UTF_8)); CSVReader csvReader new CSVReaderBuilder(reader).build()) { log.info(开始解析CSV文件); IteratorString[] iterator csvReader.iterator(); // 跳过表头 if (iterator.hasNext()) { iterator.next(); log.info(已跳过表头); } int lineNum 1; while (iterator.hasNext()) { lineNum; String[] next iterator.next(); // 空行跳过 if (next null || next.length 0) { log.warn(第{}行空行跳过, lineNum); continue; } // 字段不足跳过 if (next.length 4) { log.warn(第{}行字段数量不足跳过, lineNum); continue; } try { JtComplaintPredictionResultEntity entity new JtComplaintPredictionResultEntity(); entity.setAccount(next[0] ! null ? next[0].trim() : ); entity.setAccountTv(next[1] ! null ? next[1].trim() : ); entity.setPred(next[2] ! null ? next[2].trim() : ); entity.setGreaterThanT(next[3] ! null ? next[3].trim() : ); csvFileList.add(entity); } catch (Exception e) { log.error(第{}行解析异常{}, lineNum, e.getMessage()); } } // 批量入库 jtService.saveBatch(csvFileList); log.info(解析完成成功入库{} 条总耗时{} ms, csvFileList.size(), System.currentTimeMillis() - start); return true; } catch (Exception e) { log.error(CSV解析失败, e); return false; } }2.csv 文件解析入库方法(大量)全部解析到内存几十万以内没问题分成小批次10003000 条线程池异步批量入库不要无限制开线程// 注入线程池建议用自定义线程池不要用Executors Resource private ThreadPoolTaskExecutor asyncExecutor; private boolean doProcessBigFile(FileInputStream fileInputStream) { long start System.currentTimeMillis(); ListJtComplaintPredictionResultEntity totalList new ArrayList(); // 1. 先完整解析 try (CSVReader csvReader new CSVReaderBuilder( new BufferedReader(new InputStreamReader(fileInputStream, StandardCharsetsets.UTF_8))).build()) { IteratorString[] iterator csvReader.iterator(); if (iterator.hasNext()) iterator.next(); // 跳过表头 int line 1; while (iterator.hasNext()) { line; String[] next iterator.next(); if (next null || next.length 4) continue; JtComplaintPredictionResultEntity entity new JtComplaintPredictionResultEntity(); entity.setAccount(next[0].trim()); entity.setAccountTv(next[1].trim()); entity.setPred(next[2].trim()); entity.setGreaterThanT(next[3].trim()); totalList.add(entity); } } catch (Exception e) { log.error(解析失败, e); return false; } // 2. 分批次 多线程入库 int batchSize 3000; ListListJtComplaintPredictionResultEntity partitions Lists.partition(totalList, batchSize); log.info(总数据量{}分为{}批入库, totalList.size(), partitions.size()); CountDownLatch latch new CountDownLatch(partitions.size()); for (ListJtComplaintPredictionResultEntity batch : partitions) { asyncExecutor.execute(() - { try { jtService.saveBatch(batch); log.info(批量入库成功{} 条, batch.size()); } catch (Exception e) { log.error(批量入库失败, e); } finally { latch.countDown(); } }); } try { latch.await(); // 等待所有批次完成 } catch (InterruptedException e) { Thread.currentThread().interrupt(); } log.info(全部入库完成总耗时{} ms, System.currentTimeMillis() - start); return true; }3.少量 CSV 生成String dateNow DateTimeFormatter.ofPattern(yyyyMMddHHmmss).format(LocalDateTime.now()); MapString, Object param new HashMap(); param.put(startrow, 0); param.put(endrow, 50); long startTime System.currentTimeMillis(); ListMapString, Object dataList dao.listJtComplaintPrediction(param); log.info(查询完成{} 条耗时{} ms, dataList.size(), System.currentTimeMillis() - startTime); // 写入文件流式写不占内存 String fileName _LIST_ dateNow .csv; File csvFile new File(fileName); try (BufferedWriter writer new BufferedWriter(new OutputStreamWriter(new FileOutputStream(csvFile), StandardCharsets.UTF_8))) { // 写入表头 writer.write(account|tv_account|care_num); writer.newLine(); // 写入行 for (MapString, Object map : dataList) { String account nullToEmpty(map.get(ACCOUNT)); String careNum nullToEmpty(map.get(CARE_NUM)); // 拼接行|分隔 writer.write(account | careNum); writer.newLine(); } } log.info(CSV生成完成{}, csvFile.getName()); // 工具方法 private String nullToEmpty(Object obj) { return obj null ? : obj.toString().trim(); }#4.大量 CSV 生成分页查询流式逐页写入一边查一边写不占内存单线程写文件CSV 必须顺序写可多线程分页查询但必须排队写入public void generateBigCsv() { String fileName BIG_DATA_ System.currentTimeMillis() .csv; File csvFile new File(fileName); int pageSize 5000; // 每页查5000 int pageNum 1; try (BufferedWriter writer new BufferedWriter(new OutputStreamWriter(new FileOutputStream(csvFile), StandardCharsetsets.UTF_8))) { // 写表头 writer.write(account|tv_account|care_num); writer.newLine(); while (true) { // 分页查询 MapString, Object param new HashMap(); param.put(pageNum, pageNum); param.put(pageSize, pageSize); ListMapString, Object dataList dao.listJtComplaintPredictionPage(param); if (CollUtil.isEmpty(dataList)) { break; } // 流式写入 for (MapString, Object map : dataList) { String account nullToEmpty(map.get(ACCOUNT)); String careNum nullToEmpty(map.get(CARE_NUM)); writer.write(account | careNum); writer.newLine(); } log.info(已写入第{}页{}条, pageNum, dataList.size()); pageNum; } } catch (Exception e) { log.error(大文件生成失败, e); } log.info(超大CSV生成完成{}, csvFile.getName()); }5.最终结论解析 CSV少量直接解析 → 批量入库大量全部解析 → 分批次 → 线程池异步入库批次大小10003000 最优生成 CSV少量流式写不要 StringBuffer 拼接大量分页查询 流式写入严禁多线程写入同一个 CSV
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.coloradmin.cn/o/2499393.html
如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈,一经查实,立即删除!