easyexcel导出oom问题
1、项目中遇到了导出oom的问题,经排查原因主要有以下两个个
使用while(true)循环从数据库中查询记录,并不断使用List.addAll 导致最终的对象异常庞大
未对同时下载任务进行控制,导致同一时间任务过多
下面对这个问题进行本地模拟研究
2、造数据
该表为A股上市公司的信息表,总计将近1万条,利用下面的语句对数据量进行扩充值1千万条。
insert into stock(ts_code,symbol,name,fullname,exchange,list_status,list_date,delist_date)
select ts_code,symbol,name,fullname,exchange,list_status,list_date,delist_date from stock
3、搭项目
基于springboot+mysql+jdbc+easyexcel
核心实体
package com.taoge.oom.excel.entity;
import lombok.Data;
import java.util.Date;
/**
* Desc:
*
* @author taoxuefeng
* @date 2021/11/4
*/
@Data
public class Stock {
private Long id;
private String tsCode;
private String symbol;
private String name;
private String fullName;
private String exchange;
private Date listDate;
private Date deListDate;
}
核心dao层
package com.taoge.oom.excel.dao;
import com.taoge.oom.excel.entity.Stock;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.dao.DataAccessException;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.ResultSetExtractor;
import org.springframework.stereotype.Repository;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
/**
* Desc:
*
* @author taoxuefeng
* @date 2021/11/4
*/
@Repository
@Slf4j
public class StockRepository {
@Autowired
private JdbcTemplate jdbcTemplate;
public List<Stock> query(String startDate, String endDate, int page, int size) {
long start = (page - 1) * size;
long end = page * size;
String sql = "select * from stock where list_date>='"+startDate+"' and list_date<='"+endDate+"' limit "+start+","+end;
log.info("正在下载"+page+"页");
log.info(sql);
List<Stock> stockList = jdbcTemplate.query(sql, new ResultSetExtractor<List<Stock>>() {
@Override
public List<Stock> extractData(ResultSet rs) throws SQLException, DataAccessException {
List<Stock> result = new ArrayList();
while (rs.next()) {
Stock row = new Stock();
row.setId(rs.getLong("id"));
row.setTsCode(rs.getString("ts_code"));
row.setName(rs.getString("name"));
row.setFullName(rs.getString("fullname"));
row.setSymbol(rs.getString("symbol"));
row.setExchange(rs.getString("exchange"));
row.setListDate(rs.getDate("list_date"));
row.setDeListDate(rs.getDate("delist_date"));
result.add(row);
}
return result;
}
});
return stockList;
}
public Long count(String startDate, String endDate) {
String sql = "select count(*) from stock where list_date>='"+startDate+"' and list_date<='"+endDate+"' ";
log.info(sql);
Long query = jdbcTemplate.query(sql, new ResultSetExtractor<Long>() {
@Override
public Long extractData(ResultSet rs) throws SQLException, DataAccessException {
while (rs.next()) {
long aLong = rs.getLong(0);
return aLong;
}
return 0L;
}
});
return query;
}
}
核心service
package com.taoge.oom.excel.service;
import cn.hutool.core.collection.CollectionUtil;
import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.ExcelWriter;
import com.alibaba.excel.write.metadata.WriteSheet;
import com.taoge.oom.excel.dao.StockRepository;
import com.taoge.oom.excel.entity.Stock;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.List;
/**
* Desc:
*
* @author taoxuefeng
* @date 2021/11/4
*/
@Service
public class StockService {
@Autowired
private StockRepository stockRepository;
/**
* 一次性全部查出来
* @param startDate
* @param endDate
*/
public void download(String startDate, String endDate) {
int page = 1;
int size = 1000;
List<Stock> data = new ArrayList<>();
while (true) {
List<Stock> query = stockRepository.query(startDate, endDate, page, size);
if (CollectionUtil.isNotEmpty(query)) {
data.addAll(query);
} else {
break;
}
page++;
}
long timeMillis = System.currentTimeMillis();
File file = new File("/Users/taoxuefeng/log/"+timeMillis + ".xlsx");
FileOutputStream fout = null;
try {
fout = new FileOutputStream(file);
EasyExcel.write(fout, Stock.class).sheet("上市公司").doWrite(data);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
/**
* 分配写入方案
* @param startDate
* @param endDate
*/
public void download2(String startDate, String endDate) {
int page = 1;
int size = 1000;
long millis = System.currentTimeMillis();
File file = new File("/Users/taoxuefeng/log/"+millis + ".xlsx");
FileOutputStream fout = null;
ExcelWriter excelWriter = null;
WriteSheet writeSheet = null;
try {
fout = new FileOutputStream(file);
excelWriter = EasyExcel.write(fout, Stock.class).build();
writeSheet = EasyExcel.writerSheet("上市公司").build();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
while (true) {
List<Stock> query = stockRepository.query(startDate, endDate, page, size);
if (CollectionUtil.isNotEmpty(query)) {
excelWriter.write(query, writeSheet);
} else {
excelWriter.finish();
break;
}
page++;
}
}
}
4、启动测试
-Xmx100m
-Xms100m
-XX:+HeapDumpOnOutOfMemoryError
-XX:HeapDumpPath=/Users/taoxuefeng/log/m.hprof
-XX:+PrintGCDetails
-XX:+PrintGCDateStamps
-Xloggc:/Users/taoxuefeng/log/gc.txt
测试第一种方案(全部查出来)
仅仅调用一次就oom了(每页1000条,不到20页就报oom了)
问题很明显 大List导致内存溢出
测试第二种方案
总共46页,已经正常导出
接下来测试多个任务
根据定位,jvm内部仍然有大List存在,这些大List就是导出记录的集合,正常来说1000条的记录是没有这么大的,可能是easyexcel内部存储了一份,具体需要查阅原理才能确定。