package com.taover.easyexcel.analysis.v07; import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.UUID; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackageAccess; import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.ss.util.CellAddress; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.CommentsTable; import org.apache.poi.xssf.usermodel.XSSFComment; import org.apache.poi.xssf.usermodel.XSSFRelation; import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook; import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbookPr; import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.ContentHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import com.taover.easyexcel.analysis.ExcelReadExecutor; import com.taover.easyexcel.analysis.v07.handlers.sax.SharedStringsTableHandler; import com.taover.easyexcel.analysis.v07.handlers.sax.XlsxRowHandler; import com.taover.easyexcel.analysis.v07.workbook.WorkbookAnalyser; import com.taover.easyexcel.analysis.v07.workbook.WorkbookAnalyserImpl; import com.taover.easyexcel.cache.ReadCache; import com.taover.easyexcel.context.xlsx.XlsxReadContext; import com.taover.easyexcel.enums.CellExtraTypeEnum; import com.taover.easyexcel.exception.ExcelAnalysisException; import com.taover.easyexcel.exception.SheetNotSelectedException; import com.taover.easyexcel.metadata.CellExtra; import com.taover.easyexcel.read.metadata.ReadSheet; import com.taover.easyexcel.read.metadata.holder.ReadWorkbookHolder; import com.taover.easyexcel.read.metadata.holder.xlsx.XlsxReadWorkbookHolder; import com.taover.easyexcel.util.CollectionUtils; import com.taover.easyexcel.util.FileUtils; import com.taover.easyexcel.util.StringUtils; /** * @author jipengfei */ public class XlsxSaxAnalyser implements ExcelReadExecutor { private static final Logger LOGGER = LoggerFactory.getLogger(XlsxSaxAnalyser.class); private XlsxReadContext xlsxReadContext; private List sheetList; private Map sheetMap; /** * excel comments key: sheetNo value: CommentsTable */ private Map commentsTableMap; public XlsxSaxAnalyser(XlsxReadContext xlsxReadContext, InputStream decryptedStream) throws Exception { this.xlsxReadContext = xlsxReadContext; // Initialize cache XlsxReadWorkbookHolder xlsxReadWorkbookHolder = xlsxReadContext.xlsxReadWorkbookHolder(); OPCPackage pkg = readOpcPackage(xlsxReadWorkbookHolder, decryptedStream); xlsxReadWorkbookHolder.setOpcPackage(pkg); ArrayList packageParts = pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType()); if (!CollectionUtils.isEmpty(packageParts)) { PackagePart sharedStringsTablePackagePart = packageParts.get(0); // Specify default cache defaultReadCache(xlsxReadWorkbookHolder, sharedStringsTablePackagePart); // Analysis sharedStringsTable.xml analysisSharedStringsTable(sharedStringsTablePackagePart.getInputStream(), xlsxReadWorkbookHolder); } XSSFReader xssfReader = new XSSFReader(pkg); analysisUse1904WindowDate(xssfReader, xlsxReadWorkbookHolder); xlsxReadWorkbookHolder.setStylesTable(xssfReader.getStylesTable()); sheetList = new ArrayList(); sheetMap = new HashMap(); commentsTableMap = new HashMap(); XSSFReader.SheetIterator ite = (XSSFReader.SheetIterator)xssfReader.getSheetsData(); int index = 0; if (!ite.hasNext()) { throw new ExcelAnalysisException("Can not find any sheet!"); } WorkbookAnalyser wbAnalyser = new WorkbookAnalyserImpl(xssfReader.getWorkbookData()); while (ite.hasNext()) { InputStream inputStream = ite.next(); boolean isHidden = wbAnalyser.isHiddenSheet(index); boolean isActive = (index == wbAnalyser.getActiveTabIndexInSheetList()); sheetList.add(new ReadSheet(index, ite.getSheetName(), isActive, isHidden)); sheetMap.put(index, inputStream); if (xlsxReadContext.readWorkbookHolder().getExtraReadSet().contains(CellExtraTypeEnum.COMMENT)) { CommentsTable commentsTable = ite.getSheetComments(); if (null != commentsTable) { commentsTableMap.put(index, commentsTable); } } index++; } } private void defaultReadCache(XlsxReadWorkbookHolder xlsxReadWorkbookHolder, PackagePart sharedStringsTablePackagePart) { ReadCache readCache = xlsxReadWorkbookHolder.getReadCacheSelector().readCache(sharedStringsTablePackagePart); xlsxReadWorkbookHolder.setReadCache(readCache); readCache.init(xlsxReadContext); } private void analysisUse1904WindowDate(XSSFReader xssfReader, XlsxReadWorkbookHolder xlsxReadWorkbookHolder) throws Exception { if (xlsxReadWorkbookHolder.globalConfiguration().getUse1904windowing() != null) { return; } InputStream workbookXml = xssfReader.getWorkbookData(); WorkbookDocument ctWorkbook = WorkbookDocument.Factory.parse(workbookXml); CTWorkbook wb = ctWorkbook.getWorkbook(); CTWorkbookPr prefix = wb.getWorkbookPr(); if (prefix != null && prefix.getDate1904()) { xlsxReadWorkbookHolder.getGlobalConfiguration().setUse1904windowing(Boolean.TRUE); } else { xlsxReadWorkbookHolder.getGlobalConfiguration().setUse1904windowing(Boolean.FALSE); } } private void analysisSharedStringsTable(InputStream sharedStringsTableInputStream, XlsxReadWorkbookHolder xlsxReadWorkbookHolder) throws Exception { ContentHandler handler = new SharedStringsTableHandler(xlsxReadWorkbookHolder.getReadCache()); parseXmlSource(sharedStringsTableInputStream, handler); xlsxReadWorkbookHolder.getReadCache().putFinished(); } private OPCPackage readOpcPackage(XlsxReadWorkbookHolder xlsxReadWorkbookHolder, InputStream decryptedStream) throws Exception { if (decryptedStream == null && xlsxReadWorkbookHolder.getFile() != null) { return OPCPackage.open(xlsxReadWorkbookHolder.getFile()); } if (xlsxReadWorkbookHolder.getMandatoryUseInputStream()) { if (decryptedStream != null) { return OPCPackage.open(decryptedStream); } else { return OPCPackage.open(xlsxReadWorkbookHolder.getInputStream()); } } File readTempFile = FileUtils.createCacheTmpFile(); xlsxReadWorkbookHolder.setTempFile(readTempFile); File tempFile = new File(readTempFile.getPath(), UUID.randomUUID().toString() + ".xlsx"); if (decryptedStream != null) { FileUtils.writeToFile(tempFile, decryptedStream); } else { FileUtils.writeToFile(tempFile, xlsxReadWorkbookHolder.getInputStream()); } return OPCPackage.open(tempFile, PackageAccess.READ); } @Override public List sheetList() { return sheetList; } private void parseXmlSource(InputStream inputStream, ContentHandler handler) { //InputSource inputSource = new InputSource(this.printInputSource(inputStream)); InputSource inputSource = new InputSource(inputStream); try { SAXParserFactory saxFactory; String xlsxSAXParserFactoryName = xlsxReadContext.xlsxReadWorkbookHolder().getSaxParserFactoryName(); if (StringUtils.isEmpty(xlsxSAXParserFactoryName)) { saxFactory = SAXParserFactory.newInstance(); } else { saxFactory = SAXParserFactory.newInstance(xlsxSAXParserFactoryName, null); } saxFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); saxFactory.setFeature("http://xml.org/sax/features/external-general-entities", false); saxFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); SAXParser saxParser = saxFactory.newSAXParser(); XMLReader xmlReader = saxParser.getXMLReader(); xmlReader.setContentHandler(handler); xmlReader.parse(inputSource); inputStream.close(); } catch (SAXException e) { if(e.getException() instanceof SheetNotSelectedException) { LOGGER.warn(e.getMessage()); } } catch (ExcelAnalysisException e) { throw e; } catch (Exception e) { throw new ExcelAnalysisException(e); } finally { if (inputStream != null) { try { inputStream.close(); } catch (IOException e) { throw new ExcelAnalysisException("Can not close 'inputStream'!"); } } } } private InputStream printInputSource(InputStream inputStream) { ByteArrayOutputStream bos = new ByteArrayOutputStream(); byte[] buffer = new byte[1000*1024]; BufferedInputStream bis = new BufferedInputStream(inputStream); try { int len = -1; while( (len = bis.read(buffer)) != -1) { bos.write(buffer, 0, len); } bis.close(); bos.close(); System.out.println(">>>parseXmlSource(inputStream)"); System.out.println(new String(bos.toByteArray())); } catch (IOException e1) { try { bis.close(); bos.close(); } catch (IOException e) { e.printStackTrace(); } e1.printStackTrace(); } return new ByteArrayInputStream(bos.toByteArray()); } @Override public void execute() { List readSheetList = this.filterSheetList(sheetList, xlsxReadContext); for (ReadSheet readSheet : readSheetList) { xlsxReadContext.currentSheet(readSheet); parseXmlSource(sheetMap.get(readSheet.getSheetNo()), new XlsxRowHandler(xlsxReadContext)); // Read comments readComments(readSheet); // The last sheet is read xlsxReadContext.analysisEventProcessor().endSheet(xlsxReadContext); } } private List filterSheetList(List sheetList, XlsxReadContext analysisContext) { ReadWorkbookHolder readWorkbookHolder = analysisContext.readWorkbookHolder(); List sheetData = this.sheetList; List result = new ArrayList(); if(sheetData.isEmpty()) { return result; } for(ReadSheet item: sheetData) { if(readWorkbookHolder.getReadAll()) { if(!item.getSheetHidden()) { result.add(item); } }else if(readWorkbookHolder.getReadJustSelected()) { if(item.getSheetSelected()) { result.add(item); } }else { for(ReadSheet innerItem: sheetList) { boolean match = (item.getSheetNo() != null && item.getSheetNo().equals(innerItem.getSheetNo())); if (!match) { String parameterSheetName = item.getSheetName(); if (!StringUtils.isEmpty(parameterSheetName)) { boolean autoTrim = (item.getAutoTrim() != null && item.getAutoTrim()) || (item.getAutoTrim() == null && analysisContext.readWorkbookHolder().getGlobalConfiguration().getAutoTrim()); if (autoTrim) { parameterSheetName = parameterSheetName.trim(); } match = parameterSheetName.equals(innerItem.getSheetName()); } } if (match) { item.copyBasicParameter(innerItem); result.add(innerItem); } } } } if(result.isEmpty()) { result.add(sheetData.get(0)); } return result; } private void readComments(ReadSheet readSheet) { if (!xlsxReadContext.readWorkbookHolder().getExtraReadSet().contains(CellExtraTypeEnum.COMMENT)) { return; } CommentsTable commentsTable = commentsTableMap.get(readSheet.getSheetNo()); if (commentsTable == null) { return; } Map cellComments = commentsTable.getCellComments(); for (XSSFComment xssfComment : cellComments.values()) { CellExtra cellExtra = new CellExtra(CellExtraTypeEnum.COMMENT, xssfComment.getString().toString(), xssfComment.getRow(), xssfComment.getColumn()); xlsxReadContext.readSheetHolder().setCellExtra(cellExtra); xlsxReadContext.analysisEventProcessor().extra(xlsxReadContext); } } }