package com.taover.easyexcel.analysis.v03;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.CellRecord;
import org.apache.poi.hssf.record.EOFRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.HyperlinkRecord;
import org.apache.poi.hssf.record.IndexRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.MergeCellsRecord;
import org.apache.poi.hssf.record.NoteRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.ObjRecord;
import org.apache.poi.hssf.record.RKRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.RowRecord;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.record.TextObjectRecord;
import org.apache.poi.hssf.record.WindowOneRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.taover.easyexcel.analysis.ExcelReadExecutor;
import com.taover.easyexcel.analysis.v03.handlers.BlankRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.BofRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.BoolErrRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.BoundSheetRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.DummyRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.EofRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.FormulaRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.HyperlinkRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.IndexRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.LabelRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.LabelSstRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.MergeCellsRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.NoteRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.NumberRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.ObjRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.RkRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.SstRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.StringRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.TextObjectRecordHandler;
import com.taover.easyexcel.context.xls.XlsReadContext;
import com.taover.easyexcel.exception.ExcelAnalysisException;
import com.taover.easyexcel.exception.ExcelAnalysisStopException;
import com.taover.easyexcel.read.metadata.ReadSheet;
import com.taover.easyexcel.read.metadata.holder.xls.XlsReadWorkbookHolder;
/**
* /** * A text extractor for Excel files. *
*
* * Returns the textual content of the file, suitable for * indexing by something like Lucene, but not really *
* intended for display to the user. *
*
* *
*
* * To turn an excel file into a CSV or similar, then see * the XLS2CSVmra example *
*
* * * @see XLS2CSVmra
*
* @author jipengfei
*/
public class XlsSaxAnalyser implements HSSFListener, ExcelReadExecutor {
private static final Logger LOGGER = LoggerFactory.getLogger(XlsSaxAnalyser.class);
private static final short DUMMY_RECORD_SID = -1;
private XlsReadContext xlsReadContext;
private static final Map XLS_RECORD_HANDLER_MAP = new HashMap(32);
List skipCellRowIndexList = new ArrayList();
private Integer activeSheetIndex = null;
private int currSheetIndex = -1;
static {
XLS_RECORD_HANDLER_MAP.put(BlankRecord.sid, new BlankRecordHandler());
XLS_RECORD_HANDLER_MAP.put(BOFRecord.sid, new BofRecordHandler());
XLS_RECORD_HANDLER_MAP.put(BoolErrRecord.sid, new BoolErrRecordHandler());
XLS_RECORD_HANDLER_MAP.put(BoundSheetRecord.sid, new BoundSheetRecordHandler());
XLS_RECORD_HANDLER_MAP.put(DUMMY_RECORD_SID, new DummyRecordHandler());
XLS_RECORD_HANDLER_MAP.put(EOFRecord.sid, new EofRecordHandler());
XLS_RECORD_HANDLER_MAP.put(FormulaRecord.sid, new FormulaRecordHandler());
XLS_RECORD_HANDLER_MAP.put(HyperlinkRecord.sid, new HyperlinkRecordHandler());
XLS_RECORD_HANDLER_MAP.put(IndexRecord.sid, new IndexRecordHandler());
XLS_RECORD_HANDLER_MAP.put(LabelRecord.sid, new LabelRecordHandler());
XLS_RECORD_HANDLER_MAP.put(LabelSSTRecord.sid, new LabelSstRecordHandler());
XLS_RECORD_HANDLER_MAP.put(MergeCellsRecord.sid, new MergeCellsRecordHandler());
XLS_RECORD_HANDLER_MAP.put(NoteRecord.sid, new NoteRecordHandler());
XLS_RECORD_HANDLER_MAP.put(NumberRecord.sid, new NumberRecordHandler());
XLS_RECORD_HANDLER_MAP.put(ObjRecord.sid, new ObjRecordHandler());
XLS_RECORD_HANDLER_MAP.put(RKRecord.sid, new RkRecordHandler());
XLS_RECORD_HANDLER_MAP.put(SSTRecord.sid, new SstRecordHandler());
XLS_RECORD_HANDLER_MAP.put(StringRecord.sid, new StringRecordHandler());
XLS_RECORD_HANDLER_MAP.put(TextObjectRecord.sid, new TextObjectRecordHandler());
}
public XlsSaxAnalyser(XlsReadContext xlsReadContext) {
this.xlsReadContext = xlsReadContext;
}
@Override
public List sheetList() {
try {
if (xlsReadContext.readWorkbookHolder().getActualSheetDataList() == null) {
new XlsListSheetListener(xlsReadContext).execute();
}
} catch (ExcelAnalysisStopException e) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Custom stop!");
}
}
return xlsReadContext.readWorkbookHolder().getActualSheetDataList();
}
@Override
public void execute() {
XlsReadWorkbookHolder xlsReadWorkbookHolder = xlsReadContext.xlsReadWorkbookHolder();
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
xlsReadWorkbookHolder.setFormatTrackingHSSFListener(new FormatTrackingHSSFListener(listener));
EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener =
new EventWorkbookBuilder.SheetRecordCollectingListener(
xlsReadWorkbookHolder.getFormatTrackingHSSFListener());
xlsReadWorkbookHolder.setHssfWorkbook(workbookBuildingListener.getStubHSSFWorkbook());
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
request.addListenerForAllRecords(xlsReadWorkbookHolder.getFormatTrackingHSSFListener());
try {
factory.processWorkbookEvents(request, xlsReadWorkbookHolder.getPoifsFileSystem());
} catch (IOException e) {
throw new ExcelAnalysisException(e);
}
}
@Override
public void processRecord(Record record) {
//flush global data
this.initGlobalXlsData(xlsReadContext, record);
//check whether skip
if(this.needSkip(xlsReadContext, record)) {
return;
}
XlsRecordHandler handler = XLS_RECORD_HANDLER_MAP.get(record.getSid());
if (handler == null) {
return;
}
boolean ignoreRecord = (handler instanceof IgnorableXlsRecordHandler)
&& xlsReadContext.xlsReadSheetHolder() != null && xlsReadContext.xlsReadWorkbookHolder().getIgnoreRecord();
if (ignoreRecord) {
// No need to read the current sheet
return;
}
if (!handler.support(xlsReadContext, record)) {
return;
}
handler.processRecord(xlsReadContext, record);
}
private void initGlobalXlsData(XlsReadContext xlsReadContext2, Record record) {
if(record.getSid() == EOFRecord.sid) {
this.skipCellRowIndexList.clear();
++this.currSheetIndex;
} else if(record.getSid() == WindowOneRecord.sid) {
WindowOneRecord window = (WindowOneRecord)record;
this.activeSheetIndex = window.getActiveSheetIndex();
}
}
public boolean needSkip(XlsReadContext xlsReadContext, Record record) {
if(record.getSid() == RowRecord.sid) {
RowRecord rowRec = (RowRecord) record;
Boolean readHiddenRow = xlsReadContext.xlsReadWorkbookHolder().getReadHiddenRow();
if(readHiddenRow != null
&& !readHiddenRow
&& rowRec.getZeroHeight()) {
skipCellRowIndexList.add(rowRec.getRowNumber());
return true;
}
}else if(record instanceof CellRecord) {
CellRecord cellRec = (CellRecord)record;
Boolean justReadActiveSheet = xlsReadContext.xlsReadWorkbookHolder().getReadJustSelected();
if(justReadActiveSheet != null
&& justReadActiveSheet
&& this.activeSheetIndex != null
&& this.currSheetIndex != this.activeSheetIndex) {
return true;
}else if(skipCellRowIndexList.contains(cellRec.getRow())) {
return true;
}
}
return false;
}
}