XlsSaxAnalyser.java 9.51 KB
package com.taover.easyexcel.analysis.v03;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.CellRecord;
import org.apache.poi.hssf.record.EOFRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.HyperlinkRecord;
import org.apache.poi.hssf.record.IndexRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.MergeCellsRecord;
import org.apache.poi.hssf.record.NoteRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.ObjRecord;
import org.apache.poi.hssf.record.RKRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.RowRecord;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.record.TextObjectRecord;
import org.apache.poi.hssf.record.WindowOneRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.taover.easyexcel.analysis.ExcelReadExecutor;
import com.taover.easyexcel.analysis.v03.handlers.BlankRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.BofRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.BoolErrRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.BoundSheetRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.DummyRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.EofRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.FormulaRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.HyperlinkRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.IndexRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.LabelRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.LabelSstRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.MergeCellsRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.NoteRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.NumberRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.ObjRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.RkRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.SstRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.StringRecordHandler;
import com.taover.easyexcel.analysis.v03.handlers.TextObjectRecordHandler;
import com.taover.easyexcel.context.xls.XlsReadContext;
import com.taover.easyexcel.exception.ExcelAnalysisException;
import com.taover.easyexcel.exception.ExcelAnalysisStopException;
import com.taover.easyexcel.read.metadata.ReadSheet;
import com.taover.easyexcel.read.metadata.holder.xls.XlsReadWorkbookHolder;

/**
 * /** * A text extractor for Excel files. *
 * <p>
 * * Returns the textual content of the file, suitable for * indexing by something like Lucene, but not really *
 * intended for display to the user. *
 * </p>
 * *
 * <p>
 * * To turn an excel file into a CSV or similar, then see * the XLS2CSVmra example *
 * </p>
 * * * @see <a href=
 * "http://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/hssf/eventusermodel/examples/XLS2CSVmra.java">XLS2CSVmra</a>
 *
 * @author jipengfei
 */
public class XlsSaxAnalyser implements HSSFListener, ExcelReadExecutor {
    private static final Logger LOGGER = LoggerFactory.getLogger(XlsSaxAnalyser.class);
    private static final short DUMMY_RECORD_SID = -1;
    private XlsReadContext xlsReadContext;
    private static final Map<Short, XlsRecordHandler> XLS_RECORD_HANDLER_MAP = new HashMap<Short, XlsRecordHandler>(32);
    List<Integer> skipCellRowIndexList = new ArrayList<Integer>();
    private Integer activeSheetIndex = null;
    private int currSheetIndex = -1;
    
    static {
        XLS_RECORD_HANDLER_MAP.put(BlankRecord.sid, new BlankRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(BOFRecord.sid, new BofRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(BoolErrRecord.sid, new BoolErrRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(BoundSheetRecord.sid, new BoundSheetRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(DUMMY_RECORD_SID, new DummyRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(EOFRecord.sid, new EofRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(FormulaRecord.sid, new FormulaRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(HyperlinkRecord.sid, new HyperlinkRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(IndexRecord.sid, new IndexRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(LabelRecord.sid, new LabelRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(LabelSSTRecord.sid, new LabelSstRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(MergeCellsRecord.sid, new MergeCellsRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(NoteRecord.sid, new NoteRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(NumberRecord.sid, new NumberRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(ObjRecord.sid, new ObjRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(RKRecord.sid, new RkRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(SSTRecord.sid, new SstRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(StringRecord.sid, new StringRecordHandler());
        XLS_RECORD_HANDLER_MAP.put(TextObjectRecord.sid, new TextObjectRecordHandler());
    }

    public XlsSaxAnalyser(XlsReadContext xlsReadContext) {
        this.xlsReadContext = xlsReadContext;
    }

    @Override
    public List<ReadSheet> sheetList() {
        try {
            if (xlsReadContext.readWorkbookHolder().getActualSheetDataList() == null) {
                new XlsListSheetListener(xlsReadContext).execute();
            }
        } catch (ExcelAnalysisStopException e) {
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug("Custom stop!");
            }
        }
        return xlsReadContext.readWorkbookHolder().getActualSheetDataList();
    }

    @Override
    public void execute() {
        XlsReadWorkbookHolder xlsReadWorkbookHolder = xlsReadContext.xlsReadWorkbookHolder();
        MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
        xlsReadWorkbookHolder.setFormatTrackingHSSFListener(new FormatTrackingHSSFListener(listener));
        EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener =
            new EventWorkbookBuilder.SheetRecordCollectingListener(
                xlsReadWorkbookHolder.getFormatTrackingHSSFListener());
        xlsReadWorkbookHolder.setHssfWorkbook(workbookBuildingListener.getStubHSSFWorkbook());
        HSSFEventFactory factory = new HSSFEventFactory();
        HSSFRequest request = new HSSFRequest();
        request.addListenerForAllRecords(xlsReadWorkbookHolder.getFormatTrackingHSSFListener());
        try {
            factory.processWorkbookEvents(request, xlsReadWorkbookHolder.getPoifsFileSystem());
        } catch (IOException e) {
            throw new ExcelAnalysisException(e);
        }
    }

    @Override
    public void processRecord(Record record) {
		//flush global data
    	this.initGlobalXlsData(xlsReadContext, record);
    	
    	//check whether skip
    	if(this.needSkip(xlsReadContext, record)) {
        	return;
        }
    	
    	XlsRecordHandler handler = XLS_RECORD_HANDLER_MAP.get(record.getSid());
        if (handler == null) {
            return;
        }
        boolean ignoreRecord = (handler instanceof IgnorableXlsRecordHandler)
            && xlsReadContext.xlsReadSheetHolder() != null && xlsReadContext.xlsReadWorkbookHolder().getIgnoreRecord();
        if (ignoreRecord) {
            // No need to read the current sheet
            return;
        }        
        if (!handler.support(xlsReadContext, record)) {
            return;
        }
        handler.processRecord(xlsReadContext, record);
    }

	private void initGlobalXlsData(XlsReadContext xlsReadContext2, Record record) {
		if(record.getSid() ==  EOFRecord.sid) {
			this.skipCellRowIndexList.clear();
			++this.currSheetIndex;
		} else if(record.getSid() == WindowOneRecord.sid) {
			WindowOneRecord window = (WindowOneRecord)record;
			this.activeSheetIndex = window.getActiveSheetIndex();
		}
	}

	public boolean needSkip(XlsReadContext xlsReadContext, Record record) {
		if(record.getSid() == RowRecord.sid) {
			RowRecord rowRec = (RowRecord) record;
			Boolean readHiddenRow = xlsReadContext.xlsReadWorkbookHolder().getReadHiddenRow();
	    	if(readHiddenRow != null
	    			&& !readHiddenRow 
	    			&& rowRec.getZeroHeight()) {
	    		skipCellRowIndexList.add(rowRec.getRowNumber());
	    		return true;
	    	}
		}else if(record instanceof CellRecord) {
			CellRecord cellRec = (CellRecord)record;
			Boolean justReadActiveSheet = xlsReadContext.xlsReadWorkbookHolder().getReadJustSelected();
			if(justReadActiveSheet != null 
					&& justReadActiveSheet 
					&& this.activeSheetIndex != null 
					&& this.currSheetIndex != this.activeSheetIndex) {
				return true;
			}else if(skipCellRowIndexList.contains(cellRec.getRow())) {
				return true;	
			}			
		}
		
		return false;
	}
}