Java tutorial
/* * Copyright (c) 2014 Red Hat, Inc. and/or its affiliates. * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Cheng Fang - Initial API and implementation */ package org.jberet.support.io; import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import javax.batch.api.BatchProperty; import javax.batch.api.chunk.ItemReader; import javax.enterprise.context.Dependent; import javax.inject.Inject; import javax.inject.Named; import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFEventFactory; import org.apache.poi.hssf.eventusermodel.HSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFRequest; import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener; import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord; import org.apache.poi.hssf.record.BOFRecord; import org.apache.poi.hssf.record.BlankRecord; import org.apache.poi.hssf.record.BoolErrRecord; import org.apache.poi.hssf.record.BoundSheetRecord; import org.apache.poi.hssf.record.EOFRecord; import org.apache.poi.hssf.record.FormulaRecord; import org.apache.poi.hssf.record.LabelRecord; import org.apache.poi.hssf.record.LabelSSTRecord; import org.apache.poi.hssf.record.NumberRecord; import org.apache.poi.hssf.record.Record; import org.apache.poi.hssf.record.SSTRecord; import org.apache.poi.hssf.record.StringRecord; import org.apache.poi.poifs.filesystem.DocumentInputStream; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.jberet.support._private.SupportLogger; /** * An implementation of {@code javax.batch.api.chunk.ItemReader} for reading binary Excel files (.xls) based on * Apache POI event model API. Compared to {@link ExcelUserModelItemReader}, this reader implementation has smaller * memory footprint and is suitable for reading large binary excel files. * * @see ExcelUserModelItemReader * @see ExcelStreamingItemReader * @see ExcelItemReaderWriterBase * @since 1.1.0 */ @Named @Dependent public class ExcelEventItemReader extends ExcelUserModelItemReader implements ItemReader { /** * Maximum worksheet row numbers for Excel 2003: 65,536 (2 ** 16) * http://office.microsoft.com/en-us/excel-help/excel-specifications-and-limits-HP005199291.aspx */ protected static final int MAX_WORKSHEET_ROWS = 65536; /** * the capacity of the queue used by {@code org.apache.poi.hssf.eventusermodel.HSSFListener} to hold pre-fetched * data rows. Optional property and defaults to {@link #MAX_WORKSHEET_ROWS} (65536). */ @Inject @BatchProperty protected int queueCapacity; private BlockingQueue<Object> queue; private DocumentInputStream documentInputStream; private FormatTrackingHSSFListener formatListener; @Override public Object readItem() throws Exception { final Object result = queue.take(); if (result instanceof Exception) { if (result instanceof ReadCompletedException) { return null; } throw (Exception) result; } return result; } @Override public Serializable checkpointInfo() throws Exception { return currentRowNum; } @Override public void close() throws Exception { super.close(); if (documentInputStream != null) { try { documentInputStream.close(); } catch (final Exception e) { SupportLogger.LOGGER.tracef(e, "Failed to close DocumentInputStream for %s%n", resource); } } } @Override protected void initWorkbookAndSheet(final int startRowNumber) throws Exception { queue = new ArrayBlockingQueue<Object>(queueCapacity == 0 ? MAX_WORKSHEET_ROWS : queueCapacity); final POIFSFileSystem poifs = new POIFSFileSystem(inputStream); // get the Workbook (excel part) stream in a InputStream documentInputStream = poifs.createDocumentInputStream("Workbook"); final HSSFRequest req = new HSSFRequest(); final MissingRecordAwareHSSFListener missingRecordAwareHSSFListener = new MissingRecordAwareHSSFListener( new HSSFListenerImpl(this)); /* * Need to use English locale her because Jackson double parsing might break in certain regions * where ',' is used as decimal separator instead of '.'. */ formatListener = new FormatTrackingHSSFListener(missingRecordAwareHSSFListener, Locale.ENGLISH); req.addListenerForAllRecords(formatListener); final HSSFEventFactory factory = new HSSFEventFactory(); if (objectMapper == null) { initJsonFactoryAndObjectMapper(); } new Thread(new Runnable() { @Override public void run() { try { factory.processEvents(req, documentInputStream); } catch (final ReadCompletedException e) { SupportLogger.LOGGER.tracef("Completed reading %s%n", resource); } } }).start(); } private static final class HSSFListenerImpl implements HSSFListener { private final ExcelEventItemReader itemReader; //to store 1 row of data Map<String, String> resultMap = new HashMap<String, String>(); private SSTRecord sstrec; private String currentSheetName; private int currentSheetIndex = -1; private final ArrayList<BoundSheetRecord> boundSheetRecords = new ArrayList<BoundSheetRecord>(); private BoundSheetRecord[] orderedBSRs; private final Map<Integer, String> headerIndexToLabelMapping = new HashMap<Integer, String>(); private boolean readingHeaderRow; private boolean readingDataRow; /** * true if we are reading a sheet and this sheet is the target sheet */ private boolean readingTargetSheet; private HSSFListenerImpl(final ExcelEventItemReader itemReader) { this.itemReader = itemReader; if (itemReader.header != null) { for (int i = 0; i < itemReader.header.length; ++i) { headerIndexToLabelMapping.put(i, itemReader.header[i]); } } } @Override public void processRecord(final Record record) { String keyForNextStringRecord = null; try { if (currentSheetName == null || itemReader.sheetName.equals(currentSheetName)) { switch (record.getSid()) { case BoundSheetRecord.sid: final BoundSheetRecord sheetRec = (BoundSheetRecord) record; boundSheetRecords.add(sheetRec); break; case BOFRecord.sid: final BOFRecord bofRecord = (BOFRecord) record; if (bofRecord.getType() != BOFRecord.TYPE_WORKBOOK) { currentSheetIndex++; } if (bofRecord.getType() == BOFRecord.TYPE_WORKSHEET) { orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords); currentSheetName = orderedBSRs[currentSheetIndex].getSheetname(); if (itemReader.sheetName != null) { readingTargetSheet = currentSheetName.equals(itemReader.sheetName); } else { if (currentSheetIndex == itemReader.sheetIndex) { itemReader.sheetName = currentSheetName; readingTargetSheet = true; } else { readingTargetSheet = false; } } } break; case SSTRecord.sid: sstrec = (SSTRecord) record; break; case BlankRecord.sid: if (readingTargetSheet) { final BlankRecord rec = (BlankRecord) record; readCellValues(rec.getRow(), (int) rec.getColumn(), null); } break; case BoolErrRecord.sid: if (readingTargetSheet) { final BoolErrRecord rec = (BoolErrRecord) record; final String val = rec.isError() ? String.valueOf(rec.getErrorValue()) : String.valueOf(rec.getBooleanValue()); readCellValues(rec.getRow(), (int) rec.getColumn(), val); } break; case FormulaRecord.sid: if (readingTargetSheet) { final FormulaRecord rec = (FormulaRecord) record; final int row = rec.getRow(); final int column = rec.getColumn(); String val; if (Double.isNaN(rec.getValue())) { // Formula result is a string This is stored in the next record keyForNextStringRecord = headerIndexToLabelMapping.get(column); } else { val = itemReader.formatListener.formatNumberDateCell(rec); readCellValues(row, column, val); } } break; case StringRecord.sid: if (readingTargetSheet) { if (keyForNextStringRecord != null) { // String for formula final StringRecord rec = (StringRecord) record; final String val = rec.getString(); resultMap.put(keyForNextStringRecord, val); keyForNextStringRecord = null; } } break; case LabelRecord.sid: if (readingTargetSheet) { final LabelRecord rec = (LabelRecord) record; readCellValues(rec.getRow(), rec.getColumn(), rec.getValue()); } break; case LabelSSTRecord.sid: if (readingTargetSheet) { final LabelSSTRecord rec = (LabelSSTRecord) record; final String val = sstrec.getString(rec.getSSTIndex()).toString(); readCellValues(rec.getRow(), rec.getColumn(), val); } break; case NumberRecord.sid: if (readingTargetSheet) { final NumberRecord rec = (NumberRecord) record; final double val = rec.getValue(); readCellValues(rec.getRow(), rec.getColumn(), String.valueOf(val)); } break; case EOFRecord.sid: if (readingTargetSheet && readingDataRow) { queueRowData(null, true); } break; default: break; } } // Handle end of row if (readingTargetSheet && record instanceof LastCellOfRowDummyRecord) { final LastCellOfRowDummyRecord lastCellOfRowDummyRecord = (LastCellOfRowDummyRecord) record; final int row = lastCellOfRowDummyRecord.getRow(); if (readingHeaderRow) { itemReader.headerMapping = new HashMap<String, String>(); for (final Map.Entry<Integer, String> e : headerIndexToLabelMapping.entrySet()) { itemReader.headerMapping.put(String.valueOf(e.getKey()), e.getValue()); } if (itemReader.header == null) { final List<String> headerList = new ArrayList<String>(); final int headerColumnCount = headerIndexToLabelMapping.size(); for (int i = 0; headerList.size() < headerColumnCount; i++) { final String val = headerIndexToLabelMapping.get(i); if (val != null) { headerList.add(val); } } itemReader.header = headerList.toArray(new String[headerColumnCount]); } readingHeaderRow = false; } else if (readingDataRow) { queueRowData(null, false); } if (row >= itemReader.end) { queueRowData(null, true); } itemReader.currentRowNum = row; } } catch (final Exception e) { if (readingTargetSheet) { queueRowData(e, false); } } } private void readCellValues(final int row, final int column, final String val) { if (itemReader.header == null && row == itemReader.headerRow) { readingHeaderRow = true; readingDataRow = false; headerIndexToLabelMapping.put(column, val); } else if (row >= itemReader.start) { readingDataRow = true; readingHeaderRow = false; resultMap.put(headerIndexToLabelMapping.get(column), val); } } /** * puts data, {@link #resultMap} for regular row data, and {@link ReadCompletedException} to indicate the end * of data stream. It also re-initialize {@link #resultMap} * * @param exception any exception occurred during event record processing * @param eof true if reached the end of data stream */ private void queueRowData(final Exception exception, final boolean eof) throws ReadCompletedException { try { if (eof) { final ReadCompletedException readCompletedException = new ReadCompletedException(); itemReader.queue.put(readCompletedException); throw readCompletedException; } else if (exception != null) { itemReader.queue.put(exception); resultMap = new HashMap<String, String>(); } else { final Object obj; if (itemReader.beanType == List.class) { final List<String> resultList = new ArrayList<String>(); for (int i = 0; i < itemReader.header.length; ++i) { resultList.add(resultMap.get(itemReader.header[i])); } obj = resultList; } else if (itemReader.beanType == Map.class) { obj = resultMap; } else { obj = itemReader.objectMapper.convertValue(resultMap, itemReader.beanType); if (!itemReader.skipBeanValidation) { ItemReaderWriterBase.validate(obj); } } itemReader.queue.put(obj); resultMap = new HashMap<String, String>(); } } catch (final InterruptedException e) { Thread.currentThread().interrupt(); } } } /** * Exception to forcefully end excel parsing and reading. {@code org.apache.poi.hssf.eventusermodel.AbortableHSSFListener} * cannot be chained with {@code MissingRecordAwareHSSFListener}, so this exception has to be thrown from * {@link HSSFListenerImpl#processRecord(org.apache.poi.hssf.record.Record)} to indicate the end. */ private static final class ReadCompletedException extends RuntimeException { private static final long serialVersionUID = -8693208957107027254L; } }