List of usage examples for org.apache.poi.poifs.filesystem POIFSFileSystem createDocumentInputStream
public DocumentInputStream createDocumentInputStream(final String documentName) throws IOException
From source file:ambit.test.io.POItest.java
License:Open Source License
/** * Read an excel file and spit out what we find. * //from w w w . ja va 2 s . c o m * @param file * Expect one argument that is the file to read. * @throws IOException * When there is an error processing the file. */ public void readXLSFile(String file) throws IOException { // create a new file input stream with the input file specified // at the command line FileInputStream fin = new FileInputStream(file); // create a new org.apache.poi.poifs.filesystem.Filesystem POIFSFileSystem poifs = new POIFSFileSystem(fin); // get the Workbook (excel part) stream in a InputStream InputStream din = poifs.createDocumentInputStream("Workbook"); // construct out HSSFRequest object HSSFRequest req = new HSSFRequest(); // lazy listen for ALL records with the listener shown above req.addListenerForAllRecords(new POIExample()); // create our event factory HSSFEventFactory factory = new HSSFEventFactory(); // process our events based on the document input stream factory.processEvents(req, din); // once all the events are processed close our file input stream fin.close(); // and our document input stream (don't want to leak these!) din.close(); System.out.println("done."); }
From source file:ambit2.core.test.io.POItest.java
License:Open Source License
/** * Read an excel file and spit out what we find. * //from w ww . j a v a2s . c o m * @param file * Expect one argument that is the file to read. * @throws IOException * When there is an error processing the file. */ public void readXLSFile(String file) throws Exception { // create a new file input stream with the input file specified // at the command line FileInputStream fin = new FileInputStream(getClass().getClassLoader().getResource(file).getFile()); // create a new org.apache.poi.poifs.filesystem.Filesystem POIFSFileSystem poifs = new POIFSFileSystem(fin); // get the Workbook (excel part) stream in a InputStream InputStream din = poifs.createDocumentInputStream("Workbook"); // construct out HSSFRequest object HSSFRequest req = new HSSFRequest(); // lazy listen for ALL records with the listener shown above req.addListenerForAllRecords(new POIListener()); // create our event factory HSSFEventFactory factory = new HSSFEventFactory(); // process our events based on the document input stream factory.processEvents(req, din); // once all the events are processed close our file input stream fin.close(); // and our document input stream (don't want to leak these!) din.close(); }
From source file:com.daphne.es.showcase.excel.service.ExcelDataService.java
License:Apache License
/** * excel 2003 biff?/*from www . j a v a 2s .c om*/ * xml? ?SAX * @param user * @param is */ @Async public void importExcel2003(final User user, final InputStream is) { ExcelDataService proxy = ((ExcelDataService) AopContext.currentProxy()); BufferedInputStream bis = null; InputStream dis = null; try { long beginTime = System.currentTimeMillis(); List<ExcelData> dataList = Lists.newArrayList(); //? bis = new BufferedInputStream(is); // org.apache.poi.poifs.filesystem.Filesystem POIFSFileSystem poifs = new POIFSFileSystem(bis); // ? Workbook(excel )? dis = poifs.createDocumentInputStream("Workbook"); // HSSFRequest HSSFRequest req = new HSSFRequest(); // ? req.addListenerForAllRecords(new Excel2003ImportListener(proxy, dataList, batchSize)); // HSSFEventFactory factory = new HSSFEventFactory(); // ??? factory.processEvents(req, dis); //??batchSize? if (dataList.size() > 0) { proxy.doBatchSave(dataList); } long endTime = System.currentTimeMillis(); Map<String, Object> context = Maps.newHashMap(); context.put("seconds", (endTime - beginTime) / 1000); notificationApi.notify(user.getId(), "excelImportSuccess", context); } catch (Exception e) { log.error("excel import error", e); Map<String, Object> context = Maps.newHashMap(); context.put("error", e.getMessage()); notificationApi.notify(user.getId(), "excelImportError", context); } finally { // ? IOUtils.closeQuietly(bis); // ? IOUtils.closeQuietly(dis); } }
From source file:com.hp.octane.integrations.uft.UftTestDiscoveryUtils.java
License:Apache License
private static String extractXmlContentFromTspFile(InputStream stream) throws IOException { POIFSFileSystem poiFS = new POIFSFileSystem(stream); DirectoryNode root = poiFS.getRoot(); String xmlData = ""; for (Entry entry : root) { String name = entry.getName(); if ("ComponentInfo".equals(name)) { if (entry instanceof DirectoryEntry) { System.out.println(entry); } else if (entry instanceof DocumentEntry) { byte[] content = new byte[((DocumentEntry) entry).getSize()]; int readBytes = poiFS.createDocumentInputStream("ComponentInfo").read(content); if (readBytes < content.length) { // [YG] probably should handle this case and continue to read logger.warn("expected to read " + content.length + " bytes, but read and stopped after " + readBytes); }/*w w w . j a v a2s .c o m*/ String fromUnicodeLE = StringUtil.getFromUnicodeLE(content); xmlData = fromUnicodeLE.substring(fromUnicodeLE.indexOf('<')).replaceAll("\u0000", ""); } } } return xmlData; }
From source file:com.hpe.application.automation.tools.octane.actions.UFTTestUtil.java
License:Open Source License
public static String decodeXmlContent(InputStream stream) throws IOException { POIFSFileSystem poiFS = new POIFSFileSystem(stream); DirectoryNode root = poiFS.getRoot(); String xmlData = ""; for (Entry entry : root) { String name = entry.getName(); if ("ComponentInfo".equals(name)) { if (entry instanceof DirectoryEntry) { System.out.println(entry); } else if (entry instanceof DocumentEntry) { byte[] content = new byte[((DocumentEntry) entry).getSize()]; poiFS.createDocumentInputStream("ComponentInfo").read(content); String fromUnicodeLE = StringUtil.getFromUnicodeLE(content); xmlData = fromUnicodeLE.substring(fromUnicodeLE.indexOf('<')).replaceAll("\u0000", ""); }//from w w w . ja v a 2 s . c o m } } return xmlData; }
From source file:com.krawler.esp.fileparser.word.ExtractWordFile.java
License:Open Source License
public String extractText(String filepath) throws FastSavedException, IOException { InputStream iStream = new BufferedInputStream(new FileInputStream(filepath)); POIFSFileSystem fsys = new POIFSFileSystem(iStream); // load our POIFS document streams. DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument"); DocumentInputStream din = fsys.createDocumentInputStream("WordDocument"); byte[] header = new byte[headerProps.getSize()]; din.read(header);/*from w w w .j a v a2s . c o m*/ din.close(); int info = LittleEndian.getShort(header, 0xa); if ((info & 0x4) != 0) { throw new FastSavedException("Fast-saved files are unsupported at this time"); } if ((info & 0x100) != 0) { System.out.println("This document is password protected"); } // determine the version of Word this document came from. int nFib = LittleEndian.getShort(header, 0x2); switch (nFib) { case 101: case 102: case 103: case 104: // this is a Word 6.0 doc send it to the extractor for that version. Word6Extractor oldExtractor = new Word6Extractor(); return oldExtractor.extractText(header); } // Get the information we need from the header boolean useTable1 = (info & 0x200) != 0; // get the location of the piece table int complexOffset = LittleEndian.getInt(header, 0x1a2); // determine which table stream we must use. String tableName = null; if (useTable1) { tableName = "1Table"; } else { tableName = "0Table"; } DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName); byte[] tableStream = new byte[table.getSize()]; din = fsys.createDocumentInputStream(tableName); din.read(tableStream); din.close(); int chpOffset = LittleEndian.getInt(header, 0xfa); int chpSize = LittleEndian.getInt(header, 0xfe); int fcMin = LittleEndian.getInt(header, 0x18); // load our text pieces and our character runs ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin); TextPieceTable tpt = cft.getTextPieceTable(); List textPieces = tpt.getTextPieces(); CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin, tpt); // make the POIFS objects available for garbage collection din = null; fsys = null; table = null; headerProps = null; List textRuns = cbt.getTextRuns(); Iterator runIt = textRuns.iterator(); Iterator textIt = textPieces.iterator(); TextPiece currentPiece = (TextPiece) textIt.next(); int currentTextStart = currentPiece.getStart(); int currentTextEnd = currentPiece.getEnd(); WordTextBuffer finalTextBuf = new WordTextBuffer(); // iterate through all text runs extract the text only if they haven't // been // deleted while (runIt.hasNext()) { CHPX chpx = (CHPX) runIt.next(); boolean deleted = isDeleted(chpx.getGrpprl()); if (deleted) { continue; } int runStart = chpx.getStart(); int runEnd = chpx.getEnd(); while (runStart >= currentTextEnd) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); currentTextEnd = currentPiece.getEnd(); } if (runEnd < currentTextEnd) { String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); finalTextBuf.append(str); } else if (runEnd > currentTextEnd) { while (runEnd > currentTextEnd) { String str = currentPiece.substring(runStart - currentTextStart, currentTextEnd - currentTextStart); finalTextBuf.append(str); if (textIt.hasNext()) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); runStart = currentTextStart; currentTextEnd = currentPiece.getEnd(); } else { return finalTextBuf.toString(); } } String str = currentPiece.substring(0, runEnd - currentTextStart); finalTextBuf.append(str); } else { String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); if (textIt.hasNext()) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); currentTextEnd = currentPiece.getEnd(); } finalTextBuf.append(str); } } return finalTextBuf.toString(); }
From source file:com.krawler.esp.fileparser.wordparser.ExtractWordFile.java
License:Open Source License
public String extractText(String filepath) throws FastSavedException, IOException { InputStream iStream = new BufferedInputStream(new FileInputStream(filepath)); ArrayList text = new ArrayList(); POIFSFileSystem fsys = new POIFSFileSystem(iStream); // load our POIFS document streams. DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument"); DocumentInputStream din = fsys.createDocumentInputStream("WordDocument"); byte[] header = new byte[headerProps.getSize()]; din.read(header);//ww w . jav a 2 s. c o m din.close(); int info = LittleEndian.getShort(header, 0xa); if ((info & 0x4) != 0) { throw new FastSavedException("Fast-saved files are unsupported at this time"); } if ((info & 0x100) != 0) { System.out.println("This document is password protected"); } // determine the version of Word this document came from. int nFib = LittleEndian.getShort(header, 0x2); // Get the information we need from the header boolean useTable1 = (info & 0x200) != 0; // get the location of the piece table int complexOffset = LittleEndian.getInt(header, 0x1a2); // determine which table stream we must use. String tableName = null; if (useTable1) { tableName = "1Table"; } else { tableName = "0Table"; } DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName); byte[] tableStream = new byte[table.getSize()]; din = fsys.createDocumentInputStream(tableName); din.read(tableStream); din.close(); int chpOffset = LittleEndian.getInt(header, 0xfa); int chpSize = LittleEndian.getInt(header, 0xfe); int fcMin = LittleEndian.getInt(header, 0x18); ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin); TextPieceTable tpt = cft.getTextPieceTable(); switch (nFib) { case 101: case 102: case 103: case 104: // this is a Word 6.0 doc send it to the extractor for that version. Word6Extractor oldExtractor = new Word6Extractor(); return oldExtractor.extractText(header, tpt); } CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin, tpt); // load our text pieces and our character runs List textPieces = tpt.getTextPieces(); // make the POIFS objects available for garbage collection din = null; fsys = null; table = null; headerProps = null; List textRuns = cbt.getTextRuns(); Iterator runIt = textRuns.iterator(); Iterator textIt = textPieces.iterator(); TextPiece currentPiece = (TextPiece) textIt.next(); int currentTextStart = currentPiece.getStart(); int currentTextEnd = currentPiece.getEnd(); WordTextBuffer finalTextBuf = new WordTextBuffer(); // iterate through all text runs extract the text only if they haven't // been // deleted while (runIt.hasNext()) { CHPX chpx = (CHPX) runIt.next(); boolean deleted = isDeleted(chpx.getGrpprl()); if (deleted) { continue; } int runStart = chpx.getStart(); int runEnd = chpx.getEnd(); while (runStart >= currentTextEnd) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); currentTextEnd = currentPiece.getEnd(); } if (runEnd < currentTextEnd) { String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); finalTextBuf.append(str); } else if (runEnd > currentTextEnd) { while (runEnd > currentTextEnd) { String str = currentPiece.substring(runStart - currentTextStart, currentTextEnd - currentTextStart); finalTextBuf.append(str); if (textIt.hasNext()) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); runStart = currentTextStart; currentTextEnd = currentPiece.getEnd(); } else { return finalTextBuf.toString(); } } String str = currentPiece.substring(0, runEnd - currentTextStart); finalTextBuf.append(str); } else { String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); if (textIt.hasNext()) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); currentTextEnd = currentPiece.getEnd(); } finalTextBuf.append(str); } } return finalTextBuf.toString(); }
From source file:com.progdan.doc2txt.WordExtractor.java
License:Apache License
/** * Gets the text from a Word document./*from w w w.j a v a 2 s. c o m*/ * * @param in The InputStream representing the Word file. */ public String extractText(InputStream in) throws Exception { ArrayList text = new ArrayList(); POIFSFileSystem fsys = new POIFSFileSystem(in); // load our POIFS document streams. DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument"); DocumentInputStream din = fsys.createDocumentInputStream("WordDocument"); byte[] header = new byte[headerProps.getSize()]; din.read(header); din.close(); int info = LittleEndian.getShort(header, 0xa); if ((info & 0x4) != 0) { throw new FastSavedException("Fast-saved files are unsupported at this time"); } if ((info & 0x100) != 0) { throw new PasswordProtectedException("This document is password protected"); } // determine the version of Word this document came from. int nFib = LittleEndian.getShort(header, 0x2); switch (nFib) { case 101: case 102: case 103: case 104: // this is a Word 6.0 doc send it to the extractor for that version. Word6Extractor oldExtractor = new Word6Extractor(); return oldExtractor.extractText(header); } //Get the information we need from the header boolean useTable1 = (info & 0x200) != 0; //get the location of the piece table int complexOffset = LittleEndian.getInt(header, 0x1a2); // determine which table stream we must use. String tableName = null; if (useTable1) { tableName = "1Table"; } else { tableName = "0Table"; } DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName); byte[] tableStream = new byte[table.getSize()]; din = fsys.createDocumentInputStream(tableName); din.read(tableStream); din.close(); int chpOffset = LittleEndian.getInt(header, 0xfa); int chpSize = LittleEndian.getInt(header, 0xfe); int fcMin = LittleEndian.getInt(header, 0x18); CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin); // load our text pieces and our character runs ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin); TextPieceTable tpt = cft.getTextPieceTable(); List textPieces = tpt.getTextPieces(); // make the POIFS objects available for garbage collection din = null; fsys = null; table = null; headerProps = null; List textRuns = cbt.getTextRuns(); Iterator runIt = textRuns.iterator(); Iterator textIt = textPieces.iterator(); TextPiece currentPiece = (TextPiece) textIt.next(); int currentTextStart = currentPiece.getStart(); int currentTextEnd = currentPiece.getEnd(); WordTextBuffer finalTextBuf = new WordTextBuffer(); // iterate through all text runs extract the text only if they haven't been // deleted while (runIt.hasNext()) { CHPX chpx = (CHPX) runIt.next(); boolean deleted = isDeleted(chpx.getGrpprl()); if (deleted) { continue; } int runStart = chpx.getStart(); int runEnd = chpx.getEnd(); while (runStart >= currentTextEnd) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); currentTextEnd = currentPiece.getEnd(); } if (runEnd < currentTextEnd) { String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); finalTextBuf.append(str); } else if (runEnd > currentTextEnd) { while (runEnd > currentTextEnd) { String str = currentPiece.substring(runStart - currentTextStart, currentTextEnd - currentTextStart); finalTextBuf.append(str); if (textIt.hasNext()) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); runStart = currentTextStart; currentTextEnd = currentPiece.getEnd(); } else { return finalTextBuf.toString(); } } String str = currentPiece.substring(0, runEnd - currentTextStart); finalTextBuf.append(str); } else { String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); if (textIt.hasNext()) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); currentTextEnd = currentPiece.getEnd(); } finalTextBuf.append(str); } } return finalTextBuf.toString(); }
From source file:com.sonicle.webtop.contacts.io.input.ContactExcelFileReader.java
License:Open Source License
private void readXlsContacts(File file, BeanHandler beanHandler) throws IOException, FileReaderException { POIFSFileSystem pfs = null; InputStream is = null;/*from w w w . j a v a 2s .c om*/ HashMap<String, Integer> columnIndexes = listXlsColumnIndexes(file); try { pfs = new POIFSFileSystem(file); is = pfs.createDocumentInputStream("Workbook"); XlsRowHandler rowHandler = new XlsRowHandler(this, columnIndexes, beanHandler); XlsRowsProcessor rows = new XlsRowsProcessor(is, headersRow, firstDataRow, lastDataRow, sheet, rowHandler); rows.process(); } finally { IOUtils.closeQuietly(is); IOUtils.closeQuietly(pfs); } }
From source file:com.sonicle.webtop.core.io.input.ExcelFileReader.java
License:Open Source License
public List<String> listXlsSheets(File file) throws IOException, FileReaderException { POIFSFileSystem pfs = null; InputStream is = null;/* w w w.j a v a 2 s.c o m*/ try { pfs = new POIFSFileSystem(file); is = pfs.createDocumentInputStream("Workbook"); XlsSheetsProcessor processor = new XlsSheetsProcessor(is); processor.process(); return processor.sheetNames; } finally { IOUtils.closeQuietly(is); IOUtils.closeQuietly(pfs); } }