Example usage for org.apache.poi.poifs.filesystem POIFSFileSystem createDocumentInputStream

Introduction

In this page you can find the example usage for org.apache.poi.poifs.filesystem POIFSFileSystem createDocumentInputStream.

Prototype

public DocumentInputStream createDocumentInputStream(final String documentName) throws IOException

Source Link

Document

open a document in the root entry's list of entries

Usage

From source file:ambit.test.io.POItest.java

License:Open Source License

/**
 * Read an excel file and spit out what we find.
 * //from w w w  . ja va  2  s .  c  o  m
 * @param file
 *            Expect one argument that is the file to read.
 * @throws IOException
 *             When there is an error processing the file.
 */
public void readXLSFile(String file) throws IOException {
    // create a new file input stream with the input file specified
    // at the command line
    FileInputStream fin = new FileInputStream(file);
    // create a new org.apache.poi.poifs.filesystem.Filesystem
    POIFSFileSystem poifs = new POIFSFileSystem(fin);
    // get the Workbook (excel part) stream in a InputStream
    InputStream din = poifs.createDocumentInputStream("Workbook");
    // construct out HSSFRequest object
    HSSFRequest req = new HSSFRequest();
    // lazy listen for ALL records with the listener shown above
    req.addListenerForAllRecords(new POIExample());
    // create our event factory
    HSSFEventFactory factory = new HSSFEventFactory();
    // process our events based on the document input stream
    factory.processEvents(req, din);
    // once all the events are processed close our file input stream
    fin.close();
    // and our document input stream (don't want to leak these!)
    din.close();
    System.out.println("done.");
}

From source file:ambit2.core.test.io.POItest.java

License:Open Source License

/**
 * Read an excel file and spit out what we find.
 * //from w  ww  . j  a v  a2s  .  c o  m
 * @param file
 *            Expect one argument that is the file to read.
 * @throws IOException
 *             When there is an error processing the file.
 */
public void readXLSFile(String file) throws Exception {

    // create a new file input stream with the input file specified
    // at the command line
    FileInputStream fin = new FileInputStream(getClass().getClassLoader().getResource(file).getFile());
    // create a new org.apache.poi.poifs.filesystem.Filesystem
    POIFSFileSystem poifs = new POIFSFileSystem(fin);
    // get the Workbook (excel part) stream in a InputStream
    InputStream din = poifs.createDocumentInputStream("Workbook");
    // construct out HSSFRequest object
    HSSFRequest req = new HSSFRequest();
    // lazy listen for ALL records with the listener shown above
    req.addListenerForAllRecords(new POIListener());
    // create our event factory
    HSSFEventFactory factory = new HSSFEventFactory();
    // process our events based on the document input stream
    factory.processEvents(req, din);
    // once all the events are processed close our file input stream
    fin.close();
    // and our document input stream (don't want to leak these!)
    din.close();
}

From source file:com.daphne.es.showcase.excel.service.ExcelDataService.java

License:Apache License

/**
 *  excel 2003 biff?/*from  www  . j a v  a 2s .c om*/
 * xml? ?SAX
 * @param user
 * @param is
 */
@Async
public void importExcel2003(final User user, final InputStream is) {

    ExcelDataService proxy = ((ExcelDataService) AopContext.currentProxy());

    BufferedInputStream bis = null;
    InputStream dis = null;
    try {
        long beginTime = System.currentTimeMillis();

        List<ExcelData> dataList = Lists.newArrayList();

        //?
        bis = new BufferedInputStream(is);
        //  org.apache.poi.poifs.filesystem.Filesystem
        POIFSFileSystem poifs = new POIFSFileSystem(bis);
        // ?  Workbook(excel )?
        dis = poifs.createDocumentInputStream("Workbook");
        //  HSSFRequest
        HSSFRequest req = new HSSFRequest();

        // ?
        req.addListenerForAllRecords(new Excel2003ImportListener(proxy, dataList, batchSize));
        //  
        HSSFEventFactory factory = new HSSFEventFactory();
        // ???
        factory.processEvents(req, dis);

        //??batchSize?
        if (dataList.size() > 0) {
            proxy.doBatchSave(dataList);
        }

        long endTime = System.currentTimeMillis();

        Map<String, Object> context = Maps.newHashMap();
        context.put("seconds", (endTime - beginTime) / 1000);
        notificationApi.notify(user.getId(), "excelImportSuccess", context);
    } catch (Exception e) {
        log.error("excel import error", e);
        Map<String, Object> context = Maps.newHashMap();
        context.put("error", e.getMessage());
        notificationApi.notify(user.getId(), "excelImportError", context);
    } finally {
        // ?
        IOUtils.closeQuietly(bis);
        // ?
        IOUtils.closeQuietly(dis);
    }
}

From source file:com.hp.octane.integrations.uft.UftTestDiscoveryUtils.java

License:Apache License

private static String extractXmlContentFromTspFile(InputStream stream) throws IOException {
    POIFSFileSystem poiFS = new POIFSFileSystem(stream);
    DirectoryNode root = poiFS.getRoot();
    String xmlData = "";

    for (Entry entry : root) {
        String name = entry.getName();
        if ("ComponentInfo".equals(name)) {
            if (entry instanceof DirectoryEntry) {
                System.out.println(entry);
            } else if (entry instanceof DocumentEntry) {
                byte[] content = new byte[((DocumentEntry) entry).getSize()];
                int readBytes = poiFS.createDocumentInputStream("ComponentInfo").read(content);
                if (readBytes < content.length) {
                    //  [YG] probably should handle this case and continue to read
                    logger.warn("expected to read " + content.length + " bytes, but read and stopped after "
                            + readBytes);
                }/*w  w  w  . j a  v  a2s  .c o  m*/
                String fromUnicodeLE = StringUtil.getFromUnicodeLE(content);
                xmlData = fromUnicodeLE.substring(fromUnicodeLE.indexOf('<')).replaceAll("\u0000", "");
            }
        }
    }
    return xmlData;
}

From source file:com.hpe.application.automation.tools.octane.actions.UFTTestUtil.java

License:Open Source License

public static String decodeXmlContent(InputStream stream) throws IOException {
    POIFSFileSystem poiFS = new POIFSFileSystem(stream);
    DirectoryNode root = poiFS.getRoot();
    String xmlData = "";

    for (Entry entry : root) {
        String name = entry.getName();
        if ("ComponentInfo".equals(name)) {
            if (entry instanceof DirectoryEntry) {
                System.out.println(entry);
            } else if (entry instanceof DocumentEntry) {
                byte[] content = new byte[((DocumentEntry) entry).getSize()];
                poiFS.createDocumentInputStream("ComponentInfo").read(content);
                String fromUnicodeLE = StringUtil.getFromUnicodeLE(content);
                xmlData = fromUnicodeLE.substring(fromUnicodeLE.indexOf('<')).replaceAll("\u0000", "");
            }//from w  w w  . ja  v  a  2  s . c  o  m
        }
    }
    return xmlData;
}

From source file:com.krawler.esp.fileparser.word.ExtractWordFile.java

License:Open Source License

public String extractText(String filepath) throws FastSavedException, IOException {
    InputStream iStream = new BufferedInputStream(new FileInputStream(filepath));
    POIFSFileSystem fsys = new POIFSFileSystem(iStream);

    // load our POIFS document streams.
    DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument");
    DocumentInputStream din = fsys.createDocumentInputStream("WordDocument");
    byte[] header = new byte[headerProps.getSize()];

    din.read(header);/*from   w w w .j a  v a2s .  c  o  m*/
    din.close();

    int info = LittleEndian.getShort(header, 0xa);
    if ((info & 0x4) != 0) {
        throw new FastSavedException("Fast-saved files are unsupported at this time");
    }
    if ((info & 0x100) != 0) {
        System.out.println("This document is password protected");
    }

    // determine the version of Word this document came from.
    int nFib = LittleEndian.getShort(header, 0x2);
    switch (nFib) {
    case 101:
    case 102:
    case 103:
    case 104:
        // this is a Word 6.0 doc send it to the extractor for that version.
        Word6Extractor oldExtractor = new Word6Extractor();
        return oldExtractor.extractText(header);
    }

    // Get the information we need from the header
    boolean useTable1 = (info & 0x200) != 0;

    // get the location of the piece table
    int complexOffset = LittleEndian.getInt(header, 0x1a2);

    // determine which table stream we must use.
    String tableName = null;
    if (useTable1) {
        tableName = "1Table";
    } else {
        tableName = "0Table";
    }

    DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName);
    byte[] tableStream = new byte[table.getSize()];

    din = fsys.createDocumentInputStream(tableName);

    din.read(tableStream);
    din.close();

    int chpOffset = LittleEndian.getInt(header, 0xfa);
    int chpSize = LittleEndian.getInt(header, 0xfe);
    int fcMin = LittleEndian.getInt(header, 0x18);

    // load our text pieces and our character runs
    ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin);
    TextPieceTable tpt = cft.getTextPieceTable();
    List textPieces = tpt.getTextPieces();

    CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin, tpt);

    // make the POIFS objects available for garbage collection
    din = null;
    fsys = null;
    table = null;
    headerProps = null;

    List textRuns = cbt.getTextRuns();
    Iterator runIt = textRuns.iterator();
    Iterator textIt = textPieces.iterator();

    TextPiece currentPiece = (TextPiece) textIt.next();
    int currentTextStart = currentPiece.getStart();
    int currentTextEnd = currentPiece.getEnd();

    WordTextBuffer finalTextBuf = new WordTextBuffer();

    // iterate through all text runs extract the text only if they haven't
    // been
    // deleted
    while (runIt.hasNext()) {
        CHPX chpx = (CHPX) runIt.next();
        boolean deleted = isDeleted(chpx.getGrpprl());
        if (deleted) {
            continue;
        }

        int runStart = chpx.getStart();
        int runEnd = chpx.getEnd();

        while (runStart >= currentTextEnd) {
            currentPiece = (TextPiece) textIt.next();
            currentTextStart = currentPiece.getStart();
            currentTextEnd = currentPiece.getEnd();
        }

        if (runEnd < currentTextEnd) {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else if (runEnd > currentTextEnd) {
            while (runEnd > currentTextEnd) {
                String str = currentPiece.substring(runStart - currentTextStart,
                        currentTextEnd - currentTextStart);
                finalTextBuf.append(str);
                if (textIt.hasNext()) {
                    currentPiece = (TextPiece) textIt.next();
                    currentTextStart = currentPiece.getStart();
                    runStart = currentTextStart;
                    currentTextEnd = currentPiece.getEnd();
                } else {
                    return finalTextBuf.toString();
                }
            }
            String str = currentPiece.substring(0, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            if (textIt.hasNext()) {
                currentPiece = (TextPiece) textIt.next();
                currentTextStart = currentPiece.getStart();
                currentTextEnd = currentPiece.getEnd();
            }
            finalTextBuf.append(str);
        }
    }
    return finalTextBuf.toString();
}

From source file:com.krawler.esp.fileparser.wordparser.ExtractWordFile.java

License:Open Source License

public String extractText(String filepath) throws FastSavedException, IOException {
    InputStream iStream = new BufferedInputStream(new FileInputStream(filepath));

    ArrayList text = new ArrayList();
    POIFSFileSystem fsys = new POIFSFileSystem(iStream);

    // load our POIFS document streams.
    DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument");
    DocumentInputStream din = fsys.createDocumentInputStream("WordDocument");
    byte[] header = new byte[headerProps.getSize()];

    din.read(header);//ww w . jav a 2 s. c o  m
    din.close();

    int info = LittleEndian.getShort(header, 0xa);
    if ((info & 0x4) != 0) {
        throw new FastSavedException("Fast-saved files are unsupported at this time");
    }
    if ((info & 0x100) != 0) {
        System.out.println("This document is password protected");
    }

    // determine the version of Word this document came from.
    int nFib = LittleEndian.getShort(header, 0x2);
    // Get the information we need from the header
    boolean useTable1 = (info & 0x200) != 0;

    // get the location of the piece table
    int complexOffset = LittleEndian.getInt(header, 0x1a2);

    // determine which table stream we must use.
    String tableName = null;
    if (useTable1) {
        tableName = "1Table";
    } else {
        tableName = "0Table";
    }

    DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName);
    byte[] tableStream = new byte[table.getSize()];

    din = fsys.createDocumentInputStream(tableName);

    din.read(tableStream);
    din.close();

    int chpOffset = LittleEndian.getInt(header, 0xfa);
    int chpSize = LittleEndian.getInt(header, 0xfe);
    int fcMin = LittleEndian.getInt(header, 0x18);

    ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin);
    TextPieceTable tpt = cft.getTextPieceTable();
    switch (nFib) {
    case 101:
    case 102:
    case 103:
    case 104:
        // this is a Word 6.0 doc send it to the extractor for that version.
        Word6Extractor oldExtractor = new Word6Extractor();
        return oldExtractor.extractText(header, tpt);
    }
    CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin, tpt);
    // load our text pieces and our character runs

    List textPieces = tpt.getTextPieces();

    // make the POIFS objects available for garbage collection
    din = null;
    fsys = null;
    table = null;
    headerProps = null;

    List textRuns = cbt.getTextRuns();
    Iterator runIt = textRuns.iterator();
    Iterator textIt = textPieces.iterator();

    TextPiece currentPiece = (TextPiece) textIt.next();
    int currentTextStart = currentPiece.getStart();
    int currentTextEnd = currentPiece.getEnd();

    WordTextBuffer finalTextBuf = new WordTextBuffer();

    // iterate through all text runs extract the text only if they haven't
    // been
    // deleted
    while (runIt.hasNext()) {
        CHPX chpx = (CHPX) runIt.next();
        boolean deleted = isDeleted(chpx.getGrpprl());
        if (deleted) {
            continue;
        }

        int runStart = chpx.getStart();
        int runEnd = chpx.getEnd();

        while (runStart >= currentTextEnd) {
            currentPiece = (TextPiece) textIt.next();
            currentTextStart = currentPiece.getStart();
            currentTextEnd = currentPiece.getEnd();
        }

        if (runEnd < currentTextEnd) {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else if (runEnd > currentTextEnd) {
            while (runEnd > currentTextEnd) {
                String str = currentPiece.substring(runStart - currentTextStart,
                        currentTextEnd - currentTextStart);
                finalTextBuf.append(str);
                if (textIt.hasNext()) {
                    currentPiece = (TextPiece) textIt.next();
                    currentTextStart = currentPiece.getStart();
                    runStart = currentTextStart;
                    currentTextEnd = currentPiece.getEnd();
                } else {
                    return finalTextBuf.toString();
                }
            }
            String str = currentPiece.substring(0, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            if (textIt.hasNext()) {
                currentPiece = (TextPiece) textIt.next();
                currentTextStart = currentPiece.getStart();
                currentTextEnd = currentPiece.getEnd();
            }
            finalTextBuf.append(str);
        }
    }
    return finalTextBuf.toString();
}

From source file:com.progdan.doc2txt.WordExtractor.java

License:Apache License

/**
 * Gets the text from a Word document./*from w w  w.j a v a  2  s. c  o m*/
 *
 * @param in The InputStream representing the Word file.
 */
public String extractText(InputStream in) throws Exception {
    ArrayList text = new ArrayList();
    POIFSFileSystem fsys = new POIFSFileSystem(in);

    // load our POIFS document streams.
    DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument");
    DocumentInputStream din = fsys.createDocumentInputStream("WordDocument");
    byte[] header = new byte[headerProps.getSize()];

    din.read(header);
    din.close();

    int info = LittleEndian.getShort(header, 0xa);
    if ((info & 0x4) != 0) {
        throw new FastSavedException("Fast-saved files are unsupported at this time");
    }
    if ((info & 0x100) != 0) {
        throw new PasswordProtectedException("This document is password protected");
    }

    // determine the version of Word this document came from.
    int nFib = LittleEndian.getShort(header, 0x2);
    switch (nFib) {
    case 101:
    case 102:
    case 103:
    case 104:
        // this is a Word 6.0 doc send it to the extractor for that version.
        Word6Extractor oldExtractor = new Word6Extractor();
        return oldExtractor.extractText(header);
    }

    //Get the information we need from the header
    boolean useTable1 = (info & 0x200) != 0;

    //get the location of the piece table
    int complexOffset = LittleEndian.getInt(header, 0x1a2);

    // determine which table stream we must use.
    String tableName = null;
    if (useTable1) {
        tableName = "1Table";
    } else {
        tableName = "0Table";
    }

    DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName);
    byte[] tableStream = new byte[table.getSize()];

    din = fsys.createDocumentInputStream(tableName);

    din.read(tableStream);
    din.close();

    int chpOffset = LittleEndian.getInt(header, 0xfa);
    int chpSize = LittleEndian.getInt(header, 0xfe);
    int fcMin = LittleEndian.getInt(header, 0x18);
    CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin);

    // load our text pieces and our character runs
    ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin);
    TextPieceTable tpt = cft.getTextPieceTable();
    List textPieces = tpt.getTextPieces();

    // make the POIFS objects available for garbage collection
    din = null;
    fsys = null;
    table = null;
    headerProps = null;

    List textRuns = cbt.getTextRuns();
    Iterator runIt = textRuns.iterator();
    Iterator textIt = textPieces.iterator();

    TextPiece currentPiece = (TextPiece) textIt.next();
    int currentTextStart = currentPiece.getStart();
    int currentTextEnd = currentPiece.getEnd();

    WordTextBuffer finalTextBuf = new WordTextBuffer();

    // iterate through all text runs extract the text only if they haven't been
    // deleted
    while (runIt.hasNext()) {
        CHPX chpx = (CHPX) runIt.next();
        boolean deleted = isDeleted(chpx.getGrpprl());
        if (deleted) {
            continue;
        }

        int runStart = chpx.getStart();
        int runEnd = chpx.getEnd();

        while (runStart >= currentTextEnd) {
            currentPiece = (TextPiece) textIt.next();
            currentTextStart = currentPiece.getStart();
            currentTextEnd = currentPiece.getEnd();
        }

        if (runEnd < currentTextEnd) {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else if (runEnd > currentTextEnd) {
            while (runEnd > currentTextEnd) {
                String str = currentPiece.substring(runStart - currentTextStart,
                        currentTextEnd - currentTextStart);
                finalTextBuf.append(str);
                if (textIt.hasNext()) {
                    currentPiece = (TextPiece) textIt.next();
                    currentTextStart = currentPiece.getStart();
                    runStart = currentTextStart;
                    currentTextEnd = currentPiece.getEnd();
                } else {
                    return finalTextBuf.toString();
                }
            }
            String str = currentPiece.substring(0, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            if (textIt.hasNext()) {
                currentPiece = (TextPiece) textIt.next();
                currentTextStart = currentPiece.getStart();
                currentTextEnd = currentPiece.getEnd();
            }
            finalTextBuf.append(str);
        }
    }
    return finalTextBuf.toString();
}

From source file:com.sonicle.webtop.contacts.io.input.ContactExcelFileReader.java

License:Open Source License

private void readXlsContacts(File file, BeanHandler beanHandler) throws IOException, FileReaderException {
    POIFSFileSystem pfs = null;
    InputStream is = null;/*from w w  w . j a v  a  2s  .c  om*/
    HashMap<String, Integer> columnIndexes = listXlsColumnIndexes(file);

    try {
        pfs = new POIFSFileSystem(file);
        is = pfs.createDocumentInputStream("Workbook");
        XlsRowHandler rowHandler = new XlsRowHandler(this, columnIndexes, beanHandler);
        XlsRowsProcessor rows = new XlsRowsProcessor(is, headersRow, firstDataRow, lastDataRow, sheet,
                rowHandler);
        rows.process();

    } finally {
        IOUtils.closeQuietly(is);
        IOUtils.closeQuietly(pfs);
    }
}

From source file:com.sonicle.webtop.core.io.input.ExcelFileReader.java

License:Open Source License

public List<String> listXlsSheets(File file) throws IOException, FileReaderException {
    POIFSFileSystem pfs = null;
    InputStream is = null;/*  w  w  w.j a v  a  2 s.c  o m*/

    try {
        pfs = new POIFSFileSystem(file);
        is = pfs.createDocumentInputStream("Workbook");
        XlsSheetsProcessor processor = new XlsSheetsProcessor(is);
        processor.process();
        return processor.sheetNames;

    } finally {
        IOUtils.closeQuietly(is);
        IOUtils.closeQuietly(pfs);
    }
}