Example usage for org.apache.poi.poifs.filesystem POIFSFileSystem getRoot

Introduction

In this page you can find the example usage for org.apache.poi.poifs.filesystem POIFSFileSystem getRoot.

Prototype

public DirectoryNode getRoot()

Source Link

Document

Get the root entry

Usage

From source file:NewEmptyJUnitTest.java

/**
 * Test that we can get data from two different
 *  embeded word documents/*from ww  w .  ja  v a  2 s.  com*/
 * @throws Exception
 */
public void testExtractFromEmbeded() throws Exception {
    POIFSFileSystem fs = new POIFSFileSystem(
            POIDataSamples.getSpreadSheetInstance().openResourceAsStream(filename3));
    HWPFDocument doc;
    WordExtractor extractor3;

    DirectoryNode dirA = (DirectoryNode) fs.getRoot().getEntry("MBD0000A3B7");
    DirectoryNode dirB = (DirectoryNode) fs.getRoot().getEntry("MBD0000A3B2");

    // Should have WordDocument and 1Table
    assertNotNull(dirA.getEntry("1Table"));
    assertNotNull(dirA.getEntry("WordDocument"));

    assertNotNull(dirB.getEntry("1Table"));
    assertNotNull(dirB.getEntry("WordDocument"));

    // Check each in turn
    doc = new HWPFDocument(dirA, fs);
    extractor3 = new WordExtractor(doc);

    assertNotNull(extractor3.getText());
    assertTrue(extractor3.getText().length() > 20);
    assertEquals("I am a sample document\r\nNot much on me\r\nI am document 1\r\n", extractor3.getText());
    assertEquals("Sample Doc 1", extractor3.getSummaryInformation().getTitle());
    assertEquals("Sample Test", extractor3.getSummaryInformation().getSubject());

    doc = new HWPFDocument(dirB, fs);
    extractor3 = new WordExtractor(doc);

    assertNotNull(extractor3.getText());
    assertTrue(extractor3.getText().length() > 20);
    assertEquals("I am another sample document\r\nNot much on me\r\nI am document 2\r\n", extractor3.getText());
    assertEquals("Sample Doc 2", extractor3.getSummaryInformation().getTitle());
    assertEquals("Another Sample Test", extractor3.getSummaryInformation().getSubject());
}

From source file:NewEmptyJUnitTest.java

/**
 * [RESOLVED FIXED] Bug 51686 - Update to POI 3.8 beta 4 causes
 * ConcurrentModificationException in Tika's OfficeParser
 *///w ww  .  jav a 2 s .c  o  m
public void testBug51686() throws IOException {
    InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("Bug51686.doc");

    POIFSFileSystem fs = new POIFSFileSystem(is);

    String text = null;

    for (Entry entry : fs.getRoot()) {
        if ("WordDocument".equals(entry.getName())) {
            WordExtractor ex = new WordExtractor(fs);
            try {
                text = ex.getText();
            } finally {
                ex.close();
            }
        }
    }

    assertNotNull(text);
}

From source file:com.auxilii.msgparser.MsgParser.java

License:Open Source License

/**
 * Parses a .msg file provided by an input stream.
 *
 * @param msgFileStream The .msg file as a InputStream.
 * @return A {@link Message} object representing the .msg file.
 * @throws IOException Thrown if the file could not be loaded or parsed.
 *///from   w w  w.java2 s .  com
public Message parseMsg(InputStream msgFileStream) throws IOException {
    // the .msg file, like a file system, contains directories
    // and documents within this directories
    // we now gain access to the root node
    // and recursively go through the complete 'filesystem'.
    POIFSFileSystem fs = new POIFSFileSystem(msgFileStream);
    DirectoryEntry dir = fs.getRoot();
    Message msg = new Message();
    parseMsg(dir, msg);
    return msg;
}

From source file:com.healthmarketscience.jackcess.util.OleBlobTest.java

License:Apache License

private static void checkCompoundStorage(OleBlob.CompoundContent cc, Attachment attach) throws Exception {
    File tmpData = File.createTempFile("attach_", ".dat");

    try {/*from w  w  w  .  ja v a 2 s . c o  m*/
        FileOutputStream fout = new FileOutputStream(tmpData);
        fout.write(attach.getFileData());
        fout.close();

        POIFSFileSystem attachFs = new POIFSFileSystem(tmpData, true);

        for (OleBlob.CompoundContent.Entry e : cc) {
            DocumentEntry attachE = null;
            try {
                attachE = CompoundOleUtil.getDocumentEntry(e.getName(), attachFs.getRoot());
            } catch (FileNotFoundException fnfe) {
                // ignored, the ole data has extra entries
                continue;
            }

            byte[] attachEBytes = toByteArray(new DocumentInputStream(attachE), attachE.getSize());
            byte[] entryBytes = toByteArray(e.getStream(), e.length());

            assertTrue(Arrays.equals(attachEBytes, entryBytes));
        }

        ByteUtil.closeQuietly(attachFs);

    } finally {
        tmpData.delete();
    }
}

From source file:com.hp.octane.integrations.uft.UftTestDiscoveryUtils.java

License:Apache License

private static String extractXmlContentFromTspFile(InputStream stream) throws IOException {
    POIFSFileSystem poiFS = new POIFSFileSystem(stream);
    DirectoryNode root = poiFS.getRoot();
    String xmlData = "";

    for (Entry entry : root) {
        String name = entry.getName();
        if ("ComponentInfo".equals(name)) {
            if (entry instanceof DirectoryEntry) {
                System.out.println(entry);
            } else if (entry instanceof DocumentEntry) {
                byte[] content = new byte[((DocumentEntry) entry).getSize()];
                int readBytes = poiFS.createDocumentInputStream("ComponentInfo").read(content);
                if (readBytes < content.length) {
                    //  [YG] probably should handle this case and continue to read
                    logger.warn("expected to read " + content.length + " bytes, but read and stopped after "
                            + readBytes);
                }/*from  w w w.jav  a2s.  c  o m*/
                String fromUnicodeLE = StringUtil.getFromUnicodeLE(content);
                xmlData = fromUnicodeLE.substring(fromUnicodeLE.indexOf('<')).replaceAll("\u0000", "");
            }
        }
    }
    return xmlData;
}

From source file:com.hpe.application.automation.tools.octane.actions.UFTTestUtil.java

License:Open Source License

public static String decodeXmlContent(InputStream stream) throws IOException {
    POIFSFileSystem poiFS = new POIFSFileSystem(stream);
    DirectoryNode root = poiFS.getRoot();
    String xmlData = "";

    for (Entry entry : root) {
        String name = entry.getName();
        if ("ComponentInfo".equals(name)) {
            if (entry instanceof DirectoryEntry) {
                System.out.println(entry);
            } else if (entry instanceof DocumentEntry) {
                byte[] content = new byte[((DocumentEntry) entry).getSize()];
                poiFS.createDocumentInputStream("ComponentInfo").read(content);
                String fromUnicodeLE = StringUtil.getFromUnicodeLE(content);
                xmlData = fromUnicodeLE.substring(fromUnicodeLE.indexOf('<')).replaceAll("\u0000", "");
            }/*w ww. java 2 s  . com*/
        }
    }
    return xmlData;
}

From source file:com.krawler.esp.fileparser.word.ExtractWordFile.java

License:Open Source License

public String extractText(String filepath) throws FastSavedException, IOException {
    InputStream iStream = new BufferedInputStream(new FileInputStream(filepath));
    POIFSFileSystem fsys = new POIFSFileSystem(iStream);

    // load our POIFS document streams.
    DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument");
    DocumentInputStream din = fsys.createDocumentInputStream("WordDocument");
    byte[] header = new byte[headerProps.getSize()];

    din.read(header);/*from   w  ww  .  j av a2 s.com*/
    din.close();

    int info = LittleEndian.getShort(header, 0xa);
    if ((info & 0x4) != 0) {
        throw new FastSavedException("Fast-saved files are unsupported at this time");
    }
    if ((info & 0x100) != 0) {
        System.out.println("This document is password protected");
    }

    // determine the version of Word this document came from.
    int nFib = LittleEndian.getShort(header, 0x2);
    switch (nFib) {
    case 101:
    case 102:
    case 103:
    case 104:
        // this is a Word 6.0 doc send it to the extractor for that version.
        Word6Extractor oldExtractor = new Word6Extractor();
        return oldExtractor.extractText(header);
    }

    // Get the information we need from the header
    boolean useTable1 = (info & 0x200) != 0;

    // get the location of the piece table
    int complexOffset = LittleEndian.getInt(header, 0x1a2);

    // determine which table stream we must use.
    String tableName = null;
    if (useTable1) {
        tableName = "1Table";
    } else {
        tableName = "0Table";
    }

    DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName);
    byte[] tableStream = new byte[table.getSize()];

    din = fsys.createDocumentInputStream(tableName);

    din.read(tableStream);
    din.close();

    int chpOffset = LittleEndian.getInt(header, 0xfa);
    int chpSize = LittleEndian.getInt(header, 0xfe);
    int fcMin = LittleEndian.getInt(header, 0x18);

    // load our text pieces and our character runs
    ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin);
    TextPieceTable tpt = cft.getTextPieceTable();
    List textPieces = tpt.getTextPieces();

    CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin, tpt);

    // make the POIFS objects available for garbage collection
    din = null;
    fsys = null;
    table = null;
    headerProps = null;

    List textRuns = cbt.getTextRuns();
    Iterator runIt = textRuns.iterator();
    Iterator textIt = textPieces.iterator();

    TextPiece currentPiece = (TextPiece) textIt.next();
    int currentTextStart = currentPiece.getStart();
    int currentTextEnd = currentPiece.getEnd();

    WordTextBuffer finalTextBuf = new WordTextBuffer();

    // iterate through all text runs extract the text only if they haven't
    // been
    // deleted
    while (runIt.hasNext()) {
        CHPX chpx = (CHPX) runIt.next();
        boolean deleted = isDeleted(chpx.getGrpprl());
        if (deleted) {
            continue;
        }

        int runStart = chpx.getStart();
        int runEnd = chpx.getEnd();

        while (runStart >= currentTextEnd) {
            currentPiece = (TextPiece) textIt.next();
            currentTextStart = currentPiece.getStart();
            currentTextEnd = currentPiece.getEnd();
        }

        if (runEnd < currentTextEnd) {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else if (runEnd > currentTextEnd) {
            while (runEnd > currentTextEnd) {
                String str = currentPiece.substring(runStart - currentTextStart,
                        currentTextEnd - currentTextStart);
                finalTextBuf.append(str);
                if (textIt.hasNext()) {
                    currentPiece = (TextPiece) textIt.next();
                    currentTextStart = currentPiece.getStart();
                    runStart = currentTextStart;
                    currentTextEnd = currentPiece.getEnd();
                } else {
                    return finalTextBuf.toString();
                }
            }
            String str = currentPiece.substring(0, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            if (textIt.hasNext()) {
                currentPiece = (TextPiece) textIt.next();
                currentTextStart = currentPiece.getStart();
                currentTextEnd = currentPiece.getEnd();
            }
            finalTextBuf.append(str);
        }
    }
    return finalTextBuf.toString();
}

From source file:com.krawler.esp.fileparser.wordparser.ExtractWordFile.java

License:Open Source License

public String extractText(String filepath) throws FastSavedException, IOException {
    InputStream iStream = new BufferedInputStream(new FileInputStream(filepath));

    ArrayList text = new ArrayList();
    POIFSFileSystem fsys = new POIFSFileSystem(iStream);

    // load our POIFS document streams.
    DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument");
    DocumentInputStream din = fsys.createDocumentInputStream("WordDocument");
    byte[] header = new byte[headerProps.getSize()];

    din.read(header);/* w  w w .  j a va 2 s.  co m*/
    din.close();

    int info = LittleEndian.getShort(header, 0xa);
    if ((info & 0x4) != 0) {
        throw new FastSavedException("Fast-saved files are unsupported at this time");
    }
    if ((info & 0x100) != 0) {
        System.out.println("This document is password protected");
    }

    // determine the version of Word this document came from.
    int nFib = LittleEndian.getShort(header, 0x2);
    // Get the information we need from the header
    boolean useTable1 = (info & 0x200) != 0;

    // get the location of the piece table
    int complexOffset = LittleEndian.getInt(header, 0x1a2);

    // determine which table stream we must use.
    String tableName = null;
    if (useTable1) {
        tableName = "1Table";
    } else {
        tableName = "0Table";
    }

    DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName);
    byte[] tableStream = new byte[table.getSize()];

    din = fsys.createDocumentInputStream(tableName);

    din.read(tableStream);
    din.close();

    int chpOffset = LittleEndian.getInt(header, 0xfa);
    int chpSize = LittleEndian.getInt(header, 0xfe);
    int fcMin = LittleEndian.getInt(header, 0x18);

    ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin);
    TextPieceTable tpt = cft.getTextPieceTable();
    switch (nFib) {
    case 101:
    case 102:
    case 103:
    case 104:
        // this is a Word 6.0 doc send it to the extractor for that version.
        Word6Extractor oldExtractor = new Word6Extractor();
        return oldExtractor.extractText(header, tpt);
    }
    CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin, tpt);
    // load our text pieces and our character runs

    List textPieces = tpt.getTextPieces();

    // make the POIFS objects available for garbage collection
    din = null;
    fsys = null;
    table = null;
    headerProps = null;

    List textRuns = cbt.getTextRuns();
    Iterator runIt = textRuns.iterator();
    Iterator textIt = textPieces.iterator();

    TextPiece currentPiece = (TextPiece) textIt.next();
    int currentTextStart = currentPiece.getStart();
    int currentTextEnd = currentPiece.getEnd();

    WordTextBuffer finalTextBuf = new WordTextBuffer();

    // iterate through all text runs extract the text only if they haven't
    // been
    // deleted
    while (runIt.hasNext()) {
        CHPX chpx = (CHPX) runIt.next();
        boolean deleted = isDeleted(chpx.getGrpprl());
        if (deleted) {
            continue;
        }

        int runStart = chpx.getStart();
        int runEnd = chpx.getEnd();

        while (runStart >= currentTextEnd) {
            currentPiece = (TextPiece) textIt.next();
            currentTextStart = currentPiece.getStart();
            currentTextEnd = currentPiece.getEnd();
        }

        if (runEnd < currentTextEnd) {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else if (runEnd > currentTextEnd) {
            while (runEnd > currentTextEnd) {
                String str = currentPiece.substring(runStart - currentTextStart,
                        currentTextEnd - currentTextStart);
                finalTextBuf.append(str);
                if (textIt.hasNext()) {
                    currentPiece = (TextPiece) textIt.next();
                    currentTextStart = currentPiece.getStart();
                    runStart = currentTextStart;
                    currentTextEnd = currentPiece.getEnd();
                } else {
                    return finalTextBuf.toString();
                }
            }
            String str = currentPiece.substring(0, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            if (textIt.hasNext()) {
                currentPiece = (TextPiece) textIt.next();
                currentTextStart = currentPiece.getStart();
                currentTextEnd = currentPiece.getEnd();
            }
            finalTextBuf.append(str);
        }
    }
    return finalTextBuf.toString();
}

From source file:com.oneis.graphics.ThumbnailFinder.java

License:Mozilla Public License

/**
 * Try and get a thumbnail from an old Microsoft Office document
 *//* w  w  w  .  ja va 2  s  . c  o  m*/
private void findFromOldMSOffice() {
    try {
        File poiFilesystem = new File(inFilename);

        // Open the POI filesystem.
        InputStream is = new FileInputStream(poiFilesystem);
        POIFSFileSystem poifs = new POIFSFileSystem(is);
        is.close();

        // Read the summary information.
        DirectoryEntry dir = poifs.getRoot();
        DocumentEntry siEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME);
        DocumentInputStream dis = new DocumentInputStream(siEntry);
        PropertySet ps = new PropertySet(dis);
        dis.close();
        SummaryInformation si = new SummaryInformation(ps);
        if (si != null) {
            byte[] thumbnailData = si.getThumbnail();
            if (thumbnailData != null) {
                Thumbnail thumbnail = new Thumbnail(thumbnailData);
                byte[] wmf = thumbnail.getThumbnailAsWMF();
                // Got something!
                thumbnailDimensions = tryWMFFormat(new ByteArrayInputStream(wmf), outFilename, outFormat,
                        maxDimension);
            }
        }
    } catch (Exception e) {
        logIgnoredException("ThumbnailFinder Apache POI file reading failed", e);
    }
}

From source file:com.orange.ocara.model.export.docx.AuditDocxExporter.java

License:Mozilla Public License

/**
 * Create OleObject using a sample.//from w w w.  ja v  a 2  s. c  o m
 *
 * @param from File to embed
 * @param to   Destination file
 */
private void createOleObject(File from, File to) throws IOException, Ole10NativeException {
    File existingOleObject = new File(templateDirectory, "word/embeddings/oleObject.bin");

    OutputStream os = null;
    try {
        // When
        POIFSFileSystem fs = new POIFSFileSystem(FileUtils.openInputStream(existingOleObject));

        fs.getRoot().getEntry(Ole10Native.OLE10_NATIVE).delete();

        Ole10Native ole = new Ole10Native(from.getName(), from.getName(), from.getName(),
                IOUtils.toByteArray(FileUtils.openInputStream(from)));

        ByteArrayOutputStream stream = new ByteArrayOutputStream();
        ole.writeOut(stream);

        fs.getRoot().createDocument(Ole10Native.OLE10_NATIVE, new ByteArrayInputStream(stream.toByteArray()));

        os = FileUtils.openOutputStream(to);
        fs.writeFilesystem(os);

    } finally {
        IOUtils.closeQuietly(os);
    }
}