Example usage for org.apache.poi.poifs.filesystem DocumentEntry getSize

List of usage examples for org.apache.poi.poifs.filesystem DocumentEntry getSize

Introduction

In this page you can find the example usage for org.apache.poi.poifs.filesystem DocumentEntry getSize.

Prototype


public int getSize();

Source Link

Document

get the zize of the document, in bytes

Usage

From source file:ReadOLE2Entry.java

License:Open Source License

public static double[] ReadDouble(DocumentEntry document) throws IOException {
    DocumentInputStream stream = new DocumentInputStream(document);
    int len = document.getSize();
    byte[] buf = new byte[len];
    double[] bufDbl = new double[len / 8];
    try {/*ww w  . j  a  va2 s  .  co m*/
        stream.readFully(buf, 0, len);
    } finally {
        stream.close();
    }
    ByteBuffer.wrap(buf).order(ByteOrder.LITTLE_ENDIAN).asDoubleBuffer().get(bufDbl);
    return bufDbl;
}

From source file:ReadOLE2Entry.java

License:Open Source License

public static int[] ReadInt(DocumentEntry document) throws IOException {
    DocumentInputStream stream = new DocumentInputStream(document);
    int len = document.getSize();
    byte[] buf = new byte[len];
    int[] bufInt = new int[len / 4];
    try {/*from   w w  w .j  a  v  a2s.c om*/
        stream.readFully(buf, 0, len);
    } finally {
        stream.close();
    }
    ByteBuffer.wrap(buf).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer().get(bufInt);
    return bufInt;
}

From source file:ReadOLE2Entry.java

License:Open Source License

public static short[] ReadShort(DocumentEntry document) throws IOException {
    DocumentInputStream stream = new DocumentInputStream(document);
    int len = document.getSize();
    byte[] buf = new byte[len];
    short[] bufShort = new short[len / 2];
    try {//from  w w w  .j  a va2s  . c  o m
        stream.readFully(buf, 0, len);
    } finally {
        stream.close();
    }
    ByteBuffer.wrap(buf).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(bufShort);
    return bufShort;
}

From source file:ReadOLE2Entry.java

License:Open Source License

public static byte[] ReadBytes(DocumentEntry document) throws IOException {
    DocumentInputStream stream = new DocumentInputStream(document);
    int len = document.getSize();
    byte[] buf = new byte[len];
    try {//from   ww  w. j av a  2s .c  o  m
        stream.readFully(buf, 0, len);
    } finally {
        stream.close();
    }
    return buf;
}

From source file:ReadOLE2Entry.java

License:Open Source License

public static String ReadString(DocumentEntry document) throws IOException {
    DocumentInputStream stream = new DocumentInputStream(document);
    int len = document.getSize();
    byte[] buf = new byte[len];
    try {/* ww w.  j a  va2 s  .c  o  m*/
        stream.readFully(buf, 0, len);
    } finally {
        stream.close();
    }
    String str = new String(buf);
    return str;
}

From source file:com.ezdi.rtf.testRTFParser.RTFObjDataParser.java

License:Apache License

private byte[] handleEmbeddedPOIFS(InputStream is, Metadata metadata, AtomicInteger unknownFilenameCount)
        throws IOException {

    byte[] ret = null;
    try (NPOIFSFileSystem fs = new NPOIFSFileSystem(is)) {

        DirectoryNode root = fs.getRoot();

        if (root == null) {
            return ret;
        }//w  w  w. j  av  a  2  s  .  co  m

        if (root.hasEntry("Package")) {
            Entry ooxml = root.getEntry("Package");
            TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml));

            ByteArrayOutputStream out = new ByteArrayOutputStream();

            IOUtils.copy(stream, out);
            ret = out.toByteArray();
        } else {
            // try poifs
            POIFSDocumentType type = POIFSDocumentType.detectType(root);
            if (type == POIFSDocumentType.OLE10_NATIVE) {
                try {
                    // Try to un-wrap the OLE10Native record:
                    Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(root);
                    ret = ole.getDataBuffer();
                } catch (Ole10NativeException ex) {
                    // Not a valid OLE10Native record, skip it
                }
            } else if (type == POIFSDocumentType.COMP_OBJ) {

                DocumentEntry contentsEntry;
                try {
                    contentsEntry = (DocumentEntry) root.getEntry("CONTENTS");
                } catch (FileNotFoundException ioe) {
                    contentsEntry = (DocumentEntry) root.getEntry("Contents");
                }

                try (DocumentInputStream inp = new DocumentInputStream(contentsEntry)) {
                    ret = new byte[contentsEntry.getSize()];
                    inp.readFully(ret);
                }
            } else {

                ByteArrayOutputStream out = new ByteArrayOutputStream();
                is.reset();
                IOUtils.copy(is, out);
                ret = out.toByteArray();
                metadata.set(Metadata.RESOURCE_NAME_KEY,
                        "file_" + unknownFilenameCount.getAndIncrement() + "." + type.getExtension());
                metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());
            }
        }
    }
    return ret;
}

From source file:com.healthmarketscience.jackcess.util.OleBlobTest.java

License:Apache License

private static void checkCompoundStorage(OleBlob.CompoundContent cc, Attachment attach) throws Exception {
    File tmpData = File.createTempFile("attach_", ".dat");

    try {//  w w  w  .j  a v  a  2s  . co m
        FileOutputStream fout = new FileOutputStream(tmpData);
        fout.write(attach.getFileData());
        fout.close();

        POIFSFileSystem attachFs = new POIFSFileSystem(tmpData, true);

        for (OleBlob.CompoundContent.Entry e : cc) {
            DocumentEntry attachE = null;
            try {
                attachE = CompoundOleUtil.getDocumentEntry(e.getName(), attachFs.getRoot());
            } catch (FileNotFoundException fnfe) {
                // ignored, the ole data has extra entries
                continue;
            }

            byte[] attachEBytes = toByteArray(new DocumentInputStream(attachE), attachE.getSize());
            byte[] entryBytes = toByteArray(e.getStream(), e.length());

            assertTrue(Arrays.equals(attachEBytes, entryBytes));
        }

        ByteUtil.closeQuietly(attachFs);

    } finally {
        tmpData.delete();
    }
}

From source file:com.krawler.esp.fileparser.word.ExtractWordFile.java

License:Open Source License

public String extractText(String filepath) throws FastSavedException, IOException {
    InputStream iStream = new BufferedInputStream(new FileInputStream(filepath));
    POIFSFileSystem fsys = new POIFSFileSystem(iStream);

    // load our POIFS document streams.
    DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument");
    DocumentInputStream din = fsys.createDocumentInputStream("WordDocument");
    byte[] header = new byte[headerProps.getSize()];

    din.read(header);/*from   w w w.j  a  v  a  2 s  . c o  m*/
    din.close();

    int info = LittleEndian.getShort(header, 0xa);
    if ((info & 0x4) != 0) {
        throw new FastSavedException("Fast-saved files are unsupported at this time");
    }
    if ((info & 0x100) != 0) {
        System.out.println("This document is password protected");
    }

    // determine the version of Word this document came from.
    int nFib = LittleEndian.getShort(header, 0x2);
    switch (nFib) {
    case 101:
    case 102:
    case 103:
    case 104:
        // this is a Word 6.0 doc send it to the extractor for that version.
        Word6Extractor oldExtractor = new Word6Extractor();
        return oldExtractor.extractText(header);
    }

    // Get the information we need from the header
    boolean useTable1 = (info & 0x200) != 0;

    // get the location of the piece table
    int complexOffset = LittleEndian.getInt(header, 0x1a2);

    // determine which table stream we must use.
    String tableName = null;
    if (useTable1) {
        tableName = "1Table";
    } else {
        tableName = "0Table";
    }

    DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName);
    byte[] tableStream = new byte[table.getSize()];

    din = fsys.createDocumentInputStream(tableName);

    din.read(tableStream);
    din.close();

    int chpOffset = LittleEndian.getInt(header, 0xfa);
    int chpSize = LittleEndian.getInt(header, 0xfe);
    int fcMin = LittleEndian.getInt(header, 0x18);

    // load our text pieces and our character runs
    ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin);
    TextPieceTable tpt = cft.getTextPieceTable();
    List textPieces = tpt.getTextPieces();

    CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin, tpt);

    // make the POIFS objects available for garbage collection
    din = null;
    fsys = null;
    table = null;
    headerProps = null;

    List textRuns = cbt.getTextRuns();
    Iterator runIt = textRuns.iterator();
    Iterator textIt = textPieces.iterator();

    TextPiece currentPiece = (TextPiece) textIt.next();
    int currentTextStart = currentPiece.getStart();
    int currentTextEnd = currentPiece.getEnd();

    WordTextBuffer finalTextBuf = new WordTextBuffer();

    // iterate through all text runs extract the text only if they haven't
    // been
    // deleted
    while (runIt.hasNext()) {
        CHPX chpx = (CHPX) runIt.next();
        boolean deleted = isDeleted(chpx.getGrpprl());
        if (deleted) {
            continue;
        }

        int runStart = chpx.getStart();
        int runEnd = chpx.getEnd();

        while (runStart >= currentTextEnd) {
            currentPiece = (TextPiece) textIt.next();
            currentTextStart = currentPiece.getStart();
            currentTextEnd = currentPiece.getEnd();
        }

        if (runEnd < currentTextEnd) {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else if (runEnd > currentTextEnd) {
            while (runEnd > currentTextEnd) {
                String str = currentPiece.substring(runStart - currentTextStart,
                        currentTextEnd - currentTextStart);
                finalTextBuf.append(str);
                if (textIt.hasNext()) {
                    currentPiece = (TextPiece) textIt.next();
                    currentTextStart = currentPiece.getStart();
                    runStart = currentTextStart;
                    currentTextEnd = currentPiece.getEnd();
                } else {
                    return finalTextBuf.toString();
                }
            }
            String str = currentPiece.substring(0, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            if (textIt.hasNext()) {
                currentPiece = (TextPiece) textIt.next();
                currentTextStart = currentPiece.getStart();
                currentTextEnd = currentPiece.getEnd();
            }
            finalTextBuf.append(str);
        }
    }
    return finalTextBuf.toString();
}

From source file:com.krawler.esp.fileparser.wordparser.ExtractWordFile.java

License:Open Source License

public String extractText(String filepath) throws FastSavedException, IOException {
    InputStream iStream = new BufferedInputStream(new FileInputStream(filepath));

    ArrayList text = new ArrayList();
    POIFSFileSystem fsys = new POIFSFileSystem(iStream);

    // load our POIFS document streams.
    DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument");
    DocumentInputStream din = fsys.createDocumentInputStream("WordDocument");
    byte[] header = new byte[headerProps.getSize()];

    din.read(header);//from  w w w. ja  v  a 2s  .  c  o  m
    din.close();

    int info = LittleEndian.getShort(header, 0xa);
    if ((info & 0x4) != 0) {
        throw new FastSavedException("Fast-saved files are unsupported at this time");
    }
    if ((info & 0x100) != 0) {
        System.out.println("This document is password protected");
    }

    // determine the version of Word this document came from.
    int nFib = LittleEndian.getShort(header, 0x2);
    // Get the information we need from the header
    boolean useTable1 = (info & 0x200) != 0;

    // get the location of the piece table
    int complexOffset = LittleEndian.getInt(header, 0x1a2);

    // determine which table stream we must use.
    String tableName = null;
    if (useTable1) {
        tableName = "1Table";
    } else {
        tableName = "0Table";
    }

    DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName);
    byte[] tableStream = new byte[table.getSize()];

    din = fsys.createDocumentInputStream(tableName);

    din.read(tableStream);
    din.close();

    int chpOffset = LittleEndian.getInt(header, 0xfa);
    int chpSize = LittleEndian.getInt(header, 0xfe);
    int fcMin = LittleEndian.getInt(header, 0x18);

    ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin);
    TextPieceTable tpt = cft.getTextPieceTable();
    switch (nFib) {
    case 101:
    case 102:
    case 103:
    case 104:
        // this is a Word 6.0 doc send it to the extractor for that version.
        Word6Extractor oldExtractor = new Word6Extractor();
        return oldExtractor.extractText(header, tpt);
    }
    CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin, tpt);
    // load our text pieces and our character runs

    List textPieces = tpt.getTextPieces();

    // make the POIFS objects available for garbage collection
    din = null;
    fsys = null;
    table = null;
    headerProps = null;

    List textRuns = cbt.getTextRuns();
    Iterator runIt = textRuns.iterator();
    Iterator textIt = textPieces.iterator();

    TextPiece currentPiece = (TextPiece) textIt.next();
    int currentTextStart = currentPiece.getStart();
    int currentTextEnd = currentPiece.getEnd();

    WordTextBuffer finalTextBuf = new WordTextBuffer();

    // iterate through all text runs extract the text only if they haven't
    // been
    // deleted
    while (runIt.hasNext()) {
        CHPX chpx = (CHPX) runIt.next();
        boolean deleted = isDeleted(chpx.getGrpprl());
        if (deleted) {
            continue;
        }

        int runStart = chpx.getStart();
        int runEnd = chpx.getEnd();

        while (runStart >= currentTextEnd) {
            currentPiece = (TextPiece) textIt.next();
            currentTextStart = currentPiece.getStart();
            currentTextEnd = currentPiece.getEnd();
        }

        if (runEnd < currentTextEnd) {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else if (runEnd > currentTextEnd) {
            while (runEnd > currentTextEnd) {
                String str = currentPiece.substring(runStart - currentTextStart,
                        currentTextEnd - currentTextStart);
                finalTextBuf.append(str);
                if (textIt.hasNext()) {
                    currentPiece = (TextPiece) textIt.next();
                    currentTextStart = currentPiece.getStart();
                    runStart = currentTextStart;
                    currentTextEnd = currentPiece.getEnd();
                } else {
                    return finalTextBuf.toString();
                }
            }
            String str = currentPiece.substring(0, runEnd - currentTextStart);
            finalTextBuf.append(str);
        } else {
            String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart);
            if (textIt.hasNext()) {
                currentPiece = (TextPiece) textIt.next();
                currentTextStart = currentPiece.getStart();
                currentTextEnd = currentPiece.getEnd();
            }
            finalTextBuf.append(str);
        }
    }
    return finalTextBuf.toString();
}

From source file:com.pnf.plugin.ole.parser.streams.OleProcessor.java

License:Apache License

private void addAll(ContainerStream parent, DirectoryEntry root) throws IOException {
    // Iterate through all entries in the current fs directory
    for (Entry e : root) {
        // If it's another directory entry, recurse deeper
        if (e instanceof DirectoryEntry) {
            // Recurse and parse files, if any, within the current directory
            DirectoryEntry d = (DirectoryEntry) e;
            ClassID id = d.getStorageClsid();

            if (Stream.isVbaStorage(d.getName())) {
                VbaContainerStream vba = new VbaContainerStream(parent, id, d.getName());
                addAll(vba, d);//from   w  w w.  ja  v a 2s .c om
            } else {
                ContainerStream currDir = new ContainerStream(parent, id, d.getName());
                addAll(currDir, d);
            }
        } else if (e instanceof DocumentEntry) {
            // Retrieve chained representation of files in image
            DocumentEntry doc = (DocumentEntry) e;

            // Create byte array around contents of file.
            byte[] data = new byte[doc.getSize()];

            // Read data from image file into buffer
            boolean error = false;
            try (DocumentInputStream stream = new DocumentInputStream(doc)) {
                try {
                    stream.readFully(data);
                } catch (IndexOutOfBoundsException i) {
                    error = true;
                }
            } catch (IOException e1) {
                error = true;
            }

            ByteBuffer buff = ByteBuffer.wrap(data);
            new DocumentStream(parent, e.getName(), buff, error); // constructor takes care of notifying parent of a new child
        }
    }
}