List of usage examples for org.apache.poi.poifs.filesystem DocumentEntry getSize
public int getSize();
From source file:ReadOLE2Entry.java
License:Open Source License
public static double[] ReadDouble(DocumentEntry document) throws IOException { DocumentInputStream stream = new DocumentInputStream(document); int len = document.getSize(); byte[] buf = new byte[len]; double[] bufDbl = new double[len / 8]; try {/*ww w . j a va2 s . co m*/ stream.readFully(buf, 0, len); } finally { stream.close(); } ByteBuffer.wrap(buf).order(ByteOrder.LITTLE_ENDIAN).asDoubleBuffer().get(bufDbl); return bufDbl; }
From source file:ReadOLE2Entry.java
License:Open Source License
public static int[] ReadInt(DocumentEntry document) throws IOException { DocumentInputStream stream = new DocumentInputStream(document); int len = document.getSize(); byte[] buf = new byte[len]; int[] bufInt = new int[len / 4]; try {/*from w w w .j a v a2s.c om*/ stream.readFully(buf, 0, len); } finally { stream.close(); } ByteBuffer.wrap(buf).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer().get(bufInt); return bufInt; }
From source file:ReadOLE2Entry.java
License:Open Source License
public static short[] ReadShort(DocumentEntry document) throws IOException { DocumentInputStream stream = new DocumentInputStream(document); int len = document.getSize(); byte[] buf = new byte[len]; short[] bufShort = new short[len / 2]; try {//from w w w .j a va2s . c o m stream.readFully(buf, 0, len); } finally { stream.close(); } ByteBuffer.wrap(buf).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(bufShort); return bufShort; }
From source file:ReadOLE2Entry.java
License:Open Source License
public static byte[] ReadBytes(DocumentEntry document) throws IOException { DocumentInputStream stream = new DocumentInputStream(document); int len = document.getSize(); byte[] buf = new byte[len]; try {//from ww w. j av a 2s .c o m stream.readFully(buf, 0, len); } finally { stream.close(); } return buf; }
From source file:ReadOLE2Entry.java
License:Open Source License
public static String ReadString(DocumentEntry document) throws IOException { DocumentInputStream stream = new DocumentInputStream(document); int len = document.getSize(); byte[] buf = new byte[len]; try {/* ww w. j a va2 s .c o m*/ stream.readFully(buf, 0, len); } finally { stream.close(); } String str = new String(buf); return str; }
From source file:com.ezdi.rtf.testRTFParser.RTFObjDataParser.java
License:Apache License
private byte[] handleEmbeddedPOIFS(InputStream is, Metadata metadata, AtomicInteger unknownFilenameCount) throws IOException { byte[] ret = null; try (NPOIFSFileSystem fs = new NPOIFSFileSystem(is)) { DirectoryNode root = fs.getRoot(); if (root == null) { return ret; }//w w w. j av a 2 s . co m if (root.hasEntry("Package")) { Entry ooxml = root.getEntry("Package"); TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml)); ByteArrayOutputStream out = new ByteArrayOutputStream(); IOUtils.copy(stream, out); ret = out.toByteArray(); } else { // try poifs POIFSDocumentType type = POIFSDocumentType.detectType(root); if (type == POIFSDocumentType.OLE10_NATIVE) { try { // Try to un-wrap the OLE10Native record: Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(root); ret = ole.getDataBuffer(); } catch (Ole10NativeException ex) { // Not a valid OLE10Native record, skip it } } else if (type == POIFSDocumentType.COMP_OBJ) { DocumentEntry contentsEntry; try { contentsEntry = (DocumentEntry) root.getEntry("CONTENTS"); } catch (FileNotFoundException ioe) { contentsEntry = (DocumentEntry) root.getEntry("Contents"); } try (DocumentInputStream inp = new DocumentInputStream(contentsEntry)) { ret = new byte[contentsEntry.getSize()]; inp.readFully(ret); } } else { ByteArrayOutputStream out = new ByteArrayOutputStream(); is.reset(); IOUtils.copy(is, out); ret = out.toByteArray(); metadata.set(Metadata.RESOURCE_NAME_KEY, "file_" + unknownFilenameCount.getAndIncrement() + "." + type.getExtension()); metadata.set(Metadata.CONTENT_TYPE, type.getType().toString()); } } } return ret; }
From source file:com.healthmarketscience.jackcess.util.OleBlobTest.java
License:Apache License
private static void checkCompoundStorage(OleBlob.CompoundContent cc, Attachment attach) throws Exception { File tmpData = File.createTempFile("attach_", ".dat"); try {// w w w .j a v a 2s . co m FileOutputStream fout = new FileOutputStream(tmpData); fout.write(attach.getFileData()); fout.close(); POIFSFileSystem attachFs = new POIFSFileSystem(tmpData, true); for (OleBlob.CompoundContent.Entry e : cc) { DocumentEntry attachE = null; try { attachE = CompoundOleUtil.getDocumentEntry(e.getName(), attachFs.getRoot()); } catch (FileNotFoundException fnfe) { // ignored, the ole data has extra entries continue; } byte[] attachEBytes = toByteArray(new DocumentInputStream(attachE), attachE.getSize()); byte[] entryBytes = toByteArray(e.getStream(), e.length()); assertTrue(Arrays.equals(attachEBytes, entryBytes)); } ByteUtil.closeQuietly(attachFs); } finally { tmpData.delete(); } }
From source file:com.krawler.esp.fileparser.word.ExtractWordFile.java
License:Open Source License
public String extractText(String filepath) throws FastSavedException, IOException { InputStream iStream = new BufferedInputStream(new FileInputStream(filepath)); POIFSFileSystem fsys = new POIFSFileSystem(iStream); // load our POIFS document streams. DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument"); DocumentInputStream din = fsys.createDocumentInputStream("WordDocument"); byte[] header = new byte[headerProps.getSize()]; din.read(header);/*from w w w.j a v a 2 s . c o m*/ din.close(); int info = LittleEndian.getShort(header, 0xa); if ((info & 0x4) != 0) { throw new FastSavedException("Fast-saved files are unsupported at this time"); } if ((info & 0x100) != 0) { System.out.println("This document is password protected"); } // determine the version of Word this document came from. int nFib = LittleEndian.getShort(header, 0x2); switch (nFib) { case 101: case 102: case 103: case 104: // this is a Word 6.0 doc send it to the extractor for that version. Word6Extractor oldExtractor = new Word6Extractor(); return oldExtractor.extractText(header); } // Get the information we need from the header boolean useTable1 = (info & 0x200) != 0; // get the location of the piece table int complexOffset = LittleEndian.getInt(header, 0x1a2); // determine which table stream we must use. String tableName = null; if (useTable1) { tableName = "1Table"; } else { tableName = "0Table"; } DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName); byte[] tableStream = new byte[table.getSize()]; din = fsys.createDocumentInputStream(tableName); din.read(tableStream); din.close(); int chpOffset = LittleEndian.getInt(header, 0xfa); int chpSize = LittleEndian.getInt(header, 0xfe); int fcMin = LittleEndian.getInt(header, 0x18); // load our text pieces and our character runs ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin); TextPieceTable tpt = cft.getTextPieceTable(); List textPieces = tpt.getTextPieces(); CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin, tpt); // make the POIFS objects available for garbage collection din = null; fsys = null; table = null; headerProps = null; List textRuns = cbt.getTextRuns(); Iterator runIt = textRuns.iterator(); Iterator textIt = textPieces.iterator(); TextPiece currentPiece = (TextPiece) textIt.next(); int currentTextStart = currentPiece.getStart(); int currentTextEnd = currentPiece.getEnd(); WordTextBuffer finalTextBuf = new WordTextBuffer(); // iterate through all text runs extract the text only if they haven't // been // deleted while (runIt.hasNext()) { CHPX chpx = (CHPX) runIt.next(); boolean deleted = isDeleted(chpx.getGrpprl()); if (deleted) { continue; } int runStart = chpx.getStart(); int runEnd = chpx.getEnd(); while (runStart >= currentTextEnd) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); currentTextEnd = currentPiece.getEnd(); } if (runEnd < currentTextEnd) { String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); finalTextBuf.append(str); } else if (runEnd > currentTextEnd) { while (runEnd > currentTextEnd) { String str = currentPiece.substring(runStart - currentTextStart, currentTextEnd - currentTextStart); finalTextBuf.append(str); if (textIt.hasNext()) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); runStart = currentTextStart; currentTextEnd = currentPiece.getEnd(); } else { return finalTextBuf.toString(); } } String str = currentPiece.substring(0, runEnd - currentTextStart); finalTextBuf.append(str); } else { String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); if (textIt.hasNext()) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); currentTextEnd = currentPiece.getEnd(); } finalTextBuf.append(str); } } return finalTextBuf.toString(); }
From source file:com.krawler.esp.fileparser.wordparser.ExtractWordFile.java
License:Open Source License
public String extractText(String filepath) throws FastSavedException, IOException { InputStream iStream = new BufferedInputStream(new FileInputStream(filepath)); ArrayList text = new ArrayList(); POIFSFileSystem fsys = new POIFSFileSystem(iStream); // load our POIFS document streams. DocumentEntry headerProps = (DocumentEntry) fsys.getRoot().getEntry("WordDocument"); DocumentInputStream din = fsys.createDocumentInputStream("WordDocument"); byte[] header = new byte[headerProps.getSize()]; din.read(header);//from w w w. ja v a 2s . c o m din.close(); int info = LittleEndian.getShort(header, 0xa); if ((info & 0x4) != 0) { throw new FastSavedException("Fast-saved files are unsupported at this time"); } if ((info & 0x100) != 0) { System.out.println("This document is password protected"); } // determine the version of Word this document came from. int nFib = LittleEndian.getShort(header, 0x2); // Get the information we need from the header boolean useTable1 = (info & 0x200) != 0; // get the location of the piece table int complexOffset = LittleEndian.getInt(header, 0x1a2); // determine which table stream we must use. String tableName = null; if (useTable1) { tableName = "1Table"; } else { tableName = "0Table"; } DocumentEntry table = (DocumentEntry) fsys.getRoot().getEntry(tableName); byte[] tableStream = new byte[table.getSize()]; din = fsys.createDocumentInputStream(tableName); din.read(tableStream); din.close(); int chpOffset = LittleEndian.getInt(header, 0xfa); int chpSize = LittleEndian.getInt(header, 0xfe); int fcMin = LittleEndian.getInt(header, 0x18); ComplexFileTable cft = new ComplexFileTable(header, tableStream, complexOffset, fcMin); TextPieceTable tpt = cft.getTextPieceTable(); switch (nFib) { case 101: case 102: case 103: case 104: // this is a Word 6.0 doc send it to the extractor for that version. Word6Extractor oldExtractor = new Word6Extractor(); return oldExtractor.extractText(header, tpt); } CHPBinTable cbt = new CHPBinTable(header, tableStream, chpOffset, chpSize, fcMin, tpt); // load our text pieces and our character runs List textPieces = tpt.getTextPieces(); // make the POIFS objects available for garbage collection din = null; fsys = null; table = null; headerProps = null; List textRuns = cbt.getTextRuns(); Iterator runIt = textRuns.iterator(); Iterator textIt = textPieces.iterator(); TextPiece currentPiece = (TextPiece) textIt.next(); int currentTextStart = currentPiece.getStart(); int currentTextEnd = currentPiece.getEnd(); WordTextBuffer finalTextBuf = new WordTextBuffer(); // iterate through all text runs extract the text only if they haven't // been // deleted while (runIt.hasNext()) { CHPX chpx = (CHPX) runIt.next(); boolean deleted = isDeleted(chpx.getGrpprl()); if (deleted) { continue; } int runStart = chpx.getStart(); int runEnd = chpx.getEnd(); while (runStart >= currentTextEnd) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); currentTextEnd = currentPiece.getEnd(); } if (runEnd < currentTextEnd) { String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); finalTextBuf.append(str); } else if (runEnd > currentTextEnd) { while (runEnd > currentTextEnd) { String str = currentPiece.substring(runStart - currentTextStart, currentTextEnd - currentTextStart); finalTextBuf.append(str); if (textIt.hasNext()) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); runStart = currentTextStart; currentTextEnd = currentPiece.getEnd(); } else { return finalTextBuf.toString(); } } String str = currentPiece.substring(0, runEnd - currentTextStart); finalTextBuf.append(str); } else { String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); if (textIt.hasNext()) { currentPiece = (TextPiece) textIt.next(); currentTextStart = currentPiece.getStart(); currentTextEnd = currentPiece.getEnd(); } finalTextBuf.append(str); } } return finalTextBuf.toString(); }
From source file:com.pnf.plugin.ole.parser.streams.OleProcessor.java
License:Apache License
private void addAll(ContainerStream parent, DirectoryEntry root) throws IOException { // Iterate through all entries in the current fs directory for (Entry e : root) { // If it's another directory entry, recurse deeper if (e instanceof DirectoryEntry) { // Recurse and parse files, if any, within the current directory DirectoryEntry d = (DirectoryEntry) e; ClassID id = d.getStorageClsid(); if (Stream.isVbaStorage(d.getName())) { VbaContainerStream vba = new VbaContainerStream(parent, id, d.getName()); addAll(vba, d);//from w w w. ja v a 2s .c om } else { ContainerStream currDir = new ContainerStream(parent, id, d.getName()); addAll(currDir, d); } } else if (e instanceof DocumentEntry) { // Retrieve chained representation of files in image DocumentEntry doc = (DocumentEntry) e; // Create byte array around contents of file. byte[] data = new byte[doc.getSize()]; // Read data from image file into buffer boolean error = false; try (DocumentInputStream stream = new DocumentInputStream(doc)) { try { stream.readFully(data); } catch (IndexOutOfBoundsException i) { error = true; } } catch (IOException e1) { error = true; } ByteBuffer buff = ByteBuffer.wrap(data); new DocumentStream(parent, e.getName(), buff, error); // constructor takes care of notifying parent of a new child } } }