List of usage examples for org.apache.poi.poifs.filesystem DocumentInputStream available
@Override
public int available()
From source file:com.duroty.lucene.parser.MSPowerPointParser.java
License:Open Source License
/** * DOCUMENT ME!/* w w w .j av a2 s. co m*/ * * @param event DOCUMENT ME! */ public void processPOIFSReaderEvent(POIFSReaderEvent event) { try { if (!event.getName().equalsIgnoreCase("PowerPoint Document")) { return; } DocumentInputStream input = event.getStream(); byte[] buffer = new byte[input.available()]; input.read(buffer, 0, input.available()); byte[] espace = new String("\n\n").getBytes(); for (int i = 0; i < (buffer.length - 20); i++) { long type = LittleEndian.getUShort(buffer, i + 2); long size = LittleEndian.getUInt(buffer, i + 4); if (type == 4008) { writer.write(buffer, i + 4 + 1, (int) size + 3); writer.write(espace); i = (i + 4 + 1 + (int) size) - 1; } /*if (sleep > 0) { try { Thread.sleep(sleep); } catch (Exception ex) { } }*/ } } catch (Exception ex) { } }
From source file:com.flexive.extractor.PowerpointExtractor.java
License:Open Source License
@Override public void processPOIFSReaderEvent(POIFSReaderEvent event) { try {/*w w w. jav a 2s . c om*/ if (event.getName().equalsIgnoreCase("PowerPoint Document")) { DocumentInputStream input = event.getStream(); byte[] buffer = new byte[input.available()]; //noinspection ResultOfMethodCallIgnored input.read(buffer, 0, input.available()); processContent(buffer, 0, buffer.length); } else if (event.getName().equals("\005SummaryInformation")) { SummaryInformation si = (SummaryInformation) PropertySetFactory.create(event.getStream()); fxsi = new FxSummaryInformation(si); } } catch (Exception ex) { ex.printStackTrace(); } }
From source file:lius.index.powerpoint.PPTIndexer.java
License:Apache License
public void processPOIFSReaderEvent(POIFSReaderEvent event) { try {//w w w. j a v a 2 s . c o m if (!event.getName().equalsIgnoreCase("PowerPoint Document")) return; DocumentInputStream input = event.getStream(); byte[] buffer = new byte[input.available()]; input.read(buffer, 0, input.available()); for (int i = 0; i < buffer.length - 20; i++) { long type = LittleEndian.getUShort(buffer, i + 2); long size = LittleEndian.getUInt(buffer, i + 4); if (type == 4008L) { writer.write(buffer, i + 4 + 1, (int) size + 3); i = i + 4 + 1 + (int) size - 1; } } } catch (Exception ex) { logger.error(ex.getMessage()); } }
From source file:net.freeutils.tnef.msg.Msg.java
License:Open Source License
protected static RawInputStream toRawInputStream(DocumentEntry entry) throws IOException { DocumentInputStream dis = new DocumentInputStream(entry); ByteArrayOutputStream bais = new ByteArrayOutputStream(dis.available()); try {//from w ww . j a v a 2 s . c o m byte[] bytes = new byte[4096]; int count; while ((count = dis.read(bytes)) > -1) bais.write(bytes, 0, count); } finally { dis.close(); } return new RawInputStream(bais.toByteArray()); }
From source file:net.sf.mmm.content.parser.impl.poi.ContentParserPpt.java
License:Apache License
/** * {@inheritDoc}//from ww w . ja va2s. c o m */ @Override protected String extractText(POIFSFileSystem poiFs, long filesize, ContentParserOptions options) throws Exception { // PowerPointExtractor pptExtractor = new PowerPointExtractor(poiFs); // return pptExtractor.getText(); DocumentInputStream docStream = poiFs.createDocumentInputStream(POIFS_POWERPOINT_DOC); int length = docStream.available(); int maximumBufferSize = options.getMaximumBufferSize(); if (maximumBufferSize < length) { length = maximumBufferSize; } int capacity = length / 10; StringBuffer textBuffer = new StringBuffer(capacity); byte[] buffer = new byte[length]; docStream.read(buffer); docStream.close(); extractRecursive(buffer, 0, length, textBuffer); return textBuffer.toString(); }
From source file:net.sf.mpxj.utility.MppClean.java
License:Open Source License
/** * Extracts a block of data from the MPP file, and iterates through the map * of find/replace pairs to make the data anonymous. * /*from w w w . java 2s . c o m*/ * @param parentDirectory parent directory object * @param fileName target file name * @param replacements find/replace data * @param unicode true for double byte text * @throws IOException */ private void processReplacements(DirectoryEntry parentDirectory, String fileName, Map<String, String> replacements, boolean unicode) throws IOException { // // Populate a list of keys and sort into descending order of length // List<String> keys = new ArrayList<String>(replacements.keySet()); Collections.sort(keys, new Comparator<String>() { @Override public int compare(String o1, String o2) { return (o2.length() - o1.length()); } }); // // Extract the raw file data // DocumentEntry targetFile = (DocumentEntry) parentDirectory.getEntry(fileName); DocumentInputStream dis = new DocumentInputStream(targetFile); int dataSize = dis.available(); byte[] data = new byte[dataSize]; dis.read(data); // // Replace the text // for (String findText : keys) { String replaceText = replacements.get(findText); replaceData(data, findText, replaceText, unicode); } // // Remove the document entry // targetFile.delete(); // // Replace it with a new one // parentDirectory.createDocument(fileName, new ByteArrayInputStream(data)); }
From source file:net.sf.mpxj.utility.MppCleanUtility.java
License:Open Source License
/** * Extracts a block of data from the MPP file, and iterates through the map * of find/replace pairs to make the data anonymous. * /*www. j av a 2s . c o m*/ * @param parentDirectory parent directory object * @param fileName target file name * @param replacements find/replace data * @param unicode true for double byte text * @throws IOException */ private void processReplacements(DirectoryEntry parentDirectory, String fileName, Map<String, String> replacements, boolean unicode) throws IOException { // // Populate a list of keys and sort into descending order of length // List<String> keys = new ArrayList<String>(replacements.keySet()); Collections.sort(keys, new Comparator<String>() { @Override public int compare(String o1, String o2) { return (o2.length() - o1.length()); } }); // // Extract the raw file data // DocumentEntry targetFile = (DocumentEntry) parentDirectory.getEntry(fileName); DocumentInputStream dis = new DocumentInputStream(targetFile); int dataSize = dis.available(); byte[] data = new byte[dataSize]; dis.read(data); dis.close(); // // Replace the text // for (String findText : keys) { String replaceText = replacements.get(findText); replaceData(data, findText, replaceText, unicode); } // // Remove the document entry // targetFile.delete(); // // Replace it with a new one // parentDirectory.createDocument(fileName, new ByteArrayInputStream(data)); }
From source file:nz.govt.natlib.adapter.excel.ExcelAdapter.java
License:Apache License
public void readDocument(POIFSFileSystem fs, DocumentEntry doc) throws Exception { // load file system DocumentInputStream stream = new DocumentInputStream(doc); if (stream.available() > 256) { return;//from w w w . j a v a 2 s . c o m } // process data from stream byte[] content = new byte[stream.available()]; stream.read(content); stream.close(); for (int i = 0; i < content.length; i++) { int c = content[i]; if (c < 0) { c = 0x100 + c; } } }
From source file:nz.govt.natlib.adapter.powerpoint.PowerPointAdapter.java
License:Apache License
public void readDocument(POIFSFileSystem fs, DocumentEntry doc) throws Exception { // load file system DocumentInputStream stream = new DocumentInputStream(doc); if (stream.available() > 256) { return;/*w w w .ja v a 2s .c o m*/ } // process data from stream byte[] content = new byte[stream.available()]; stream.read(content); stream.close(); for (int i = 0; i < content.length; i++) { int c = content[i]; if (c < 0) { c = 0x100 + c; } System.out.println(i + ", " + Integer.toString(c) + "\t" + Integer.toHexString(c) + "\t" + (char) c); } }
From source file:nz.govt.natlib.adapter.works.DocAdapter.java
License:Apache License
public ArrayList readDocument(POIFSFileSystem fs, DocumentEntry doc) throws Exception { // load file system DocumentInputStream stream = new DocumentInputStream(doc); ArrayList words = new ArrayList(); if (stream.available() > 256) { // System.out.println("Too big "); return words; }//from w w w.ja v a2 s. c om // process data from stream byte[] content = new byte[stream.available()]; stream.read(content); stream.close(); // System.out.println("Read :"+content.length); // Fancy Harvester for finding strings 16/8 bit length or terminated... String candidate = ""; int runLong = 0; for (int i = 0; i < content.length; i++) { char c = (char) content[i]; if (c > 32 && c < 127) { candidate += c; runLong = 0; } else { runLong++; } // terminate... if (runLong >= 2) { // weed out anything less than 2 char if (candidate.trim().length() > 2) { words.add(candidate); } runLong = 0; candidate = ""; } } // for (int i=0;i<content.length;i++) { // int c = content[i]; // if (c<0) { // c = 0x100 + c; // } // System.out.println(i+", // "+Integer.toString(c)+"\t"+Integer.toHexString(c)+"\t"+(char)c); // } return words; }