Example usage for org.apache.poi.poifs.filesystem DocumentInputStream available

List of usage examples for org.apache.poi.poifs.filesystem DocumentInputStream available

Introduction

In this page you can find the example usage for org.apache.poi.poifs.filesystem DocumentInputStream available.

Prototype

@Override
    public int available() 

Source Link

Usage

From source file:com.duroty.lucene.parser.MSPowerPointParser.java

License:Open Source License

/**
 * DOCUMENT ME!/* w  w  w  .j av a2  s.  co  m*/
 *
 * @param event DOCUMENT ME!
 */
public void processPOIFSReaderEvent(POIFSReaderEvent event) {
    try {
        if (!event.getName().equalsIgnoreCase("PowerPoint Document")) {
            return;
        }

        DocumentInputStream input = event.getStream();

        byte[] buffer = new byte[input.available()];
        input.read(buffer, 0, input.available());

        byte[] espace = new String("\n\n").getBytes();

        for (int i = 0; i < (buffer.length - 20); i++) {
            long type = LittleEndian.getUShort(buffer, i + 2);
            long size = LittleEndian.getUInt(buffer, i + 4);

            if (type == 4008) {
                writer.write(buffer, i + 4 + 1, (int) size + 3);
                writer.write(espace);
                i = (i + 4 + 1 + (int) size) - 1;
            }

            /*if (sleep > 0) {
                try {
                    Thread.sleep(sleep);
                } catch (Exception ex) {
                }
            }*/
        }
    } catch (Exception ex) {
    }
}

From source file:com.flexive.extractor.PowerpointExtractor.java

License:Open Source License

@Override
public void processPOIFSReaderEvent(POIFSReaderEvent event) {
    try {/*w w  w.  jav  a 2s  . c  om*/
        if (event.getName().equalsIgnoreCase("PowerPoint Document")) {
            DocumentInputStream input = event.getStream();
            byte[] buffer = new byte[input.available()];
            //noinspection ResultOfMethodCallIgnored
            input.read(buffer, 0, input.available());
            processContent(buffer, 0, buffer.length);
        } else if (event.getName().equals("\005SummaryInformation")) {
            SummaryInformation si = (SummaryInformation) PropertySetFactory.create(event.getStream());
            fxsi = new FxSummaryInformation(si);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}

From source file:lius.index.powerpoint.PPTIndexer.java

License:Apache License

public void processPOIFSReaderEvent(POIFSReaderEvent event) {
    try {//w  w w. j a  v a 2  s  .  c o  m
        if (!event.getName().equalsIgnoreCase("PowerPoint Document"))
            return;
        DocumentInputStream input = event.getStream();
        byte[] buffer = new byte[input.available()];
        input.read(buffer, 0, input.available());
        for (int i = 0; i < buffer.length - 20; i++) {
            long type = LittleEndian.getUShort(buffer, i + 2);
            long size = LittleEndian.getUInt(buffer, i + 4);
            if (type == 4008L) {
                writer.write(buffer, i + 4 + 1, (int) size + 3);
                i = i + 4 + 1 + (int) size - 1;
            }
        }
    } catch (Exception ex) {
        logger.error(ex.getMessage());
    }
}

From source file:net.freeutils.tnef.msg.Msg.java

License:Open Source License

protected static RawInputStream toRawInputStream(DocumentEntry entry) throws IOException {
    DocumentInputStream dis = new DocumentInputStream(entry);
    ByteArrayOutputStream bais = new ByteArrayOutputStream(dis.available());
    try {//from  w  ww  . j a  v a 2 s  . c  o m
        byte[] bytes = new byte[4096];
        int count;
        while ((count = dis.read(bytes)) > -1)
            bais.write(bytes, 0, count);
    } finally {
        dis.close();
    }
    return new RawInputStream(bais.toByteArray());
}

From source file:net.sf.mmm.content.parser.impl.poi.ContentParserPpt.java

License:Apache License

/**
 * {@inheritDoc}//from   ww w .  ja va2s.  c  o  m
 */
@Override
protected String extractText(POIFSFileSystem poiFs, long filesize, ContentParserOptions options)
        throws Exception {

    // PowerPointExtractor pptExtractor = new PowerPointExtractor(poiFs);
    // return pptExtractor.getText();

    DocumentInputStream docStream = poiFs.createDocumentInputStream(POIFS_POWERPOINT_DOC);

    int length = docStream.available();
    int maximumBufferSize = options.getMaximumBufferSize();
    if (maximumBufferSize < length) {
        length = maximumBufferSize;
    }
    int capacity = length / 10;
    StringBuffer textBuffer = new StringBuffer(capacity);
    byte[] buffer = new byte[length];
    docStream.read(buffer);
    docStream.close();
    extractRecursive(buffer, 0, length, textBuffer);
    return textBuffer.toString();
}

From source file:net.sf.mpxj.utility.MppClean.java

License:Open Source License

/**
 * Extracts a block of data from the MPP file, and iterates through the map
 * of find/replace pairs to make the data anonymous.
 * /*from w  w  w  . java 2s  . c o m*/
 * @param parentDirectory parent directory object
 * @param fileName target file name
 * @param replacements find/replace data
 * @param unicode true for double byte text
 * @throws IOException
 */
private void processReplacements(DirectoryEntry parentDirectory, String fileName,
        Map<String, String> replacements, boolean unicode) throws IOException {
    //
    // Populate a list of keys and sort into descending order of length
    //
    List<String> keys = new ArrayList<String>(replacements.keySet());
    Collections.sort(keys, new Comparator<String>() {
        @Override
        public int compare(String o1, String o2) {
            return (o2.length() - o1.length());
        }
    });

    //
    // Extract the raw file data
    //
    DocumentEntry targetFile = (DocumentEntry) parentDirectory.getEntry(fileName);
    DocumentInputStream dis = new DocumentInputStream(targetFile);
    int dataSize = dis.available();
    byte[] data = new byte[dataSize];
    dis.read(data);

    //
    // Replace the text
    //
    for (String findText : keys) {
        String replaceText = replacements.get(findText);
        replaceData(data, findText, replaceText, unicode);
    }

    //
    // Remove the document entry
    //
    targetFile.delete();

    //
    // Replace it with a new one
    //
    parentDirectory.createDocument(fileName, new ByteArrayInputStream(data));
}

From source file:net.sf.mpxj.utility.MppCleanUtility.java

License:Open Source License

/**
 * Extracts a block of data from the MPP file, and iterates through the map
 * of find/replace pairs to make the data anonymous.
 * /*www. j av a  2s  . c o m*/
 * @param parentDirectory parent directory object
 * @param fileName target file name
 * @param replacements find/replace data
 * @param unicode true for double byte text
 * @throws IOException
 */
private void processReplacements(DirectoryEntry parentDirectory, String fileName,
        Map<String, String> replacements, boolean unicode) throws IOException {
    //
    // Populate a list of keys and sort into descending order of length
    //
    List<String> keys = new ArrayList<String>(replacements.keySet());
    Collections.sort(keys, new Comparator<String>() {
        @Override
        public int compare(String o1, String o2) {
            return (o2.length() - o1.length());
        }
    });

    //
    // Extract the raw file data
    //
    DocumentEntry targetFile = (DocumentEntry) parentDirectory.getEntry(fileName);
    DocumentInputStream dis = new DocumentInputStream(targetFile);
    int dataSize = dis.available();
    byte[] data = new byte[dataSize];
    dis.read(data);
    dis.close();

    //
    // Replace the text
    //
    for (String findText : keys) {
        String replaceText = replacements.get(findText);
        replaceData(data, findText, replaceText, unicode);
    }

    //
    // Remove the document entry
    //
    targetFile.delete();

    //
    // Replace it with a new one
    //
    parentDirectory.createDocument(fileName, new ByteArrayInputStream(data));
}

From source file:nz.govt.natlib.adapter.excel.ExcelAdapter.java

License:Apache License

public void readDocument(POIFSFileSystem fs, DocumentEntry doc) throws Exception {
    // load file system
    DocumentInputStream stream = new DocumentInputStream(doc);

    if (stream.available() > 256) {
        return;//from   w w w . j a  v  a  2  s  . c  o  m
    }

    // process data from stream
    byte[] content = new byte[stream.available()];
    stream.read(content);
    stream.close();

    for (int i = 0; i < content.length; i++) {
        int c = content[i];
        if (c < 0) {
            c = 0x100 + c;
        }
    }

}

From source file:nz.govt.natlib.adapter.powerpoint.PowerPointAdapter.java

License:Apache License

public void readDocument(POIFSFileSystem fs, DocumentEntry doc) throws Exception {
    // load file system
    DocumentInputStream stream = new DocumentInputStream(doc);

    if (stream.available() > 256) {
        return;/*w w w  .ja v a 2s  .c  o  m*/
    }

    // process data from stream
    byte[] content = new byte[stream.available()];
    stream.read(content);
    stream.close();

    for (int i = 0; i < content.length; i++) {
        int c = content[i];
        if (c < 0) {
            c = 0x100 + c;
        }
        System.out.println(i + ", " + Integer.toString(c) + "\t" + Integer.toHexString(c) + "\t" + (char) c);
    }

}

From source file:nz.govt.natlib.adapter.works.DocAdapter.java

License:Apache License

public ArrayList readDocument(POIFSFileSystem fs, DocumentEntry doc) throws Exception {
    // load file system
    DocumentInputStream stream = new DocumentInputStream(doc);
    ArrayList words = new ArrayList();

    if (stream.available() > 256) {
        // System.out.println("Too big ");
        return words;
    }//from   w  w w.ja v a2 s. c om

    // process data from stream
    byte[] content = new byte[stream.available()];
    stream.read(content);
    stream.close();

    // System.out.println("Read :"+content.length);

    // Fancy Harvester for finding strings 16/8 bit length or terminated...
    String candidate = "";
    int runLong = 0;
    for (int i = 0; i < content.length; i++) {
        char c = (char) content[i];
        if (c > 32 && c < 127) {
            candidate += c;
            runLong = 0;
        } else {
            runLong++;
        }

        // terminate...
        if (runLong >= 2) {
            // weed out anything less than 2 char
            if (candidate.trim().length() > 2) {
                words.add(candidate);
            }
            runLong = 0;
            candidate = "";
        }
    }

    // for (int i=0;i<content.length;i++) {
    // int c = content[i];
    // if (c<0) {
    // c = 0x100 + c;
    // }
    // System.out.println(i+",
    // "+Integer.toString(c)+"\t"+Integer.toHexString(c)+"\t"+(char)c);
    // }

    return words;
}