Example usage for org.apache.poi.poifs.eventfilesystem POIFSReader POIFSReader

List of usage examples for org.apache.poi.poifs.eventfilesystem POIFSReader POIFSReader

Introduction

In this page you can find the example usage for org.apache.poi.poifs.eventfilesystem POIFSReader POIFSReader.

Prototype

POIFSReader

Source Link

Usage

From source file:com.armorize.hackalert.extractor.msword.MSExtractor.java

License:Apache License

/**
 * Extracts properties and text from an MS Document input stream
 *//*from w w w .  j a va2  s.c  o  m*/
protected void extract(InputStream input) throws Exception {
    // First, extract properties
    this.reader = new POIFSReader();
    this.properties = new PropertiesBroker();
    this.reader.registerListener(new PropertiesReaderListener(this.properties),
            SummaryInformation.DEFAULT_STREAM_NAME);
    input.reset();
    if (input.available() > 0) {
        reader.read(input);
    }
    // Then, extract text
    input.reset();
    this.text = extractText(input);
}

From source file:com.bluecubs.xinco.index.filetypes.XincoIndexMicrosoftPowerpoint.java

License:Apache License

public String getFileContentString(File f) {
    String text = null;//from   w  ww  . j ava  2 s.c  o  m
    try {
        POIFSReader r = new POIFSReader();
        XincoIndexMicrosoftPowerpointPOIFSReaderListener ximpprl = new XincoIndexMicrosoftPowerpointPOIFSReaderListener();
        r.registerListener(ximpprl);
        r.read(new FileInputStream(f));
        text = ximpprl.getEventText();
    } catch (Exception e) {
        text = null;
    }
    return text;
}

From source file:com.duroty.lucene.parser.MSPowerPointParser.java

License:Open Source License

/**
 * DOCUMENT ME!// w w w . ja  va 2  s.  c  om
 *
 * @return DOCUMENT ME!
 *
 * @throws ParserException DOCUMENT ME!
 */
private String getContents() throws ParserException {
    String contents = "";

    try {
        POIFSReader reader = new POIFSReader();
        writer = new ByteArrayOutputStream();
        reader.registerListener(this);
        reader.read(in);
        contents = writer.toString();
    } catch (Exception ex) {
        throw new ParserException(ex);
    }

    return contents;
}

From source file:com.flexive.extractor.ExcelExtractor.java

License:Open Source License

/**
 * Extracts the text informations from the excel file.
 *
 * @param in the input stream to read from
 * @return the extraxted informations, or null if no text extraction was possible
 *///from w  w  w  . ja  v  a 2s. c om
public ExtractedData extract(final InputStream in) {

    BufferedInputStream bis = null;
    try {

        writer = new ByteArrayOutputStream();

        // We need to read the stream 2 times, so we use a buffered input stream and mark the
        // beginning
        bis = new BufferedInputStream(in);
        bis.mark(Integer.MAX_VALUE);

        // Retrieve summary information
        POIFSReader r = new POIFSReader();
        r.registerListener(this, "\005SummaryInformation");
        r.read(bis);
        bis.reset();

        // Retrieve text by processing all sheets
        HSSFWorkbook wb = new HSSFWorkbook(bis);
        for (int i = 0; i < wb.getNumberOfSheets(); i++) {
            HSSFSheet sheet = wb.getSheetAt(i);
            processSheet(sheet);
        }

        // Append summary info to text
        if (fxsi != null) {
            writer.write(FxSharedUtils.getBytes(fxsi.getFTIndexInformations()));
        }
        writer.flush();

        return new ExtractedData(fxsi, writer.toString());
    } catch (Exception exc) {
        exc.printStackTrace();
        return null;
    } finally {
        try {
            if (writer != null)
                writer.close();
        } catch (Exception exc) {
            /*ignore*/}
        try {
            if (bis != null)
                bis.close();
        } catch (Exception exc) {
            /*ignore*/}
    }
}

From source file:com.flexive.extractor.FxSummaryInformation.java

License:Open Source License

/**
 * Reads the summary information from a document.
 *
 * @param input the input stream to read from, will not be closed at the end
 * @return the summary information//from w  w  w.  j a va  2  s.  c  o  m
 */
public static FxSummaryInformation getSummaryInformation(InputStream input) {
    class SummaryStore implements POIFSReaderListener {
        private FxSummaryInformation fxsi = null;

        /**
         * Processes the Summary section.
         *
         * @param event the summary section event.
         */
        @Override
        public void processPOIFSReaderEvent(POIFSReaderEvent event) {
            try {
                SummaryInformation si = (SummaryInformation) PropertySetFactory.create(event.getStream());
                fxsi = new FxSummaryInformation(si);
            } catch (Exception ex) {
                /* ignore */
            }
        }

        protected FxSummaryInformation getFxSummaryInformation() {
            return fxsi;
        }
    }
    try {
        POIFSReader reader = new POIFSReader();
        SummaryStore st = new SummaryStore();
        reader.registerListener(st, "\005SummaryInformation");
        reader.read(input);
        return st.getFxSummaryInformation();
    } catch (Exception ex) {
        return null;
    }
}

From source file:com.flexive.extractor.PowerpointExtractor.java

License:Open Source License

/**
 * Extracts the text informations from the powerpoint file.
 *
 * @param in the input stream to read from
 * @return the extraxted informations, or null if no text extraction was possible
 *///from w w w .j  a  v a2 s.c  o  m
public ExtractedData extract(final InputStream in) {
    try {
        writer = new ByteArrayOutputStream();
        POIFSReader reader = new POIFSReader();
        reader.registerListener(this);
        //FxSummaryInformation.getSummaryInformation(fileName);
        reader.read(in);
        if (fxsi != null) {
            writer.write(FxSharedUtils.getBytes(fxsi.getFTIndexInformations()));
        }
        writer.flush();
        return new ExtractedData(fxsi, writer.toString());
    } catch (Exception ex) {
        return null;
    } finally {
        try {
            writer.close();
        } catch (Exception exc) {
            /*ignore*/}
    }
}

From source file:com.flexive.extractor.WordExtractor.java

License:Open Source License

/**
 * Extracts the text informations from the word file.
 *
 * @param in the input stream to read from
 * @return the extraxted informations, or null if no text extraction was possible
 *///from  ww  w.ja  v a2s .co  m
public ExtractedData extract(final InputStream in) {
    ByteArrayOutputStream baos = null;
    PrintWriter writer = null;
    BufferedInputStream bis = null;
    try {

        baos = new ByteArrayOutputStream();
        writer = new PrintWriter(baos);

        // We need to read the stream 2 times, so we use a buffered input stream and mark the
        // beginning
        bis = new BufferedInputStream(in);
        bis.mark(Integer.MAX_VALUE);

        // Retrieve summary information
        POIFSReader r = new POIFSReader();
        r.registerListener(this, "\005SummaryInformation");
        r.read(bis);
        bis.reset();

        // Retrieve text
        WordDocument wd = new WordDocument(bis);
        wd.writeAllText(writer);
        if (fxsi != null) {
            writer.write(fxsi.getFTIndexInformations());
        }
        writer.flush();

        return new ExtractedData(fxsi, baos.toString());
    } catch (Exception exc) {
        return null;
    } finally {
        try {
            if (writer != null)
                writer.close();
        } catch (Exception exc) {
            /*ignore*/}
        try {
            if (baos != null)
                baos.close();
        } catch (Exception exc) {
            /*ignore*/}
        try {
            if (bis != null)
                bis.close();
        } catch (Exception exc) {
            /*ignore*/}
    }
}

From source file:com.knowgate.ole.OLEDocument.java

License:Open Source License

public OLEDocument() {

    oReader = new POIFSReader();
    oListener = new OLEListener();

    oReader.registerListener(oListener, "\005SummaryInformation");

}

From source file:com.krawler.esp.fileparser.word.ExtractWordFile.java

License:Open Source License

public Properties extractProperties(InputStream in) throws IOException {

    PropertiesBroker propertiesBroker = new PropertiesBroker();
    POIFSReader reader = new POIFSReader();
    reader.registerListener(new PropertiesReaderListener(propertiesBroker), "\005SummaryInformation");
    reader.read(in);//from  w  w  w .  j  ava 2  s. c o m
    return propertiesBroker.getProperties();
}

From source file:com.villemos.ispace.aperture.enricher.MicrosoftPropertyReader.java

License:Open Source License

@Handler
public void addMSProperties(@Body InformationObject io, @Headers Map<String, Object> headers) {

    File file = new File(io.hasUri);

    if (file.exists() && io.hasUri.endsWith(".doc")) {
        POIFSReader r = new POIFSReader();
        r.registerListener(this);
        try {//from w w w. ja va  2 s  . c o  m
            FileInputStream inStream = new FileInputStream(file);
            r.read(inStream);

            Iterator<Entry<String, String>> it = msProperties.entrySet().iterator();
            while (it.hasNext()) {
                Entry<String, String> entry = it.next();
                io.metadata.put(entry.getKey(), entry.getValue());
            }
            inStream.close();
        } catch (Exception e) {
            e.printStackTrace();
            LOG.error("Failed to get properties for .doc file '" + file.getName() + "'.");
        }
    }
}