Example usage for org.apache.poi.poifs.eventfilesystem POIFSReader read

List of usage examples for org.apache.poi.poifs.eventfilesystem POIFSReader read

Introduction

In this page you can find the example usage for org.apache.poi.poifs.eventfilesystem POIFSReader read.

Prototype

public void read(final POIFSFileSystem poifs) throws IOException 

Source Link

Document

Read from a POIFSFileSystem and process the documents we get

Usage

From source file:com.bluecubs.xinco.index.filetypes.XincoIndexMicrosoftPowerpoint.java

License:Apache License

public String getFileContentString(File f) {
    String text = null;/*from   ww w .  j  ava  2s.  c  o m*/
    try {
        POIFSReader r = new POIFSReader();
        XincoIndexMicrosoftPowerpointPOIFSReaderListener ximpprl = new XincoIndexMicrosoftPowerpointPOIFSReaderListener();
        r.registerListener(ximpprl);
        r.read(new FileInputStream(f));
        text = ximpprl.getEventText();
    } catch (Exception e) {
        text = null;
    }
    return text;
}

From source file:com.duroty.lucene.parser.MSPowerPointParser.java

License:Open Source License

/**
 * DOCUMENT ME!//from   www . j  av a  2s  . c o  m
 *
 * @return DOCUMENT ME!
 *
 * @throws ParserException DOCUMENT ME!
 */
private String getContents() throws ParserException {
    String contents = "";

    try {
        POIFSReader reader = new POIFSReader();
        writer = new ByteArrayOutputStream();
        reader.registerListener(this);
        reader.read(in);
        contents = writer.toString();
    } catch (Exception ex) {
        throw new ParserException(ex);
    }

    return contents;
}

From source file:com.flexive.extractor.ExcelExtractor.java

License:Open Source License

/**
 * Extracts the text informations from the excel file.
 *
 * @param in the input stream to read from
 * @return the extraxted informations, or null if no text extraction was possible
 *///from w ww  .  j a  v  a  2 s  . co  m
public ExtractedData extract(final InputStream in) {

    BufferedInputStream bis = null;
    try {

        writer = new ByteArrayOutputStream();

        // We need to read the stream 2 times, so we use a buffered input stream and mark the
        // beginning
        bis = new BufferedInputStream(in);
        bis.mark(Integer.MAX_VALUE);

        // Retrieve summary information
        POIFSReader r = new POIFSReader();
        r.registerListener(this, "\005SummaryInformation");
        r.read(bis);
        bis.reset();

        // Retrieve text by processing all sheets
        HSSFWorkbook wb = new HSSFWorkbook(bis);
        for (int i = 0; i < wb.getNumberOfSheets(); i++) {
            HSSFSheet sheet = wb.getSheetAt(i);
            processSheet(sheet);
        }

        // Append summary info to text
        if (fxsi != null) {
            writer.write(FxSharedUtils.getBytes(fxsi.getFTIndexInformations()));
        }
        writer.flush();

        return new ExtractedData(fxsi, writer.toString());
    } catch (Exception exc) {
        exc.printStackTrace();
        return null;
    } finally {
        try {
            if (writer != null)
                writer.close();
        } catch (Exception exc) {
            /*ignore*/}
        try {
            if (bis != null)
                bis.close();
        } catch (Exception exc) {
            /*ignore*/}
    }
}

From source file:com.flexive.extractor.FxSummaryInformation.java

License:Open Source License

/**
 * Reads the summary information from a document.
 *
 * @param input the input stream to read from, will not be closed at the end
 * @return the summary information/* ww  w. ja v a2  s. co  m*/
 */
public static FxSummaryInformation getSummaryInformation(InputStream input) {
    class SummaryStore implements POIFSReaderListener {
        private FxSummaryInformation fxsi = null;

        /**
         * Processes the Summary section.
         *
         * @param event the summary section event.
         */
        @Override
        public void processPOIFSReaderEvent(POIFSReaderEvent event) {
            try {
                SummaryInformation si = (SummaryInformation) PropertySetFactory.create(event.getStream());
                fxsi = new FxSummaryInformation(si);
            } catch (Exception ex) {
                /* ignore */
            }
        }

        protected FxSummaryInformation getFxSummaryInformation() {
            return fxsi;
        }
    }
    try {
        POIFSReader reader = new POIFSReader();
        SummaryStore st = new SummaryStore();
        reader.registerListener(st, "\005SummaryInformation");
        reader.read(input);
        return st.getFxSummaryInformation();
    } catch (Exception ex) {
        return null;
    }
}

From source file:com.flexive.extractor.PowerpointExtractor.java

License:Open Source License

/**
 * Extracts the text informations from the powerpoint file.
 *
 * @param in the input stream to read from
 * @return the extraxted informations, or null if no text extraction was possible
 *//*  w  w w  .  ja v  a2  s.co m*/
public ExtractedData extract(final InputStream in) {
    try {
        writer = new ByteArrayOutputStream();
        POIFSReader reader = new POIFSReader();
        reader.registerListener(this);
        //FxSummaryInformation.getSummaryInformation(fileName);
        reader.read(in);
        if (fxsi != null) {
            writer.write(FxSharedUtils.getBytes(fxsi.getFTIndexInformations()));
        }
        writer.flush();
        return new ExtractedData(fxsi, writer.toString());
    } catch (Exception ex) {
        return null;
    } finally {
        try {
            writer.close();
        } catch (Exception exc) {
            /*ignore*/}
    }
}

From source file:com.flexive.extractor.WordExtractor.java

License:Open Source License

/**
 * Extracts the text informations from the word file.
 *
 * @param in the input stream to read from
 * @return the extraxted informations, or null if no text extraction was possible
 *///  w ww. j  a  v  a 2  s  .c  om
public ExtractedData extract(final InputStream in) {
    ByteArrayOutputStream baos = null;
    PrintWriter writer = null;
    BufferedInputStream bis = null;
    try {

        baos = new ByteArrayOutputStream();
        writer = new PrintWriter(baos);

        // We need to read the stream 2 times, so we use a buffered input stream and mark the
        // beginning
        bis = new BufferedInputStream(in);
        bis.mark(Integer.MAX_VALUE);

        // Retrieve summary information
        POIFSReader r = new POIFSReader();
        r.registerListener(this, "\005SummaryInformation");
        r.read(bis);
        bis.reset();

        // Retrieve text
        WordDocument wd = new WordDocument(bis);
        wd.writeAllText(writer);
        if (fxsi != null) {
            writer.write(fxsi.getFTIndexInformations());
        }
        writer.flush();

        return new ExtractedData(fxsi, baos.toString());
    } catch (Exception exc) {
        return null;
    } finally {
        try {
            if (writer != null)
                writer.close();
        } catch (Exception exc) {
            /*ignore*/}
        try {
            if (baos != null)
                baos.close();
        } catch (Exception exc) {
            /*ignore*/}
        try {
            if (bis != null)
                bis.close();
        } catch (Exception exc) {
            /*ignore*/}
    }
}

From source file:com.krawler.esp.fileparser.word.ExtractWordFile.java

License:Open Source License

public Properties extractProperties(InputStream in) throws IOException {

    PropertiesBroker propertiesBroker = new PropertiesBroker();
    POIFSReader reader = new POIFSReader();
    reader.registerListener(new PropertiesReaderListener(propertiesBroker), "\005SummaryInformation");
    reader.read(in);
    return propertiesBroker.getProperties();
}

From source file:com.villemos.ispace.aperture.enricher.MicrosoftPropertyReader.java

License:Open Source License

@Handler
public void addMSProperties(@Body InformationObject io, @Headers Map<String, Object> headers) {

    File file = new File(io.hasUri);

    if (file.exists() && io.hasUri.endsWith(".doc")) {
        POIFSReader r = new POIFSReader();
        r.registerListener(this);
        try {/*from  ww  w .j a va2  s.  co  m*/
            FileInputStream inStream = new FileInputStream(file);
            r.read(inStream);

            Iterator<Entry<String, String>> it = msProperties.entrySet().iterator();
            while (it.hasNext()) {
                Entry<String, String> entry = it.next();
                io.metadata.put(entry.getKey(), entry.getValue());
            }
            inStream.close();
        } catch (Exception e) {
            e.printStackTrace();
            LOG.error("Failed to get properties for .doc file '" + file.getName() + "'.");
        }
    }
}

From source file:lius.index.powerpoint.PPTIndexer.java

License:Apache License

public String getContent() {
    String contents = "";
    try {/*w w  w.  j a va2 s .  c  o  m*/
        POIFSReader reader = new POIFSReader();
        writer = new ByteArrayOutputStream();
        reader.registerListener(this);
        reader.read(getStreamToIndex());
        contents = writer.toString();
    } catch (Exception ex) {
        logger.error(ex.getMessage());
    }
    return contents;
}

From source file:net.sourceforge.docfetcher.model.parse.MSOfficeParser.java

License:Open Source License

protected final ParseResult parse(File file, ParseContext context) throws ParseException {
    String contents = renderText(file, context.getFilename());
    ParseResult parseResult = new ParseResult(contents);

    POIFSReader reader = new POIFSReader();
    MyReaderListener listener = new MyReaderListener();
    reader.registerListener(listener, "\005SummaryInformation"); //$NON-NLS-1$

    InputStream in = null;/*from  w  w  w.  j  a  va  2s  .  com*/
    try {
        in = new FileInputStream(file);
        try {
            reader.read(in);
        } catch (IllegalArgumentException e) {
            // Bug #3537738: "IllegalArgumentException: name cannot be empty"
            throw new ParseException(e);
        }

        parseResult.setTitle(listener.title).addAuthor(listener.author).addMiscMetadata(listener.subject)
                .addMiscMetadata(listener.keywords).addMiscMetadata(listener.comments);
    } catch (IOException e) {
        // Ignore, we can live without meta data
        Util.printErr(e);
    } finally {
        Closeables.closeQuietly(in);
    }

    return parseResult;
}